
    '}h+8                        d dl Z d dl mZ ddlmZmZmZmZmZmZm	Z	 d dl
mZmZ ddgZ G d de      Zd	d
e de de dz   e_        	 	 	 	 ddee   dee   dee   dee   dee   dee   dededededededededefdZdee   dee   dee   dee   dee   dededededededededefdZdee   dee   dee   dee   dee   dededededededededefdZy)     N)Tensor   )	Optimizer_default_to_fused_or_foreach_use_grad_for_differentiable_differentiable_doc_foreach_doc_maximize_doc_view_as_real)ListOptionalRMSproprmspropc                   h     e Zd Z	 	 	 	 	 	 	 	 	 ddee   dedef fdZ fdZd Zed	d       Z	 xZ
S )
r   foreachmaximizedifferentiablec                    d|k  st        d|       d|k  st        d|       d|k  st        d|       d|k  st        d|       d|k  st        d|       t        ||||||||	|
	      }t        |   ||       y )Ng        zInvalid learning rate: zInvalid epsilon value: zInvalid momentum value: zInvalid weight_decay value: zInvalid alpha value: )	lrmomentumalphaepscenteredweight_decayr   r   r   )
ValueErrordictsuper__init__)selfparamsr   r   r   r   r   r   r   r   r   defaults	__class__s               R/var/www/html/test/engine/venv/lib/python3.12/site-packages/torch/optim/rmsprop.pyr   zRMSprop.__init__   s     by6rd;<<cz6se<==h7zBCCl";L>JKKe|4UG<==%)

 	*    c                     t         |   |       | j                  D ]\  }|j                  dd       |j                  dd       |j                  dd        |j                  dd       |j                  dd       ^ y )Nr   r   r   Fr   r   r   )r   __setstate__param_groups
setdefault)r   stategroupr"   s      r#   r&   zRMSprop.__setstate__0   sv    U#&& 	6EZ+Z/Y-Z/-u5	6r$   c                 F   d}|d   D ]  }|j                   |t        j                  |      z  }|j                  |       |j                   j                  rt        d      |j                  |j                          | j                  |   }	t        |	      dk(  rd|	d<   t        j                  |t        j                        |	d<   |d   dkD  r(t        j                  |t        j                        |	d	<   |d
   r(t        j                  |t        j                        |	d<   |j                  |	d          |d   dkD  r|j                  |	d	          |d
   r|j                  |	d          |d   rt        |	d   t              rt        d      |	dxx   dz  cc<    |S )NFr    z)RMSprop does not support sparse gradientsr   step)memory_format
square_avgr   momentum_bufferr   grad_avgr   z`step` can't be a tensorr   )gradtorch
is_complexappend	is_sparseRuntimeErrorr)   len
zeros_likepreserve_format
isinstancer   )
r   r*   params_with_gradgradssquare_avgsmomentum_buffer_list	grad_avgshas_complexpr)   s
             r#   _init_groupzRMSprop._init_group9   s   x $	Avv~5++A..K##A&vv"#NOOLL JJqME 5zQ !f&+&6&6U%:%:'l# $q(/4/?/?)>)>0E+, $(-(8(8)>)>)E*% u\23Z 1$$++E2C,DEZ   z!23%&:eFmV+L"#=>>&MQMI$	J r$   c                 :   d}|$t        j                         5   |       }ddd       | j                  D ]W  }g }g }g }g }g }| j                  ||||||      }	t	        ||||||d   |d   |d   |d   |d   |d   |d   |d	   |d
   |	       Y |S # 1 sw Y   qxY w)zPerforms a single optimization step.

        Args:
            closure (Callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r   r   r   r   r   r   r   r   )
r   r   r   r   r   r   r   r   r   r@   )r2   enable_gradr'   rB   r   )
r   closurelossr*   r;   r<   r=   r?   r>   r@   s
             r#   r,   zRMSprop.stepb   s     ""$ !y! && 	E!EKI#% **52BE;XlnwxK $;Gn%L">2z*z*i(z*$%56'	6 =! !s   BB)	g{Gz?gGz?g:0yE>r   r   FNFF)N)__name__
__module____qualname__r   boolr   r&   rB   r   r,   __classcell__)r"   s   @r#   r   r   
   sg     "&$#+ $#+ #+ #+J6'R "' "'r$   a  Implements RMSprop algorithm.

    .. math::
       \begin{aligned}
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{input}      : \alpha \text{ (alpha)},\: \gamma \text{ (lr)},
                \: \theta_0 \text{ (params)}, \: f(\theta) \text{ (objective)}                   \\
            &\hspace{13mm}   \lambda \text{ (weight decay)},\: \mu \text{ (momentum)},\: centered\\
            &\textbf{initialize} : v_0 \leftarrow 0 \text{ (square average)}, \:
                \textbf{b}_0 \leftarrow 0 \text{ (buffer)}, \: g^{ave}_0 \leftarrow 0     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do}                         \\
            &\hspace{5mm}g_t           \leftarrow   \nabla_{\theta} f_t (\theta_{t-1})           \\
            &\hspace{5mm}if \: \lambda \neq 0                                                    \\
            &\hspace{10mm} g_t \leftarrow g_t + \lambda  \theta_{t-1}                            \\
            &\hspace{5mm}v_t           \leftarrow   \alpha v_{t-1} + (1 - \alpha) g^2_t
                \hspace{8mm}                                                                     \\
            &\hspace{5mm} \tilde{v_t} \leftarrow v_t                                             \\
            &\hspace{5mm}if \: centered                                                          \\
            &\hspace{10mm} g^{ave}_t \leftarrow g^{ave}_{t-1} \alpha + (1-\alpha) g_t            \\
            &\hspace{10mm} \tilde{v_t} \leftarrow \tilde{v_t} -  \big(g^{ave}_{t} \big)^2        \\
            &\hspace{5mm}if \: \mu > 0                                                           \\
            &\hspace{10mm} \textbf{b}_t\leftarrow \mu \textbf{b}_{t-1} +
                g_t/ \big(\sqrt{\tilde{v_t}} +  \epsilon \big)                                   \\
            &\hspace{10mm} \theta_t \leftarrow \theta_{t-1} - \gamma \textbf{b}_t                \\
            &\hspace{5mm} else                                                                   \\
            &\hspace{10mm}\theta_t      \leftarrow   \theta_{t-1} -
                \gamma  g_t/ \big(\sqrt{\tilde{v_t}} + \epsilon \big)  \hspace{3mm}              \\
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
            &\bf{return} \:  \theta_t                                                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
       \end{aligned}

    For further details regarding the algorithm we refer to
    `lecture notes <https://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf>`_ by G. Hinton.
    and centered version `Generating Sequences
    With Recurrent Neural Networks <https://arxiv.org/pdf/1308.0850v5.pdf>`_.
    The implementation here takes the square root of the gradient average before
    adding epsilon (note that TensorFlow interchanges these two operations). The effective
    learning rate is thus :math:`\gamma/(\sqrt{v} + \epsilon)` where :math:`\gamma`
    is the scheduled learning rate and :math:`v` is the weighted moving average
    of the squared gradient.
    a  
    Args:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float, optional): learning rate (default: 1e-2)
        momentum (float, optional): momentum factor (default: 0)
        alpha (float, optional): smoothing constant (default: 0.99)
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-8)
        centered (bool, optional) : if ``True``, compute the centered RMSProp,
            the gradient is normalized by an estimation of its variance
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        z	
        z

    r    r<   r=   r?   r>   r   r   r   r@   r   r   r   r   r   r   c	                   |t        | |d      \  }}|r)t        j                  j                         rt	        d      |r%t        j                  j                         st
        }nt        } || |||||	|
|||||||       y)zsFunctional API that performs rmsprop algorithm computation.
    See :class:`~torch.optim.RMSProp` for details.
    NF)	use_fusedz6torch.jit.script not supported with foreach optimizers)	r   r   r   r   r   r   r   r   r@   )r   r2   jitis_scriptingr6   _multi_tensor_rmsprop_single_tensor_rmsprop)r    r<   r=   r?   r>   r   r   r   r@   r   r   r   r   r   r   _funcs                    r#   r   r      s    0 1&.TYZ
7599))+STTuyy--/$%!%r$   c       	         h   t        |       D ]  \  }}||   }|s|n| }||   }|dk7  r|j                  ||      }t        j                  |      }|r?t        j                  |      }t        j                  |      }t        j                  |      }|j                  |      j                  ||d|z
         |
rT||   }|rt        j                  |      }|j                  |d|z
         |j                  ||d      j                         }n|j                         }|r|j                  |      }n|j                  |      }|	dkD  rS||   }|rt        j                  |      }|j                  |	      j                  ||       |j                  ||        |j                  |||         y )Nr   r   r   value)	enumerateaddr2   r3   view_as_realmul_addcmul_lerp_addcmulsqrt_sqrtadd_addcdiv_)r    r<   r=   r?   r>   r   r   r   r   r   r   r   r   r@   iparamr1   r.   is_complex_paramr0   avgbufs                         r#   rQ   rQ      s   $ f% %15Qx#t$ ^
188E86D ++E2&&u-E%%d+D++J7J''d!e)'D |H --h7NN4U+$$Xxr$BHHJC//#C''#,C((3-Ca<&q)C((-HHX''c2JJs2#J&NN4RCN0K%1r$   c       	            t        |       dk(  ry |rJ d       t        j                  | ||||g      }|j                         D ]  \  \  }}}}}}|r9||g}|	dkD  r|j	                  |       |
r|j	                  |       t        |g|  |rt        j                  |      }|dk7  r3|rt        j                  |||       nt        j                  |||      }t        j                  ||       t        j                  |||d|z
         |
r_t        j                  ||d|z
         t        j                  |||d      }t        j                  |       t        j                  ||       n+t        j                  |      }t        j                  ||       |	dkD  rHt        j                  ||	       t        j                   |||       t        j                  |||        t        j                   ||||         y )Nr   z#_foreach ops don't support autogradrU   r   rV   rX   )r7   r   "_group_tensors_by_device_and_dtypevaluesr4   r   r2   _foreach_neg_foreach_add__foreach_add_foreach_mul__foreach_addcmul__foreach_lerp__foreach_addcmul_foreach_sqrt__foreach_sqrt_foreach_addcdiv_)r    r<   r=   r?   r>   r   r   r   r   r   r   r   r   r@   grouped_tensorsgrouped_paramsgrouped_gradsgrouped_square_avgsgrouped_grad_avgsgrouped_momentum_buffer_listrR   state_and_gradsrg   s                          r#   rP   rP   9  s   $ 6{aDDDBBFES^`ik  DA  BO/>/E/E/G%S 	, '>=*=?P	%,.ABO!|&&'CD&&'89.;?;!..}=M1##M>V % 2 2=.Xd e/7 3]MYZ]bYbc  !2M1u9M(()<>OQbjlmC  %S)%%&9:CS)a< <hG##$@-QTU0LUWTWX##NM3rcRK%Sr$   )NFFF)r2   r   	optimizerr   r   r   r   r	   r
   r   typingr   r   __all__r   __doc__rJ   floatr   rQ   rP    r$   r#   <module>r      s    Y Y Y !i
 @i @F*T	 
 		 		 U:J # 2L2<2 f2 F|	2
 v,2 d^2 2 2 2 	2 2 
2  !2" #2$ %2j71L71<71 f71 F|	71
 v,71 	71 71 
71 71 71 71 71 71 71t=SL=S<=S f=S F|	=S
 v,=S 	=S =S 
=S =S =S =S =S =S =Sr$   