
    |h                         d Z ddlmc mZ ddlmZ ddlmZ ddl	m
Z
  e        e
ddg        G d	 d
ej                                      Zej                   j                  dej                        e_         y)z Adamax optimizer implementation.    N)	optimizer)register_keras_serializable)keras_exportzkeras.optimizers.Adamaxz$keras.optimizers.experimental.Adamax)v1c                   X     e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 d fd	Z fdZd Z fdZ xZS )Adamaxa  Optimizer that implements the Adamax algorithm.

    Adamax, a variant of Adam based on the infinity norm, is a first-order
    gradient-based optimization method. Due to its capability of adjusting the
    learning rate based on data characteristics, it is suited to learn
    time-variant process, e.g., speech data with dynamically changed noise
    conditions. Default parameters follow those provided in the paper (see
    references below).

    Initialization:

    ```python
    m = 0  # Initialize initial 1st moment vector
    u = 0  # Initialize the exponentially weighted infinity norm
    t = 0  # Initialize timestep
    ```

    The update rule for parameter `w` with gradient `g` is described at the end
    of section 7.1 of the paper (see the referenece section):

    ```python
    t += 1
    m = beta1 * m + (1 - beta) * g
    u = max(beta2 * u, abs(g))
    current_lr = learning_rate / (1 - beta1 ** t)
    w = w - current_lr * m / (u + epsilon)
    ```

    Args:
        learning_rate: A `tf.Tensor`, floating point value, a schedule that is a
            `tf.keras.optimizers.schedules.LearningRateSchedule`, or a callable
            that takes no arguments and returns the actual value to use. The
            learning rate. Defaults to `0.001`.
        beta_1: A float value or a constant float tensor. The exponential decay
            rate for the 1st moment estimates.
        beta_2: A float value or a constant float tensor. The exponential decay
            rate for the exponentially weighted infinity norm.
        epsilon: A small constant for numerical stability.
        {{base_optimizer_keyword_args}}

    Reference:
        - [Kingma et al., 2014](http://arxiv.org/abs/1412.6980)
    c                     t        |   d||||||	|
||d	| | j                  |      | _        || _        || _        || _        y )N)	nameweight_decayclipnorm	clipvalueglobal_clipnormuse_emaema_momentumema_overwrite_frequencyjit_compile )super__init___build_learning_rate_learning_ratebeta_1beta_2epsilon)selflearning_rater   r   r   r   r   r   r   r   r   r   r   r
   kwargs	__class__s                  ]/var/www/html/test/engine/venv/lib/python3.12/site-packages/tf_keras/src/optimizers/adamax.pyr   zAdamax.__init__K   sf    " 	 	
%+%$;#	
 	
 #77F    c                 >   t         |   |       t        | d      r| j                  ryd| _        g | _        g | _        |D ]Z  }| j                  j                  | j                  |d             | j
                  j                  | j                  |d             \ y)a  Initialize optimizer variables.

        Adamax optimizer has 2 types of variables: momentums (denoted as m),
        exponentially weighted infinity norm (denoted as u).

        Args:
            var_list: list of model variables to build Adamax variables on.
        _builtNTm)model_variablevariable_nameu)r   buildhasattrr"   _m_uappendadd_variable_from_reference)r   var_listvarr   s      r   r'   zAdamax.buildm   s     	h4"t{{ 
	CGGNN00#&c 1 
 GGNN00#&c 1 
	r    c                 j   t        j                  | j                  |j                        }t        j                  | j                  dz   |j                        }t        j
                  t        j                  | j                  |j                        |      }| j                  |      }| j                  | j                  |      }| j                  | j                  |      }t        |t         j                        r'|j                  }	|j                  | d| j                  z
  z         |j                  t        j                  |j                   d| j                  z
  z  |	             |j#                  || j$                  z         t        j&                  ||	      }
t        j(                  |
t        j*                  |j                               |
z
  }|j                  t        j                  ||	             |j-                  ||z  d|z
  || j.                  z   z  z         y|j                  ||z
  d| j                  z
  z         |j#                  t        j(                  | j$                  |z  t        j*                  |                   |j-                  ||z  d|z
  || j.                  z   z  z         y)z=Update step given gradient and the associated model variable.   N)tfcastr   dtype
iterationspowr   _var_keyr)   _index_dictr*   
isinstanceIndexedSlicesindices
assign_addscatter_addvaluesassignr   gathermaximumabs
assign_subr   )r   gradientvariablelr
local_stepbeta_1_powervar_keyr#   r&   r:   u_sliceu_slice_incrementals               r   update_stepzAdamax.update_step   s   WWT''8WWT__q0(..A
vvbggdkk8>>BJO--)GGD$$W-.GGD$$W-.h 0 01&&GLL!q4;;/0MM  AO!DgN HHQ_%ii7+G

7BFF8??$;<wF   MM"**+>HIaQ-!dll2BCD
 LL(Q,1t{{?;<HHRZZa1ABCaQ-!dll2BCDr    c                     t         |          }|j                  | j                  | j                        | j
                  | j                  | j                  d       |S )N)r   r   r   r   )r   
get_configupdate_serialize_hyperparameterr   r   r   r   )r   configr   s     r   rM   zAdamax.get_config   sV    #%!%!?!?''" ++++<<		
 r    )gMbP?g?g+?gHz>NNNNFgGz?NTr   )	__name__
__module____qualname____doc__r   r'   rK   rM   __classcell__)r   s   @r   r   r      sM    
*\  $ D6 D r    r   z{{base_optimizer_keyword_args}})rT   tensorflow.compat.v2compatv2r1   tf_keras.src.optimizersr   'tf_keras.src.saving.object_registrationr    tensorflow.python.util.tf_exportr   	Optimizerr   replacebase_optimizer_keyword_argsr   r    r   <module>r_      s{    ' ! ! - O : E"YY   Y Yx ''%y'L'Lr    