o
    ·2úhÉ  ã                   @   sV   d dl mZ d dlmZ d dlmZ edgƒG dd„ dejƒƒZej dej	¡e_dS )	é    )Úkeras_export)Úadam)Ú	optimizerzkeras.optimizers.AdamWc                       s@   e Zd ZdZ																
d‡ fdd„	Z‡  ZS )ÚAdamWa‚  Optimizer that implements the AdamW algorithm.

    AdamW optimization is a stochastic gradient descent method that is based on
    adaptive estimation of first-order and second-order moments with an added
    method to decay weights per the techniques discussed in the paper,
    'Decoupled Weight Decay Regularization' by
    [Loshchilov, Hutter et al., 2019](https://arxiv.org/abs/1711.05101).

    According to
    [Kingma et al., 2014](http://arxiv.org/abs/1412.6980),
    the underlying Adam method is "*computationally
    efficient, has little memory requirement, invariant to diagonal rescaling of
    gradients, and is well suited for problems that are large in terms of
    data/parameters*".

    Args:
        learning_rate: A float, a
            `keras.optimizers.schedules.LearningRateSchedule` instance, or
            a callable that takes no arguments and returns the actual value to
            use. The learning rate. Defaults to `0.001`.
        beta_1: A float value or a constant float tensor, or a callable
            that takes no arguments and returns the actual value to use. The
            exponential decay rate for the 1st moment estimates.
            Defaults to `0.9`.
        beta_2: A float value or a constant float tensor, or a callable
            that takes no arguments and returns the actual value to use. The
            exponential decay rate for the 2nd moment estimates.
            Defaults to `0.999`.
        epsilon: A small constant for numerical stability. This epsilon is
            "epsilon hat" in the Kingma and Ba paper (in the formula just
            before Section 2.1), not the epsilon in Algorithm 1 of the paper.
            Defaults to 1e-7.
        amsgrad: Boolean. Whether to apply AMSGrad variant of this algorithm
            from the paper "On the Convergence of Adam and beyond".
            Defaults to `False`.
        {{base_optimizer_keyword_args}}

    References:

    - [Loshchilov et al., 2019](https://arxiv.org/abs/1711.05101)
    - [Kingma et al., 2014](http://arxiv.org/abs/1412.6980) for `adam`
    - [Reddi et al., 2018](
        https://openreview.net/pdf?id=ryQu7f-RZ) for `amsgrad`.
    çü©ñÒMbP?çü©ñÒMbp?çÍÌÌÌÌÌì?ç+‡ÙÎ÷ï?çH¯¼šò×z>FNç®Gáz®ï?Úadamwc                    sH   t ƒ jd||||||||||	|
||||dœ|¤Ž | jd u r"tdƒ‚d S )N)Úlearning_rateÚbeta_1Úbeta_2ÚepsilonÚamsgradÚnameÚweight_decayÚclipnormÚ	clipvalueÚglobal_clipnormÚuse_emaÚema_momentumÚema_overwrite_frequencyÚloss_scale_factorÚgradient_accumulation_stepszDArgument `weight_decay` must be a float. Received: weight_decay=None© )ÚsuperÚ__init__r   Ú
ValueError)Úselfr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   Úkwargs©Ú	__class__r   úU/var/www/html/chatgem/venv/lib/python3.10/site-packages/keras/src/optimizers/adamw.pyr   5   s0   ñð
ÿÿzAdamW.__init__)r   r   r   r	   r
   FNNNFr   NNNr   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   Ú__classcell__r   r   r"   r$   r      s$    /ðr   z{{base_optimizer_keyword_args}}N)
Úkeras.src.api_exportr   Úkeras.src.optimizersr   r   ÚAdamr   r(   ÚreplaceÚbase_optimizer_keyword_argsr   r   r   r$   Ú<module>   s    [
ÿ