o
    2h[$                     @   sX   d Z ddlZddlZddlmZ ddlmZ ddlm	Z	 G dd dee	j
Zdd	 ZdS )
a	  A class for Tensorflow specific optimizer logic.

The major behavior change for this class is for tf.distribute.

It will override methods from base Keras core Optimizer,
which provide distribute specific functionality, e.g. variable
creation, loss reduction, etc.
    N)backend)KerasAutoTrackable)base_optimizerc                       s   e Zd Z fddZ	d fdd	Zdd Zd	d
 Zdd Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zd ddZ  ZS )!TFOptimizerc                    s"   t  j|i | tj | _d S N)super__init__tf
distributeget_strategy_distribution_strategy)selfargskwargs	__class__ a/var/www/html/chatgem/venv/lib/python3.10/site-packages/keras/src/backend/tensorflow/optimizer.pyr      s   zTFOptimizer.__init__Nzerosc                    s\   t |tjr
|j}n|}| jj| t j|||dW  d    S 1 s'w   Y  d S )N)nameinitializer)	
isinstancer   Variablevaluer   extendedcolocate_vars_withr   add_variable_from_reference)r   reference_variabler   r   colocate_varr   r   r   r      s   $z'TFOptimizer.add_variable_from_referencec                 C   s   t d)Nzhstateless_apply is not supported with the TensorFlow backend (as it is incompatible with tf.distribute).)
ValueError)r   optimizer_variablesgradstrainable_variablesr   r   r   stateless_apply'   s   zTFOptimizer.stateless_applyc                 C   H   t |tjr	|j}t||j}t |tjr|| d S |	| d S r   )
r   r   r   r   r	   castdtypeIndexedSlicesscatter_updateassignr   variabler   r   r   r   r)   /      zTFOptimizer.assignc                 C   r$   r   )
r   r   r   r   r	   r%   r&   r'   scatter_add
assign_addr*   r   r   r   r.   8   r,   zTFOptimizer.assign_addc                 C   r$   r   )
r   r   r   r   r	   r%   r&   r'   scatter_sub
assign_subr*   r   r   r   r0   A   r,   zTFOptimizer.assign_subc                 C   s^   t |tjr	|j}t|dr| }|j
S t |tjjr,t|dr,t|j	dr,|j	 }|j
S )N_distributed_containerhandle)r   r   r   r   hasattrr1   r	   __internal__CompositeTensorr2   
_unique_id)r   r+   r   r   r   _var_keyJ   s   
	

zTFOptimizer._var_keyc                    s4    j d u rd S  fdd}tjjj| j| d S )Nc                    s>    fdd}|D ]}t |tjr|j}| jj||dd qd S )Nc                    sD     | r t j| j}t j| j}| | | |  d S d S r   )_use_weight_decayr	   r%   learning_rater&   weight_decayr0   )r+   lrwdr   r   r   weight_decay_fn^   s
   
z`TFOptimizer._apply_weight_decay.<locals>.distributed_apply_weight_decay.<locals>.weight_decay_fnF)group)r   r   r   r   r   update)distribution	variablesr   r>   r+   r=   r   r   distributed_apply_weight_decay]   s   zGTFOptimizer._apply_weight_decay.<locals>.distributed_apply_weight_decay)r:   r	   r4   r
   interimmaybe_merge_callr   )r   rB   rC   r   r=   r   _apply_weight_decayY   s   

zTFOptimizer._apply_weight_decayc                 C   sD   dd |D }t t||}| |}tjjj| j| j	|| d S )Nc                 S   "   g | ]}t |tjr|jn|qS r   r   r   r   r   .0vr   r   r   
<listcomp>r       z4TFOptimizer._backend_update_step.<locals>.<listcomp>)
listzip_all_reduce_sum_gradientsr	   r4   r
   rD   rE   _distributed_tf_update_stepr   )r   r!   r"   r9   grads_and_varsr   r   r   _backend_update_stepq   s   

z TFOptimizer._backend_update_stepc                    s6    fdd}|D ]\}}|j j||||fdd qd S )Nc                    s     || |S r   )update_step)vargradr9   r=   r   r   apply_grad_to_update_var   s   zITFOptimizer._distributed_tf_update_step.<locals>.apply_grad_to_update_varFr   r?   )r   r@   )r   rA   rR   r9   rW   rV   rU   r   r=   r   rQ      s   z'TFOptimizer._distributed_tf_update_stepc           
      C   s   t j }|s	|S t|}t|}|r'dd |D }t j t jjj|}ng }g }d}|D ]\}}	|du r?|d|	f q/||| |	f |d7 }q/|t	|ksWJ d|S )a  Returns all-reduced gradients aggregated via summation.

        Args:
            grads_and_vars: List of (gradient, variable) pairs.

        Returns:
            List of (gradient, variable) pairs
            where gradients have been all-reduced.
        c                 S   s   g | ]}|d  qS )r   r   )rJ   pairr   r   r   rL      s    z9TFOptimizer._all_reduce_sum_gradients.<locals>.<listcomp>r   N   zFailed to add all gradients)
r	   r
   get_replica_contextrN   filter_empty_gradients
all_reduceReduceOpSUMappendlen)
r   rR   replica_contextfiltered_grads_and_varsr!   reducedreduced_with_nonesreduced_posgrK   r   r   r   rP      s(   




z%TFOptimizer._all_reduce_sum_gradientsc                 C   sB   dd |D }t || jD ]\}}| jjj|dd |fd qdS )zOverwrite model variables with their moving average values.

        This function overwrites variables on each device.

        Args:
          var_list: list of model variables.
        c                 S   rG   r   rH   rI   r   r   r   rL      rM   zMTFOptimizer._overwrite_model_variables_with_average_value.<locals>.<listcomp>c                 S   s
   |  |S r   r)   )abr   r   r   <lambda>   s   
 zKTFOptimizer._overwrite_model_variables_with_average_value.<locals>.<lambda>)r   N)rO   _model_variables_moving_averager   r   r@   )r   r"   rU   average_varr   r   r   -_overwrite_model_variables_with_average_value   s   
z9TFOptimizer._overwrite_model_variables_with_average_valuec                    s>   dd  dd |D } fdd}t jjj|| j|| d S )Nc                 S   s   |  | |  d S r   rh   )rU   rV   r   r   r   update_accumulator   s   zPTFOptimizer._backend_increment_gradient_accumulators.<locals>.update_accumulatorc                 S      g | ]}|j qS r   )r   rI   r   r   r   rL          zHTFOptimizer._backend_increment_gradient_accumulators.<locals>.<listcomp>c                    s.   t ||D ]\}}| jj| |fdd qd S )NFrX   )rO   r   r@   )rA   r!   accumulatorsrV   rU   ro   r   r   "_distributed_tf_increment_grad_acc   s
   
z`TFOptimizer._backend_increment_gradient_accumulators.<locals>._distributed_tf_increment_grad_acc)r	   r4   r
   rD   rE   r   )r   r!   	acc_gradsrr   rt   r   rs   r   (_backend_increment_gradient_accumulators   s   
z4TFOptimizer._backend_increment_gradient_accumulatorsc                 C   s   t || j|S r   )r	   clip_by_normclipnorm)r   valuesaxesr   r   r   _clip_by_norm   s   zTFOptimizer._clip_by_norm)Nr   r   )__name__
__module____qualname__r   r   r#   r)   r.   r0   r7   rF   rS   rQ   rP   rn   rv   r{   __classcell__r   r   r   r   r      s     			#r   c                 C   s   t | } | s| S g }g }| D ]\}}|du r|| q|||f qt |}|s=dd | D f}td| d|  d|rJtddd |D  |S )	zDFilter out `(grad, var)` pairs that have a gradient equal to `None`.Nc                 S   s   g | ]\}}|j qS r   r   )rJ   _rK   r   r   r   rL      s    z*filter_empty_gradients.<locals>.<listcomp>z(No gradients provided for any variable: z. Provided `grads_and_vars` is .zGradients do not exist for variables %s when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?c                 S   rp   r   r   rI   r   r   r   rL      rq   )tupler`   r   warningswarn)rR   filteredvars_with_empty_gradsrV   rU   r+   r   r   r   r\      s.   r\   )__doc__r   
tensorflowr	   	keras.srcr   &keras.src.backend.tensorflow.trackabler   keras.src.optimizersr   BaseOptimizerr   r\   r   r   r   r   <module>   s    	 O