o
    2hК                     @   s   d dl Z d dlZd dlZd dlmZ d dlZd dlZd dlm	Z	 d dlm
Z d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d dlmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ e rud dl m!Z! e!j"Z"nej"Z"G dd dej#Z$dddZ%G dd deZ&dS )    N)partial)backend)	callbacks)
optimizers)tree)config)distribution_lib)is_nnx_enabled)trainer)array_slicing)data_adapter_utils)EpochIterator)traceback_utils)nnxc                       sN  e Zd Z fddZ		d4ddZdd Zd	d
 Zdd Zdd Zd5ddZ	d5ddZ
d5ddZd5ddZej																d6ddZej								d7ddZej	d8d d!Z				d9d"d#Z			d:d$d%Zd&d' Zd(d) Zd*d+ Zd,d- Z				d;d.d/Z				d<d0d1Z					d=d2d3Z  ZS )>
JAXTrainerc                    s&   t    d | _d | _d | _d| _d S )NT)super__init__train_functiontest_functionpredict_function_jax_state_syncedself	__class__ X/var/www/html/chatgem/venv/lib/python3.10/site-packages/keras/src/backend/jax/trainer.pyr       s
   

zJAXTrainer.__init__FNc	              
   C   s   i }	| j r	||	d< | j|||fddi|	\}
}}|r#| j  || _| j||||||
||d\}}|r9| j  |\}}}|}|rl| jdurltt| jj|}t	j
|d | j|}W d   n1 sgw   Y  |||
||ffS )z?This method is stateless and is intended for use with jax.grad.trainingreturn_lossesT)xyy_predsample_weightr   Nstate_mapping)_call_has_training_argstateless_call_losses_overrideclearstateless_compute_loss	optimizerlistzip	variablesr   StatelessScope
scale_loss)r   trainable_variablesnon_trainable_variablesmetrics_variablesr   r    r"   r   optimizer_variableskwargsr!   losseslossr-   unscaled_lossmappingr   r   r   compute_loss_and_updates'   sR   



z#JAXTrainer.compute_loss_and_updatesc                 C   s   t jdd t| j|D d }| jj|t|d jd d | 	||||}W d    n1 s2w   Y  g }	| jD ]}
|
|
}|d u rJ|
j}|	| q<||	fS )Nc                 S   s   g | ]\}}||fqS r   r   ).0ref_vvr   r   r   
<listcomp>f   s    z8JAXTrainer._update_metrics_variables.<locals>.<listcomp>r#   r   )r"   )r   r.   r,   r2   _loss_trackerupdate_stater   flattenshapecompute_metricsget_current_valuevalueappend)r   r2   r7   r   r    r!   r"   scopelogsnew_metrics_variablesr;   new_vr   r   r   _update_metrics_variablesb   s$   


z$JAXTrainer._update_metrics_variablesc              
   C   s   |\}}}}t |\}}}	tj| jdd}
|
||||||	d|d\\}}}|\}}}}| j|||\}}| ||||||	\}}| ||||}||fS )NT)has_aux)r   r3   )	r   unpack_x_y_sample_weightjaxvalue_and_gradr9   r*   stateless_applyrJ   _enforce_jax_state_sharding)r   statedatar0   r1   r3   r2   r   r    r"   grad_fnr6   auxgradsr7   r!   rG   r   r   r   
train_stepx   sL   
zJAXTrainer.train_stepc              	   C   s   |\}}}t |\}}}| j||||||dd\}	}
|
\}}}}| ||||||\}}| j||d |d\}}}}|||f}||fS )NF)r   r0   r1   r3   r2   )r   rL   r9   rJ   rP   )r   rQ   rR   r0   r1   r2   r   r    r"   r6   rT   r7   r!   rG   _r   r   r   	test_step   sH   


	zJAXTrainer.test_stepc           	      C   sf   |\}}i }| j rd|d< t|\}}}| j|||fi |\}}| jd |d d d\}}}}||fS )NFr   rW   )r%   r   rL   r&   rP   )	r   rQ   rR   r0   r1   r4   r   rX   outputsr   r   r   predict_step   s,   
zJAXTrainer.predict_stepc                    s`   j dkr(|rdd  jsjrt   fdd}|S fdd}|S fdd}|S )N   c                 S   s0   | d }| dd  D ]}t dd ||}q
|S )Nr   r\   c                 S   s   t j| |gS N)rM   numpyconcatenate)t1t2r   r   r   <lambda>   s    z@JAXTrainer._make_function.<locals>.concatenate.<locals>.<lambda>)r   map_structure)rZ   outputnext_outputr   r   r   r_      s   z.JAXTrainer._make_function.<locals>.concatenatec                    sx   t |}| |\}} |g}ztjd D ]}t |}| |\}} || qW n	 ty3   Y nw  |}|| fS Nr\   )nextrangesteps_per_executionrE   StopIteration)rQ   iteratorrR   rZ   rX   _outputsr_   r   step_functionr   r   iterator_step   s   z0JAXTrainer._make_function.<locals>.iterator_stepc                    sd   t |}| |\}} zt jd D ]}t |}| |\}} qW || fS  ty1   Y || fS w rf   )rg   rh   ri   rj   )rQ   rk   rR   rZ   rX   )r   rn   r   r   ro     s   c                    s    | t |S r]   )rg   )rQ   rk   )rn   r   r   ro     s   )ri   run_eagerlyjit_compilejit)r   rn   concatenate_outputsro   r   rm   r   _make_function   s   

 zJAXTrainer._make_functionc                 C   H   | j d ur	|s	d S | js| jrt| jdd}n| j}| |}|| _ d S Nr   donate_argnums)r   rp   rq   rr   rV   rt   )r   forcerV   rn   r   r   r   make_train_function  s   

zJAXTrainer.make_train_functionc                 C   ru   rv   )r   rp   rq   rr   rY   rt   )r   ry   rY   rn   r   r   r   make_test_function(  s   

zJAXTrainer.make_test_functionc                    s\   j d ur
|s
j S fdd}jsjrt|dd}j|dd  fdd}|_ d S )	Nc                    s      | |\}}|| d |ffS )Nr   )r[   )rQ   rR   rZ   r1   r   r   r   r[   =  s   z6JAXTrainer.make_predict_function.<locals>.predict_stepr   rw   T)rs   c                    s    | |\}} || fS r]   r   )rQ   rk   rZ   )_step_functionr   r   rn   H  s   z7JAXTrainer.make_predict_function.<locals>.step_function)r   rp   rq   rr   rt   )r   ry   r[   rn   r   )r|   r   r   make_predict_function9  s   
z JAXTrainer.make_predict_functionr\   auto        Tr   c           $      C   s`  |  d t }|r||k rtd|  |}d | _|r0|d u r0tj|||f|d\\}}}}|d ur<t	|\}}}t
||||||	|
| jd}| j|d |  t|tjshtj|d|dk|||j| d}|   |   d	| _i }d	}|  | jp|}zt||D ]}|   || d| _| F |D ];\}}}|| | jr| jdddddd
}d	| _| ||\}}|\}}} }!||| |!d| _| || | jr nqW d    n1 sw   Y  | !  t"| #|}"|d ur4| $||r4t%| dd d u rt
||||p|| j|d	d| _| j&||||p|||ddd}#dd |#' D }#|"(|# |)||" |"}| jrB nqd}W | !  t| j*t+j,r^|dkr^| j*-| j. t%| dd d uri| `|rr|j/|d d | _| 0  | j1S | !  t| j*t+j,r|dkr| j*-| j. t%| dd d ur| `|r|j/|d d | _| 0  w )NfitzLimiting epochs to %d)validation_split)r   r    r"   
batch_sizesteps_per_epochshuffleclass_weightri   rk   Tr   )add_historyadd_progbarverboseepochsstepsmodelFr0   r1   r3   r2   purge_model_variablesrW   _eval_epoch_iterator)r   r    r"   r   ri   r   r   )r   r    r"   r   r   r   return_dict_use_cached_eval_datasetc                 S   s   i | ]	\}}d | |qS )val_r   )r:   namevalr   r   r   
<dictcomp>  s    z"JAXTrainer.fit.<locals>.<dictcomp>)rG   )2_assert_compile_calledr   
max_epochswarningswarnr   r   train_validation_splitr   rL   JAXEpochIteratorri   _symbolic_buildreset
isinstancecallbacks_moduleCallbackListnum_batches$_record_training_state_sharding_specrz   stop_trainingon_train_begin_initial_epochrh   reset_metricson_epoch_beginr   catch_stop_iterationon_train_batch_begin_get_jax_stater   
_jax_stateon_train_batch_endjax_state_syncdict_get_metrics_result_or_logs_should_evalgetattrevaluateitemsupdateon_epoch_endr*   optimizers_module	Optimizerfinalize_variable_valuestrainable_weightson_train_end_clear_jax_state_shardinghistory)$r   r   r    r   r   r   r   r   validation_datar   r   r"   initial_epochr   validation_stepsvalidation_batch_sizevalidation_freqr   val_xval_yval_sample_weightepoch_iteratortraining_logstraining_finishedepoch
begin_stepend_steprk   rQ   rG   r0   r1   r3   r2   
epoch_logsval_logsr   r   r   r   N  s  

	



-	




zJAXTrainer.fitc	              	   K   s  |  d |	dd}
|	rtd|	 |
r| j}nt|||||d| jd}| j|d |  t|t	j
sDt	j
||dk|d|j| d	}|   |   d| _|  i }|   d
| _| C |D ]8\}}}|| | jr|| jd
d
d
d
d}d| _| ||\}}|\}}}|||d| _||| | jr nqcW d    n1 sw   Y  |   | |}|| d | _|
s|   |r|S | |S )Nr   r   FzArguments not recognized: )r   r    r"   r   r   r   ri   r   r   r\   r   r   r   r   r   Tr0   r1   r2   r   r0   r1   r2   )r   pop
ValueErrorr   r   ri   r   r   r   r   r   r   r   r{   stop_evaluatingon_test_beginr   r   r   on_test_batch_beginr   r   r   on_test_batch_endr   r   on_test_endr   _flatten_metrics_in_order)r   r   r    r   r   r"   r   r   r   r4   use_cached_eval_datasetr   rG   r   r   rk   rQ   r0   r1   r2   r   r   r   r     s   



	&


zJAXTrainer.evaluatec              	   C   s  t |||d| jd}tdd |  D sK|D ]/\}}}tt|\}}}t r.| | nt	  | | W d    n1 sAw   Y   |
  t|tjs_tj||dk|d|j| d}|   |   d| _|  dd	 }	d
| _d }
d }| G |D ]<\}}}|| | jr| jd
d
d
d}d| _| ||\}}|\}}||d| _|	||
}
||d|i | jr nqW d    n1 sw   Y  |   |  d | _|   t|tj |
S )NF)r   r   r   r   ri   c                 s       | ]}|j V  qd S r]   builtr:   layerr   r   r   	<genexpr>      z%JAXTrainer.predict.<locals>.<genexpr>r   r\   r   c                 S   s4   |d u rt dd | }|S t | dd ||  |S )Nc                 S   s   | gS r]   r   )batch_outputr   r   r   rb     s    z?JAXTrainer.predict.<locals>.append_to_outputs.<locals>.<lambda>c                 S   s
   |  |S r]   )rE   )rd   r   r   r   r   rb        
 )r   rc   map_structure_up_to)batch_outputsrZ   r   r   r   append_to_outputs  s   z-JAXTrainer.predict.<locals>.append_to_outputsT)r0   r1   r   r0   r1   rZ   )!r   ri   all_flatten_layersr   rL   rg   r	   r   r.   r   r   r   r   r   r   r}   stop_predictingon_predict_beginr   r   on_predict_batch_beginr   r   r   on_predict_batch_endr   on_predict_endr   r   r   npr_   )r   r   r   r   r   r   r   rX   rk   r   rZ   r1   r   r   rQ   r   r0   r   r   r   predictv  s   






 zJAXTrainer.predictc                    s   |  d |d ur d urtd  d| t|  fdd}| jt| d |   |   | jdddddd	}d| _	| 
|| \}}|\}	}
}}|	|
||d
| _|   tdd |}|rj|S | |S )Ntrain_on_batchzkArguments `sample_weight` and `class_weight` cannot be specified at the same time. Received: sample_weight=z, class_weight=c                   3       t  fV  d S r]   _distribute_datar   r"   r   r    r   r   rR        z'JAXTrainer.train_on_batch.<locals>.data
data_batchTFr   rW   c                 S   
   t | S r]   r   arrayr   r   r   r   rb     r   z+JAXTrainer.train_on_batch.<locals>.<lambda>)r   r   r   class_weight_to_sample_weightsr   rg   r   rz   r   r   r   r   r   r   rc   r   )r   r   r    r"   r   r   rR   rQ   rG   r0   r1   r3   r2   r   r   r   r     sT   

zJAXTrainer.train_on_batchc                    s   |  d  fdd}| jt| d |   |   | jddddd}d| _| || \}}|\}}	}
||	|
d| _| 	  t
d	d
 |}|rO|S | |S )Ntest_on_batchc                   3   r   r]   r   r   r   r   r   rR     r   z&JAXTrainer.test_on_batch.<locals>.datar   TFr   r   c                 S   r   r]   r   r   r   r   r   rb   ;  r   z*JAXTrainer.test_on_batch.<locals>.<lambda>)r   r   rg   r   r{   r   r   r   r   r   r   rc   r   )r   r   r    r"   r   rR   rQ   rG   r0   r1   r2   r   r   r   r     s.   


zJAXTrainer.test_on_batchc                    s   t dd |  D s#t  |   W d    n1 sw   Y  |   |   | jddddd}d| _ fdd}| || \}}|\}}||d| _	| 
  td	d
 |}|S )Nc                 s   r   r]   r   r   r   r   r   r   A  r   z.JAXTrainer.predict_on_batch.<locals>.<genexpr>TFr   c                   3   s     fV  d S r]   r   r   r   r   r   rR   P  s   z)JAXTrainer.predict_on_batch.<locals>.datar   c                 S   r   r]   r   r   r   r   r   rb   Z  r   z-JAXTrainer.predict_on_batch.<locals>.<lambda>)r   r   r   r.   r   r}   r   r   r   r   r   r   rc   )r   r   rQ   rR   r   r0   r1   r   r   r   predict_on_batch@  s,   

zJAXTrainer.predict_on_batchc                 C   s   t | dd r	| jrd S | jdd }| jdd }| jdd }| jdd }|r9t| j|D ]	\}}|| q/|rKt| j|D ]	\}}|| qA|r^t| jj	|D ]	\}}|| qT|rpt| j
|D ]	\}}|| qfd| _d S )Nr   r0   r1   r3   r2   T)r   r   r   getr,   r0   assignr1   r*   r-   r2   )r   r0   r1   r3   r2   r;   r<   r   r   r   r   ]  s.   
zJAXTrainer.jax_state_syncc                 C   sj   dd | j D | _dd | jD | _t| dr'| jd ur'dd | jjD | _ng | _dd | jD | _	d S )Nc                 S      g | ]}|j jqS r   rD   shardingr:   r<   r   r   r   r=   x      zCJAXTrainer._record_training_state_sharding_spec.<locals>.<listcomp>c                 S   r   r   r  r  r   r   r   r=   {  r  r*   c                 S   r   r   r  r  r   r   r   r=     r  c                 S   r   r   r  r  r   r   r   r=     r  )
r0   _trainable_variable_shardingsr1   !_non_trainable_variable_shardingshasattrr*   r-   _optimizer_variable_shardingsr2   _metrics_variable_shardingsr   r   r   r   r   w  s   
z/JAXTrainer._record_training_state_sharding_specc                 C   s   d | _ d | _d | _d | _d S r]   )r  r  r  r	  r   r   r   r   r     s   
z$JAXTrainer._clear_jax_state_shardingc                 C   s   |pg }|pg }|pg }|pg }t t|D ]}tj|| | j| ||< qt t|D ]}tj|| | j| ||< q-t t|D ]}tj|| | j| ||< qDt t|D ]}tj|| | j| ||< q[||||fS )a%  Enforce the sharding spec constraint for all the training state.

        Since the output of the train/eval step will be used as inputs to next
        step, we need to ensure that they have the same sharding spec, so that
        nnx.jit/jax.jit won't have to recompile the train/eval function.

        Note that this function will also rely on the recorded sharding spec
        for each of states.

        This function is expected to be called within the jitted train/eval
        function, especially around the end of the function.
        )	rh   lenrM   laxwith_sharding_constraintr  r  r  r	  )r   r0   r1   r3   r2   ir   r   r   rP     s4   



z&JAXTrainer._enforce_jax_state_shardingc                 C   sb   |r| j D ]}d|_q|r| jD ]}d|_q|r"| jjD ]}d|_q|r-| jD ]}d|_q'dS dS )a  Remove all the model variable for memory saving.

        During JAX training, since the training function is stateless, we have
        to pass in and get the model weights over and over, during which the
        copy of the weights that attached to the Variable are still and
        occupying extra memory. We remove those variable to save memory (for
        better memory utilization) at the beginning of the epoch, and reattach
        the value back to variables at the end of the epoch, via
        `jax_state_sync()`.
        N)r0   _valuer1   r*   r-   r2   )r   r0   r1   r3   r2   r<   r   r   r   _purge_model_variables  s   


z!JAXTrainer._purge_model_variablesc                 C   s   g }|r| dd | jD  |r| dd | jD  |r*| dd | jjD  |r7| dd | jD  |rB| j||||d t|S )Nc                 S      g | ]}|j qS r   rD   r  r   r   r   r=         z-JAXTrainer._get_jax_state.<locals>.<listcomp>c                 S   r  r   r  r  r   r   r   r=     r  c                 S   r  r   r  r  r   r   r   r=     r  c                 S   r  r   r  r  r   r   r   r=     r  rW   )rE   r0   r1   r*   r-   r2   r  tuple)r   r0   r1   r3   r2   r   rQ   r   r   r   r     s"   zJAXTrainer._get_jax_state)FN)F)NNNr\   r~   Nr   NTNNr   NNNr\   )NNNr~   NNNF)Nr~   NN)NNNF)NNF)NNNN)FFFF)FFFFF)__name__
__module____qualname__r   r9   rJ   rV   rY   r[   rt   rz   r{   r}   r   filter_tracebackr   r   r   r   r   r   r   r   r   rP   r  r   __classcell__r   r   r   r   r      s    
;,*

3

 >ie
@
*
2
 r   c                    sX   t    d ur%|d u rt fdd| }ttj jd}t|| |S ttj	| S )Nc                    s     | jS r]   )get_data_layoutrA   ddistributionr   r   rb     s    z"_distribute_data.<locals>.<lambda>)batch_dim_name)
r   r  r   rc   r   jax_distribution_libdistribute_data_inputr  rM   
device_put)rR   layoutsjax_dist_data_inputr   r  r   r     s   
r   c                   @   s,   e Zd Zdd Zdd Zdd Zdd Zd	S )
r   c                 C   s
   t | jS r]   )rg   _epoch_iteratorr   r   r   r   __next__
  s   
zJAXEpochIterator.__next__c                 C   s<   t  }|d ur| |S | jjr| j S | | j S r]   )r   r  _get_distributed_iteratordata_adapterbuiltin_prefetchget_jax_iterator_prefetch_numpy_iterator)r   r  r   r   r   _get_iterator  s   

zJAXEpochIterator._get_iteratorc                 #   sB    d}| j  D ]}|du rt fdd|}t||V  qdS )zALazily compute layouts to reduce host to device transfer latency.Nc                    s     | jjS r]   )r  rA   backend_layoutr  r  r   r   rb     s    z<JAXEpochIterator._get_distributed_iterator.<locals>.<lambda>)r'  r)  r   rc   r   )r   r  r"  rR   r   r  r   r&    s   
z*JAXEpochIterator._get_distributed_iteratorc                 #   sF    t  d fdd	}|dd r! V  |d sdS dS )a  Shard and prefetch batches on device.

        Most of the implementation has been borrowed from
        `flax.jax_utils.prefetch_to_device`

        This utility takes an iterator and returns a new iterator which fills an
        on device prefetch buffer. Eager prefetching can improve the performance
        of training loops significantly by overlapping compute and data
        transfer.
           c                    s$   t  | D ]	}t| qd S r]   )	itertoolsislicerE   r   )nrR   numpy_iteratorqueuer   r   enqueue5  s   z:JAXEpochIterator._prefetch_numpy_iterator.<locals>.enqueue)r0  r\   N)r-  )collectionsdequepopleft)r   r2  r4  r   r1  r   r*  %  s   

z)JAXEpochIterator._prefetch_numpy_iteratorN)r  r  r  r%  r+  r&  r*  r   r   r   r   r   	  s
    r   r]   )'r5  r.  r   	functoolsr   rM   r^   r   	keras.srcr   r   r   r   r   r   keras.src.backendr   r   r  keras.src.backend.configr	   keras.src.distributionkeras.src.trainersr
   base_trainer keras.src.trainers.data_adaptersr   r   !keras.src.trainers.epoch_iteratorr   keras.src.utilsr   flaxr   rr   Trainerr   r   r   r   r   r   r   <module>   sB           
_