o
    2hl                     @   s   d dl mZ d dl mZ d dl mZ d dl mZ d dl mZ d dl mZ d dl mZ d dlm	Z	 d d	l
mZ d d
lmZ d dlmZ d dlmZ e	dG dd deeZe	dG dd deZdS )    )activations)backend)constraints)initializers)ops)regularizers)tree)keras_export)	InputSpec)Layer)DropoutRNNCell)RNNzkeras.layers.LSTMCellc                       s~   e Zd ZdZ																		d fd
d	Z fddZdd Zdd ZdddZ fddZ	dddZ
  ZS )LSTMCellax  Cell class for the LSTM layer.

    This class processes one step within the whole time sequence input, whereas
    `keras.layer.LSTM` processes the whole sequence.

    Args:
        units: Positive integer, dimensionality of the output space.
        activation: Activation function to use. Default: hyperbolic tangent
            (`tanh`). If you pass None, no activation is applied
            (ie. "linear" activation: `a(x) = x`).
        recurrent_activation: Activation function to use for the recurrent step.
            Default: sigmoid (`sigmoid`). If you pass `None`, no activation is
            applied (ie. "linear" activation: `a(x) = x`).
        use_bias: Boolean, (default `True`), whether the layer
            should use a bias vector.
        kernel_initializer: Initializer for the `kernel` weights matrix,
            used for the linear transformation of the inputs. Default:
            `"glorot_uniform"`.
        recurrent_initializer: Initializer for the `recurrent_kernel`
            weights matrix, used for the linear transformation
            of the recurrent state. Default: `"orthogonal"`.
        bias_initializer: Initializer for the bias vector. Default: `"zeros"`.
        unit_forget_bias: Boolean (default `True`). If `True`,
            add 1 to the bias of the forget gate at initialization.
            Setting it to `True` will also force `bias_initializer="zeros"`.
            This is recommended in [Jozefowicz et al.](
            https://github.com/mlresearch/v37/blob/gh-pages/jozefowicz15.pdf)
        kernel_regularizer: Regularizer function applied to the `kernel` weights
            matrix. Default: `None`.
        recurrent_regularizer: Regularizer function applied to the
            `recurrent_kernel` weights matrix. Default: `None`.
        bias_regularizer: Regularizer function applied to the bias vector.
            Default: `None`.
        kernel_constraint: Constraint function applied to the `kernel` weights
            matrix. Default: `None`.
        recurrent_constraint: Constraint function applied to the
            `recurrent_kernel` weights matrix. Default: `None`.
        bias_constraint: Constraint function applied to the bias vector.
            Default: `None`.
        dropout: Float between 0 and 1. Fraction of the units to drop for the
            linear transformation of the inputs. Default: 0.
        recurrent_dropout: Float between 0 and 1. Fraction of the units to drop
            for the linear transformation of the recurrent state. Default: 0.
        seed: Random seed for dropout.

    Call arguments:
        inputs: A 2D tensor, with shape `(batch, features)`.
        states: A 2D tensor with shape `(batch, units)`, which is the state
            from the previous time step.
        training: Python boolean indicating whether the layer should behave in
            training mode or in inference mode. Only relevant when `dropout` or
            `recurrent_dropout` is used.

    Example:

    >>> inputs = np.random.random((32, 10, 8))
    >>> rnn = keras.layers.RNN(keras.layers.LSTMCell(4))
    >>> output = rnn(inputs)
    >>> output.shape
    (32, 4)
    >>> rnn = keras.layers.RNN(
    ...    keras.layers.LSTMCell(4),
    ...    return_sequences=True,
    ...    return_state=True)
    >>> whole_sequence_output, final_state = rnn(inputs)
    >>> whole_sequence_output.shape
    (32, 10, 4)
    >>> final_state.shape
    (32, 4)
    tanhsigmoidTglorot_uniform
orthogonalzerosN        c                    sF  |dkrt d| d|dd}t jdi | || _|| _t|| _t|| _	|| _
t|| _t|| _t|| _t|	| _t|
| _t|| _t|| _t|| _t|| _tdtd|| _tdtd|| _| jdkrd| _| jdkrd	| _|| _tjj|d
| _ || _!| j| jg| _"| j| _#d S )Nr   zQReceived an invalid value for argument `units`, expected a positive integer, got .implementation         ?r         )seed )$
ValueErrorpopsuper__init__r   unitsr   get
activationrecurrent_activationuse_biasr   kernel_initializerrecurrent_initializerbias_initializerr   kernel_regularizerrecurrent_regularizerbias_regularizerr   kernel_constraintrecurrent_constraintbias_constraintminmaxdropoutrecurrent_dropoutdropout_mask_countr   r   randomSeedGeneratorseed_generatorunit_forget_bias
state_sizeoutput_size)selfr!   r#   r$   r%   r&   r'   r(   r7   r)   r*   r+   r,   r-   r.   r1   r2   r   kwargsr   	__class__r   T/var/www/html/chatgem/venv/lib/python3.10/site-packages/keras/src/layers/rnn/lstm.pyr    X   sB   

zLSTMCell.__init__c                    s   t  | |d } j| jd fd j j jd _ j j jd fd j j	 j
d _ jrT jr> fdd}n j} j jd fd| j jd _d S d  _d S )	Nr   kernel)shapenameinitializerregularizer
constraintrecurrent_kernelc                    s`   t  j jfg|R i |td jfg|R i | j jd fg|R i |gS )Nonesr   )r   concatenater(   r!   r   r"   )_argsr;   r:   r   r>   r(      s0   
z(LSTMCell.build.<locals>.bias_initializerbias)r   build
add_weightr!   r&   r)   r,   r@   r'   r*   r-   rF   r%   r7   r(   r+   r.   rL   )r:   input_shape	input_dimr(   r<   rK   r>   rM      s8   

zLSTMCell.buildc                 C   s   |\}}}}|\}}	}
}|  |t|| jddd| jf  }|  |t|	| jdd| j| jd f  }|| || |t|
| jdd| jd | jd f    }|  |t|| jdd| jd df  }||fS )z.Computes carry and output using split kernels.Nr      )r$   r   matmulrF   r!   r#   )r:   xh_tm1c_tm1x_ix_fx_cx_oh_tm1_ih_tm1_fh_tm1_ch_tm1_oifcor   r   r>   _compute_carry_and_output   s4   " "z"LSTMCell._compute_carry_and_outputc                 C   sH   |\}}}}|  |}|  |}|| || |  }	|  |}
|	|
fS )z.Computes carry and output using fused kernels.)r$   r#   )r:   zrU   z0z1z2z3r^   r_   r`   ra   r   r   r>   _compute_carry_and_output_fused   s   


z(LSTMCell._compute_carry_and_output_fusedFc           !      C   s@  |d }|d }| j dkr|r:d| j  k rdk r:n n| |}||d  }||d  }||d  }	||d  }
n|}|}|}	|}
tj| jddd\}}}}t||}t||}t|	|}t|
|}| jrtj| jddd\}}}}||7 }||7 }||7 }||7 }|rd| j	  k rdk rn n| 
|}||d  }||d  }||d  }||d  }n|}|}|}|}||||f}||||f}| |||\}}nA|rd| j  k rdk rn n	| |}|| }t|| j}|t|| j7 }| jr|| j7 }tj|ddd}| ||\}}|| | } | | |gfS )	Nr   r   r   r   r   rQ   r   axis)r   r1   get_dropout_maskr   splitr@   rR   r%   rL   r2   get_recurrent_dropout_maskrb   rF   rh   r#   )!r:   inputsstatestrainingrT   rU   dp_maskinputs_iinputs_finputs_cinputs_ok_ik_fk_ck_orV   rW   rX   rY   b_ib_fb_cb_orec_dp_maskrZ   r[   r\   r]   rS   r`   ra   rc   hr   r   r>   call   s`   




zLSTMCell.callc                    s   i d| j dt| jdt| jd| jd| jdt| jdt| j	dt| j
d	t| jd
t| jdt| jdt| jdt| jdt| jd| jd| jd| j}t  }i ||S )Nr!   r#   r$   r%   r7   r&   r'   r(   r)   r*   r+   r,   r-   r.   r1   r2   r   )r!   r   	serializer#   r$   r%   r7   r   r&   r'   r(   r   r)   r*   r+   r   r,   r-   r.   r1   r2   r   r   
get_configr:   configbase_configr<   r   r>   r   !  sb   
zLSTMCell.get_configc                    s    fddj D S )Nc                    s    g | ]}t j |fjd qS ))dtype)r   r   compute_dtype).0d
batch_sizer:   r   r>   
<listcomp>E  s    z.LSTMCell.get_initial_state.<locals>.<listcomp>)r8   )r:   r   r   r   r>   get_initial_stateD  s   zLSTMCell.get_initial_state)r   r   Tr   r   r   TNNNNNNr   r   NFN)__name__
__module____qualname____doc__r    rM   rb   rh   r   r   r   __classcell__r   r   r<   r>   r      s2    J;0
	;#r   zkeras.layers.LSTMc                       sD  e Zd ZdZ																				
	
	
	
	
	d6 fdd	Zd7 fdd	Zd8 fdd	Zedd Zedd Z	edd Z
edd Zedd Zedd Zedd Zed d! Zed"d# Zed$d% Zed&d' Zed(d) Zed*d+ Zed,d- Zed.d/ Zed0d1 Z fd2d3Zed4d5 Z  ZS )9LSTMa@  Long Short-Term Memory layer - Hochreiter 1997.

    Based on available runtime hardware and constraints, this layer
    will choose different implementations (cuDNN-based or backend-native)
    to maximize the performance. If a GPU is available and all
    the arguments to the layer meet the requirement of the cuDNN kernel
    (see below for details), the layer will use a fast cuDNN implementation
    when using the TensorFlow backend.
    The requirements to use the cuDNN implementation are:

    1. `activation` == `tanh`
    2. `recurrent_activation` == `sigmoid`
    3. `recurrent_dropout` == 0
    4. `unroll` is `False`
    5. `use_bias` is `True`
    6. Inputs, if use masking, are strictly right-padded.
    7. Eager execution is enabled in the outermost context.

    For example:

    >>> inputs = np.random.random((32, 10, 8))
    >>> lstm = keras.layers.LSTM(4)
    >>> output = lstm(inputs)
    >>> output.shape
    (32, 4)
    >>> lstm = keras.layers.LSTM(
    ...     4, return_sequences=True, return_state=True)
    >>> whole_seq_output, final_memory_state, final_carry_state = lstm(inputs)
    >>> whole_seq_output.shape
    (32, 10, 4)
    >>> final_memory_state.shape
    (32, 4)
    >>> final_carry_state.shape
    (32, 4)

    Args:
        units: Positive integer, dimensionality of the output space.
        activation: Activation function to use.
            Default: hyperbolic tangent (`tanh`).
            If you pass `None`, no activation is applied
            (ie. "linear" activation: `a(x) = x`).
        recurrent_activation: Activation function to use
            for the recurrent step.
            Default: sigmoid (`sigmoid`).
            If you pass `None`, no activation is applied
            (ie. "linear" activation: `a(x) = x`).
        use_bias: Boolean, (default `True`), whether the layer
            should use a bias vector.
        kernel_initializer: Initializer for the `kernel` weights matrix,
            used for the linear transformation of the inputs. Default:
            `"glorot_uniform"`.
        recurrent_initializer: Initializer for the `recurrent_kernel`
            weights matrix, used for the linear transformation of the recurrent
            state. Default: `"orthogonal"`.
        bias_initializer: Initializer for the bias vector. Default: `"zeros"`.
        unit_forget_bias: Boolean (default `True`). If `True`,
            add 1 to the bias of the forget gate at initialization.
            Setting it to `True` will also force `bias_initializer="zeros"`.
            This is recommended in [Jozefowicz et al.](
            https://github.com/mlresearch/v37/blob/gh-pages/jozefowicz15.pdf)
        kernel_regularizer: Regularizer function applied to the `kernel` weights
            matrix. Default: `None`.
        recurrent_regularizer: Regularizer function applied to the
            `recurrent_kernel` weights matrix. Default: `None`.
        bias_regularizer: Regularizer function applied to the bias vector.
            Default: `None`.
        activity_regularizer: Regularizer function applied to the output of the
            layer (its "activation"). Default: `None`.
        kernel_constraint: Constraint function applied to the `kernel` weights
            matrix. Default: `None`.
        recurrent_constraint: Constraint function applied to the
            `recurrent_kernel` weights matrix. Default: `None`.
        bias_constraint: Constraint function applied to the bias vector.
            Default: `None`.
        dropout: Float between 0 and 1. Fraction of the units to drop for the
            linear transformation of the inputs. Default: 0.
        recurrent_dropout: Float between 0 and 1. Fraction of the units to drop
            for the linear transformation of the recurrent state. Default: 0.
        seed: Random seed for dropout.
        return_sequences: Boolean. Whether to return the last output
            in the output sequence, or the full sequence. Default: `False`.
        return_state: Boolean. Whether to return the last state in addition
            to the output. Default: `False`.
        go_backwards: Boolean (default: `False`).
            If `True`, process the input sequence backwards and return the
            reversed sequence.
        stateful: Boolean (default: `False`). If `True`, the last state
            for each sample at index i in a batch will be used as initial
            state for the sample of index i in the following batch.
        unroll: Boolean (default False).
            If `True`, the network will be unrolled,
            else a symbolic loop will be used.
            Unrolling can speed-up a RNN,
            although it tends to be more memory-intensive.
            Unrolling is only suitable for short sequences.
        use_cudnn: Whether to use a cuDNN-backed implementation. `"auto"` will
            attempt to use cuDNN when feasible, and will fallback to the
            default implementation if not.

    Call arguments:
        inputs: A 3D tensor, with shape `(batch, timesteps, feature)`.
        mask: Binary tensor of shape `(samples, timesteps)` indicating whether
            a given timestep should be masked  (optional).
            An individual `True` entry indicates that the corresponding timestep
            should be utilized, while a `False` entry indicates that the
            corresponding timestep should be ignored. Defaults to `None`.
        training: Python boolean indicating whether the layer should behave in
            training mode or in inference mode. This argument is passed to the
            cell when calling it. This is only relevant if `dropout` or
            `recurrent_dropout` is used  (optional). Defaults to `None`.
        initial_state: List of initial state tensors to be passed to the first
            call of the cell (optional, `None` causes creation
            of zero-filled initial state tensors). Defaults to `None`.
    r   r   Tr   r   r   Nr   Fautoc              	      s(  t |fi d|d|d|d|d|d|d|d|	d	|
d
|d|d|d|d|d|d|dd d|ddddd|d|dd}t j|f||||||d| tdd| _|dvrptd| || _t		 dkrt	
|j|j| j|jr|dv rd| _d S d S d S d S ) Nr#   r$   r%   r&   r7   r'   r(   r)   r*   r+   r,   r-   r.   r1   r2   r   	trainableTrB   	lstm_cellr   r   r   )return_sequencesreturn_statego_backwardsstatefulunrollactivity_regularizerrQ   )ndim)r   TFzlInvalid valid received for argument `use_cudnn`. Expected one of {'auto', True, False}. Received: use_cudnn=
tensorflow)Tr   F)r   r"   r   r   r    r
   
input_specr   	use_cudnnr   cudnn_okr#   r$   r   r%   supports_jit)r:   r!   r#   r$   r%   r&   r'   r(   r7   r)   r*   r+   r   r,   r-   r.   r1   r2   r   r   r   r   r   r   r   r;   cellr<   r   r>   r      s   	


zLSTM.__init__c                    s  t |r	|d }| jdv rt| jstzY|r;| jr;| j|d d dd d f }tj|dd}t	|t
|}|| }n|}tj||d |d || jj| jj| jj| jj| jj| j| j| jd}t dkrhd| _|W S  tys   Y nw | jdu r}td	t j||||d
S )Nr   )r   Tr   ri   )r@   rF   rL   r#   r$   r   r   r   r   FTzuse_cudnn=True was specified, but cuDNN is not supported for this layer configuration with this backend. Pass use_cudnn='auto' to fallback to a non-cuDNN implementation.)maskrp   )r   	is_nestedr   r2   r1   r   rk   r   expand_dimsbroadcast_torA   r   lstmr@   rF   rL   r#   r$   r   r   r   r   NotImplementedErrorr   r   
inner_loop)r:   	sequencesinitial_stater   rp   rq   dp_sequencesoutr<   r   r>   r     sP   





zLSTM.inner_loopc                    s   t  j||||dS )N)r   rp   r   )r   r   )r:   r   r   r   rp   r<   r   r>   r   F  s   z	LSTM.callc                 C      | j jS r   )r   r!   rK   r   r   r>   r!   K     z
LSTM.unitsc                 C   r   r   )r   r#   rK   r   r   r>   r#   O  r   zLSTM.activationc                 C   r   r   )r   r$   rK   r   r   r>   r$   S  r   zLSTM.recurrent_activationc                 C   r   r   )r   r%   rK   r   r   r>   r%   W  r   zLSTM.use_biasc                 C   r   r   )r   r7   rK   r   r   r>   r7   [  r   zLSTM.unit_forget_biasc                 C   r   r   )r   r&   rK   r   r   r>   r&   _  r   zLSTM.kernel_initializerc                 C   r   r   )r   r'   rK   r   r   r>   r'   c  r   zLSTM.recurrent_initializerc                 C   r   r   )r   r(   rK   r   r   r>   r(   g  r   zLSTM.bias_initializerc                 C   r   r   )r   r)   rK   r   r   r>   r)   k  r   zLSTM.kernel_regularizerc                 C   r   r   )r   r*   rK   r   r   r>   r*   o  r   zLSTM.recurrent_regularizerc                 C   r   r   )r   r+   rK   r   r   r>   r+   s  r   zLSTM.bias_regularizerc                 C   r   r   )r   r,   rK   r   r   r>   r,   w  r   zLSTM.kernel_constraintc                 C   r   r   )r   r-   rK   r   r   r>   r-   {  r   zLSTM.recurrent_constraintc                 C   r   r   )r   r.   rK   r   r   r>   r.     r   zLSTM.bias_constraintc                 C   r   r   )r   r1   rK   r   r   r>   r1     r   zLSTM.dropoutc                 C   r   r   )r   r2   rK   r   r   r>   r2     r   zLSTM.recurrent_dropoutc                    s   i d| j dt| jdt| jd| jdt| jdt| jdt| j	d| j
d	t| jd
t| jdt| jdt| jdt| jdt| jdt| jd| jd| jd| jji}t  }|d= i ||S )Nr!   r#   r$   r%   r&   r'   r(   r7   r)   r*   r+   r   r,   r-   r.   r1   r2   r   r   )r!   r   r   r#   r$   r%   r   r&   r'   r(   r7   r   r)   r*   r+   r   r   r,   r-   r.   r1   r2   r   r   r   r   r   r<   r   r>   r     sl   
 
"zLSTM.get_configc                 C   s   | di |S )Nr   r   )clsr   r   r   r>   from_config  s   zLSTM.from_config)r   r   Tr   r   r   TNNNNNNNr   r   NFFFFFr   r   )NNF)r   r   r   r   r    r   r   propertyr!   r#   r$   r%   r7   r&   r'   r(   r)   r*   r+   r,   r-   r.   r1   r2   r   classmethodr   r   r   r   r<   r>   r   K  s~    vQ5















'r   N)	keras.srcr   r   r   r   r   r   r   keras.src.api_exportr	   keras.src.layers.input_specr
   keras.src.layers.layerr   %keras.src.layers.rnn.dropout_rnn_cellr   keras.src.layers.rnn.rnnr   r   r   r   r   r   r>   <module>   s$      =