o
    2h                     @   s@   d dl mZ d dlmZ d dlmZ edG dd deZdS )    )ops)keras_export)	Attentionzkeras.layers.AdditiveAttentionc                       sB   e Zd ZdZ		d fdd	Zdd Zdd	 Z fd
dZ  ZS )AdditiveAttentiona[
  Additive attention layer, a.k.a. Bahdanau-style attention.

    Inputs are a list with 2 or 3 elements:
    1. A `query` tensor of shape `(batch_size, Tq, dim)`.
    2. A `value` tensor of shape `(batch_size, Tv, dim)`.
    3. A optional `key` tensor of shape `(batch_size, Tv, dim)`. If none
        supplied, `value` will be used as `key`.

    The calculation follows the steps:
    1. Calculate attention scores using `query` and `key` with shape
        `(batch_size, Tq, Tv)` as a non-linear sum
        `scores = reduce_sum(tanh(query + key), axis=-1)`.
    2. Use scores to calculate a softmax distribution with shape
        `(batch_size, Tq, Tv)`.
    3. Use the softmax distribution to create a linear combination of `value`
        with shape `(batch_size, Tq, dim)`.

    Args:
        use_scale: If `True`, will create a scalar variable to scale the
            attention scores.
        dropout: Float between 0 and 1. Fraction of the units to drop for the
            attention scores. Defaults to `0.0`.

    Call arguments:
        inputs: List of the following tensors:
            - `query`: Query tensor of shape `(batch_size, Tq, dim)`.
            - `value`: Value tensor of shape `(batch_size, Tv, dim)`.
            - `key`: Optional key tensor of shape `(batch_size, Tv, dim)`. If
                not given, will use `value` for both `key` and `value`, which is
                the most common case.
        mask: List of the following tensors:
            - `query_mask`: A boolean mask tensor of shape `(batch_size, Tq)`.
                If given, the output will be zero at the positions where
                `mask==False`.
            - `value_mask`: A boolean mask tensor of shape `(batch_size, Tv)`.
                If given, will apply the mask such that values at positions
                 where `mask==False` do not contribute to the result.
        return_attention_scores: bool, it `True`, returns the attention scores
            (after masking and softmax) as an additional output argument.
        training: Python boolean indicating whether the layer should behave in
            training mode (adding dropout) or in inference mode (no dropout).
        use_causal_mask: Boolean. Set to `True` for decoder self-attention. Adds
            a mask such that position `i` cannot attend to positions `j > i`.
            This prevents the flow of information from the future towards the
            past. Defaults to `False`.

    Output:
        Attention outputs of shape `(batch_size, Tq, dim)`.
        (Optional) Attention scores after masking and softmax with shape
            `(batch_size, Tq, Tv)`.
    T        c                    s   t  jd||d| d S )N)	use_scaledropout )super__init__)selfr   r   kwargs	__class__r	   h/var/www/html/chatgem/venv/lib/python3.10/site-packages/keras/src/layers/attention/additive_attention.pyr   <   s   zAdditiveAttention.__init__c                 C   sD   |  | |d d }d | _| jr | jd|gd| jdd| _d S d S )Nr   scaleglorot_uniformT)nameshapeinitializerdtype	trainable)_validate_inputsr   r   
add_weightr   )r   input_shapedimr	   r	   r   buildD   s   
zAdditiveAttention.buildc                 C   sH   t j|dd}t j|dd}| jr| jnd}t j|t ||  ddS )a  Calculates attention scores as a nonlinear sum of query and key.

        Args:
            query: Query tensor of shape `(batch_size, Tq, dim)`.
            key: Key tensor of shape `(batch_size, Tv, dim)`.

        Returns:
            Tensor of shape `(batch_size, Tq, Tv)`.
        )axisg      ?r   )r   expand_dimsr   r   sumtanh)r   querykey
q_reshaped
k_reshapedr   r	   r	   r   _calculate_scoresQ   s   z#AdditiveAttention._calculate_scoresc                    s   t   }|d= |S )N
score_mode)r
   
get_config)r   base_configr   r	   r   r*   c   s   
zAdditiveAttention.get_config)Tr   )	__name__
__module____qualname____doc__r   r   r(   r*   __classcell__r	   r	   r   r   r      s    6r   N)	keras.srcr   keras.src.api_exportr   $keras.src.layers.attention.attentionr   r   r	   r	   r	   r   <module>   s
    