o
    2hW                     @   sN  d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlm	Z	 d dlm
Z
 d dlmZ d dlmZ d d	lmZ 	 ed
dgG dd dZeddde dfddZedG dd deZdd ZG dd deZed			d.ddZ	 edd/d d!Zed"d#d$ Zed%d&d' Zed(d/d)d*Zed+d/d,d-ZdS )0    N)backend)ops)keras_export)KerasTensor)any_symbolic_tensors)canonicalize_axis)standardize_axis_for_numpy)	Operationzkeras.Quantizerzkeras.quantizers.Quantizerc                   @   s2   e Zd ZdddZdd Zedd Zdd	 Zd
S )	Quantizerint8c                 C   s
   || _ d S Noutput_dtype)selfr    r   Z/var/www/html/chatgem/venv/lib/python3.10/site-packages/keras/src/quantizers/quantizers.py__init__   s   
zQuantizer.__init__c                 C   s   |S )z0Compute a quantized output from an input tensor.r   )r   xr   r   r   __call__   s   zQuantizer.__call__c                 C   s   | di |S )a  Creates a quantizer from its config.

        This method is the reverse of `get_config`,
        capable of instantiating the same quantizer from the config
        dictionary.

        This method is used by Keras `model_to_estimator`, saving and
        loading models to HDF5 formats, Keras model cloning, some visualization
        utilities, and exporting models to and from JSON.

        Args:
            config: A Python dictionary, typically the output of get_config.

        Returns:
            A quantizer instance.
        Nr   r   )clsconfigr   r   r   from_config      zQuantizer.from_configc                 C   s   t |  d)a  Returns the config of the quantizer.

        A quantizer config is a Python dictionary (serializable)
        containing all configuration parameters of the quantizer.
        The same quantizer can be reinstantiated later
        (without any saved state) from this configuration.

        This method is optional if you are just training and executing models,
        exporting to and from SavedModels, or using weight checkpoints.

        This method is required for Keras `model_to_estimator`, saving and
        loading models to HDF5 formats, Keras model cloning, some visualization
        utilities, and exporting models to and from JSON.

        Returns:
            Python dictionary.
        z  does not implement get_config())NotImplementedErrorr   r   r   r   
get_config-   r   zQuantizer.get_configN)r   )__name__
__module____qualname__r   r   classmethodr   r   r   r   r   r   r
      s    

r
   z!keras.quantizers.abs_max_quantizei   r   Fc           	   
   C   s  |rKt | j}t| } t|}t|d ttj	t
| |dd|}t| |}tt||d |d }||}t|tj||dfS t| } t|d ttj	t
| |dd|}t|t | j}t| |}tt||d |d }t||}||fS )N   T)axiskeepdimsr   dtype)r   standardize_dtyper&   r   convert_to_numpyr   npdivideaddmaxabsmultiplycliproundastypeconvert_to_tensorcast)	inputsr#   value_ranger&   epsilonto_numpyoriginal_dtypescaleoutputsr   r   r   abs_max_quantizeB   s0   	


r;   z keras.quantizers.AbsMaxQuantizerc                   @   s0   e Zd Zde dfddZdd Zdd Zd	S )
AbsMaxQuantizerr    r   c                 C   s8   t j| |d t|tr|f}t|| _|| _|| _d S )Nr   )r
   r   
isinstanceinttupler#   r5   r6   )r   r#   r5   r6   r   r   r   r   r   i   s   


zAbsMaxQuantizer.__init__c                 C   s$   t || j| j| j| j\}}||fS r   )r;   r#   r5   r   r6   )r   r   quantized_xr9   r   r   r   r   w   s   zAbsMaxQuantizer.__call__c                 C   s   | j | j| j| jdS )Nr#   r5   r6   r   rA   r   r   r   r   r   }   s
   zAbsMaxQuantizer.get_configN)r   r   r   r   r6   r   r   r   r   r   r   r   r<   g   s    
r<   c                 C   s   t | jd}t| |} t||}d|> d }|sdnd}t|| }t||| }t|| |}	|t| | }
t|
||}t|}t	t|||}t	t|||}||||	fS )z>Adjusts and nudges the quantization range for better accuracy.float32r"   r   )
r   result_typer&   r   r3   subtractr*   r/   r0   r.   )	min_range	max_rangenum_bitsnarrow_rangecompute_dtype	quant_max	quant_min
diff_ranger9   	inv_scalezero_point_from_min
zero_pointnudged_zero_point
nudged_min
nudged_maxr   r   r   adjust_and_nudge   s   
rS   c                       s.   e Zd Zd
 fdd	Zdd Zdd	 Z  ZS )FakeQuantWithMinMaxVars   FNc                    s    t    || _|| _|| _d S r   )superr   rG   rH   r#   )r   rG   rH   r#   	__class__r   r   r      s   

z FakeQuantWithMinMaxVars.__init__c                 C   s   t |||| j| j| jdS )N)rG   rH   r#   )fake_quant_with_min_max_varsrG   rH   r#   r   r4   min_valsmax_valsr   r   r   call   s   zFakeQuantWithMinMaxVars.callc                 C   s   t |j|jdS )Nr%   )r   shaper&   rZ   r   r   r   compute_output_spec   s   z+FakeQuantWithMinMaxVars.compute_output_specrU   FN)r   r   r   r   r]   r_   __classcell__r   r   rW   r   rT      s    
rT   z-keras.quantizers.fake_quant_with_min_max_varsrU   c              	      sL  t | frt | ||S t| } t|}t|}t dur*t | j t dkrddl	}t
| j} du rd|jjt| dtt|ddtt|ddd}tj||dS | jd }	t|  |	} |jjt| dt|dt|dd}tj||d}t||	 S tj fd	d
}
|
| ||S )au  Perform per-tensor or per-channel fake quantization.

    `[min_vals, max_vals]` define the clamping range for the `inputs`.

    The `inputs` are quantized into the quantization range:
    - `[0, 2^num_bits - 1]` when `narrow_range=False`
    - `[1, 2^num_bits - 1]` when `narrow_range=True`

    After quantization, the values are dequantized and output as floats within
    the `[min_vals, max_vals]` interval.

    This operation supports gradient computation, allowing `min_vals` and
    `max_vals` to be trained.

    Args:
        inputs: Input Keras tensor of float dtype.
        min_vals: A global minimum scalar or a per-channel minimum tensor.
        max_vals: A global maximum scalar or a per-channel maximum tensor.
        num_bits: Quantization bit width (e.g., `8` for int8). Defaults to `8`.
        narrow_range: Whether to use narrow quantization range. Defaults to
            `False`.
        axis: Axis along which to perform per-channel quantization. If `None`,
              per-tensor quantization is performed. Defaults to `None`.


    Returns:
        Tensor: A Keras tensor with fake quantization applied.
    N
tensorflowr   rB   r   )rG   rH   r%   r"   c                    s   t j}t||\}}ttt |d}tt	jt	j}t
|}tttt
t|||d|}	tj	|	|d}	ttt d d fdd
}
|	|
fS )Ng      ?r%   )upstreamc                    s   | d u r|\} t | d} fddtt|jD }t }t || d} d ur5t j||d}nt |}t }t || d} d urSt j||d}nt |}|||fS )N        c                       g | ]}| kr|qS r   r   .0ir#   r   r   
<listcomp>6      zqfake_quant_with_min_max_vars.<locals>._fake_quant_with_min_max_vars_per_channel.<locals>.grad.<locals>.<listcomp>ri   )r   whererangelenr^   
less_equalsumgreater_equal)rc   argsdxaxesmin_maskgrad_minmax_maskgrad_max)r#   masksrR   rQ   r   r   r   grad0  s   


z]fake_quant_with_min_max_vars.<locals>._fake_quant_with_min_max_vars_per_channel.<locals>.grad)r   r'   r&   rS   r   floorr+   r.   r/   r3   rD   logical_andrq   ro   )r   min_valmax_valr&   r9   rM   
quant_zero	x_clampedx_clamped_shiftedresultrz   r#   rH   rG   )ry   rR   rQ   r   r   )_fake_quant_with_min_max_vars_per_channel  s8   zOfake_quant_with_min_max_vars.<locals>._fake_quant_with_min_max_vars_per_channel)r   rT   symbolic_callr   r2   r>   r   ndimr   rb   r'   r&   quantizationrY   r3   reshapeswapaxes(fake_quant_with_min_max_vars_per_channelcustom_gradient)r4   r[   r\   rG   rH   r#   tfr&   r:   	last_axisr   r   r   r   rY      sH   
%







?rY   z%keras.quantizers.compute_float8_scalec                 C   sR   t |}t t || d| }t | dk||}t t | ||}t |S )N   rd   )r   
reciprocalr*   rl   isfinite)amaxr9   	dtype_maxmarginsfr   r   r   compute_float8_scaleT  s
   

r   z,keras.quantizers.compute_float8_amax_historyc                 C   sD   t t t | |j}t t j|dddggt |dg}|S )N)shiftr   r"   )r   r3   r,   r-   r&   scatter_updaterollr   )r   amax_historyamax_updatenew_amax_historyr   r   r   compute_float8_amax_historya  s   r   z(keras.quantizers.quantize_and_dequantizec                 C   sh   t tt|j|}t | t ||}t || |}t ||}t t ||t ||}|S r   )	r   r3   float	ml_dtypesfinfor,   r*   r/   r.   )r4   r9   quantized_dtyperI   quantized_dtype_maxr   r   r   r   quantize_and_dequantizel  s   r   zkeras.quantizers.pack_int4c                    sx  t | jdkrtd| jt| jddpt| j} dk r$ |7   g fddt|D  fddt|D }t	
| }t	|d }t	t	|d	d
}|dd
df d }t	j||gdd}|t	|d }	|d|	df }
|
ddd	df }|
d
dd	df }t	jddd}t	||}t	||}t	|t	|d}t	|d}t	
||}|}|t	||fS )a	  Pack an int4 tensor into an int8 tensor with packed nibbles.

    The input values must already be int8 in the signed range `[-8, 7]` and
    represent the desired int4 values. Packing is performed along the specified
    axis (default is 0).

    For every two consecutive rows, the **low nibble** of the output byte
    stores the value from the first row, and the **high nibble** stores
    the value from the second row.

    Args:
        arr: An int8 tensor containing int4 values in the range `[-8, 7]`.
        axis: The axis along which to pack the tensor. Defaults to 0.

    Returns:
        tuple: A tuple `(packed, packed_shape, orig_rows)` where `packed` is
            the packed int8 tensor with int4 values stored in nibbles,
            `packed_shape` is the shape of the packed tensor, and `orig_rows`
            is the original (unpacked) row count prior to any padding that may
            have been inserted when an odd number of rows is supplied.

    Example:

    ```python
    >>> import numpy as np
    >>> from keras.quantizers import pack_int4, unpack_int4

    # Example with axis=0
    # Original array has shape (3, 2)
    >>> original_array = np.array([[-3, 7], [2, -8], [1, 0]], dtype=np.int8)

    # Pack the array along axis 0. Since the length of axis 0 (3) is
    # odd, it will be padded to a length of 4. The packed array will
    # have a shape of (ceil(3/2), 2) = (2, 2).
    >>> packed, packed_shape, orig_len = pack_int4(original_array, axis=0)
    >>> print("Packed array:
", packed)
    Packed array:
    [[  45 -121]
    [   1    0]]

    # Now, unpack the array back to its original form
    >>> unpacked = unpack_int4(packed, orig_len, axis=0)
    >>> print("Unpacked array:
", unpacked)
    Unpacked array:
    [[-3  7]
    [ 2 -8]
    [ 1  0]]
    >>> np.allclose(original_array, unpacked)
    True

    # Example with axis=1
    # Original array has shape (2, 3)
    >>> original_array = np.array([[-3, 7, 2], [-8, 1, 0]], dtype=np.int8)

    # Pack along axis 1. Length of axis 1 (3) is padded to 4.
    # The new shape is (2, ceil(3/2)) = (2, 2).
    >>> packed, packed_shape, orig_len = pack_int4(original_array, axis=1)
    >>> print("Packed array:
", packed)
    Packed array:
    [[ 125   2]
    [  24   0]]

    # Unpack the array
    >>> unpacked = unpack_int4(packed, orig_len, axis=1)
    >>> print("Unpacked array:
", unpacked)
    Unpacked array:
    [[-3  7  2]
    [-8  1  0]]
    >>> np.allclose(original_array, unpacked)
    True
    ```
    r   z(Expected int8 tensor for packing, got {}rankNr   c                    re   r   r   rf   ri   r   r   rj     rk   zpack_int4.<locals>.<listcomp>c                       g | ]}  |qS r   indexrf   permr   r   rj         r   r"   .ri   int32   r%      )r   r'   r&   	TypeErrorformatgetattrr^   rn   rm   r   	transposeequalmodconcatenater3   arraybitwise_and
bitwise_or
left_shift)arrr#   r   inv_perm
transposedrows	needs_padzero_rowpadded_fullrows_packedpaddedlowhighmasklow_uhigh_upackedorig_lenr   )r#   r   r   	pack_int4{  s4   J
r   zkeras.quantizers.unpack_int4c                    s  t | jdkrtd| j t| jddpt| j} dk r$ |7   dkr||dkr|tjd| jd}t	| |}t	t
| d	|}t|d
k ||d }t|d
k ||d }tj||gdd}	t|	dtt| dd  }
|
d|df S  g fddt|D  fddt|D }t| }tjddd}t	||}t	t
|d	|}tjd
ddtjdddfdd}||}||}tj||gdd}	t|	dtt|dd  }
|
d|df }
t|
|}
|
S )a
  Unpack a packed int4 back to an int8 tensor in the range [-8, 7].

    This function reverses the packing performed by `pack_int4`, restoring
    the original int8 tensor (values in the range [-8, 7]) from a packed int8
    tensor where each element contains two int4 values (one in the lower nibble,
    one in the upper nibble).

    The function restores the original axis order and removes any
    padding that was added during packing.

    Args:
        packed: An int8 tensor containing packed int4 values along the
            specified axis. Each int8 value encodes two int4 values.
        orig_len: The original (unpadded) length of the axis that was
            packed. This is used to remove any padding that may have
            been added during packing to ensure an even number of rows.
        axis: The axis along which the tensor was packed. Defaults to 0.

    Returns:
        unpacked: An int8 tensor with the same shape as the original
            (unpacked) tensor, with values in the range [-8, 7].

    Example:

    ```python
    >>> import numpy as np
    >>> from keras.quantizers import pack_int4, unpack_int4

    # Example with axis=0
    # Original array has shape (3, 2)
    >>> original_array = np.array([[-3, 7], [2, -8], [1, 0]], dtype=np.int8)

    # Pack the array along axis 0. Since the length of axis 0 (3) is
    # odd, it will be padded to a length of 4. The packed array will
    # have a shape of (ceil(3/2), 2) = (2, 2).
    >>> packed, packed_shape, orig_len = pack_int4(original_array, axis=0)
    >>> print("Packed array:
", packed)
    Packed array:
    [[  45 -121]
    [   1    0]]

    # Now, unpack the array back to its original form
    >>> unpacked = unpack_int4(packed, orig_len, axis=0)
    >>> print("Unpacked array:
", unpacked)
    Unpacked array:
    [[-3  7]
    [ 2 -8]
    [ 1  0]]
    >>> np.allclose(original_array, unpacked)
    True

    # Example with axis=1
    # Original array has shape (2, 3)
    >>> original_array = np.array([[-3, 7, 2], [-8, 1, 0]], dtype=np.int8)

    # Pack along axis 1. Length of axis 1 (3) is padded to 4.
    # The new shape is (2, ceil(3/2)) = (2, 2).
    >>> packed, packed_shape, orig_len = pack_int4(original_array, axis=1)
    >>> print("Packed array:
", packed)
    Packed array:
    [[ 125   2]
    [  24   0]]

    # Unpack the array
    >>> unpacked = unpack_int4(packed, orig_len, axis=1)
    >>> print("Unpacked array:
", unpacked)
    Unpacked array:
    [[-3  7  2]
    [-8  1  0]]
    >>> np.allclose(original_array, unpacked)
    True
    ```
    r   z(Expected int8 tensor for unpacking, got r   Nr   r   r   r%   r   rU      r"   ri   )r   .c                    re   r   r   rf   ri   r   r   rj   b  rk   zunpack_int4.<locals>.<listcomp>c                    r   r   r   rf   r   r   r   rj   c  r   c                    s   t |  k | |  S r   )r   rl   )r   )eightsixteenr   r   	to_signedn  s   zunpack_int4.<locals>.to_signed)r   r'   r&   r   r   r^   rn   r   r   r   right_shiftrl   stackr   r?   rm   r   )r   r   r#   r   r   low_unpackedhigh_unpacked
low_signedhigh_signedstackedunpackedr   r   r   r   r   r   )r#   r   r   r   r   unpack_int4  sH   K
""r   r`   )r   )r   numpyr)   	keras.srcr   r   keras.src.api_exportr   keras.src.backendr   r   &keras.src.backend.common.backend_utilsr   r   keras.src.ops.operationr	   r
   r6   r;   r<   rS   rT   rY   r   r   r   r   r   r   r   r   r   <module>   sP    
1$! 


z