o
    2hv                     @   s   d dl mZ d dl mZ d dl mZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ d dl
mZ d d	lmZ d d
lmZ d dlmZ d dlmZ G dd deZG dd deZedG dd deZG dd de	ZG dd de	ZdS )    )backend)layers)tree)keras_export)Layer)TFDataLayer)
saving_lib)serialization_lib)KerasSaveable)backend_utils)
tensorflow)	auto_namec                   @   s>   e Zd ZdddZdd Zedd Zdd	 Zed
d Z	dS )Crossone_hotc                 C   s0   |dvrt d| t|| _|| _|| _d S )N>   intr   zdInvalid value for argument `output_mode`. Expected one of {'int', 'one_hot'}. Received: output_mode=)
ValueErrortuplefeature_namescrossing_dimoutput_mode)selfr   r   r    r   g/var/www/html/chatgem/venv/lib/python3.10/site-packages/keras/src/layers/preprocessing/feature_space.py__init__   s   

zCross.__init__c                 C      dS )Nr   r   r   r   r   r   	_obj_type      zCross._obj_typec                 C   s   d | jS )N_X_)joinr   r   r   r   r   name      z
Cross.namec                 C   s   | j | j| jdS )Nr   r   r   r"   r   r   r   r   
get_config"   s   zCross.get_configc                 C      | di |S Nr   r   clsconfigr   r   r   from_config)      zCross.from_configNr   )
__name__
__module____qualname__r   r   propertyr    r#   classmethodr)   r   r   r   r   r      s    

r   c                   @   s0   e Zd Zdd Zdd Zdd Zedd Zd	S )
Featurec                 C   s@   |dvrt d| || _t|trt|}|| _|| _d S )N>   r   floatr   zmInvalid value for argument `output_mode`. Expected one of {'int', 'one_hot', 'float'}. Received: output_mode=)r   dtype
isinstancedictr	   deserialize_keras_objectpreprocessorr   )r   r3   r7   r   r   r   r   r   /   s   

zFeature.__init__c                 C   r   )Nr1   r   r   r   r   r   r   >   r   zFeature._obj_typec                 C   s   | j t| j| jdS )Nr3   r7   r   )r3   r	   serialize_keras_objectr7   r   r   r   r   r   r#   A   s   zFeature.get_configc                 C   r$   r%   r   r&   r   r   r   r)   J   r*   zFeature.from_configN)r,   r-   r.   r   r   r#   r0   r)   r   r   r   r   r1   .   s    	r1   zkeras.utils.FeatureSpacec                       sv  e Zd ZdZedIddZedd ZedJdd	ZedKddZedJddZ	e	dLddZ
e				dMddZe				dMddZedNddZedNddZ						dO fdd	Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zd3d4 Zd5d6 Zd7d8 Zd9d: Zd;d< Zed=d> Zd?d@ Z dAdB Z!dCdD Z"dEdF Z#dGdH Z$  Z%S )PFeatureSpacea  One-stop utility for preprocessing and encoding structured data.

    Arguments:
        feature_names: Dict mapping the names of your features to their
            type specification, e.g. `{"my_feature": "integer_categorical"}`
            or `{"my_feature": FeatureSpace.integer_categorical()}`.
            For a complete list of all supported types, see
            "Available feature types" paragraph below.
        output_mode: One of `"concat"` or `"dict"`. In concat mode, all
            features get concatenated together into a single vector.
            In dict mode, the FeatureSpace returns a dict of individually
            encoded features (with the same keys as the input dict keys).
        crosses: List of features to be crossed together, e.g.
            `crosses=[("feature_1", "feature_2")]`. The features will be
            "crossed" by hashing their combined value into
            a fixed-length vector.
        crossing_dim: Default vector size for hashing crossed features.
            Defaults to `32`.
        hashing_dim: Default vector size for hashing features of type
            `"integer_hashed"` and `"string_hashed"`. Defaults to `32`.
        num_discretization_bins: Default number of bins to be used for
            discretizing features of type `"float_discretized"`.
            Defaults to `32`.

    **Available feature types:**

    Note that all features can be referred to by their string name,
    e.g. `"integer_categorical"`. When using the string name, the default
    argument values are used.

    ```python
    # Plain float values.
    FeatureSpace.float(name=None)

    # Float values to be preprocessed via featurewise standardization
    # (i.e. via a `keras.layers.Normalization` layer).
    FeatureSpace.float_normalized(name=None)

    # Float values to be preprocessed via linear rescaling
    # (i.e. via a `keras.layers.Rescaling` layer).
    FeatureSpace.float_rescaled(scale=1., offset=0., name=None)

    # Float values to be discretized. By default, the discrete
    # representation will then be one-hot encoded.
    FeatureSpace.float_discretized(
        num_bins, bin_boundaries=None, output_mode="one_hot", name=None)

    # Integer values to be indexed. By default, the discrete
    # representation will then be one-hot encoded.
    FeatureSpace.integer_categorical(
        max_tokens=None, num_oov_indices=1, output_mode="one_hot", name=None)

    # String values to be indexed. By default, the discrete
    # representation will then be one-hot encoded.
    FeatureSpace.string_categorical(
        max_tokens=None, num_oov_indices=1, output_mode="one_hot", name=None)

    # Integer values to be hashed into a fixed number of bins.
    # By default, the discrete representation will then be one-hot encoded.
    FeatureSpace.integer_hashed(num_bins, output_mode="one_hot", name=None)

    # String values to be hashed into a fixed number of bins.
    # By default, the discrete representation will then be one-hot encoded.
    FeatureSpace.string_hashed(num_bins, output_mode="one_hot", name=None)
    ```

    Examples:

    **Basic usage with a dict of input data:**

    ```python
    raw_data = {
        "float_values": [0.0, 0.1, 0.2, 0.3],
        "string_values": ["zero", "one", "two", "three"],
        "int_values": [0, 1, 2, 3],
    }
    dataset = tf.data.Dataset.from_tensor_slices(raw_data)

    feature_space = FeatureSpace(
        features={
            "float_values": "float_normalized",
            "string_values": "string_categorical",
            "int_values": "integer_categorical",
        },
        crosses=[("string_values", "int_values")],
        output_mode="concat",
    )
    # Before you start using the FeatureSpace,
    # you must `adapt()` it on some data.
    feature_space.adapt(dataset)

    # You can call the FeatureSpace on a dict of data (batched or unbatched).
    output_vector = feature_space(raw_data)
    ```

    **Basic usage with `tf.data`:**

    ```python
    # Unlabeled data
    preprocessed_ds = unlabeled_dataset.map(feature_space)

    # Labeled data
    preprocessed_ds = labeled_dataset.map(lambda x, y: (feature_space(x), y))
    ```

    **Basic usage with the Keras Functional API:**

    ```python
    # Retrieve a dict Keras Input objects
    inputs = feature_space.get_inputs()
    # Retrieve the corresponding encoded Keras tensors
    encoded_features = feature_space.get_encoded_features()
    # Build a Functional model
    outputs = keras.layers.Dense(1, activation="sigmoid")(encoded_features)
    model = keras.Model(inputs, outputs)
    ```

    **Customizing each feature or feature cross:**

    ```python
    feature_space = FeatureSpace(
        features={
            "float_values": FeatureSpace.float_normalized(),
            "string_values": FeatureSpace.string_categorical(max_tokens=10),
            "int_values": FeatureSpace.integer_categorical(max_tokens=10),
        },
        crosses=[
            FeatureSpace.cross(("string_values", "int_values"), crossing_dim=32)
        ],
        output_mode="concat",
    )
    ```

    **Returning a dict of integer-encoded features:**

    ```python
    feature_space = FeatureSpace(
        features={
            "string_values": FeatureSpace.string_categorical(output_mode="int"),
            "int_values": FeatureSpace.integer_categorical(output_mode="int"),
        },
        crosses=[
            FeatureSpace.cross(
                feature_names=("string_values", "int_values"),
                crossing_dim=32,
                output_mode="int",
            )
        ],
        output_mode="dict",
    )
    ```

    **Specifying your own Keras preprocessing layer:**

    ```python
    # Let's say that one of the features is a short text paragraph that
    # we want to encode as a vector (one vector per paragraph) via TF-IDF.
    data = {
        "text": ["1st string", "2nd string", "3rd string"],
    }

    # There's a Keras layer for this: TextVectorization.
    custom_layer = layers.TextVectorization(output_mode="tf_idf")

    # We can use FeatureSpace.feature to create a custom feature
    # that will use our preprocessing layer.
    feature_space = FeatureSpace(
        features={
            "text": FeatureSpace.feature(
                preprocessor=custom_layer, dtype="string", output_mode="float"
            ),
        },
        output_mode="concat",
    )
    feature_space.adapt(tf.data.Dataset.from_tensor_slices(data))
    output_vector = feature_space(data)
    ```

    **Retrieving the underlying Keras preprocessing layers:**

    ```python
    # The preprocessing layer of each feature is available in `.preprocessors`.
    preprocessing_layer = feature_space.preprocessors["feature1"]

    # The crossing layer of each feature cross is available in `.crossers`.
    # It's an instance of keras.layers.HashedCrossing.
    crossing_layer = feature_space.crossers["feature1_X_feature2"]
    ```

    **Saving and reloading a FeatureSpace:**

    ```python
    feature_space.save("featurespace.keras")
    reloaded_feature_space = keras.models.load_model("featurespace.keras")
    ```
    r   c                 C   s   t |||dS )N)r   )r   )r'   r   r   r   r   r   r   cross  r*   zFeatureSpace.crossc                 C   s   t |||S N)r1   )r'   r3   r7   r   r   r   r   feature  r!   zFeatureSpace.featureNc                 C   s,   |pt d}td| dd}td|ddS )Nr2   float32_preprocessor)r3   r    r8   )r   TFDIdentityr1   r'   r    r7   r   r   r   r2     s
   zFeatureSpace.float      ?        c                 C   s0   |pt d}tj||| dd}td|ddS )Nfloat_rescaledr?   )scaleoffsetr    r>   r2   r8   )r   r   	Rescalingr1   )r'   rE   rF   r    r7   r   r   r   rD   &  s   zFeatureSpace.float_rescaledc                 C   s.   |pt d}tjd| dd}td|ddS )Nfloat_normalizedr?   )axisr    r>   r2   r8   )r   r   Normalizationr1   rA   r   r   r   rH   0     
zFeatureSpace.float_normalizedc                 C   s0   |pt d}tj||| dd}td||dS )Nfloat_discretizedr?   )num_binsbin_boundariesr    r>   r8   )r   r   Discretizationr1   )r'   rN   rO   r   r    r7   r   r   r   rM   :  s   zFeatureSpace.float_discretized   c                 C   0   |pt d}tj| d||d}td||dS )Ninteger_categoricalr?   r    
max_tokensnum_oov_indicesint32r8   )r   r   IntegerLookupr1   r'   rU   rV   r   r    r7   r   r   r   rS   H     z FeatureSpace.integer_categoricalc                 C   rR   )Nstring_categoricalr?   rT   stringr8   )r   r   StringLookupr1   rY   r   r   r   r[   Z  rZ   zFeatureSpace.string_categoricalc                 C   .   |pt d}tj| d|d}td||dS )Nstring_hashedr?   r    rN   r\   r8   r   r   Hashingr1   r'   rN   r   r    r7   r   r   r   r_   l  rL   zFeatureSpace.string_hashedc                 C   r^   )Ninteger_hashedr?   r`   rW   r8   ra   rc   r   r   r   rd   v  rL   zFeatureSpace.integer_hashedconcat    c                    sr  t  j|d |std| _| _| _ fdd| D  _g  _|rht	|
 }|D ]8}	t|	tr;t|	}	t|	trG j|	 q/|sMtd|	D ]}
|
|vr\td|	 qO jt|	|d q/dd  jD  _|d	vr|td
| | _ fdd j D  _dd  j D  _d  _ fdd jD  _i  _d _d  _d  _d  _d _d S )Nr    z0The `features` argument cannot be None or empty.c                       i | ]\}}|  ||qS r   )_standardize_feature.0r    valuer   r   r   
<dictcomp>      z)FeatureSpace.__init__.<locals>.<dictcomp>zzWhen specifying `crosses`, the argument `crossing_dim` (dimensionality of the crossing space) should be specified as well.zwAll features referenced in the `crosses` argument should be present in the `features` dict. Received unknown features: )r   c                 S   s   i | ]}|j |qS r   rg   rk   r;   r   r   r   rm         >   r5   re   zdInvalid value for argument `output_mode`. Expected one of {'dict', 'concat'}. Received: output_mode=c                    rh   r   )_feature_to_inputrj   r   r   r   rm     rn   c                 S   s   i | ]\}}||j qS r   )r7   rj   r   r   r   rm     s    c                    s   i | ]	}|j  |qS r   )r    _cross_to_crosserro   r   r   r   rm     s    F)superr   r   r   hashing_dimnum_discretization_binsitemsfeaturescrossessetkeysr4   r5   r	   r6   r   appendcrosses_by_namer   inputspreprocessorsencoded_featurescrossersone_hot_encoders_is_adaptedre   _preprocessed_features_names_crossed_features_names_sublayers_built)r   rw   r   rx   r   rt   ru   r    feature_setr;   key	__class__r   r   r     sl   







zFeatureSpace.__init__c                 C   s   t jd|j|dS )N)rQ   )shaper3   r    )r   Inputr3   r   r    r=   r   r   r   rq        zFeatureSpace._feature_to_inputc                 C   s   t |tr|S t |trt|S |dkr| j|dS |dkr%| j|dS |dkr/| j|dS |dkr;| j|| j	dS |dkrE| j
|dS |dkrO| j|dS |d	kr[| j| j|dS |d
krg| j| j|dS td| )Nr2   rg   rH   rD   rM   r`   rS   r[   rd   r_   zInvalid feature type: )r4   r1   r5   r	   r6   r2   rH   rD   rM   ru   rS   r[   rd   rt   r_   r   r   r   r   r   ri     s.   


z!FeatureSpace._standardize_featurec                 C   s   t j|j|jdS )Nrg   )r   HashedCrossingr   r    )r   r;   r   r   r   rr     r   zFeatureSpace._cross_to_crosserc                 C   sd   g }| j  D ](}| j| }t|tjr|jd urqn
t|tjr%|jr%qt	|dr/|
| q|S )Nadapt)rw   rz   r~   r4   r   rK   
input_meanTextVectorization_has_input_vocabularyhasattrr{   )r   adaptable_preprocessorsr    r7   r   r   r   _list_adaptable_preprocessors  s   



z*FeatureSpace._list_adaptable_preprocessorsc                    s   t |tjjstd| dt| d|  D ]5 | fdd}| j  }t	t
|}t|jdkr:|d}t|jdv rH|d	d }|| qd
| _|   d
| _d
| _d S )NzE`adapt()` can only be called on a tf.data.Dataset. Received instead: 
 (of type )c                    s   |   S r<   r   xrg   r   r   <lambda>  s    z$FeatureSpace.adapt.<locals>.<lambda>r   rf   >   r   rQ   c                 S   s   t | dS )NrI   )tfexpand_dimsr   r   r   r   r     s    T)r4   r   dataDatasetr   typer   mapr~   nextiterlenr   batchr   r   get_encoded_featuresbuiltr   )r   datasetfeature_datasetr7   r   r   rg   r   r     s.   


zFeatureSpace.adaptc                 C   s   |    | jS r<   )_check_if_builtr}   r   r   r   r   
get_inputs   s   zFeatureSpace.get_inputsc                 C   s@   |    | jd u r| | j}| |}| ||}|| _| jS r<   )_check_if_adaptedr   _preprocess_featuresr}   _cross_features_merge_features)r   preprocessed_featurescrossed_featuresmerged_featuresr   r   r   r   $  s   

z!FeatureSpace.get_encoded_featuresc                    s    fdd   D S )Nc                    s    i | ]}|j |  | qS r   )r~   rk   r    rw   r   r   r   rm   1  s    z5FeatureSpace._preprocess_features.<locals>.<dictcomp>)rz   )r   rw   r   r   r   r   0  s   z!FeatureSpace._preprocess_featuresc                    sB   i }| j D ]} fdd|jD }| j|j |}|||j< q|S )Nc                       g | ]} | qS r   r   r   rw   r   r   
<listcomp>9  rp   z0FeatureSpace._cross_features.<locals>.<listcomp>)rx   r   r   r    )r   rw   all_outputsr;   r}   outputsr   r   r   r   6  s   
zFeatureSpace._cross_featuresc                    s  j st _ t  _j j }fddj D  fddjD  }jdkr3i }ng }jrjt||D ] \}}j|d }	|	rN|	|}jdkrX|||< q=|	| q=jdkre|S 
|S fddj D fddjD  }
t|||
D ]\}}}t|rt|d j}n|j}t|}|jdkrj|pj|}d }|d	std
| d| dt|tjtjfr| }n)t|tjr|j}nt|tjr|j}nt|tjtjfr|j}ntd
| d|d urtj|dd}	|	j|< |	|}jdkr4|j}|d	s#|dkr.td| d| d|	| q|||< qjdkrJt dd_

|S |S )Nc                    r   r   r   r   )r   r   r   r   H  s    z0FeatureSpace._merge_features.<locals>.<listcomp>c                    r   r   r   r   )r   r   r   r   K  rp   r5   c                       g | ]} j | qS r   r   r   r   r   r   r   c      
c                    r   r   )r|   r   r   r   r   r   e  r   r   r   r   z	Feature 'zj' has `output_mode='one_hot'`. Thus its preprocessor should return an integer dtype. Instead it returns a z dtype.z' has `output_mode='one_hot'`. However it isn't a standard feature and the dimensionality of its output space is not known, thus it cannot be one-hot encoded. Try using `output_mode='int'`.	multi_hot)
num_tokensr   re   r\   z-Cannot concatenate features because feature 'z%' has not been encoded (it has dtype z'). Consider using `output_mode='dict'`.rI   rJ   )!r   sortedrz   r   r   r   zipr   getr{   re   r   	is_nestedflattenr3   r   standardize_dtyper~   r   
startswithr   r4   r   rX   r]   vocabulary_sizeCategoryEncodingr   rP   rN   r   rb   	TFDConcat)r   r   r   	all_namesall_featuresoutput_dictfeatures_to_concatr    r=   encoder	all_specsspecr3   r7   cardinalityr   )r   r   r   r   r   >  s   


















zFeatureSpace._merge_featuresc                 C   s$   | j s|  sd| _ d S tdd S )NTzUYou need to call `.adapt(dataset)` on the FeatureSpace before you can start using it.)r   r   r   r   r   r   r   r     s   
zFeatureSpace._check_if_adaptedc                 C   s$   | j s|   |   d| _ d S d S NT)r   r   r   r   r   r   r   r     s
   
zFeatureSpace._check_if_builtc                 C   s@   t |tjtjtjfst |ttttfst	
|}t|}|S r<   )r4   r   TensorSparseTensorRaggedTensorlistr   r   r2   r   convert_to_numpyconvert_to_tensorr   r   r   r   r   _convert_input  s
   

zFeatureSpace._convert_inputc                    s      t|tstd| dt|  fdd| D }d}| D ]%\}}t|jdkr<t	|d||< d}q&t|jd	krKt
|d
||< q&t ,  |}t fdd|} |}t fdd|} ||}W d    n1 sw   Y  |rˈ jdkr|jd d	ksJ t dkrt st|}tj|dd}n | D ]\}}t|jdkr|jd d	krtj|dd||< qt dkrt stdd |}|S )Nz>A FeatureSpace can only be called with a dict. Received: data=r   c                    s   i | ]
\}}|  |qS r   r   )rk   r   rl   r   r   r   rm     s    z)FeatureSpace.__call__.<locals>.<dictcomp>Fr   )rQ   rQ   TrQ   rI   c                    
     | S r<   r   r   r   r   r   r        
 z'FeatureSpace.__call__.<locals>.<lambda>c                    r   r<   r   r   r   r   r   r     r   re   r   r      c                 S   s   t j| | jdS )N)r3   )r   r   r3   r   r   r   r   r     s    )r   r4   r5   r   r   rv   r   r   r   reshaper   r   TFGraphScoper   r   map_structurer   r   r   r   in_tf_graphr   squeeze)r   r   	rebatchedr    r   preprocessed_datacrossed_datamerged_datar   r   r   __call__  sb   





zFeatureSpace.__call__c                 C   s*   t | j| jt | j| j| j| jdS )N)rw   r   rx   r   rt   ru   )r	   r9   rw   r   rx   r   rt   ru   r   r   r   r   r#     s   

zFeatureSpace.get_configc                 C   r$   r%   r   r&   r   r   r   r)     r*   zFeatureSpace.from_configc                 C   s   dd | j  D S )Nc                 S   s   i | ]
\}}||j  qS r   )r7   get_build_config)rk   r    r=   r   r   r   rm     s    
z1FeatureSpace.get_build_config.<locals>.<dictcomp>)rw   rv   r   r   r   r   r     s   zFeatureSpace.get_build_configc                 C   s8   |  D ]}| j| j}|js|||  qd| _d S r   )rz   rw   r7   r   build_from_configr   )r   r(   r    r7   r   r   r   r     s   
zFeatureSpace.build_from_configc                 C   s   t | | dS )a  Save the `FeatureSpace` instance to a `.keras` file.

        You can reload it via `keras.models.load_model()`:

        ```python
        feature_space.save("featurespace.keras")
        reloaded_fs = keras.models.load_model("featurespace.keras")
        ```
        N)r   
save_model)r   filepathr   r   r   save  s   
zFeatureSpace.savec                 C      d S r<   r   r   storer   r   r   save_own_variables$  r   zFeatureSpace.save_own_variablesc                 C   r   r<   r   r   r   r   r   load_own_variables'  r   zFeatureSpace.load_own_variablesr+   r<   )rB   rC   N)Nr   N)NrQ   r   N)r   N)re   Nrf   rf   rf   N)&r,   r-   r.   __doc__r0   r;   r=   r2   rD   rH   rM   rS   r[   r_   rd   r   rq   ri   rr   r   r   r   r   r   r   r   r   r   r   r   r#   r)   r   r   r   r   r   __classcell__r   r   r   r   r:   O   sz     F
			I$l
;

r:   c                       s$   e Zd Z fddZdd Z  ZS )r   c                    s   t  jdi | || _d S r%   )rs   r   rJ   )r   rJ   kwargsr   r   r   r   ,  s   
zTFDConcat.__init__c                 C   s   | j jj|| jdS )Nr   )r   numpyconcatenaterJ   )r   xsr   r   r   call0  s   zTFDConcat.call)r,   r-   r.   r   r   r   r   r   r   r   r   +  s    r   c                   @   s   e Zd Zdd ZdS )r@   c                 C   s   |S r<   r   r   r   r   r   r   5  r   zTFDIdentity.callN)r,   r-   r.   r   r   r   r   r   r@   4  s    r@   N)	keras.srcr   r   r   keras.src.api_exportr   keras.src.layers.layerr   ,keras.src.layers.preprocessing.tf_data_layerr   keras.src.savingr   r	   keras.src.saving.keras_saveabler
   keras.src.utilsr   keras.src.utils.module_utilsr   r   keras.src.utils.namingr   r   r1   r:   r   r@   r   r   r   r   <module>   s.    !     `	