o
    2h_+                     @   s   d Z ddlZddlZddlZddlZddlZddlmZ ed			ddd	Z	ed
				dddZ
ed					dddZedG dd dZeddd ZdS )z0Deprecated text preprocessing APIs from Keras 1.    N)keras_exportz6keras._legacy.preprocessing.text.text_to_word_sequence!!"#$%&()*+,-./:;<=>?@[\]^_`{|}~	
T c                    sJ   |r|   }  fdd|D }t|}| |} |  }dd |D S )DEPRECATED.c                    s   i | ]}| qS  r   ).0csplitr   ^/var/www/html/chatgem/venv/lib/python3.10/site-packages/keras/src/legacy/preprocessing/text.py
<dictcomp>   s    z)text_to_word_sequence.<locals>.<dictcomp>c                 S   s   g | ]}|r|qS r   r   )r   ir   r   r   
<listcomp>       z)text_to_word_sequence.<locals>.<listcomp>)lowerstr	maketrans	translater
   )
input_textfiltersr   r
   translate_dicttranslate_mapseqr   r	   r   text_to_word_sequence   s   


r   z(keras._legacy.preprocessing.text.one_hotc              	   C   s   t | |t||||dS )r   )hash_functionr   r   r
   analyzer)hashing_trickhash)r   nr   r   r
   r   r   r   r   one_hot    s   
r   z.keras._legacy.preprocessing.text.hashing_trickc                    sT    du rt  n dkrdd  |du rt| |||d}n|| } fdd|D S )r   Nmd5c                 S   s   t t|   dS )N   )inthashlibr    encode	hexdigest)wr   r   r   r   D   s   z$hashing_trick.<locals>.hash_functionr   r   r
   c                    s    g | ]} |d   d  qS )   r   )r   r&   r   r   r   r   r   N   s     z!hashing_trick.<locals>.<listcomp>)r   r   )textr   r   r   r   r
   r   r   r   r)   r   r   5   s   r   z*keras._legacy.preprocessing.text.Tokenizerc                   @   s|   e Zd ZdZ							dddZd	d
 Zdd Zdd Zdd Zdd Z	dd Z
dddZdddZdd Zdd ZdS ) 	Tokenizerr   Nr   Tr   Fc           
      K   s   d|v rt d |d}|dd}	|rtdt| t | _tt	| _
|| _|| _|| _|| _|	| _|| _|| _tt	| _i | _i | _|| _d S )Nnb_wordszDThe `nb_words` argument in `Tokenizer` has been renamed `num_words`.document_countr   z Unrecognized keyword arguments: )warningswarnpop	TypeErrorr   collectionsOrderedDictword_countsdefaultdictr"   	word_docsr   r
   r   	num_wordsr-   
char_level	oov_token
index_docs
word_index
index_wordr   )
selfr7   r   r   r
   r8   r9   r   kwargsr-   r   r   r   __init__U   s*   


zTokenizer.__init__c                 C   s  |D ]g}|  j d7  _ | jst|tr*| jr't|tr#dd |D }n| }|}n| jd u r;t|| j| j| jd}n| |}|D ]}|| j	v rS| j	|  d7  < qBd| j	|< qBt
|D ]}| j|  d7  < q]qt| j	 }|jdd dd | jd u rg }n| jg}|d	d
 |D  tt|ttdt|d | _dd | j D | _t| j D ]\}}|| j| j| < qd S )Nr(   c                 S      g | ]}|  qS r   r   r   	text_elemr   r   r   r      r   z*Tokenizer.fit_on_texts.<locals>.<listcomp>r'   c                 S   s   | d S Nr(   r   )xr   r   r   <lambda>   s    z(Tokenizer.fit_on_texts.<locals>.<lambda>T)keyreversec                 s   s    | ]}|d  V  qdS )r   Nr   )r   wcr   r   r   	<genexpr>   s    z)Tokenizer.fit_on_texts.<locals>.<genexpr>c                 S   s   i | ]\}}||qS r   r   )r   r&   r   r   r   r   r      s    z*Tokenizer.fit_on_texts.<locals>.<dictcomp>)r-   r8   
isinstancelistr   r   r   r   r
   r4   setr6   itemssortr9   extenddictziprangelenr;   r<   r:   )r=   textsr*   r   r&   wcounts
sorted_vocr   r   r   r   fit_on_textsy   sH   




zTokenizer.fit_on_textsc                 C   sD   |  j t|7  _ |D ]}t|}|D ]}| j|  d7  < qqd S rD   )r-   rT   rM   r:   )r=   	sequencesr   r   r   r   r   fit_on_sequences   s   zTokenizer.fit_on_sequencesc                 C      t | |S N)rL   texts_to_sequences_generator)r=   rU   r   r   r   texts_to_sequences      zTokenizer.texts_to_sequencesc           	      c   s    | j }| j| j}|D ]k}| jst|tr.| jr+t|tr'dd |D }n| }|}n| jd u r?t	|| j
| j| jd}n| |}g }|D ],}| j|}|d urj|rd||krd|d urc|| qH|| qH| jd urt|| qH|V  qd S )Nc                 S   r@   r   rA   rB   r   r   r   r      r   z:Tokenizer.texts_to_sequences_generator.<locals>.<listcomp>r'   )r7   r;   getr9   r8   rK   rL   r   r   r   r   r
   append)	r=   rU   r7   oov_token_indexr*   r   vectr&   r   r   r   r   r]      s@   





z&Tokenizer.texts_to_sequences_generatorc                 C   r[   r\   )rL   sequences_to_texts_generator)r=   rY   r   r   r   sequences_to_texts   r_   zTokenizer.sequences_to_textsc                 c   s    | j }| j| j}|D ]A}g }|D ]2}| j|}|d ur8|r2||kr2|d ur1|| j|  q|| q| jd urE|| j|  qd|}|V  qd S )Nr   )r7   r;   r`   r9   r<   ra   join)r=   rY   r7   rb   r   rc   numwordr   r   r   rd      s&   

z&Tokenizer.sequences_to_texts_generatorbinaryc                 C   s   |  |}| j||dS )N)mode)r^   sequences_to_matrix)r=   rU   rj   rY   r   r   r   texts_to_matrix   s   
zTokenizer.texts_to_matrixc                 C   sN  | j s| jrt| jd }ntd| j }|dkr | js tdtt||f}t|D ]w\}}|s4q-t	t
}|D ]}||krBq;||  d7  < q;t| D ]R\}}	|dkr`|	|| |< qQ|dkro|	t| || |< qQ|dkrzd|| |< qQ|dkrdt|	 }
td| jd| j|d   }|
| || |< qQtd	|q-|S )
Nr(   zKSpecify a dimension (`num_words` argument), or fit on some text data first.tfidfz7Fit the Tokenizer on some data before using tfidf mode.countfreqri   r   zUnknown vectorization mode:)r7   r;   rT   
ValueErrorr-   npzeros	enumerater2   r5   r"   rL   rN   logr:   r`   )r=   rY   rj   r7   rE   r   r   countsjr   tfidfr   r   r   rk      sL   

zTokenizer.sequences_to_matrixc                 C   sh   t | j}t | j}t | j}t | j}t | j}| j| j| j	| j
| j| j| j|||||dS )N)r7   r   r   r
   r8   r9   r-   r4   r6   r:   r<   r;   )jsondumpsr4   r6   r:   r;   r<   r7   r   r   r
   r8   r9   r-   )r=   json_word_countsjson_word_docsjson_index_docsjson_word_indexjson_index_wordr   r   r   
get_config  s$   zTokenizer.get_configc                 K   s(   |   }| jj|d}tj|fi |S )N)
class_nameconfig)r   	__class____name__ry   rz   )r=   r>   r   tokenizer_configr   r   r   to_json2  s
   zTokenizer.to_json)Nr   Tr   FNN)ri   )r   
__module____qualname____doc__r?   rX   rZ   r^   r]   re   rd   rl   rk   r   r   r   r   r   r   r+   Q   s(    
$0"

.r+   z4keras._legacy.preprocessing.text.tokenizer_from_jsonc           	      C   s   t | }|d}t |d}t |d}t |d}dd | D }t |d}dd | D }t |d	}tdi |}||_||_||_||_	||_
|S )r   r   r4   r6   r:   c                 S      i | ]	\}}t ||qS r   r"   r   kvr   r   r   r   E      z'tokenizer_from_json.<locals>.<dictcomp>r<   c                 S   r   r   r   r   r   r   r   r   G  r   r;   Nr   )ry   loadsr`   r0   rN   r+   r4   r6   r:   r;   r<   )	json_stringr   r   r4   r6   r:   r<   r;   	tokenizerr   r   r   tokenizer_from_json;  s    

r   )r   Tr   )r   Tr   N)Nr   Tr   N)r   r2   r#   ry   r.   numpyrq   keras.src.api_exportr   r   r   r   r+   r   r   r   r   r   <module>   s<     j