o
    ³2úhÛ?  ã                   @  sâ   d Z ddlmZ ddlZddlZddlZddlmZm	Z	 er$ddl
mZ ej d¡Zej e¡Zej e¡ eejd< e d¡e_e d	¡e_e d
¡e_eje_e dej¡e_e dej¡e_e d¡ZG dd„ dejƒZdS )a  
This module imports a copy of [`html.parser.HTMLParser`][] and modifies it heavily through monkey-patches.
A copy is imported rather than the module being directly imported as this ensures that the user can import
and  use the unmodified library for their own needs.
é    )ÚannotationsN)ÚTYPE_CHECKINGÚSequence)ÚMarkdownzhtml.parserÚ
htmlparserz<[a-zA-Z]|</>z\?>z&([a-zA-Z][-.a-zA-Z0-9]*);a”  
  <[a-zA-Z][^`\t\n\r\f />\x00]*       # tag name <= added backtick here
  (?:[\s/]*                           # optional whitespace before attribute name
    (?:(?<=['"\s/])[^`\s/>][^\s/=>]*  # attribute name <= added backtick here
      (?:\s*=+\s*                     # value indicator
        (?:'[^']*'                    # LITA-enclosed value
          |"[^"]*"                    # LIT-enclosed value
          |(?!['"])[^`>\s]*           # bare value <= added backtick here
         )
         (?:\s*,)*                    # possibly followed by a comma
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                 # trailing whitespace
a  
  [a-zA-Z][^`\t\n\r\f />]*           # tag name
  [\t\n\r\f /]*                     # optional whitespace before attribute name
  (?:(?<=['"\t\n\r\f /])[^`\t\n\r\f />][^\t\n\r\f /=>]*  # attribute name
    (?:=                            # value indicator
      (?:'[^']*'                    # LITA-enclosed value
        |"[^"]*"                    # LIT-enclosed value
        |(?!['"])[^>\t\n\r\f ]*     # bare value
       )
     )?
    [\t\n\r\f /]*                   # possibly followed by a space
   )*
   >?
z^([ ]*\n){2}c                      s&  e Zd ZU dZdC‡ fdd„Z‡ fdd„Z‡ fdd	„ZedDdd„ƒZdEdd„Z	dFdd„Z
dGdd„ZdHdd„ZdIdd„ZdJdd „ZdHd!d"„ZdKd$d%„ZdKd&d'„ZdId(d)„ZdL‡ fd,d-„ZdId.d/„ZdId0d1„ZdId2d3„ZdM‡ fd4d5„ZdM‡ fd6d7„ZdNdO‡ fd:d;„Zd<Zd=ed>< dPd?d@„ZdMdAdB„Z‡  ZS )QÚHTMLExtractorzû
    Extract raw HTML from text.

    The raw HTML is stored in the [`htmlStash`][markdown.util.HtmlStash] of the
    [`Markdown`][markdown.Markdown] instance passed to `md` and the remaining text
    is stored in `cleandoc` as a list of strings.
    Úmdr   c                   sF   d|vrd|d< t dgƒ| _dg| _d| _tƒ j|i |¤Ž || _d S )NÚconvert_charrefsFÚhrr   )ÚsetÚ
empty_tagsÚlineno_start_cacheÚoverride_comment_updateÚsuperÚ__init__r   )Úselfr   ÚargsÚkwargs©Ú	__class__© úN/var/www/html/chatgem/venv/lib/python3.10/site-packages/markdown/htmlparser.pyr   e   s   
zHTMLExtractor.__init__c                   s4   d| _ d| _g | _g | _g | _dg| _tƒ  ¡  dS )z1Reset this instance.  Loses all unprocessed data.Fr   N)ÚinrawÚintailÚstackÚ_cacheÚcleandocr   r   Úreset©r   r   r   r   r   t   s   zHTMLExtractor.resetc                   sv   t ƒ  ¡  t| jƒr | jr| js|  t | j¡¡ n|  | j¡ t| j	ƒr9| j
 | jj d | j	¡¡¡ g | _	dS dS )zHandle any buffered data.Ú N)r   ÚcloseÚlenÚrawdatar	   Ú
cdata_elemÚhandle_datar   Úunescaper   r   Úappendr   Ú	htmlStashÚstoreÚjoinr   r   r   r   r       s   



þzHTMLExtractor.closeÚreturnÚintc                 C  sj   t t| jƒd | jd ƒD ]}| j| }| j d|¡}|dkr$t| jƒ}| j |d ¡ q| j| jd  S )zHReturns char index in `self.rawdata` for the start of the current line. é   Ú
éÿÿÿÿ)Úranger!   r   Úlinenor"   Úfindr&   )r   ÚiiÚlast_line_start_posÚlf_posr   r   r   Úline_offsetŽ   s   

zHTMLExtractor.line_offsetÚboolc                 C  s<   | j dkrdS | j dkrdS | j| j| j| j  …  ¡ dkS )z†
        Returns True if current position is at start of line.

        Allows for up to three blank spaces at start of line.
        r   Té   Fr   )Úoffsetr"   r5   Ústripr   r   r   r   Úat_line_start›   s
   

 zHTMLExtractor.at_line_startÚtagÚstrc                 C  s<   | j | j }tj | j|¡}|r| j|| ¡ … S d |¡S )z™
        Returns the text of the end tag.

        If it fails to extract the actual text from the raw data, it builds a closing tag with `tag`.
        z</{}>)r5   r8   r   Ú	endendtagÚsearchr"   ÚendÚformat)r   r;   ÚstartÚmr   r   r   Úget_endtag_text¨   s
   
zHTMLExtractor.get_endtag_textÚattrsúSequence[tuple[str, str]]c                 C  sœ   || j v r|  ||¡ d S | j |¡r&| js|  ¡ r&| js&d| _| j d¡ |  	¡ }| jr;| j
 |¡ | j |¡ d S | j |¡ || jv rL|  ¡  d S d S )NTr-   )r   Úhandle_startendtagr   Úis_block_levelr   r:   r   r   r&   Úget_starttag_textr   r   ÚCDATA_CONTENT_ELEMENTSÚclear_cdata_mode)r   r;   rD   Útextr   r   r   Úhandle_starttag·   s   
 
þzHTMLExtractor.handle_starttagc                 C  sÖ   |   |¡}| jrc| j |¡ || jv r!| jr!| j ¡ |krn| jst| jƒdkrat | j	| j
| j t|ƒ d … ¡rA| j d¡ nd| _d| _| j | jj d | j¡¡¡ | j d¡ g | _d S d S | j |¡ d S )Nr   r-   TFr   ú

)rC   r   r   r&   r   Úpopr!   Úblank_line_reÚmatchr"   r5   r8   r   r   r   r'   r(   r)   )r   r;   rK   r   r   r   Úhandle_endtagÌ   s$   

þ$
ózHTMLExtractor.handle_endtagÚdatac                 C  s:   | j r
d|v r
d| _ | jr| j |¡ d S | j |¡ d S )Nr-   F)r   r   r   r&   r   ©r   rR   r   r   r   r$   ç   s
   zHTMLExtractor.handle_dataÚis_blockc                 C  sÆ   | j s| jr| j |¡ dS |  ¡ r[|r[t | j| j| j	 t
|ƒ d… ¡r+|d7 }nd| _| jr6| jd nd}| d¡sH| d¡rH| j d¡ | j | jj |¡¡ | j d¡ dS | j |¡ dS )z Handle empty tags (`<data>`). Nr-   Tr.   r   rM   )r   r   r   r&   r:   rO   rP   r"   r5   r8   r!   r   Úendswithr   r'   r(   )r   rR   rT   Úitemr   r   r   Úhandle_empty_tagï   s   $
zHTMLExtractor.handle_empty_tagc                 C  s   | j |  ¡ | j |¡d d S )N©rT   )rW   rH   r   rG   )r   r;   rD   r   r   r   rF     s   z HTMLExtractor.handle_startendtagÚnamec                 C  ó   | j d |¡dd d S )Nz&#{};FrX   ©rW   r@   ©r   rY   r   r   r   Úhandle_charref	  ó   zHTMLExtractor.handle_charrefc                 C  rZ   )Nz&{};FrX   r[   r\   r   r   r   Úhandle_entityref  r^   zHTMLExtractor.handle_entityrefc                 C  sZ   | j | j t|ƒ d }| j||d … dkr!|  d¡ d| _d S | jd |¡dd d S )Né   r7   z-->ú<Tz	<!--{}-->rX   )r5   r8   r!   r"   r$   r   rW   r@   )r   rR   Úir   r   r   Úhandle_comment  s   
zHTMLExtractor.handle_commentrb   Újc                   s"   | j r
d| _ d}d}tƒ  ||¡S )NFr   r,   )r   r   Ú	updatepos)r   rb   rd   r   r   r   re     s
   zHTMLExtractor.updateposc                 C  rZ   )Nz<!{}>TrX   r[   rS   r   r   r   Úhandle_decl  r^   zHTMLExtractor.handle_declc                 C  rZ   )Nz<?{}?>TrX   r[   rS   r   r   r   Ú	handle_pi"  r^   zHTMLExtractor.handle_pic                 C  s,   |  d¡rdnd}| jd ||¡dd d S )NzCDATA[z]]>z]>z<![{}{}TrX   )Ú
startswithrW   r@   )r   rR   r?   r   r   r   Úunknown_decl%  s   zHTMLExtractor.unknown_declc                   s,   |   ¡ s| jrtƒ  |¡S |  d¡ |d S )Nz<?é   )r:   r   r   Úparse_pir$   )r   rb   r   r   r   rk   )  s   
zHTMLExtractor.parse_pic                   sŽ   |   ¡ s| jr>| j||d … dkr8| j||d … dks8|  |¡}|dkr6|  | j||d … ¡ |d S |S tƒ  |¡S |  d¡ |d S )	Nr7   z<![é	   z	<![CDATA[r.   r,   z<!rj   )r:   r   r"   Úparse_bogus_commentr$   r   Úparse_html_declaration)r   rb   Úresultr   r   r   rn   1  s   ,

z$HTMLExtractor.parse_html_declarationr   Úreportc                   s6   t ƒ  ||¡}|dkrdS | j| j||… dd |S )Nr.   FrX   )r   rm   rW   r"   )r   rb   rp   Úposr   r   r   rm   A  s
   z!HTMLExtractor.parse_bogus_commentNz
str | NoneÚ_HTMLExtractor__starttag_textc                 C  s   | j S )z)Return full source of start tag: `<...>`.)rr   r   r   r   r   rH   P  s   zHTMLExtractor.get_starttag_textc                 C  s@  | j ||d … dkr|  | j ||d … ¡ |d S d | _|  |¡}|dk r7|  | j ||d … ¡ |d S | j }|||… | _g }tj ||d ¡}|sRJ dƒ‚| ¡ }| d¡ 	¡  | _
}||k rÂtj ||¡}|snnT| ddd¡\}	}
}|
s}d }n-|d d… d  kr|dd … ks¤n |d d… d	  kr¢|dd … krªn n|dd… }|r±t |¡}| |	 	¡ |f¡ | ¡ }||k sd|||…  ¡ }|d
vrÿ|  ¡ \}}d| jv rí|| j d¡ }t| jƒ| j d¡ }n|t| jƒ }|  |||… ¡ |S | d¡r|  ||¡ |S || jv r|  |¡ |  ||¡ |S )Nr7   z</>r   r,   z#unexpected call to parse_starttag()rj   ú'r.   ú")ú>ú/>r-   rv   )r"   r$   rr   Úcheck_for_whole_start_tagr   Útagfind_tolerantrP   r?   ÚgroupÚlowerÚlasttagÚattrfind_tolerantr%   r&   r9   ÚgetposÚcountr!   ÚrfindrU   rF   rI   Úset_cdata_moderL   )r   rb   Úendposr"   rD   rP   Úkr;   rB   ÚattrnameÚrestÚ	attrvaluer?   r0   r8   r   r   r   Úparse_starttagT  s`   
&(
ó

ÿý
zHTMLExtractor.parse_starttag)r   r   )r*   r+   )r*   r6   )r;   r<   r*   r<   )r;   r<   rD   rE   )r;   r<   )rR   r<   )rR   r<   rT   r6   )rY   r<   )rb   r+   rd   r+   r*   r+   )rb   r+   r*   r+   )r   )rb   r+   rp   r+   r*   r+   )r*   r<   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   r    Úpropertyr5   r:   rC   rL   rQ   r$   rW   rF   r]   r_   rc   re   rf   rg   ri   rk   rn   rm   rr   Ú__annotations__rH   r†   Ú__classcell__r   r   r   r   r   \   s6   
 









	



r   )rŠ   Ú
__future__r   ÚreÚimportlib.utilÚ	importlibÚsysÚtypingr   r   Úmarkdownr   ÚutilÚ	find_specÚspecÚmodule_from_specr   ÚloaderÚexec_moduleÚmodulesÚcompileÚstarttagopenÚpicloseÚ	entityrefÚ
incompleteÚVERBOSEÚlocatestarttagend_tolerantÚlocatetagendrO   Ú
HTMLParserr   r   r   r   r   Ú<module>   s0   
òó
