U
    \>e7                     @  s   d Z ddlmZ ddlZddlZddlZddlmZm	Z	 erHddl
mZ ejdZejeZeje eejd< ede_ed	e_eje_ed
eje_edZG dd dejZdS )a  
This module imports a copy of [`html.parser.HTMLParser`][] and modifies it heavily through monkey-patches.
A copy is imported rather than the module being directly imported as this ensures that the user can import
and  use the unmodified library for their own needs.
    )annotationsN)TYPE_CHECKINGSequence)Markdownzhtml.parser
htmlparserz\?>z&([a-zA-Z][-.a-zA-Z0-9]*);a  
  <[a-zA-Z][^`\t\n\r\f />\x00]*       # tag name <= added backtick here
  (?:[\s/]*                           # optional whitespace before attribute name
    (?:(?<=['"\s/])[^`\s/>][^\s/=>]*  # attribute name <= added backtick here
      (?:\s*=+\s*                     # value indicator
        (?:'[^']*'                    # LITA-enclosed value
          |"[^"]*"                    # LIT-enclosed value
          |(?!['"])[^`>\s]*           # bare value <= added backtick here
         )
         (?:\s*,)*                    # possibly followed by a comma
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                 # trailing whitespace
z^([ ]*\n){2}c                      sx  e Zd ZU dZdd fddZ fddZ fdd	Zed
dddZddddZ	dddddZ
dddddZddddZddddZdddd d!Zddd"d#Zdd$d%d&Zdd$d'd(Zddd)d*Zddd+d,Zddd-d.Zddd/d0Zd
d
d1 fd2d3Zd
d
d1 fd4d5ZdAd
d
d
d7 fd8d9Zd:Zd;ed<< ddd=d>Zd
d
d1d?d@Z  ZS )BHTMLExtractorz
    Extract raw HTML from text.

    The raw HTML is stored in the [`htmlStash`][markdown.util.HtmlStash] of the
    [`Markdown`][markdown.Markdown] instance passed to `md` and the remaining text
    is stored in `cleandoc` as a list of strings.
    r   )mdc                   s<   d|krd|d< t dg| _dg| _t j|| || _d S )Nconvert_charrefsFhrr   )set
empty_tagslineno_start_cachesuper__init__r   )selfr   argskwargs	__class__ 7/tmp/pip-unpacked-wheel-wj0og6ym/markdown/htmlparser.pyr   S   s    zHTMLExtractor.__init__c                   s4   d| _ d| _g | _g | _g | _dg| _t   dS )z1Reset this instance.  Loses all unprocessed data.Fr   N)inrawintailstack_cachecleandocr   r   resetr   r   r   r   r   `   s    zHTMLExtractor.resetc                   sr   t    t| jr@| jr4| js4| t| j n| | j t| j	rn| j
| jjd| j	 g | _	dS )zHandle any buffered data. N)r   closelenrawdatar	   
cdata_elemhandle_datar   unescaper   r   appendr   	htmlStashstorejoinr   r   r   r   r   k   s    


zHTMLExtractor.closeint)returnc                 C  sj   t t| jd | jd D ]>}| j| }| jd|}|dkrHt| j}| j|d  q| j| jd  S )zHReturns char index in `self.rawdata` for the start of the current line.    
)ranger    r   linenor!   findr%   )r   iiZlast_line_start_posZlf_posr   r   r   line_offsetz   s    

zHTMLExtractor.line_offsetboolc                 C  s<   | j dkrdS | j dkrdS | j| j| j| j    dkS )z
        Returns True if current position is at start of line.

        Allows for up to three blank spaces at start of line.
        r   T   Fr   )offsetr!   r2   stripr   r   r   r   at_line_start   s
    

zHTMLExtractor.at_line_startstr)tagr*   c                 C  s@   | j | j }tj| j|}|r2| j||  S d|S dS )z
        Returns the text of the end tag.

        If it fails to extract the actual text from the raw data, it builds a closing tag with `tag`.
        z</{}>N)r2   r5   r   	endendtagsearchr!   endformat)r   r9   startmr   r   r   get_endtag_text   s
    zHTMLExtractor.get_endtag_textzSequence[tuple[str, str]])r9   attrsc                 C  s   || j kr| || d S | j|rL| js:|  rL| jsLd| _| jd | 	 }| jrt| j
| | j| n| j| || jkr|   d S )NTr,   )r   handle_startendtagr   is_block_levelr   r7   r   r   r%   get_starttag_textr   r   CDATA_CONTENT_ELEMENTSclear_cdata_mode)r   r9   rA   textr   r   r   handle_starttag   s    
 
zHTMLExtractor.handle_starttag)r9   c                 C  s   |  |}| jr| j| || jkr>| jr>| j |kr&q>q&t| jdkrt| j	| j
| j t| d  r~| jd nd| _d| _| j| jjd| j | jd g | _n| j| d S )Nr   r,   TFr   

)r@   r   r   r%   r   popr    blank_line_rematchr!   r2   r5   r   r   r   r&   r'   r(   )r   r9   rG   r   r   r   handle_endtag   s     

$zHTMLExtractor.handle_endtag)datac                 C  s8   | j rd|krd| _ | jr(| j| n| j| d S )Nr,   F)r   r   r   r%   r   r   rN   r   r   r   r#      s
    zHTMLExtractor.handle_data)rN   is_blockc                 C  s   | j s| jr| j| n|  r|rt| j| j| j	 t
| d rT|d7 }nd| _| jrj| jd nd}|ds|dr| jd | j| jj| | jd n| j| dS )z Handle empty tags (`<data>`). Nr,   Tr-   r   rI   )r   r   r   r%   r7   rK   rL   r!   r2   r5   r    r   endswithr   r&   r'   )r   rN   rP   itemr   r   r   handle_empty_tag   s    $
zHTMLExtractor.handle_empty_tagc                 C  s   | j |  | j|d d S )NrP   )rS   rD   r   rC   )r   r9   rA   r   r   r   rB      s    z HTMLExtractor.handle_startendtag)namec                 C  s   | j d|dd d S )Nz&#{};FrT   rS   r=   r   rU   r   r   r   handle_charref   s    zHTMLExtractor.handle_charrefc                 C  s   | j d|dd d S )Nz&{};FrT   rV   rW   r   r   r   handle_entityref   s    zHTMLExtractor.handle_entityrefc                 C  s   | j d|dd d S )Nz	<!--{}-->TrT   rV   rO   r   r   r   handle_comment   s    zHTMLExtractor.handle_commentc                 C  s   | j d|dd d S )Nz<!{}>TrT   rV   rO   r   r   r   handle_decl   s    zHTMLExtractor.handle_declc                 C  s   | j d|dd d S )Nz<?{}?>TrT   rV   rO   r   r   r   	handle_pi  s    zHTMLExtractor.handle_pic                 C  s,   | drdnd}| jd||dd d S )NzCDATA[z]]>z]>z<![{}{}TrT   )
startswithrS   r=   )r   rN   r<   r   r   r   unknown_decl  s    zHTMLExtractor.unknown_decl)ir*   c                   s,   |   s| jrt |S | d |d S )Nz<?   )r7   r   r   parse_pir#   r   r_   r   r   r   ra     s    
zHTMLExtractor.parse_pic                   s,   |   s| jrt |S | d |d S )Nz<!r`   )r7   r   r   parse_html_declarationr#   rb   r   r   r   rc     s    
z$HTMLExtractor.parse_html_declarationr   )r_   reportr*   c                   s6   t  ||}|dkrdS | j| j|| dd |S )Nr-   FrT   )r   parse_bogus_commentrS   r!   )r   r_   rd   posr   r   r   re     s
    z!HTMLExtractor.parse_bogus_commentNz
str | None_HTMLExtractor__starttag_textc                 C  s   | j S )z)Return full source of start tag: `<...>`.)rg   r   r   r   r   rD   '  s    zHTMLExtractor.get_starttag_textc                 C  s  d | _ | |}|dk r|S | j}||| | _ g }tj||d }|sRtd| }|d	  | _
}||k r4tj||}|sq4|ddd\}	}
}|
sd }n\|d d d  kr|dd  ksn |d d d  kr|dd  krn n|dd }|rt|}||		 |f | }qn|||  }|d	kr|  \}}d
| j kr|| j d
 }t| j | j d
 }n|t| j  }| |||  |S |dr| || n"|| jkr| | | || |S )Nr   r+   z#unexpected call to parse_starttag()r`   r4   'r-   ")>/>r,   rk   )rg   check_for_whole_start_tagr!   r   tagfind_tolerantrL   AssertionErrorr<   grouplowerlasttagattrfind_tolerantr$   r%   r6   getposcountr    rfindr#   rQ   rB   rE   set_cdata_moderH   )r   r_   endposr!   rA   rL   kr9   r?   attrnamerest	attrvaluer<   r/   r5   r   r   r   parse_starttag+  s\    

&
 





zHTMLExtractor.parse_starttag)r   )__name__
__module____qualname____doc__r   r   r   propertyr2   r7   r@   rH   rM   r#   rS   rB   rX   rY   rZ   r[   r\   r^   ra   rc   re   rg   __annotations__rD   r|   __classcell__r   r   r   r   r   J   s2   
r   )r   
__future__r   reimportlib.util	importlibsystypingr   r   Zmarkdownr   util	find_specspecmodule_from_specr   loaderexec_modulemodulescompilepiclose	entityref
incompleteVERBOSElocatestarttagend_tolerantrK   
HTMLParserr   r   r   r   r   <module>   s&   

