
    @6iwF                     D   d Z ddlZddlZ	 ddlZddlmZ dgZ ej                  d      Z
 ej                  d      Z ej                  d      Z ej                  d	      Z ej                  d
      Z ej                  d      Z ej                  d      Z ej                  d      Z ej                  d      Z ej                  dej&                        Z ej                  d      Z ej                  d      Z G d dej.                        Zy# e$ r ddlZY w xY w)zA parser for HTML and XHTML.    N   unescape
HTMLParserz[&<]z
&[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z	<[a-zA-Z]>z--\s*>z+([a-zA-Z][^\t\n\r\f />\x00]*)(?:\s|/(?!>))*z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aF  
  <[a-zA-Z][^\t\n\r\f />\x00]*       # tag name
  (?:[\s/]*                          # optional whitespace before attribute name
    (?:(?<=['"\s/])[^\s/>][^\s/=>]*  # attribute name
      (?:\s*=+\s*                    # value indicator
        (?:'[^']*'                   # LITA-enclosed value
          |"[^"]*"                   # LIT-enclosed value
          |(?!['"])[^>\s]*           # bare value
         )
         (?:\s*,)*                   # possibly followed by a comma
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                # trailing whitespace
z#</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>c                       e Zd ZdZdZddZd Zd Zd ZdZ	d Z
d	 Zd
 Zd Zd ZddZd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zy)r   aE  Find tags and other markup and call handler functions.

    Usage:
        p = HTMLParser()
        p.feed(data)
        ...
        p.close()

    Start tags are handled by calling self.handle_starttag() or
    self.handle_startendtag(); end tags by self.handle_endtag().  The
    data between tags is passed from the parser to the derived class
    by calling self.handle_data() with the data as argument (the data
    may be split up in arbitrary chunks).  If convert_charrefs is
    True the character references are converted automatically to the
    corresponding Unicode character (and self.handle_data() is no
    longer split in chunks), otherwise they are passed by calling
    self.handle_entityref() or self.handle_charref() with the string
    containing respectively the named or numeric reference as the
    argument.
    )scriptstylec                 2    || _         | j                          y)zInitialize and reset this instance.

        If convert_charrefs is True (the default), all character references
        are automatically converted to the corresponding Unicode characters.
        N)convert_charrefsreset)selfr   s     ]/home/azureuser/techstart-app/venv/lib/python3.12/site-packages/htmlmin/python3html/parser.py__init__zHTMLParser.__init__e   s     !1

    c                     d| _         d| _        t        | _        d| _        t
        j                  j                  |        y)z1Reset this instance.  Loses all unprocessed data. z???N)rawdatalasttaginteresting_normalinteresting
cdata_elem
markupbase
ParserBaser   r   s    r   r   zHTMLParser.resetn   s3    -##D)r   c                 N    | j                   |z   | _         | j                  d       y)zFeed data to the parser.

        Call this as often as you want, with as little or as much text
        as you want (may include '\n').
        r   N)r   goaheadr   datas     r   feedzHTMLParser.feedv   s     ||d*Qr   c                 &    | j                  d       y)zHandle any buffered data.r   N)r   r   s    r   closezHTMLParser.close   s    Qr   Nc                     | j                   S )z)Return full source of start tag: '<...>'.)_HTMLParser__starttag_textr   s    r   get_starttag_textzHTMLParser.get_starttag_text   s    ###r   c                     |j                         | _        t        j                  d| j                  z  t        j                        | _        y )Nz</\s*%s\s*>)lowerr   recompileIr   )r   elems     r   set_cdata_modezHTMLParser.set_cdata_mode   s/    **,::nt&FMr   c                 (    t         | _        d | _        y N)r   r   r   r   s    r   clear_cdata_modezHTMLParser.clear_cdata_mode   s    -r   c                 
   | j                   }d}t        |      }||k  rc| j                  rq| j                  se|j	                  d|      }|dk  r|j                  dt        ||dz
              }|dk\  r't        j                  d      j                  ||      sn|}n?| j                  j                  ||      }|r|j                         }n| j                  rn|}||k  rP| j                  r0| j                  s$| j                  | j                  |||              n| j                  |||        | j                  ||      }||k(  rn9|j                  } |d|      rat         j#                  ||      r| j%                  |      }	n |d|      r| j'                  |      }	nr |d|      r| j)                  |      }	nW |d|      r| j+                  |      }	n< |d	|      r| j-                  |      }	n!|d
z   |k  r| j                  d       |d
z   }	nnn|	dk  r|sne|j	                  d|d
z         }	|	dk  r |j	                  d|d
z         }	|	dk  r|d
z   }	n|	d
z  }	| j                  r0| j                  s$| j                  | j                  |||	              n| j                  |||	        | j                  ||	      }n |d|      rt.        j#                  ||      }|rY|j1                         dd }
| j3                  |
       |j5                         }	 |d|	d
z
        s|	d
z
  }	| j                  ||	      }!d||d  v r,| j                  |||dz           | j                  ||dz         }n |d|      rt6        j#                  ||      }|rW|j1                  d
      }
| j9                  |
       |j5                         }	 |d|	d
z
        s|	d
z
  }	| j                  ||	      }t:        j#                  ||      }|rE|rB|j1                         ||d  k(  r,|j5                         }	|	|k  r|}	| j                  ||d
z         }n>|d
z   |k  r'| j                  d       | j                  ||d
z         }nnJ d       ||k  rc|rs||k  rn| j                  sb| j                  r0| j                  s$| j                  | j                  |||              n| j                  |||        | j                  ||      }||d  | _         y )Nr   <&"   z[\s;]</<!--<?<!r   r   z&#   ;zinteresting.search() lied)r   lenr   r   findrfindmaxr(   r)   searchr   starthandle_datar   	updatepos
startswithstarttagopenmatchparse_starttagparse_endtagparse_commentparse_piparse_html_declarationcharrefgrouphandle_charrefend	entityrefhandle_entityref
incomplete)r   rN   r   injampposrE   rC   knames              r   r   zHTMLParser.goahead   s   ,,L!e$$T__LLa(q5 %]]3Aqt=F!JJx077HA((//;AA1u(($$T]]71Q<%@A$$WQq\2q!$AAvu ++J#q!%%gq1++A.Aa())!,A***1-Aa(a(Aa(33A6A!eq[$$S)AAq5S!a%0A1u#LLa!e4q5 !AAQ,,T__((wq|)DE((16NN1a(D!$gq1 ;;=2.D''-		A%c1Q3/Eq!,Agabk)((1Q38 NN1ac2C#!3 ;;q>D))$/		A%c1Q3/Eq!,A"((!4u{{};!IIK6 !A NN1a!e4!eq[ $$S)q!a%0A555qS !eV 1q5$$T__  wq|!<=  1.q!$Aqr{r   c                 p   | j                   }|||dz    dk(  sJ d       |||dz    dk(  r| j                  |      S |||dz    dk(  r| j                  |      S |||dz    j                         d	k(  r7|j	                  d
|dz         }|dk(  ry| j                  ||dz   |        |dz   S | j                  |      S )Nr8   r7   z+unexpected call to parse_html_declaration()   r5      z<![	   z	<!doctyper   r9   r   )r   rH   parse_marked_sectionr'   r<   handle_declparse_bogus_comment)r   rR   r   gtposs       r   rJ   z!HTMLParser.parse_html_declaration  s    ,,q1~% 	D )C 	D%1QqS>V#%%a((Qqs^u$,,Q//Qqs^!!#{2LLac*E{WQqS/07N++A..r   c                     | j                   }|||dz    dv sJ d       |j                  d|dz         }|dk(  ry|r| j                  ||dz   |        |dz   S )Nr8   )r7   r4   z"unexpected call to parse_comment()r   r9   r   )r   r<   handle_comment)r   rR   reportr   poss        r   r^   zHTMLParser.parse_bogus_comment#  su    ,,q1~- 	C 1B 	C-ll3!$"9!C 01Qwr   c                     | j                   }|||dz    dk(  sJ d       t        j                  ||dz         }|sy|j                         }| j	                  ||dz   |        |j                         }|S )Nr8   r6   zunexpected call to parse_pi()r9   )r   picloser?   r@   	handle_pirN   )r   rR   r   rE   rT   s        r   rI   zHTMLParser.parse_pi/  st    ,,q1~%F'FF%w!,KKMwqsA'IIKr   c                    d | _         | j                  |      }|dk  r|S | j                  }||| | _         g }t        j	                  ||dz         }|sJ d       |j                         }|j                  d      j                         x| _        }||k  rt        j	                  ||      }|sn|j                  ddd      \  }	}
}|
sd }n,|d d dcxk(  r|dd  k(  sn |d d dcxk(  r|dd  k(  rn n|dd }|r| j                  |      }|j                  |	j                         |f       |j                         }||k  r||| j                         }|d	vr| j                         \  }}d
| j                   v rP|| j                   j                  d
      z   }t        | j                         | j                   j!                  d
      z
  }n|t        | j                         z   }| j#                  |||        |S |j%                  d      r| j'                  ||       |S | j)                  ||       || j*                  v r| j-                  |       |S )Nr   r   z#unexpected call to parse_starttag()r8   rZ   'r9   ")r   />
rj   )r$   check_for_whole_start_tagr   tagfind_tolerantrE   rN   rL   r'   r   attrfind_tolerantr   appendstripgetposcountr;   r=   rA   endswithhandle_startendtaghandle_starttagCDATA_CONTENT_ELEMENTSr,   )r   rR   endposr   attrsrE   rV   tagmattrnamerest	attrvaluerN   linenooffsets                  r   rF   zHTMLParser.parse_starttag;  sR   #//2A:M,,&q0  &&w!4;;;uIIK"[[^1133s&j!''3A()1a(8%HdI 	2A$8)BC.82A#7237%aO	 MM)4	LL(..*I67A &j a%%'k!![[]NFFt+++$"6"6"<"<T"BBT112//55d;<  #d&:&:";;WQv./M<<##C/
    e,d111##C(r   c                 H   | j                   }t        j                  ||      }|rt|j                         }|||dz    }|dk(  r|dz   S |dk(  r6|j	                  d|      r|dz   S |j	                  d|      ry||kD  r|S |dz   S |dk(  ry|dv ry||kD  r|S |dz   S t        d	      )
Nr   r   /rj   r8   r9   r   z6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZzwe should not get here!)r   locatestarttagend_tolerantrE   rN   rC   AssertionError)r   rR   r   rz   rT   nexts         r   rl   z$HTMLParser.check_for_whole_start_tagn  s    ,,&,,Wa8A1QqS>Ds{1us{%%dA.q5L%%c1-q5Hq5Lrz 5 6 1u1u677r   c                 .   | j                   }|||dz    dk(  sJ d       t        j                  ||dz         }|sy|j                         }t        j                  ||      }|s| j                  | j                  |||        |S t        j                  ||dz         }|s!|||dz    dk(  r|dz   S | j                  |      S |j                  d      j                         }|j                  d|j                               }| j                  |       |dz   S |j                  d      j                         }| j                  %|| j                  k7  r| j                  |||        |S | j                  |j                                | j                          |S )	Nr8   r4   zunexpected call to parse_endtagr   r9   rZ   z</>r   )r   	endendtagr?   rN   
endtagfindrE   r   rA   rm   r^   rL   r'   r<   handle_endtagr/   )r   rR   r   rE   r_   	namematchtagnamer+   s           r   rG   zHTMLParser.parse_endtag  s   ,,q1~%H'HH%  !A#.		  !,*  5!12(..w!<I1QqS>U*Q3J33A66ooa(..0G
 LLimmo6Ew'7N{{1~##%??&t&  5!124::<(r   c                 J    | j                  ||       | j                  |       y r.   )ru   r   r   ry   rx   s      r   rt   zHTMLParser.handle_startendtag  s     S%(3r   c                      y r.    r   s      r   ru   zHTMLParser.handle_starttag      r   c                      y r.   r   )r   ry   s     r   r   zHTMLParser.handle_endtag  r   r   c                      y r.   r   r   rW   s     r   rM   zHTMLParser.handle_charref  r   r   c                      y r.   r   r   s     r   rP   zHTMLParser.handle_entityref  r   r   c                      y r.   r   r   s     r   rA   zHTMLParser.handle_data  r   r   c                      y r.   r   r   s     r   ra   zHTMLParser.handle_comment  r   r   c                      y r.   r   )r   decls     r   r]   zHTMLParser.handle_decl  r   r   c                      y r.   r   r   s     r   rf   zHTMLParser.handle_pi  r   r   c                      y r.   r   r   s     r   unknown_declzHTMLParser.unknown_decl  r   r   c                     t        |      S r.   r   )r   ss     r   r   zHTMLParser.unescape  s    {r   )T)r   )__name__
__module____qualname____doc__rv   r   r   r    r"   r$   r%   r,   r/   r   rJ   r^   rI   rF   rl   rG   rt   ru   r   rM   rP   rA   ra   r]   rf   r   r   r   r   r   r   r   M   s    * 1* O$Nu#t/*		/f8D%P 
r   )r   r(   warnings_markupbaser   ImportErrorr   r   __all__r)   r   rQ   rO   rK   rD   re   commentcloserm   rn   VERBOSEr   r   r   r   r   r   r   r   <module>r      s2   "* 
 $  .  RZZ' RZZ%
BJJ>?	
"**@
Arzz+&
"**S/rzz)$ 2::LM BJJ=>  (RZZ ) ZZ  BJJsO	 RZZ>?
T&& Tg  s   D 	DD