
    V=^,                        d dl mZmZmZ d dlmZ d dlmZm	Z	 d dl
mZ ddlmZ ddlmZ ddlmZmZ dd	lmZmZmZ dd
lmZmZ ddlmZ ddlmZ ddlmZ  ee      Zedk\  reZne	Z G d de      Zy)    )absolute_importdivisionunicode_literals)unichr)dequeOrderedDict)version_info   )spaceCharacters)entities)asciiLettersasciiUpper2Lower)digits	hexDigitsEOF)
tokenTypestagTokenTypes)replacementCharacters)HTMLInputStream)Trie)      c                       e Zd ZdZdK fd	Zd Zd ZdLdZd Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z d Z!d  Z"d! Z#d" Z$d# Z%d$ Z&d% Z'd& Z(d' Z)d( Z*d) Z+d* Z,d+ Z-d, Z.d- Z/d. Z0d/ Z1d0 Z2d1 Z3d2 Z4d3 Z5d4 Z6d5 Z7d6 Z8d7 Z9d8 Z:d9 Z;d: Z<d; Z=d< Z>d= Z?d> Z@d? ZAd@ ZBdA ZCdB ZDdC ZEdD ZFdE ZGdF ZHdG ZIdH ZJdI ZKdJ ZL xZMS )MHTMLTokenizera	   This class takes care of tokenizing HTML.

    * self.currentToken
      Holds the token that is currently being processed.

    * self.state
      Holds a reference to the method to be invoked... XXX

    * self.stream
      Points to HTMLInputStream object.
    c                     t        |fi || _        || _        d| _        g | _        | j
                  | _        d| _        d | _        t        t        | /          y NF)r   streamparser
escapeFlaglastFourChars	dataStatestateescapecurrentTokensuperr   __init__)selfr   r   kwargs	__class__s       5/usr/lib/python3/dist-packages/html5lib/_tokenizer.pyr&   zHTMLTokenizer.__init__(   sU    %f77  ^^
 !mT+-    c              #     K   t        g       | _        | j                         r| j                  j                  rHt
        d   | j                  j                  j                  d      d | j                  j                  rH| j                  r)| j                  j                          | j                  r)| j                         ryyw)z This is where the magic happens.

        We do our usually processing through the states and when we have a token
        to return we yield the token which pauses processing until the next token
        is requested.
        
ParseErrorr   typedataN)r   
tokenQueuer"   r   errorsr   poppopleftr'   s    r*   __iter__zHTMLTokenizer.__iter__7   s       ) jjl++$$),7ASASAWAWXYAZ[[ ++$$//oo--// // jjls   A>C	4C	6C	C	c                    t         }d}|rt        }d}g }| j                  j                         }||v r@|t        ur8|j                  |       | j                  j                         }||v r	|t        ur8t        dj                  |      |      }|t        v r2t        |   }| j                  j                  t        d   dd|id       nd|cxk  rd	k  sn |d
kD  r+d}| j                  j                  t        d   dd|id       nwd|cxk  rdk  s8n d|cxk  rdk  s+n d|cxk  rdk  sn d|cxk  rdk  sn |t        g d      v r(| j                  j                  t        d   dd|id       	 t        |      }|dk7  r@| j                  j                  t        d   dd       | j                  j                  |       |S # t        $ r+ |dz
  }t        d|dz	  z        t        d|dz  z        z   }Y zw xY w)zThis function returns either U+FFFD or the character based on the
        decimal or hexadecimal representation. It also discards ";" if present.
        If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked.
        
       r-   z$illegal-codepoint-for-numeric-entity	charAsIntr/   r0   datavarsi   i      �r
                  i  i  )#   i  i  i i i i i i i i i i i i i i i i i	 i	 i
 i
 i i i i i i i i i i i r>   i   i   i  ;z numeric-entity-without-semicolonr.   )r   r   r   charr   appendintjoinr   r1   r   	frozensetchr
ValueErrorunget)	r'   isHexallowedradix	charStackcr;   rG   vs	            r*   consumeNumberEntityz!HTMLTokenizer.consumeNumberEntityG   s    GE	 KK7lq|Q  "A 7lq|
 	*E2	 --(3DOO""J|,D$J1<i0H$J K ,f,8#DOO""J|,D$J1<i0H$J K
 9..9..9..9..Y (E F F &&
<0H(N5@)4L(N OK 9~ 8OO""J|,D$F$H IKKa   K'6Q"W-.Vq5y5I1JJKs   1G 1G76G7c                 h   d}| j                   j                         g}|d   t        v s|d   t        ddfv s
|(||d   k(  r | j                   j	                  |d          n|d   dk(  rd}|j                  | j                   j                                |d   dv r+d}|j                  | j                   j                                |r|d   t        v s|s<|d   t        v r1| j                   j	                  |d          | j                  |      }n=| j                  j                  t        d	   d
d       | j                   j	                  |j                                ddj                  |      z   }n|d   t        urZt        j                  dj                  |            sn5|j                  | j                   j                                |d   t        urZ	 t        j                  dj                  |d d             }t!        |      }||d   dk7  r%| j                  j                  t        d	   dd       |d   dk7  r^|r\|   t$        v s||   t        v s||   dk(  r>| j                   j	                  |j                                ddj                  |      z   }nt&        |   }| j                   j	                  |j                                |dj                  |d        z  }nb| j                  j                  t        d	   dd       | j                   j	                  |j                                ddj                  |      z   }|r| j(                  d   d   dxx   |z  cc<   y |t        v rd}nd}| j                  j                  t        |   |d       y # t"        $ r d }Y w xY w)N&r   <#F)xXTr-   zexpected-numeric-entityr.   r:   rF   znamed-entity-without-semicolon=zexpected-named-entityr0   r
   SpaceCharacters
Characters)r   rG   r   r   rN   rH   r   r   rU   r1   r   r3   rJ   entitiesTriehas_keys_with_prefixlongest_prefixlenKeyErrorr   r   r$   )	r'   allowedCharfromAttributeoutputrR   hex
entityNameentityLength	tokenTypes	            r*   consumeEntityzHTMLTokenizer.consumeEntity   sR   [[%%'(	aLO+y|S#/N([IaL-HKKil+q\S CT[[--/0}
*  !1!1!34 	"2IbMV$;!!)B-011#6 &&
<0H0I(K L!!)--/2rwwy11 R=+#889KL  !1!1!34 R=+")883B9PQ
": %b>S(OO**J|4L,L,N OrNc)m|,<|,6|,3KK%%immo6 2779#55F%j1FKK%%immo6bggi&>??F&&
<0H(?(A B!!)--/2rwwy11f%b)!,6,(-	(	OO""Jy,A6#RS=  "!
"s   2N" "N10N1c                 *    | j                  |d       y)zIThis method replaces the need for "entityInAttributeValueState".
        T)re   rf   N)rl   )r'   re   s     r*   processEntityInAttributez&HTMLTokenizer.processEntityInAttribute   s     	{$Gr+   c                 *   | j                   }|d   t        v r|d   j                  t              |d<   |d   t        d   k(  rC|d   }t        |      }t        |      t        |      kD  r|j                  |ddd          ||d<   |d   t        d   k(  rT|d   r%| j                  j                  t        d   d	d
       |d   r%| j                  j                  t        d   dd
       | j                  j                  |       | j                  | _        y)zThis method is a generic handler for emitting the tags. It also sets
        the state to "data" because that's what's needed after a token has been
        emitted.
        r/   nameStartTagr0   NrZ   EndTagr-   zattributes-in-end-tagr.   selfClosingzself-closing-flag-on-end-tag)r$   r   	translater   r   attributeMaprc   updater1   rH   r!   r"   )r'   tokenrawr0   s       r*   emitCurrentTokenzHTMLTokenizer.emitCurrentToken   s   
 !!&M]*!&M334DEE&MV}
: 66Fm#C(s8c$i'KKDbD	* $fV}
8 44=OO**J|4L4K,M N'OO**J|4L4R,T Uu%^^
r+   c                 j   | j                   j                         }|dk(  r| j                  | _        y
|dk(  r| j                  | _        y
|dk(  rK| j
                  j                  t        d   dd       | j
                  j                  t        d   dd       y
|t        u ry|t        v rG| j
                  j                  t        d	   || j                   j                  t        d
      z   d       y
| j                   j                  d      }| j
                  j                  t        d   ||z   d       y
)NrW   rX    r-   invalid-codepointr.   r_   Fr^   TrW   rX   r{   )r   rG   entityDataStater"   tagOpenStater1   rH   r   r   r   
charsUntilr'   r0   charss      r*   r!   zHTMLTokenizer.dataState   s0   {{!3;--DJ0 / S[**DJ, + XOO""J|,D,?$A BOO""J|,D,4$6 7$ ! S[_$ OO""J7H,I$(4;;+A+A/SW+X$X$Z [  KK**+?@EOO""J|,D$(5L$2 3r+   c                 F    | j                          | j                  | _        yNT)rl   r!   r"   r5   s    r*   r~   zHTMLTokenizer.entityDataState  s    ^^
r+   c                 l   | j                   j                         }|dk(  r| j                  | _        y|dk(  r| j                  | _        y|t
        k(  ry|dk(  rK| j                  j                  t        d   dd       | j                  j                  t        d   d	d       y|t        v rG| j                  j                  t        d
   || j                   j                  t        d      z   d       y| j                   j                  d      }| j                  j                  t        d   ||z   d       y)NrW   rX   Fr{   r-   r|   r.   r_   r?   r^   Tr}   )r   rG   characterReferenceInRcdatar"   rcdataLessThanSignStater   r1   rH   r   r   r   r   s      r*   rcdataStatezHTMLTokenizer.rcdataState"  s0   {{!3;88DJ0 / S[55DJ, + S[XOO""J|,D,?$A BOO""J|,D,4$6 7  _$ OO""J7H,I$(4;;+A+A/SW+X$X$Z [  KK**+?@EOO""J|,D$(5L$2 3r+   c                 F    | j                          | j                  | _        yr   )rl   r   r"   r5   s    r*   r   z(HTMLTokenizer.characterReferenceInRcdata?  s    %%
r+   c                    | j                   j                         }|dk(  r| j                  | _        y
|dk(  rK| j                  j                  t        d   dd       | j                  j                  t        d   dd       y
|t        k(  ry| j                   j                  d	      }| j                  j                  t        d   ||z   d       y
NrX   r{   r-   r|   r.   r_   r?   F)rX   r{   T)	r   rG   rawtextLessThanSignStater"   r1   rH   r   r   r   r   s      r*   rawtextStatezHTMLTokenizer.rawtextStateD  s    {{!3;66DJ  XOO""J|,D,?$A BOO""J|,D,4$6 7  S[KK**?;EOO""J|,D$(5L$2 3r+   c                    | j                   j                         }|dk(  r| j                  | _        y
|dk(  rK| j                  j                  t        d   dd       | j                  j                  t        d   dd       y
|t        k(  ry| j                   j                  d	      }| j                  j                  t        d   ||z   d       y
r   )	r   rG   scriptDataLessThanSignStater"   r1   rH   r   r   r   r   s      r*   scriptDataStatezHTMLTokenizer.scriptDataStateV  s    {{!3;99DJ  XOO""J|,D,?$A BOO""J|,D,4$6 7  S[KK**?;EOO""J|,D$(5L$2 3r+   c                 n   | j                   j                         }|t        k(  ry|dk(  rK| j                  j	                  t
        d   dd       | j                  j	                  t
        d   dd       y| j                  j	                  t
        d   || j                   j                  d      z   d       y)	NFr{   r-   r|   r.   r_   r?   T)r   rG   r   r1   rH   r   r   r'   r0   s     r*   plaintextStatezHTMLTokenizer.plaintextStateh  s    {{!3;XOO""J|,D,?$A BOO""J|,D,4$6 7
  OO""J|,D$(4;;+A+A(+K$K$M Nr+   c                 L   | j                   j                         }|dk(  r| j                  | _        y|dk(  r| j                  | _        y|t
        v r&t        d   |g ddd| _        | j                  | _        y|dk(  r\| j                  j                  t        d   dd	       | j                  j                  t        d
   dd	       | j                  | _        y|dk(  rR| j                  j                  t        d   dd	       | j                   j                  |       | j                  | _        y| j                  j                  t        d   dd	       | j                  j                  t        d
   dd	       | j                   j                  |       | j                  | _        y)N!/rq   F)r/   rp   r0   rs   selfClosingAcknowledged>r-   z'expected-tag-name-but-got-right-bracketr.   r_   z<>?z'expected-tag-name-but-got-question-markzexpected-tag-namerX   T)r   rG   markupDeclarationOpenStater"   closeTagOpenStater   r   r$   tagNameStater1   rH   r!   rN   bogusCommentStater   s     r*   r   zHTMLTokenizer.tagOpenStatew  s   {{!3;88DJ< ; S[//DJ8 7 \!)3J)?)-r05<A!CD **DJ, + S[ OO""J|,D$M$O POO""J|,Dd#STDJ  S[ OO""J|,D$M$O PKKd#//DJ  OO""J|,D$7$9 :OO""J|,Dc#RSKKd#DJr+   c                 z   | j                   j                         }|t        v r%t        d   |g dd| _        | j
                  | _        y|dk(  r7| j                  j                  t        d   dd       | j                  | _        y|t        u r\| j                  j                  t        d   dd       | j                  j                  t        d	   d
d       | j                  | _        y| j                  j                  t        d   dd|id       | j                   j                  |       | j                  | _        y)Nrr   Fr/   rp   r0   rs   r   r-   z*expected-closing-tag-but-got-right-bracketr.   z expected-closing-tag-but-got-eofr_   </z!expected-closing-tag-but-got-charr0   r<   T)r   rG   r   r   r$   r   r"   r1   rH   r!   r   rN   r   r   s     r*   r   zHTMLTokenizer.closeTagOpenState  s,   {{!<)3H)=t)+E!CD**DJ" ! S[OO""J|,D$P$R SDJ  S[OO""J|,D$F$H IOO""J|,Dd#STDJ  OO""J|,D$G17$@ A KKd#//DJr+   c                    | j                   j                         }|t        v r| j                  | _        y
|dk(  r| j                          y
|t        u r7| j                  j                  t        d   dd       | j                  | _        y
|dk(  r| j                  | _        y
|dk(  r=| j                  j                  t        d   dd       | j                  dxx   d	z  cc<   y
| j                  dxx   |z  cc<   y
)Nr   r-   zeof-in-tag-namer.   r   r{   r|   rp   r?   T)r   rG   r   beforeAttributeNameStater"   ry   r   r1   rH   r   r!   selfClosingStartTagStater$   r   s     r*   r   zHTMLTokenizer.tagNameState  s   {{!?"66DJ" ! S[!!#  S[OO""J|,D$5$7 8DJ  S[66DJ  XOO""J|,D,?$A Bf%1%
  f%-% r+   c                    | j                   j                         }|dk(  rd| _        | j                  | _        y| j
                  j                  t        d   dd       | j                   j                  |       | j                  | _        yNr   r:   r_   rX   r.   T)
r   rG   temporaryBufferrcdataEndTagOpenStater"   r1   rH   r   rN   r   r   s     r*   r   z%HTMLTokenizer.rcdataLessThanSignState  su    {{!3;#%D 33DJ
  OO""J|,Dc#RSKKd#))DJr+   c                 8   | j                   j                         }|t        v r'| xj                  |z  c_        | j                  | _        y| j                  j                  t        d   dd       | j                   j                  |       | j                  | _        yNr_   r   r.   T)r   rG   r   r   rcdataEndTagNameStater"   r1   rH   r   rN   r   r   s     r*   r   z#HTMLTokenizer.rcdataEndTagOpenState  s    {{!<  D( 33DJ
  OO""J|,Dd#STKKd#))DJr+   c                 &   | j                   xr8 | j                   d   j                         | j                  j                         k(  }| j                  j	                         }|t
        v r1|r/t        d   | j                  g dd| _         | j                  | _        y
|dk(  r1|r/t        d   | j                  g dd| _         | j                  | _        y
|dk(  rA|r?t        d   | j                  g dd| _         | j                          | j                  | _        y
|t        v r| xj                  |z  c_        y
| j                  j                  t        d   d| j                  z   d	       | j                  j                  |       | j                   | _        y
Nrp   rr   Fr   r   r   r_   r   r.   T)r$   lowerr   r   rG   r   r   r   r"   r   ry   r!   r   r1   rH   rN   r   r'   appropriater0   s      r*   r   z#HTMLTokenizer.rcdataEndTagNameState  s   ''mD,=,=f,E,K,K,MQUQeQeQkQkQm,m{{!?"{)3H)=)-)=)=)+E!CD 66DJ& % S[[)3H)=)-)=)=)+E!CD 66DJ  S[[)3H)=)-)=)=)+E!CD !!#DJ  \!  D(  	 OO""J|,D,043G3G,G$I JKKd#))DJr+   c                    | j                   j                         }|dk(  rd| _        | j                  | _        y| j
                  j                  t        d   dd       | j                   j                  |       | j                  | _        yr   )
r   rG   r   rawtextEndTagOpenStater"   r1   rH   r   rN   r   r   s     r*   r   z&HTMLTokenizer.rawtextLessThanSignState  su    {{!3;#%D 44DJ
  OO""J|,Dc#RSKKd#**DJr+   c                 8   | j                   j                         }|t        v r'| xj                  |z  c_        | j                  | _        y| j                  j                  t        d   dd       | j                   j                  |       | j                  | _        yr   )r   rG   r   r   rawtextEndTagNameStater"   r1   rH   r   rN   r   r   s     r*   r   z$HTMLTokenizer.rawtextEndTagOpenState  s    {{!<  D( 44DJ
  OO""J|,Dd#STKKd#**DJr+   c                 &   | j                   xr8 | j                   d   j                         | j                  j                         k(  }| j                  j	                         }|t
        v r1|r/t        d   | j                  g dd| _         | j                  | _        y
|dk(  r1|r/t        d   | j                  g dd| _         | j                  | _        y
|dk(  rA|r?t        d   | j                  g dd| _         | j                          | j                  | _        y
|t        v r| xj                  |z  c_        y
| j                  j                  t        d   d| j                  z   d	       | j                  j                  |       | j                   | _        y
r   )r$   r   r   r   rG   r   r   r   r"   r   ry   r!   r   r1   rH   rN   r   r   s      r*   r   z$HTMLTokenizer.rawtextEndTagNameState  s   ''mD,=,=f,E,K,K,MQUQeQeQkQkQm,m{{!?"{)3H)=)-)=)=)+E!CD 66DJ& % S[[)3H)=)-)=)=)+E!CD 66DJ  S[[)3H)=)-)=)=)+E!CD !!#DJ  \!  D(  	 OO""J|,D,043G3G,G$I JKKd#**DJr+   c                    | j                   j                         }|dk(  rd| _        | j                  | _        y|dk(  r7| j
                  j                  t        d   dd       | j                  | _        y| j
                  j                  t        d   dd       | j                   j                  |       | j                  | _        y)	Nr   r:   r   r_   z<!r.   rX   T)r   rG   r   scriptDataEndTagOpenStater"   r1   rH   r   scriptDataEscapeStartStaterN   r   r   s     r*   r   z)HTMLTokenizer.scriptDataLessThanSignState,  s    {{!3;#%D 77DJ  S[OO""J|,Dd#ST88DJ
  OO""J|,Dc#RSKKd#--DJr+   c                 8   | j                   j                         }|t        v r'| xj                  |z  c_        | j                  | _        y| j                  j                  t        d   dd       | j                   j                  |       | j                  | _        yr   )r   rG   r   r   scriptDataEndTagNameStater"   r1   rH   r   rN   r   r   s     r*   r   z'HTMLTokenizer.scriptDataEndTagOpenState:  s    {{!<  D( 77DJ
  OO""J|,Dd#STKKd#--DJr+   c                 &   | j                   xr8 | j                   d   j                         | j                  j                         k(  }| j                  j	                         }|t
        v r1|r/t        d   | j                  g dd| _         | j                  | _        y
|dk(  r1|r/t        d   | j                  g dd| _         | j                  | _        y
|dk(  rA|r?t        d   | j                  g dd| _         | j                          | j                  | _        y
|t        v r| xj                  |z  c_        y
| j                  j                  t        d   d| j                  z   d	       | j                  j                  |       | j                   | _        y
r   )r$   r   r   r   rG   r   r   r   r"   r   ry   r!   r   r1   rH   rN   r   r   s      r*   r   z'HTMLTokenizer.scriptDataEndTagNameStateE  s   ''mD,=,=f,E,K,K,MQUQeQeQkQkQm,m{{!?"{)3H)=)-)=)=)+E!CD 66DJ& % S[[)3H)=)-)=)=)+E!CD 66DJ  S[[)3H)=)-)=)=)+E!CD !!#DJ  \!  D(  	 OO""J|,D,043G3G,G$I JKKd#--DJr+   c                    | j                   j                         }|dk(  r7| j                  j                  t        d   dd       | j
                  | _        y| j                   j                  |       | j                  | _        yN-r_   r.   T)	r   rG   r1   rH   r   scriptDataEscapeStartDashStater"   rN   r   r   s     r*   r   z(HTMLTokenizer.scriptDataEscapeStartStatea  m    {{!3;OO""J|,Dc#RS<<DJ  KKd#--DJr+   c                    | j                   j                         }|dk(  r7| j                  j                  t        d   dd       | j
                  | _        y| j                   j                  |       | j                  | _        yr   )	r   rG   r1   rH   r   scriptDataEscapedDashDashStater"   rN   r   r   s     r*   r   z,HTMLTokenizer.scriptDataEscapeStartDashStatek  r   r+   c                 :   | j                   j                         }|dk(  r7| j                  j                  t        d   dd       | j
                  | _        y
|dk(  r| j                  | _        y
|dk(  rK| j                  j                  t        d   dd       | j                  j                  t        d   dd       y
|t        k(  r| j                  | _        y
| j                   j                  d	      }| j                  j                  t        d   ||z   d       y
)Nr   r_   r.   rX   r{   r-   r|   r?   )rX   r   r{   T)r   rG   r1   rH   r   scriptDataEscapedDashStater"   "scriptDataEscapedLessThanSignStater   r!   r   r   s      r*   scriptDataEscapedStatez$HTMLTokenizer.scriptDataEscapedStateu  s   {{!3;OO""J|,Dc#RS88DJ  S[@@DJ  XOO""J|,D,?$A BOO""J|,D,4$6 7  S[DJ
  KK**+?@EOO""J|,D$(5L$2 3r+   c                 B   | j                   j                         }|dk(  r7| j                  j                  t        d   dd       | j
                  | _        y	|dk(  r| j                  | _        y	|dk(  r\| j                  j                  t        d   dd       | j                  j                  t        d   dd       | j                  | _        y	|t        k(  r| j                  | _        y	| j                  j                  t        d   |d       | j                  | _        y	)
Nr   r_   r.   rX   r{   r-   r|   r?   T)r   rG   r1   rH   r   r   r"   r   r   r   r!   r   s     r*   r   z(HTMLTokenizer.scriptDataEscapedDashState  s   {{!3;OO""J|,Dc#RS<<DJ  S[@@DJ  XOO""J|,D,?$A BOO""J|,D,4$6 744DJ  S[DJ  OO""J|,Dd#ST44DJr+   c                    | j                   j                         }|dk(  r&| j                  j                  t        d   dd       y
|dk(  r| j
                  | _        y
|dk(  r7| j                  j                  t        d   dd       | j                  | _        y
|dk(  r\| j                  j                  t        d   dd       | j                  j                  t        d   d	d       | j                  | _        y
|t        k(  r| j                  | _        y
| j                  j                  t        d   |d       | j                  | _        y
)Nr   r_   r.   rX   r   r{   r-   r|   r?   T)r   rG   r1   rH   r   r   r"   r   r   r   r!   r   s     r*   r   z,HTMLTokenizer.scriptDataEscapedDashDashState  s5   {{!3;OO""J|,Dc#RS" ! S[@@DJ  S[OO""J|,Dc#RS--DJ  XOO""J|,D,?$A BOO""J|,D,4$6 744DJ  S[DJ  OO""J|,Dd#ST44DJr+   c                    | j                   j                         }|dk(  rd| _        | j                  | _        y|t
        v rA| j                  j                  t        d   d|z   d       || _        | j                  | _        y| j                  j                  t        d   dd       | j                   j                  |       | j                  | _        yr   )r   rG   r    scriptDataEscapedEndTagOpenStater"   r   r1   rH   r    scriptDataDoubleEscapeStartStaterN   r   r   s     r*   r   z0HTMLTokenizer.scriptDataEscapedLessThanSignState  s    {{!3;#%D >>DJ  \!OO""J|,DcTXj#YZ#'D >>DJ
  OO""J|,Dc#RSKKd#44DJr+   c                    | j                   j                         }|t        v r|| _        | j                  | _        y| j                  j                  t        d   dd       | j                   j                  |       | j                  | _        yr   )r   rG   r   r    scriptDataEscapedEndTagNameStater"   r1   rH   r   rN   r   r   s     r*   r   z.HTMLTokenizer.scriptDataEscapedEndTagOpenState  sv    {{!<#'D >>DJ
  OO""J|,Dd#STKKd#44DJr+   c                 &   | j                   xr8 | j                   d   j                         | j                  j                         k(  }| j                  j	                         }|t
        v r1|r/t        d   | j                  g dd| _         | j                  | _        y
|dk(  r1|r/t        d   | j                  g dd| _         | j                  | _        y
|dk(  rA|r?t        d   | j                  g dd| _         | j                          | j                  | _        y
|t        v r| xj                  |z  c_        y
| j                  j                  t        d   d| j                  z   d	       | j                  j                  |       | j                   | _        y
r   )r$   r   r   r   rG   r   r   r   r"   r   ry   r!   r   r1   rH   rN   r   r   s      r*   r   z.HTMLTokenizer.scriptDataEscapedEndTagNameState  s   ''mD,=,=f,E,K,K,MQUQeQeQkQkQm,m{{!?"{)3H)=)-)=)=)+E!CD 66DJ& % S[[)3H)=)-)=)=)+E!CD 66DJ  S[[)3H)=)-)=)=)+E!CD !!#DJ  \!  D(  	 OO""J|,D,043G3G,G$I JKKd#44DJr+   c                 
   | j                   j                         }|t        t        d      z  v rf| j                  j                  t        d   |d       | j                  j                         dk(  r| j                  | _
        y| j                  | _
        y|t        v r;| j                  j                  t        d   |d       | xj                  |z  c_        y| j                   j                  |       | j                  | _
        yN)r   r   r_   r.   scriptT)r   rG   r   rK   r1   rH   r   r   r   scriptDataDoubleEscapedStater"   r   r   rN   r   s     r*   r   z.HTMLTokenizer.scriptDataDoubleEscapeStartState  s    {{!Oi
&;;<OO""J|,Dd#ST##))+x7!>>
  "88
  \!OO""J|,Dd#ST  D(   KKd#44DJr+   c                    | j                   j                         }|dk(  r7| j                  j                  t        d   dd       | j
                  | _        y
|dk(  r7| j                  j                  t        d   dd       | j                  | _        y
|dk(  rK| j                  j                  t        d   dd       | j                  j                  t        d   dd       y
|t        k(  r7| j                  j                  t        d   d	d       | j                  | _        y
| j                  j                  t        d   |d       y
Nr   r_   r.   rX   r{   r-   r|   r?   eof-in-script-in-scriptT)
r   rG   r1   rH   r    scriptDataDoubleEscapedDashStater"   (scriptDataDoubleEscapedLessThanSignStater   r!   r   s     r*   r   z*HTMLTokenizer.scriptDataDoubleEscapedState  s/   {{!3;OO""J|,Dc#RS>>DJ  S[OO""J|,Dc#RSFFDJ  XOO""J|,D,?$A BOO""J|,D,4$6 7  S[OO""J|,D$=$? @DJ  OO""J|,Dd#STr+   c                    | j                   j                         }|dk(  r7| j                  j                  t        d   dd       | j
                  | _        y
|dk(  r7| j                  j                  t        d   dd       | j                  | _        y
|dk(  r\| j                  j                  t        d   dd       | j                  j                  t        d   dd       | j                  | _        y
|t        k(  r7| j                  j                  t        d   d	d       | j                  | _        y
| j                  j                  t        d   |d       | j                  | _        y
r   )r   rG   r1   rH   r   $scriptDataDoubleEscapedDashDashStater"   r   r   r   r!   r   s     r*   r   z.HTMLTokenizer.scriptDataDoubleEscapedDashState  sI   {{!3;OO""J|,Dc#RSBBDJ" ! S[OO""J|,Dc#RSFFDJ  XOO""J|,D,?$A BOO""J|,D,4$6 7::DJ  S[OO""J|,D$=$? @DJ  OO""J|,Dd#ST::DJr+   c                 ,   | j                   j                         }|dk(  r&| j                  j                  t        d   dd       y|dk(  r7| j                  j                  t        d   dd       | j
                  | _        y|dk(  r7| j                  j                  t        d   dd       | j                  | _        y|dk(  r\| j                  j                  t        d   dd       | j                  j                  t        d   d	d       | j                  | _        y|t        k(  r7| j                  j                  t        d   d
d       | j                  | _        y| j                  j                  t        d   |d       | j                  | _        y)Nr   r_   r.   rX   r   r{   r-   r|   r?   r   T)r   rG   r1   rH   r   r   r"   r   r   r   r!   r   s     r*   r   z2HTMLTokenizer.scriptDataDoubleEscapedDashDashState%  ss   {{!3;OO""J|,Dc#RS( ' S[OO""J|,Dc#RSFFDJ" ! S[OO""J|,Dc#RS--DJ  XOO""J|,D,?$A BOO""J|,D,4$6 7::DJ  S[OO""J|,D$=$? @DJ  OO""J|,Dd#ST::DJr+   c                    | j                   j                         }|dk(  r>| j                  j                  t        d   dd       d| _        | j                  | _        y| j                   j                  |       | j                  | _        y)Nr   r_   r.   r:   T)
r   rG   r1   rH   r   r   scriptDataDoubleEscapeEndStater"   rN   r   r   s     r*   r   z6HTMLTokenizer.scriptDataDoubleEscapedLessThanSignState>  su    {{!3;OO""J|,Dc#RS#%D <<DJ  KKd#::DJr+   c                 
   | j                   j                         }|t        t        d      z  v rf| j                  j                  t        d   |d       | j                  j                         dk(  r| j                  | _
        y| j                  | _
        y|t        v r;| j                  j                  t        d   |d       | xj                  |z  c_        y| j                   j                  |       | j                  | _
        yr   )r   rG   r   rK   r1   rH   r   r   r   r   r"   r   r   rN   r   s     r*   r   z,HTMLTokenizer.scriptDataDoubleEscapeEndStateI  s    {{!Oi
&;;<OO""J|,Dd#ST##))+x7!88
  ">>
  \!OO""J|,Dd#ST  D(   KKd#::DJr+   c                    | j                   j                         }|t        v r!| j                   j                  t        d       y|t        v r2| j
                  d   j                  |dg       | j                  | _        y|dk(  r| j                          y|dk(  r| j                  | _        y|dv rW| j                  j                  t        d   dd	       | j
                  d   j                  |dg       | j                  | _        y|d
k(  rW| j                  j                  t        d   dd	       | j
                  d   j                  ddg       | j                  | _        y|t        u r7| j                  j                  t        d   dd	       | j                  | _        y| j
                  d   j                  |dg       | j                  | _        y)NTr0   r:   r   r   )'"r]   rX   r-   #invalid-character-in-attribute-namer.   r{   r|   r?   z#expected-attribute-name-but-got-eof)r   rG   r   r   r   r$   rH   attributeNameStater"   ry   r   r1   r   r   r!   r   s     r*   r   z&HTMLTokenizer.beforeAttributeNameStateY  s   {{!?"KK""?D92 1 \!f%,,dBZ800DJ, + S[!!#( ' S[66DJ$ # ))OO""J|,D$I$K Lf%,,dBZ800DJ  XOO""J|,D,?$A Bf%,,h^<00DJ  S[OO""J|,D$I$K LDJ  f%,,dBZ800DJr+   c                    | j                   j                         }d}d}|dk(  r| j                  | _        nv|t        v rB| j
                  d   d   dxx   || j                   j                  t        d      z   z  cc<   d}n,|dk(  rd}n#|t        v r| j                  | _        n|dk(  r| j                  | _        n|d	k(  rE| j                  j                  t        d
   dd       | j
                  d   d   dxx   dz  cc<   d}n|dv rE| j                  j                  t        d
   dd       | j
                  d   d   dxx   |z  cc<   d}n^|t        u r7| j                  j                  t        d
   dd       | j                  | _        n| j
                  d   d   dxx   |z  cc<   d}|r| j
                  d   d   d   j                  t               | j
                  d   d   d<   | j
                  d   d d D ]D  \  }}| j
                  d   d   d   |k(  s| j                  j                  t        d
   dd        n |r| j#                          y)NTFr]   r0   rZ   r   r   r   r{   r-   r|   r.   r?   r   r   rX   r   zeof-in-attribute-namezduplicate-attribute)r   rG   beforeAttributeValueStater"   r   r$   r   r   afterAttributeNameStater   r1   rH   r   r   r!   rt   r   ry   )r'   r0   leavingThisState	emitTokenrp   _s         r*   r   z HTMLTokenizer.attributeNameStatew  sq   {{!	3;77DJ\!f%b)!,&&|T:1; ;,$S[ I_$55DJS[66DJXOO""J|,D,?$A Bf%b)!,8,$_$OO""J|,D$I$K L f%b)!,4,$S[OO""J|,D,C$E FDJf%b)!,4,$
 !!&)"-a0::;KL f%b)!,,,V4Sb9 a$$V,R03t;OO**J|4L,A,C D	 %%'r+   c                    | j                   j                         }|t        v r!| j                   j                  t        d       y|dk(  r| j                  | _        y|dk(  r| j                          y|t        v r2| j                  d   j                  |dg       | j                  | _        y|dk(  r| j                  | _        y|dk(  rW| j                  j                  t        d   d	d
       | j                  d   j                  ddg       | j                  | _        y|dv rW| j                  j                  t        d   dd
       | j                  d   j                  |dg       | j                  | _        y|t        u r7| j                  j                  t        d   dd
       | j                  | _        y| j                  d   j                  |dg       | j                  | _        y)NTr]   r   r0   r:   r   r{   r-   r|   r.   r?   r   z&invalid-character-after-attribute-namezexpected-end-of-tag-but-got-eof)r   rG   r   r   r   r"   ry   r   r$   rH   r   r   r1   r   r   r!   r   s     r*   r   z%HTMLTokenizer.afterAttributeNameState  s   {{!?"KK""?D96 5 S[77DJ2 1 S[!!#. - \!f%,,dBZ800DJ( ' S[66DJ$ # XOO""J|,D,?$A Bf%,,h^<00DJ  _$OO""J|,D$L$N Of%,,dBZ800DJ  S[OO""J|,D$E$G HDJ  f%,,dBZ800DJr+   c                    | j                   j                         }|t        v r!| j                   j                  t        d       y|dk(  r| j                  | _        y|dk(  r-| j                  | _        | j                   j                  |       y|dk(  r| j                  | _        y|dk(  r6| j                  j                  t        d   dd       | j                          y|d	k(  rT| j                  j                  t        d   d
d       | j                  d   d   dxx   dz  cc<   | j                  | _        y|dv rT| j                  j                  t        d   dd       | j                  d   d   dxx   |z  cc<   | j                  | _        y|t        u r7| j                  j                  t        d   dd       | j                  | _        y| j                  d   d   dxx   |z  cc<   | j                  | _        y)NTr   rW   r   r   r-   z.expected-attribute-value-but-got-right-bracketr.   r{   r|   r0   rZ   r
   r?   )r]   rX   `z"equals-in-unquoted-attribute-valuez$expected-attribute-value-but-got-eof)r   rG   r   r   attributeValueDoubleQuotedStater"   attributeValueUnQuotedStaterN   attributeValueSingleQuotedStater1   rH   r   ry   r$   r   r!   r   s     r*   r   z'HTMLTokenizer.beforeAttributeValueState  s   {{!?"KK""?D9: 9 T\==DJ6 5 S[99DJKKd#0 / S[==DJ, + S[OO""J|,D$T$V W!!#$ # XOO""J|,D,?$A Bf%b)!,8,99DJ  _$OO""J|,D$H$J Kf%b)!,4,99DJ  S[OO""J|,D$J$L MDJ  f%b)!,4,99DJr+   c                    | j                   j                         }|dk(  r| j                  | _        y|dk(  r| j	                  d       y|dk(  rC| j
                  j                  t        d   dd       | j                  d   d   d	xx   d
z  cc<   y|t        u r7| j
                  j                  t        d   dd       | j                  | _        y| j                  d   d   d	xx   || j                   j                  d      z   z  cc<   y)Nr   rW   r{   r-   r|   r.   r0   rZ   r
   r?   z#eof-in-attribute-value-double-quote)r   rW   r{   Tr   rG   afterAttributeValueStater"   rn   r1   rH   r   r$   r   r!   r   r   s     r*   r   z-HTMLTokenizer.attributeValueDoubleQuotedState  s   {{!4<66DJ  S[))#.  XOO""J|,D,?$A Bf%b)!,8,  S[OO""J|,D$I$K LDJ  f%b)!,&&'<=1> >,r+   c                    | j                   j                         }|dk(  r| j                  | _        y|dk(  r| j	                  d       y|dk(  rC| j
                  j                  t        d   dd       | j                  d   d   d	xx   d
z  cc<   y|t        u r7| j
                  j                  t        d   dd       | j                  | _        y| j                  d   d   d	xx   || j                   j                  d      z   z  cc<   y)Nr   rW   r{   r-   r|   r.   r0   rZ   r
   r?   z#eof-in-attribute-value-single-quote)r   rW   r{   Tr   r   s     r*   r   z-HTMLTokenizer.attributeValueSingleQuotedState  s   {{!3;66DJ  S[))#.  XOO""J|,D,?$A Bf%b)!,8,  S[OO""J|,D$I$K LDJ  f%b)!,&&';<1= =,r+   c           	         | j                   j                         }|t        v r| j                  | _        y|dk(  r| j                  d       y|dk(  r| j                          y|dv rC| j                  j                  t        d   dd       | j                  d   d   d	xx   |z  cc<   y|d
k(  rC| j                  j                  t        d   dd       | j                  d   d   d	xx   dz  cc<   y|t        u r7| j                  j                  t        d   dd       | j                  | _        y| j                  d   d   d	xx   || j                   j                  t        d      t        z        z   z  cc<   y)NrW   r   )r   r   r]   rX   r   r-   z0unexpected-character-in-unquoted-attribute-valuer.   r0   rZ   r
   r{   r|   r?   z eof-in-attribute-value-no-quotes)rW   r   r   r   r]   rX   r   r{   T)r   rG   r   r   r"   rn   ry   r1   rH   r   r$   r   r!   r   rK   r   s     r*   r   z)HTMLTokenizer.attributeValueUnQuotedState  s   {{!?"66DJ( ' S[))#.$ # S[!!#   ..OO""J|,D$V$X Yf%b)!,4,  XOO""J|,D,?$A Bf%b)!,8,  S[OO""J|,D$F$H IDJ  f%b)!,t{{7M7MGH?Z8\ 1\ \,r+   c                    | j                   j                         }|t        v r| j                  | _        y|dk(  r| j                          y|dk(  r| j                  | _        y|t        u rR| j                  j                  t        d   dd       | j                   j                  |       | j                  | _        y| j                  j                  t        d   dd       | j                   j                  |       | j                  | _        y)Nr   r   r-   z$unexpected-EOF-after-attribute-valuer.   z*unexpected-character-after-attribute-valueT)r   rG   r   r   r"   ry   r   r   r1   rH   r   rN   r!   r   s     r*   r   z&HTMLTokenizer.afterAttributeValueState.  s    {{!?"66DJ  S[!!#  S[66DJ  S[OO""J|,D$J$L MKKd#DJ 	 OO""J|,D$P$R SKKd#66DJr+   c                    | j                   j                         }|dk(  r d| j                  d<   | j                          y|t        u rR| j
                  j                  t        d   dd       | j                   j                  |       | j                  | _
        y| j
                  j                  t        d   dd       | j                   j                  |       | j                  | _
        y)Nr   Trs   r-   z#unexpected-EOF-after-solidus-in-tagr.   z)unexpected-character-after-solidus-in-tag)r   rG   r$   ry   r   r1   rH   r   rN   r!   r"   r   r   s     r*   r   z&HTMLTokenizer.selfClosingStartTagStateB  s    {{!3;/3Dm,!!#  S[OO""J|,D$I$K L KKd#DJ 	 OO""J|,D$O$Q RKKd#66DJr+   c                     | j                   j                  d      }|j                  dd      }| j                  j	                  t
        d   |d       | j                   j                          | j                  | _        y)Nr   r{   r?   Commentr.   T)	r   r   replacer1   rH   r   rG   r!   r"   r   s     r*   r   zHTMLTokenizer.bogusCommentStateT  sg     {{%%c*||Hh/	*D9	;
 	^^
r+   c                 <   | j                   j                         g}|d   dk(  rU|j                  | j                   j                                |d   dk(  rct        d   dd| _        | j
                  | _        y|d   dv red}dD ]6  }|j                  | j                   j                                |d   |vs4d	} n |rt        d
   dd d dd| _        | j                  | _        y|d   dk(  r| j                  | j                  j                  j                  r| j                  j                  j                  d   j                  | j                  j                  j                  k7  rRd}dD ]7  }|j                  | j                   j                                |d   |k7  s5d	} n |r| j                  | _        y| j                  j                  t        d   dd       |r,| j                   j                  |j!                                |r,| j"                  | _        y)NrZ   r   r   r:   r.   T)dD))oOrS   CtTyYpPeEFDoctype)r/   rp   publicIdsystemIdcorrect[)r   r   Ar  r  r  r-   zexpected-dashes-or-doctype)r   rG   rH   r   r$   commentStartStater"   doctypeStater   treeopenElements	namespacedefaultNamespacecdataSectionStater1   rN   r3   r   )r'   rR   matchedexpecteds       r*   r   z(HTMLTokenizer.markupDeclarationOpenStatec  s   [[%%'(	R=CT[[--/0}#-7	-BB$O!!33
r]j(GA   !1!1!34R=0#G -7	-B-/15404%6! "..
ms"kk%kk++kk++B/99T[[=M=M=^=^^G:   !1!1!34R=H,#G	
 !33

<(@ < > 	? KKimmo. ++
r+   c                    | j                   j                         }|dk(  r| j                  | _        y|dk(  r=| j                  j                  t        d   dd       | j                  dxx   dz  cc<   y|dk(  r\| j                  j                  t        d   d	d       | j                  j                  | j                         | j                  | _        y|t        u r\| j                  j                  t        d   d
d       | j                  j                  | j                         | j                  | _        y| j                  dxx   |z  cc<   | j                  | _        y)Nr   r{   r-   r|   r.   r0   r?   r   incorrect-commenteof-in-commentT)r   rG   commentStartDashStater"   r1   rH   r   r$   r!   r   commentStater   s     r*   r  zHTMLTokenizer.commentStartState  sA   {{!3;33DJ$ # XOO""J|,D,?$A Bf%1%  S[OO""J|,D$7$9 :OO""4#4#45DJ  S[OO""J|,D$4$6 7OO""4#4#45DJ  f%-%**DJr+   c                    | j                   j                         }|dk(  r| j                  | _        y|dk(  r=| j                  j                  t        d   dd       | j                  dxx   dz  cc<   y|dk(  r\| j                  j                  t        d   d	d       | j                  j                  | j                         | j                  | _        y|t        u r\| j                  j                  t        d   d
d       | j                  j                  | j                         | j                  | _        y| j                  dxx   d|z   z  cc<   | j                  | _        y)Nr   r{   r-   r|   r.   r0      -�r   r  r  T)r   rG   commentEndStater"   r1   rH   r   r$   r!   r   r  r   s     r*   r  z#HTMLTokenizer.commentStartDashState  sE   {{!3;--DJ$ # XOO""J|,D,?$A Bf%2%  S[OO""J|,D$7$9 :OO""4#4#45DJ  S[OO""J|,D$4$6 7OO""4#4#45DJ  f%t3%**DJr+   c                    | j                   j                         }|dk(  r| j                  | _        y
|dk(  r=| j                  j                  t        d   dd       | j                  dxx   dz  cc<   y
|t        u r\| j                  j                  t        d   dd       | j                  j                  | j                         | j                  | _        y
| j                  dxx   || j                   j                  d	      z   z  cc<   y
)Nr   r{   r-   r|   r.   r0   r?   r  )r   r{   T)r   rG   commentEndDashStater"   r1   rH   r   r$   r   r!   r   r   s     r*   r  zHTMLTokenizer.commentState  s    {{!3;11DJ  XOO""J|,D,?$A Bf%1%  S[OO""J|,D,<$> ?OO""4#4#45DJ  f%&&7*8 8%r+   c                 *   | j                   j                         }|dk(  r| j                  | _        y	|dk(  rN| j                  j                  t        d   dd       | j                  dxx   dz  cc<   | j                  | _        y	|t        u r\| j                  j                  t        d   dd       | j                  j                  | j                         | j                  | _        y	| j                  dxx   d|z   z  cc<   | j                  | _        y	)
Nr   r{   r-   r|   r.   r0   r!  zeof-in-comment-end-dashT)r   rG   r"  r"   r1   rH   r   r$   r  r   r!   r   s     r*   r$  z!HTMLTokenizer.commentEndDashState  s   {{!3;--DJ  XOO""J|,D,?$A Bf%2%**DJ  S[OO""J|,D$=$? @OO""4#4#45DJ  f%t3%**DJr+   c                    | j                   j                         }|dk(  r7| j                  j                  | j                         | j
                  | _        y|dk(  rN| j                  j                  t        d   dd       | j                  dxx   dz  cc<   | j                  | _        y|dk(  r7| j                  j                  t        d   d	d       | j                  | _        y|d
k(  r=| j                  j                  t        d   dd       | j                  dxx   |z  cc<   y|t        u r\| j                  j                  t        d   dd       | j                  j                  | j                         | j
                  | _        y| j                  j                  t        d   dd       | j                  dxx   d|z   z  cc<   | j                  | _        y)Nr   r{   r-   r|   r.   r0   u   --�r   z,unexpected-bang-after-double-dash-in-commentr   z,unexpected-dash-after-double-dash-in-commentzeof-in-comment-double-dashzunexpected-char-in-commentz--T)r   rG   r1   rH   r$   r!   r"   r   r  commentEndBangStater   r   s     r*   r"  zHTMLTokenizer.commentEndState  s   {{!3;OO""4#4#45DJ2 1 XOO""J|,D,?$A Bf%3%**DJ( ' S[OO""J|,D$R$T U11DJ   S[OO""J|,D$R$T Uf%-%  S[OO""J|,D$@$B COO""4#4#45DJ 	 OO""J|,D$@$B Cf%4%**DJr+   c                    | j                   j                         }|dk(  r7| j                  j                  | j                         | j
                  | _        y|dk(  r)| j                  dxx   dz  cc<   | j                  | _        y|dk(  rN| j                  j                  t        d   dd       | j                  dxx   d	z  cc<   | j                  | _        y|t        u r\| j                  j                  t        d   d
d       | j                  j                  | j                         | j
                  | _        y| j                  dxx   d|z   z  cc<   | j                  | _        y)Nr   r   r0   z--!r{   r-   r|   r.   u   --!�zeof-in-comment-end-bang-stateT)r   rG   r1   rH   r$   r!   r"   r$  r   r  r   r   s     r*   r'  z!HTMLTokenizer.commentEndBangState  sI   {{!3;OO""4#4#45DJ" ! S[f%.%11DJ  XOO""J|,D,?$A Bf%4%**DJ  S[OO""J|,D$C$E FOO""4#4#45DJ  f%5%**DJr+   c                    | j                   j                         }|t        v r| j                  | _        y|t
        u rk| j                  j                  t        d   dd       d| j                  d<   | j                  j                  | j                         | j                  | _        y| j                  j                  t        d   dd       | j                   j                  |       | j                  | _        y)Nr-   !expected-doctype-name-but-got-eofr.   Fr  zneed-space-after-doctypeT)r   rG   r   beforeDoctypeNameStater"   r   r1   rH   r   r$   r!   rN   r   s     r*   r  zHTMLTokenizer.doctypeState  s    {{!?"44DJ  S[OO""J|,D$G$I J+0Di(OO""4#4#45DJ 	 OO""J|,D$>$@ AKKd#44DJr+   c                    | j                   j                         }|t        v r	 y|dk(  rk| j                  j	                  t
        d   dd       d| j                  d<   | j                  j	                  | j                         | j                  | _        y|dk(  rF| j                  j	                  t
        d   dd       d	| j                  d
<   | j                  | _        y|t        u rk| j                  j	                  t
        d   dd       d| j                  d<   | j                  j	                  | j                         | j                  | _        y|| j                  d
<   | j                  | _        y)Nr   r-   z+expected-doctype-name-but-got-right-bracketr.   Fr  r{   r|   r?   rp   r*  T)r   rG   r   r1   rH   r   r$   r!   r"   doctypeNameStater   r   s     r*   r+  z$HTMLTokenizer.beforeDoctypeNameState*  s]   {{!?"* ) S[OO""J|,D$Q$S T+0Di(OO""4#4#45DJ  XOO""J|,D,?$A B(0Df%..DJ  S[OO""J|,D$G$I J+0Di(OO""4#4#45DJ  )-Df%..DJr+   c                    | j                   j                         }|t        v rA| j                  d   j	                  t
              | j                  d<   | j                  | _        y|dk(  rf| j                  d   j	                  t
              | j                  d<   | j                  j                  | j                         | j                  | _        y|dk(  rN| j                  j                  t        d   dd       | j                  dxx   dz  cc<   | j                  | _        y|t        u r| j                  j                  t        d   dd       d	| j                  d
<   | j                  d   j	                  t
              | j                  d<   | j                  j                  | j                         | j                  | _        y| j                  dxx   |z  cc<   y)Nrp   r   r{   r-   r|   r.   r?   zeof-in-doctype-nameFr  T)r   rG   r   r$   rt   r   afterDoctypeNameStater"   r1   rH   r!   r   r-  r   r   s     r*   r-  zHTMLTokenizer.doctypeNameStateD  s   {{!?"(,(9(9&(A(K(KL\(]Df%33DJ& % S[(,(9(9&(A(K(KL\(]Df%OO""4#4#45DJ  XOO""J|,D,?$A Bf%1%..DJ  S[OO""J|,D$9$; <+0Di((,(9(9&(A(K(KL\(]Df%OO""4#4#45DJ  f%-%r+   c                    | j                   j                         }|t        v r	 y|dk(  r7| j                  j	                  | j
                         | j                  | _        y|t        u rd| j
                  d<   | j                   j                  |       | j                  j	                  t        d   dd       | j                  j	                  | j
                         | j                  | _        y|dv r?d}d	D ]$  }| j                   j                         }||vs"d} n |rU| j                  | _        y|d
v r?d}dD ]$  }| j                   j                         }||vs"d} n |r| j                  | _        y| j                   j                  |       | j                  j	                  t        d   dd|id       d| j
                  d<   | j                  | _        y)Nr   Fr  r-   eof-in-doctyper.   r  T))uU)bB)lL)iIr   sS)r  r:  r   r	  )mMz*expected-space-or-right-bracket-in-doctyper0   r<   )r   rG   r   r1   rH   r$   r!   r"   r   rN   r   afterDoctypePublicKeywordStateafterDoctypeSystemKeywordStatebogusDoctypeState)r'   r0   r  r  s       r*   r/  z#HTMLTokenizer.afterDoctypeNameState]  s   {{!?"\ [ S[OO""4#4#45DJV U S[+0Di(KKd#OO""J|,D$4$6 7OO""4#4#45DJH E z!!9 H;;++-D8+"' !%!D!DDJ#!9 H;;++-D8+"' !%!D!DDJ KKd#OO""J|,D$P%+TN$4 5 ,1Di(//DJr+   c                 V   | j                   j                         }|t        v r| j                  | _        y|dv rR| j
                  j                  t        d   dd       | j                   j                  |       | j                  | _        y|t        u rk| j
                  j                  t        d   dd       d| j                  d<   | j
                  j                  | j                         | j                  | _        y| j                   j                  |       | j                  | _        y	N)r   r   r-   unexpected-char-in-doctyper.   r1  Fr  T)r   rG   r   "beforeDoctypePublicIdentifierStater"   r1   rH   r   rN   r   r$   r!   r   s     r*   r?  z,HTMLTokenizer.afterDoctypePublicKeywordState     {{!?"@@DJ  ZOO""J|,D$@$B CKKd#@@DJ  S[OO""J|,D$4$6 7+0Di(OO""4#4#45DJ  KKd#@@DJr+   c                 4   | j                   j                         }|t        v r	 y|dk(  r!d| j                  d<   | j                  | _        y|dk(  r!d| j                  d<   | j                  | _        y|dk(  rk| j                  j                  t        d   dd       d	| j                  d
<   | j                  j                  | j                         | j                  | _        y|t        u rk| j                  j                  t        d   dd       d	| j                  d
<   | j                  j                  | j                         | j                  | _        y| j                  j                  t        d   dd       d	| j                  d
<   | j                  | _        y)Nr   r:   r  r   r   r-   unexpected-end-of-doctyper.   Fr  r1  rD  T)r   rG   r   r$   (doctypePublicIdentifierDoubleQuotedStater"   (doctypePublicIdentifierSingleQuotedStater1   rH   r   r!   r   rA  r   s     r*   rE  z0HTMLTokenizer.beforeDoctypePublicIdentifierState  s   {{!?"0 / T\,.Dj)FFDJ* ) S[,.Dj)FFDJ$ # S[OO""J|,D$?$A B+0Di(OO""4#4#45DJ  S[OO""J|,D$4$6 7+0Di(OO""4#4#45DJ 	 OO""J|,D$@$B C+0Di(//DJr+   c                    | j                   j                         }|dk(  r| j                  | _        y|dk(  r=| j                  j                  t        d   dd       | j                  dxx   dz  cc<   y|dk(  rk| j                  j                  t        d   d	d       d
| j                  d<   | j                  j                  | j                         | j                  | _        y|t        u rk| j                  j                  t        d   dd       d
| j                  d<   | j                  j                  | j                         | j                  | _        y| j                  dxx   |z  cc<   y)Nr   r{   r-   r|   r.   r  r?   r   rH  Fr  r1  T
r   rG   !afterDoctypePublicIdentifierStater"   r1   rH   r   r$   r!   r   r   s     r*   rI  z6HTMLTokenizer.doctypePublicIdentifierDoubleQuotedState  V   {{!4<??DJ& % XOO""J|,D,?$A Bj)X5)  S[OO""J|,D$?$A B+0Di(OO""4#4#45DJ  S[OO""J|,D$4$6 7+0Di(OO""4#4#45DJ  j)T1)r+   c                    | j                   j                         }|dk(  r| j                  | _        y|dk(  r=| j                  j                  t        d   dd       | j                  dxx   dz  cc<   y|dk(  rk| j                  j                  t        d   d	d       d
| j                  d<   | j                  j                  | j                         | j                  | _        y|t        u rk| j                  j                  t        d   dd       d
| j                  d<   | j                  j                  | j                         | j                  | _        y| j                  dxx   |z  cc<   y)Nr   r{   r-   r|   r.   r  r?   r   rH  Fr  r1  TrL  r   s     r*   rJ  z6HTMLTokenizer.doctypePublicIdentifierSingleQuotedState  V   {{!3;??DJ& % XOO""J|,D,?$A Bj)X5)  S[OO""J|,D$?$A B+0Di(OO""4#4#45DJ  S[OO""J|,D$4$6 7+0Di(OO""4#4#45DJ  j)T1)r+   c                    | j                   j                         }|t        v r| j                  | _        y|dk(  r7| j
                  j                  | j                         | j                  | _        y|dk(  rF| j
                  j                  t        d   dd       d| j                  d<   | j                  | _        y|dk(  rF| j
                  j                  t        d   dd       d| j                  d<   | j                  | _        y|t        u rk| j
                  j                  t        d   d	d       d
| j                  d<   | j
                  j                  | j                         | j                  | _        y| j
                  j                  t        d   dd       d
| j                  d<   | j                  | _        y)Nr   r   r-   rD  r.   r:   r  r   r1  Fr  T)r   rG   r   -betweenDoctypePublicAndSystemIdentifiersStater"   r1   rH   r$   r!   r   (doctypeSystemIdentifierDoubleQuotedState(doctypeSystemIdentifierSingleQuotedStater   rA  r   s     r*   rM  z/HTMLTokenizer.afterDoctypePublicIdentifierState  s   {{!?"KKDJ2 1 S[OO""4#4#45DJ, + S[OO""J|,D$@$B C,.Dj)FFDJ" ! S[OO""J|,D$@$B C,.Dj)FFDJ  S[OO""J|,D$4$6 7+0Di(OO""4#4#45DJ 	 OO""J|,D$@$B C+0Di(//DJr+   c                    | j                   j                         }|t        v r	 y|dk(  r7| j                  j	                  | j
                         | j                  | _        y|dk(  r!d| j
                  d<   | j                  | _        y|dk(  r!d| j
                  d<   | j                  | _        y|t        k(  rk| j                  j	                  t        d   dd       d	| j
                  d
<   | j                  j	                  | j
                         | j                  | _        y| j                  j	                  t        d   dd       d	| j
                  d
<   | j                  | _        y)Nr   r   r:   r  r   r-   r1  r.   Fr  rD  T)r   rG   r   r1   rH   r$   r!   r"   rS  rT  r   r   rA  r   s     r*   rR  z;HTMLTokenizer.betweenDoctypePublicAndSystemIdentifiersState  sR   {{!?"* ) S[OO""4#4#45DJ$ # S[,.Dj)FFDJ  S[,.Dj)FFDJ  S[OO""J|,D$4$6 7+0Di(OO""4#4#45DJ 	 OO""J|,D$@$B C+0Di(//DJr+   c                 V   | j                   j                         }|t        v r| j                  | _        y|dv rR| j
                  j                  t        d   dd       | j                   j                  |       | j                  | _        y|t        u rk| j
                  j                  t        d   dd       d| j                  d<   | j
                  j                  | j                         | j                  | _        y| j                   j                  |       | j                  | _        yrC  )r   rG   r   "beforeDoctypeSystemIdentifierStater"   r1   rH   r   rN   r   r$   r!   r   s     r*   r@  z,HTMLTokenizer.afterDoctypeSystemKeywordState)  rF  r+   c                 4   | j                   j                         }|t        v r	 y|dk(  r!d| j                  d<   | j                  | _        y|dk(  r!d| j                  d<   | j                  | _        y|dk(  rk| j                  j                  t        d   dd       d	| j                  d
<   | j                  j                  | j                         | j                  | _        y|t        u rk| j                  j                  t        d   dd       d	| j                  d
<   | j                  j                  | j                         | j                  | _        y| j                  j                  t        d   dd       d	| j                  d
<   | j                  | _        y)Nr   r:   r  r   r   r-   rD  r.   Fr  r1  T)r   rG   r   r$   rS  r"   rT  r1   rH   r   r!   r   rA  r   s     r*   rW  z0HTMLTokenizer.beforeDoctypeSystemIdentifierState=  s   {{!?"0 / T\,.Dj)FFDJ* ) S[,.Dj)FFDJ$ # S[OO""J|,D$@$B C+0Di(OO""4#4#45DJ  S[OO""J|,D$4$6 7+0Di(OO""4#4#45DJ 	 OO""J|,D$@$B C+0Di(//DJr+   c                    | j                   j                         }|dk(  r| j                  | _        y|dk(  r=| j                  j                  t        d   dd       | j                  dxx   dz  cc<   y|dk(  rk| j                  j                  t        d   d	d       d
| j                  d<   | j                  j                  | j                         | j                  | _        y|t        u rk| j                  j                  t        d   dd       d
| j                  d<   | j                  j                  | j                         | j                  | _        y| j                  dxx   |z  cc<   y)Nr   r{   r-   r|   r.   r  r?   r   rH  Fr  r1  T
r   rG   !afterDoctypeSystemIdentifierStater"   r1   rH   r   r$   r!   r   r   s     r*   rS  z6HTMLTokenizer.doctypeSystemIdentifierDoubleQuotedStateZ  rN  r+   c                    | j                   j                         }|dk(  r| j                  | _        y|dk(  r=| j                  j                  t        d   dd       | j                  dxx   dz  cc<   y|dk(  rk| j                  j                  t        d   d	d       d
| j                  d<   | j                  j                  | j                         | j                  | _        y|t        u rk| j                  j                  t        d   dd       d
| j                  d<   | j                  j                  | j                         | j                  | _        y| j                  dxx   |z  cc<   y)Nr   r{   r-   r|   r.   r  r?   r   rH  Fr  r1  TrZ  r   s     r*   rT  z6HTMLTokenizer.doctypeSystemIdentifierSingleQuotedStater  rP  r+   c                    | j                   j                         }|t        v r	 y|dk(  r7| j                  j	                  | j
                         | j                  | _        y|t        u rk| j                  j	                  t        d   dd       d| j
                  d<   | j                  j	                  | j
                         | j                  | _        y| j                  j	                  t        d   dd       | j                  | _        y)	Nr   r-   r1  r.   Fr  rD  T)r   rG   r   r1   rH   r$   r!   r"   r   r   rA  r   s     r*   r[  z/HTMLTokenizer.afterDoctypeSystemIdentifierState  s    {{!?"  S[OO""4#4#45DJ  S[OO""J|,D$4$6 7+0Di(OO""4#4#45DJ
  OO""J|,D$@$B C//DJr+   c                 f   | j                   j                         }|dk(  r7| j                  j                  | j                         | j
                  | _        y|t        u rR| j                   j                  |       | j                  j                  | j                         | j
                  | _        y	 y)Nr   T)	r   rG   r1   rH   r$   r!   r"   r   rN   r   s     r*   rA  zHTMLTokenizer.bogusDoctypeState  s    {{!3;OO""4#4#45DJ  S[KKd#OO""4#4#45DJ  r+   c                    g }	 |j                  | j                  j                  d             |j                  | j                  j                  d             | j                  j                         }|t        k(  rn0|dk(  sJ |d   dd  dk(  r|d   d d |d<   n|j                  |       dj                  |      }|j                  d      }|d	kD  rGt        |      D ]'  }| j                  j                  t        d
   dd       ) |j                  dd      }|r%| j                  j                  t        d   |d       | j                  | _        y)NT]r   rZ   z]]r:   r{   r   r-   r|   r.   r?   r_   )rH   r   r   rG   r   rJ   countranger1   r   r   r!   r"   )r'   r0   rG   	nullCountr   s        r*   r  zHTMLTokenizer.cdataSectionState  sB   KK..s34KK..s34;;##%Ds{s{"{8BC=D(#Bx}DHKK%  wwt}JJx(	q=9% F&&
<0H0C(E FF <<(3DOO""J|,D,0$2 3^^
r+   )Nr   )N__name__
__module____qualname____doc__r&   r6   rU   rl   rn   ry   r!   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r$  r"  r'  r  r+  r-  r/  r?  rE  rI  rJ  rM  rR  r@  rW  rS  rT  r[  rA  r  __classcell__)r)   s   @r*   r   r      s   
.0 FPNT`H
$8:
:
$$!F0,		8		8	8((,	8 *.2	 <4l@ D&&2($+Z..$&>."421f(:00<4(:00&r+   r   N) 
__future__r   r   r   sixr   rL   collectionsr   r   sysr	   	constantsr   r   r   r   r   r   r   r   r   r   _inputstreamr   _trier   r`   dictru   objectr    r+   r*   <module>rt     sX    B B  *  &  5 - - 0 , ) H~6LLlF lr+   