
    V=^=                     n   d dl mZmZmZ d dlmZ d dlZd dlmZm	Z	 ddl
mZmZmZ ddl
mZmZmZ ddlmZmZ d d	lmZ d
j-                  e      dz   Z ej0                  dez   dz         Z ej0                  dez   dz         Zi Z ed      dk(  Z e ej>                               D ]f  \  Z Z!er ee!      dkD  ses ee!      dkD  r!e!dk7  s' ee!      dk(  r ejD                  e!      Z!n e#e!      Z!e!evse jI                         sbe ee!<   h d Z% ede%       ddZ& G d de'      Z( G d de)      Z*y)    )absolute_importdivisionunicode_literals)	text_typeN)register_errorxmlcharrefreplace_errors   )voidElementsbooleanAttributesspaceCharacters)rcdataElementsentitiesxmlEntities)treewalkers_utils)escape z"'=<>`[]u_    	
 /`  ᠎᠏               　]u   􏿿   &c           
          t        | t        t        f      rmg }g }d}t        | j                  | j
                  | j                         D ]  \  }}|rd}|| j
                  z   }t        j                  | j                  |t        | j                  |dz   g             r(t        j                  | j                  ||dz          }d}nt        |      }|j                  |        |D ]  }t        j                  |      }	|	rF|j                  d       |j                  |	       |	j                  d      rN|j                  d       `|j                  dt!        |      dd  z          dj#                  |      | j                  fS t%        |       S )NFr   Tr   ;z&#x%s;r   )
isinstanceUnicodeEncodeErrorUnicodeTranslateError	enumerateobjectstartendr   isSurrogatePairminsurrogatePairToCodepointordappend_encode_entity_mapgetendswithhexjoinr   )
excres
codepointsskipicindex	codepointcpes
             5/usr/lib/python3/dist-packages/html5lib/serializer.pyhtmlentityreplace_errorsr6   *   s[   #*,ABC
cjj377;< 
	)DAq		ME%%cjjsCGGUQY;O7P&QR";;CJJuUUVY<WX	F	i(
	)  	5B"&&r*A

3

1zz#JJsO

8s2wqr{34	5 cgg&&',,    htmlentityreplacec                 r    t        j                  |      }t        di |}|j                   ||       |      S )a  Serializes the input token stream using the specified treewalker

    :arg input: the token stream to serialize

    :arg tree: the treewalker to use

    :arg encoding: the encoding to use

    :arg serializer_opts: any options to pass to the
        :py:class:`html5lib.serializer.HTMLSerializer` that gets created

    :returns: the tree serialized as a string

    Example:

    >>> from html5lib.html5parser import parse
    >>> from html5lib.serializer import serialize
    >>> token_stream = parse('<html><body><p>Hi!</p></body></html>')
    >>> serialize(token_stream, omit_optional_tags=False)
    '<html><head></head><body><p>Hi!</p></body></html>'

     )r   getTreeWalkerHTMLSerializerrender)inputtreeencodingserializer_optswalkerss         r5   	serializerD   K   s6    0 &&t,F))A88F5M8,,r7   c                   r    e Zd ZdZdZdZdZdZdZdZ	dZ
dZdZdZdZdZdZdZd Zd Zd Zdd
ZddZddZy	)r<   legacy"TF)quote_attr_values
quote_charuse_best_quote_charomit_optional_tagsminimize_boolean_attributesuse_trailing_solidusspace_before_trailing_solidusescape_lt_in_attrsescape_rcdataresolve_entitiesalphabetical_attributesinject_meta_charsetstrip_whitespacesanitizec                 D   t        |      t        | j                        z
  }t        |      dkD  r t        dt	        t        |            z        d|v rd| _        | j                  D ])  }t        | ||j                  |t        | |                   + g | _
        d| _        y)aB
  Initialize HTMLSerializer

        :arg inject_meta_charset: Whether or not to inject the meta charset.

            Defaults to ``True``.

        :arg quote_attr_values: Whether to quote attribute values that don't
            require quoting per legacy browser behavior (``"legacy"``), when
            required by the standard (``"spec"``), or always (``"always"``).

            Defaults to ``"legacy"``.

        :arg quote_char: Use given quote character for attribute quoting.

            Defaults to ``"`` which will use double quotes unless attribute
            value contains a double quote, in which case single quotes are
            used.

        :arg escape_lt_in_attrs: Whether or not to escape ``<`` in attribute
            values.

            Defaults to ``False``.

        :arg escape_rcdata: Whether to escape characters that need to be
            escaped within normal elements within rcdata elements such as
            style.

            Defaults to ``False``.

        :arg resolve_entities: Whether to resolve named character entities that
            appear in the source tree. The XML predefined entities &lt; &gt;
            &amp; &quot; &apos; are unaffected by this setting.

            Defaults to ``True``.

        :arg strip_whitespace: Whether to remove semantically meaningless
            whitespace. (This compresses all whitespace to a single space
            except within ``pre``.)

            Defaults to ``False``.

        :arg minimize_boolean_attributes: Shortens boolean attributes to give
            just the attribute value, for example::

              <input disabled="disabled">

            becomes::

              <input disabled>

            Defaults to ``True``.

        :arg use_trailing_solidus: Includes a close-tag slash at the end of the
            start tag of void elements (empty elements whose end tag is
            forbidden). E.g. ``<hr/>``.

            Defaults to ``False``.

        :arg space_before_trailing_solidus: Places a space immediately before
            the closing slash in a tag using a trailing solidus. E.g.
            ``<hr />``. Requires ``use_trailing_solidus=True``.

            Defaults to ``True``.

        :arg sanitize: Strip all unsafe or unknown constructs from output.
            See :py:class:`html5lib.filters.sanitizer.Filter`.

            Defaults to ``False``.

        :arg omit_optional_tags: Omit start/end tags that are optional.

            Defaults to ``True``.

        :arg alphabetical_attributes: Reorder attributes to be in alphabetical order.

            Defaults to ``False``.

        r   z2__init__() got an unexpected keyword argument '%s'rI   FN)	frozensetoptionslen	TypeErrornextiterrJ   setattrr'   getattrerrorsstrict)selfkwargsunexpected_argsattrs       r5   __init__zHTMLSerializer.__init__   s    ^ $F+i.EE!#PSWX\]lXmSnnoo6!',D$LL 	GDD$

4t1D EF	Gr7   c                 z    t        |t              sJ | j                  r|j                  | j                  d      S |S )Nr8   r   r   r@   encodera   strings     r5   rh   zHTMLSerializer.encode   s4    &),-,====0CDDMr7   c                 z    t        |t              sJ | j                  r|j                  | j                  d      S |S )Nr`   rg   ri   s     r5   encodeStrictzHTMLSerializer.encodeStrict   s3    &),-,====99Mr7   Nc              #   
  K   || _         d}g | _        |r| j                  rddlm}  |||      }| j
                  rddlm}  ||      }| j                  rddlm}  ||      }| j                  rddl
m}  ||      }| j                  rddlm}  ||      }|D ]  }|d   }|dk(  rd|d   z  }|d   r|d	|d   z  z  }n
|d
   r|dz  }|d
   rS|d
   j                  d      dk\  r+|d
   j                  d      dk\  r| j                  d       d}nd}|d||d
   |z  }|dz  }| j                  |       |dv ri|dk(  s|rA|r(|d   j                  d      dk\  r| j                  d       | j!                  |d          | j!                  t#        |d                |dv r|d   }	| j                  d|	z         |	t$        v r| j&                  sd}n|r| j                  d       |d   j)                         D ]  \  \  }
}}|}|}| j                  d       | j                  |       | j*                  rB|t-        j.                  |	t1                     vsa|t-        j.                  dt1                     vs| j                  d       | j2                  dk(  st5        |      dk(  rd}nY| j2                  dk(  rt6        j9                  |      d u}n2| j2                  dk(  rt:        j9                  |      d u}nt=        d       |j?                  d!d"      }| j@                  r|j?                  d#d$      }|r| jB                  }| jD                  rd|v rd|vrd}n
d|v rd|vrd}|dk(  r|j?                  dd%      }n|j?                  dd&      }| j                  |       | j!                  |       | j                  |       | j!                  |        |	tF        v r?| jH                  r3| jJ                  r| j                  d'       n| j                  d(       | j!                  d       |d)k(  r;|d   }	|	t$        v rd}n|r| j                  d       | j                  d*|	z         |d+k(  rE|d   }|j                  d,      dk\  r| j                  d-       | j                  d.|d   z         :|d/k(  r^|d   }	|	d0z   }|tL        vr| j                  d1|	z         | jN                  r|tP        vr
tL        |   }nd2|	z  }| j                  |       | j                  |d           y w)3NFr	   )FiltertypeDoctypez<!DOCTYPE %snamepublicIdz PUBLIC "%s"systemIdz SYSTEMrG   r   'zBSystem identifier contains both single and double quote characters >)
CharactersSpaceCharactersrx   dataz</zUnexpected </ in CDATA)StartTagEmptyTagz<%sTz+Unexpected child element of a CDATA elementr   =alwaysspecrF   z?quote_attr_values must be one of: 'always', 'spec', or 'legacy'r   z&amp;<z&lt;z&#39;z&quot;z //EndTagz</%s>Commentz--zComment contains --z	<!--%s-->Entityr   zEntity %s not recognizedz&%s;))r@   r_   rS   filters.inject_meta_charsetrn   rR   filters.alphabeticalattributesrT   filters.whitespacerU   filters.sanitizerrK   filters.optionaltagsfindserializeErrorrl   rh   r   r   rP   itemsrL   r   r'   tuplerH   rY   _quoteAttributeSpecsearch_quoteAttributeLegacy
ValueErrorreplacerO   rI   rJ   r
   rM   rN   r   rQ   r   )ra   
treewalkerr@   in_cdatarn   tokenro   doctyperI   rq   _	attr_name
attr_valuekv
quote_attrry   keys                     r5   rD   zHTMLSerializer.serialize   sX     00;
H5J ''>
+J   2
+J==1
+J""4
+J l	3E=Dy (5=8$~j0AAAG:&y(G$Z(--c2a7 ,11#6!; //0tu%(
%(
Jj8I:VVG3''00::,,E&M$6$6t$<$A++,DE++eFm44++fU6]&;<<11V}''55>)$2D2D#H''(UV27-2E2E2G &1.NQ	J!A"A++C00++A..;;"3"7"7eg"FF"3"7"7EG"DD"//4411X=Q1)-J!33v=)<)C)CA)Fd)RJ!33x?)>)E)Ea)HPT)TJ", .M #N NIIc7322 !		#v 6A%)-J#77#&!8114J%(AX#Q,14J)S0$%IIc7$;$%IIc8$<"&"3"3J"??"&++a.0"&"3"3J"??"&++a.0M&1N <'D,E,E99"//55"//44kk#&&!V}>)$H''(UV''$77"V}99T?a'''(=>''eFm(CDD!V}Sjh&''(BT(IJ((S-C#C=D!D=D''-- ##E&M2Yl	3s   I$U' UKUc                     |r*dj                  t        | j                  ||                  S dj                  t        | j                  |                  S )an  Serializes the stream from the treewalker into a string

        :arg treewalker: the treewalker to serialize

        :arg encoding: the string encoding to use

        :returns: the serialized tree

        Example:

        >>> from html5lib import parse, getTreeWalker
        >>> from html5lib.serializer import HTMLSerializer
        >>> token_stream = parse('<html><body>Hi!</body></html>')
        >>> walker = getTreeWalker('etree')
        >>> serializer = HTMLSerializer(omit_optional_tags=False)
        >>> serializer.render(walker(token_stream))
        '<html><head></head><body>Hi!</body></html>'

        r7   r   )r*   listrD   )ra   r   r@   s      r5   r=   zHTMLSerializer.renderw  sC    ( 88D
H!EFGG774z :;<<r7   c                 ^    | j                   j                  |       | j                  rt        y N)r_   r%   r`   SerializeError)ra   ry   s     r5   r   zHTMLSerializer.serializeError  s&    4 ;;   r7   r   )zXXX ERROR MESSAGE NEEDED)__name__
__module____qualname__rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   rU   rX   re   rh   rl   rD   r=   r   r:   r7   r5   r<   r<   h   s     !J "& $(! M $H/GWrG3R=2!r7   r<   c                       e Zd ZdZy)r   zError in serialized treeN)r   r   r   __doc__r:   r7   r5   r   r     s    "r7   r   )etreeN)+
__future__r   r   r   sixr   recodecsr   r   	constantsr
   r   r   r   r   r   r   r   r   xml.sax.saxutilsr   r*   _quoteAttributeSpecCharscompiler   r   r&   rY   _is_ucs4r   r   r   r   r#   r$   islowerr6   rD   r   r<   	Exceptionr   r:   r7   r5   <module>r      sJ   B B  	 ; G G < < ! #77?3i?  bjj'?!?#!EF "

3)A#A$-$- .   |!!" &DAq	c!fqjc!fqjCxq6Q;///2AAA&&!))+$%q!&-< "$< =-:l!V l!^		Y 	r7   