
    e                     6   d Z dZddlZddlmZ ddlmZ ddlZddlmZm	Z	 ddl
mZ ddlZddlZddlZddlZddlZddlZddlZddlZd Zdd	Z G d
 de      Zd ZdZdZddZddZddZddZddZedk(  r! eej>                  jA                                yy)z=Diagnostic functions, mainly for use when doing tech support.MIT    N)BytesIO)
HTMLParser)BeautifulSoup__version__)builder_registryc           	         t        dt        z         t        dt        j                  z         g d}|D ]F  }t        j
                  D ]  }||j                  v s ' |j                  |       t        d|z         H d|v rM|j                  d       	 ddl	m
} t        d	d
j                  t        t        |j                              z         d|v r	 ddl}t        d|j                  z         t#        | d      r| j%                         } |D ]V  }t        d|z         d}	 t'        | |      }	d}|r't        d|z         t        	j/                                t        d       X y# t        $ r}t        d       Y d}~d}~ww xY w# t        $ r}t        d       Y d}~d}~ww xY w# t(        $ r,}t        d|z         t+        j,                          Y d}~d}~ww xY w)zDiagnostic suite for isolating common problems.

    :param data: A string containing markup that needs to be explained.
    :return: None; diagnostics are printed to standard output.
    z'Diagnostic running on Beautiful Soup %szPython version %s)html.parserhtml5liblxmlz;I noticed that %s is not installed. Installing it may help.r   zlxml-xmlr   etreezFound lxml version %s.z.lxml is not installed or couldn't be imported.Nr   zFound html5lib version %sz2html5lib is not installed or couldn't be imported.readz#Trying to parse your markup with %sF)featuresT%s could not parse the markup.z#Here's what %s did with the markup:zP--------------------------------------------------------------------------------)printr   sysversionr   buildersr   removeappendr   r   joinmapstrLXML_VERSIONImportErrorr   hasattrr   r   	Exception	traceback	print_excprettify)
databasic_parsersnamebuilderr   er   parsersuccesssoups
             ./usr/lib/python3/dist-packages/bs4/diagnose.pydiagnoser,      s    
4{BD	,.7M '00 	Gw'''	   &M Z(	B"*SXXc#e>P>P6Q-RRT ]"	F.1E1EEG
 tVyy{ 4v=?	" 7DG 86AC4==?$x!  	B@B B	B  	FDF F	F  	"3f<>!!	"sH   ;E( F	 "F*(	F1FF		F'F""F'*	G3"GGc                    ddl m} |j                  dd      }t        | t              r| j                  d      } t        |       } |j                  |f||d|D ]-  \  }}t        |d|j                  dd|j                         / y	)
a  Print out the lxml events that occur during parsing.

    This lets you see how lxml parses a document when no Beautiful
    Soup code is running. You can use this to determine whether
    an lxml-specific problem is in Beautiful Soup's lxml tree builders
    or in lxml itself.

    :param data: Some markup.
    :param html: If True, markup will be parsed with lxml's HTML parser.
       if False, lxml's XML parser will be used.
    r   r   recoverTutf8)htmlr.   z, z>4N)r   r   pop
isinstancer   encoder   	iterparser   tagtext)r#   r0   kwargsr   r.   readereventelements           r+   
lxml_tracer;   N   s     jjD)G${{6"T]F)%//7.4 Dw 	w{{GLLACD    c                   L    e Zd ZdZd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zy)AnnouncingParserzSubclass of HTMLParser that announces parse events, without doing
    anything else.

    You can use this to get a picture of how html.parser sees a given
    document. The easiest way to do this is to call `htmlparser_trace`.
    c                     t        |       y )N)r   )selfss     r+   _pzAnnouncingParser._pl   s	    ar<   c                 ,    | j                  d|z         y )Nz%s STARTrB   )r@   r%   attrss      r+   handle_starttagz AnnouncingParser.handle_starttago   s    
T!"r<   c                 ,    | j                  d|z         y )Nz%s ENDrD   r@   r%   s     r+   handle_endtagzAnnouncingParser.handle_endtagr   s    4 r<   c                 ,    | j                  d|z         y )Nz%s DATArD   r@   r#   s     r+   handle_datazAnnouncingParser.handle_datau       	D !r<   c                 ,    | j                  d|z         y )Nz
%s CHARREFrD   rH   s     r+   handle_charrefzAnnouncingParser.handle_charrefx       t#$r<   c                 ,    | j                  d|z         y )Nz%s ENTITYREFrD   rH   s     r+   handle_entityrefz!AnnouncingParser.handle_entityref{   s    %&r<   c                 ,    | j                  d|z         y )Nz
%s COMMENTrD   rK   s     r+   handle_commentzAnnouncingParser.handle_comment~   rP   r<   c                 ,    | j                  d|z         y )Nz%s DECLrD   rK   s     r+   handle_declzAnnouncingParser.handle_decl   rM   r<   c                 ,    | j                  d|z         y )Nz%s UNKNOWN-DECLrD   rK   s     r+   unknown_declzAnnouncingParser.unknown_decl   s    !D()r<   c                 ,    | j                  d|z         y )Nz%s PIrD   rK   s     r+   	handle_pizAnnouncingParser.handle_pi   s    $r<   N)__name__
__module____qualname____doc__rB   rF   rI   rL   rO   rR   rT   rV   rX   rZ    r<   r+   r>   r>   d   s9    #!"%'%"* r<   r>   c                 :    t               }|j                  |        y)zPrint out the HTMLParser events that occur during parsing.

    This lets you see how HTMLParser parses a document when no
    Beautiful Soup code is running.

    :param data: Some markup.
    N)r>   feed)r#   r(   s     r+   htmlparser_tracerb      s     F
KKr<   aeioubcdfghjklmnpqrstvwxyzc                     d}t        |       D ]/  }|dz  dk(  rt        }nt        }|t        j                  |      z  }1 |S )z#Generate a random word-like string.    r   )range_consonants_vowelsrandomchoice)lengthrA   its       r+   rwordrp      sI    
A6] q5A:AA	V]]1 Hr<   c                 D    dj                  d t        |       D              S )z'Generate a random sentence-like string. c              3   Z   K   | ]#  }t        t        j                  d d             % yw)   	   N)rp   rk   randint).0rn   s     r+   	<genexpr>zrsentence.<locals>.<genexpr>   s      F1E&..1-.Fs   )+)r   rh   )rm   s    r+   	rsentencery      s    88FfFFFr<   c           	         g d}g }t        |       D ]  }t        j                  dd      }|dk(  r*t        j                  |      }|j	                  d|z         H|dk(  r/|j	                  t        t        j                  dd                   ||dk(  st        j                  |      }|j	                  d|z          d	d
j                  |      z   dz   S )z+Randomly generate an invalid HTML document.)pdivspanrn   bscripttabler      z<%s>   rt   rg   z</%s>z<html>
z</html>)rh   rk   rv   rl   r   ry   r   )num_elements	tag_nameselementsrn   rl   tag_names         r+   rdocr      s    AIH<  0!$Q;}}Y/HOOFX-.q[OOIfnnQq&9:;q[}}Y/HOOGh./0 dii))I55r<   c                    t        dt        z         t        |       }t        dt        |      z         dddgddfD ]Q  }d}	 t	        j                         }t        ||      }t	        j                         }d}|s?t        d|z
  fz         S ddl	m
} t	        j                         }|j                  |       t	        j                         }t        d||z
  z         dd
l}	|	j                         }t	        j                         }|j                  |       t	        j                         }t        d||z
  z         y
# t        $ r,}t        d	|z         t        j                          Y d
}~d
}~ww xY w)z.Very basic head-to-head performance benchmark.z1Comparative parser benchmark on Beautiful Soup %sz3Generated a large invalid HTML document (%d bytes).r   r0   r   r
   FTr   Nz"BS4+%s parsed the markup in %.2fs.r   r   z$Raw lxml parsed the markup in %.2fs.z(Raw html5lib parsed the markup in %.2fs.)r   r   r   lentimer   r   r    r!   r   r   HTMLr   r   parse)
r   r#   r(   r)   ar*   r~   r'   r   r   s
             r+   benchmark_parsersr      sD   	>LND	@3t9LNFF+ZG J	"		A v.D		AG 761Q3-GIJ 		A	JJt		A	1QqS9;  "F		A
LL		A	51=?#  	"3f<>!!	"s   6E  	E5	"E00E5c                    t        j                         }|j                  }t        |       }t	        t
        ||      }t        j                  d|||       t        j                  |      }|j                  d       |j                  dd       y)z7Use Python's profiler on a randomly generated document.)bs4r#   r(   zbs4.BeautifulSoup(data, parser)
cumulativez_html5lib|bs42   N)tempfileNamedTemporaryFiler%   r   dictr   cProfilerunctxpstatsStats
sort_statsprint_stats)r   r(   
filehandlefilenamer#   varsstatss          r+   profiler      sp    ,,.JHDCd62DOO5dHMLL"E	\"	or*r<   __main__)T)   )rt   )i  )順 )r   r   )!r^   __license__r   ior   html.parserr   r   r   r   bs4.builderr   osr   rk   r   r   r    r   r,   r;   r>   rb   rj   ri   rp   ry   r   r   r   r[   stdinr   r_   r<   r+   <module>r      s    C    " 
 * ( 	      
 6pD,$ z $ L	 %	G6$@@+ zSYY^^ r<   