o
    Gb/                     @   s   d dl Z d dlZd dlZzzd dlZW n ey    d dlZY nw W n ey/   dZdZY nw dd ZddlmZm	Z	m
Z
 dZdZdZd	Zd
ZdZedZedZdd ZdS )    Nc                 C   s   t | d pdS )Nencoding )chardetdetect)data r   ?/usr/local/lib/python3.10/dist-packages/feedparser/encodings.pylazy_chardet_encoding*   s   r	      )CharacterEncodingOverrideCharacterEncodingUnknownNonXMLContentTypes   Los    < ?s   < ? s      <s   <   z  z^<\?xml[^>]*?>s#   ^<\?.*encoding=[\'"](.*?)[\'"].*\?>c              
   C   s  d}d}|dd t jkrd}|dd }n|dd t jkr(d}|dd }n||dd t jkrB|dd tkrBd}|dd }nb|dd t jkr\|dd tkr\d}|dd }nH|dd	 t jkrnd
}|d	d }n6|dd tkryd}n+|dd tkrd}n |dd t	krd}n|dd t
krd}n
|dd tkrd}|}z|r||d
}W n ttfy   d}Y nw t|}|r| d d
 }|r|dv r|}| dpd}t|\}}|dddd}	t|	tr|	d
d}	d}
d}d}||v s|dr|drd}
|	p|pd
}n8||v s0|dr8|dr8d}
|	p6d}n|drD|	pBd}n| rRd| vrR|pPd}n|pVd
}| dkr`d}| dkrid}d}| r|
sd| v r}d| d  }nd}t|}d}g }|||td
ddfD ]M}t|r||}|sq||v rq|| z||}W n ttfy   Y qw d}d }t !|rt "||}n|d! | }|d
} |st#d"d#||f  }d}n||krt$d$||f }|}||d%< |rd&|d'< ||d(< |S ))z|Detect and convert the character encoding to UTF-8.

    http_headers is a dictionary
    data is a raw string (not Unicode)r   N   zutf-32bezutf-32le   zutf-16bezutf-16le   zutf-8cp037r   )u16zutf-16utf16utf_16u32zutf-32utf32utf_32ziso-10646-ucs-2ziso-10646-ucs-4Zcsucs4Z	csunicodezucs-2zucs-4zcontent-typecharset'ignore)zapplication/xmlzapplication/xml-dtdz&application/xml-external-parsed-entity)ztext/xmlztext/xml-external-parsed-entityzapplication/z+xmlr
   ztext/zus-asciiz
iso-8859-1gb2312gb18030z%s is not an XML media typezno Content-type specifiedzwindows-1252z
iso-8859-2z&<?xml version='1.0' encoding='utf-8'?>
z#document encoding unknown, I tried z>%s, %s, utf-8, windows-1252, and iso-8859-2 but nothing workedz)document declared as %s, but parsed as %sr   TZbozoZbozo_exception)%codecsBOM_UTF32_BEBOM_UTF32_LEBOM_UTF16_BE
ZERO_BYTESBOM_UTF16_LEBOM_UTF8EBCDIC_MARKERUTF16BE_MARKERUTF16LE_MARKERUTF32BE_MARKERUTF32LE_MARKERdecodeencodeUnicodeDecodeErrorLookupErrorRE_XML_PI_ENCODINGmatchgroupslowergetcgiparse_headerreplace
isinstancebytes
startswithendswithr   r	   callableappendRE_XML_DECLARATIONsearchsubr   r   )Zhttp_headersr   resultbom_encodingZxml_encodingZtempdataZxml_encoding_matchZhttp_content_typeparamsZhttp_encodingZacceptable_content_typeZapplication_content_typesZtext_content_typesZrfc3023_encodingerrormsgZknown_encodingtried_encodingsZproposed_encodingZnew_declarationr   r   r   convert_to_utf8G   s   /	""








rE   )r3   r   reZcchardetr   ImportErrorr	   
exceptionsr   r   r   r%   r&   r'   r(   r)   r"   compiler<   r.   rE   r   r   r   r   <module>   s2   	

