3
e\$                 @   s,  yd dl ZW n ek
r(   d dlZY nX d dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZmZmZmZmZ d dlmZ dd	lmZmZmZmZmZmZ e
ed
eedddZe
ed
eedddZe
ed
eee dddZ e
ed
eedddZ!eedddZ"e
ed
eedddZ#e
ed
eedddZ$e
ed
eedddZ%e
ed
eedddZ&e
ed
eedddZ'eedd d!Z(e
ed
eedd"d#Z)e
ed
eedd$d%Z*e
ed
eedd&d'Z+e
ed
eedd(d)Z,e
ed
eedd*d+Z-e
e.ed
eed,d-d.Z/dNe0e1ee d0d1d2Z2e
d3d
eed4d5d6Z3e0eee e0f d7d8d9Z4eed:d;d<Z5dOeeed>d?d@Z6eee dAdBdCZ7eee8dDdEdFZ9eeedDdGdHZ:dIej;dJfee1eddKdLdMZ<dS )P    N)IncrementalDecoder)aliases)	lru_cache)findall)ListOptionalSetTupleUnion)MultibyteIncrementalDecoder   )ENCODING_MARKSIANA_SUPPORTED_SIMILARRE_POSSIBLE_ENCODING_INDICATIONUNICODE_RANGES_COMBINEDUNICODE_SECONDARY_RANGE_KEYWORDUTF8_MAXIMAL_ALLOCATION)maxsize)	characterreturnc             C   sT   yt j| }W n tk
r"   dS X d|kpRd|kpRd|kpRd|kpRd|kpRd|kS )NFz
WITH GRAVEz
WITH ACUTEzWITH CEDILLAzWITH DIAERESISzWITH CIRCUMFLEXz
WITH TILDE)unicodedataname
ValueError)r   description r   \/var/www/html/StaffProfile/staffvenv/lib/python3.6/site-packages/charset_normalizer/utils.pyis_accentuated   s    r   c             C   s.   t j| }|s| S |jd}tt|d dS )N r      )r   decompositionsplitchrint)r   Z
decomposedcodesr   r   r   remove_accent*   s
    

r$   c             C   s.   t | }x tj D ]\}}||kr|S qW dS )zK
    Retrieve the Unicode range official name from a single character.
    N)ordr   items)r   Zcharacter_ord
range_nameZ	ord_ranger   r   r   unicode_range5   s
    r(   c             C   s,   yt j| }W n tk
r"   dS X d|kS )NFZLATIN)r   r   r   )r   r   r   r   r   is_latinC   s
    r)   c             C   s(   y| j d W n tk
r"   dS X dS )NasciiFT)encodeUnicodeEncodeError)r   r   r   r   is_asciiL   s
    r-   c             C   s2   t j| }d|krdS t| }|d kr*dS d|kS )NPTFZPunctuation)r   categoryr(   )r   character_categorycharacter_ranger   r   r   is_punctuationT   s    
r2   c             C   s:   t j| }d|ksd|krdS t| }|d kr2dS d|kS )NSNTFZForms)r   r/   r(   )r   r0   r1   r   r   r   	is_symbolc   s    
r5   c             C   s   t | }|d krdS d|kS )NFZ	Emoticons)r(   )r   r1   r   r   r   is_emoticonr   s    r6   c             C   s&   | j  s| d	krdS tj| }d|kS )
N   ｜+,;<>TZ>   r:   r8   r9   r<   r7   r;   )isspacer   r/   )r   r0   r   r   r   is_separator|   s    
r?   c             C   s   | j  | j kS )N)islowerisupper)r   r   r   r   is_case_variable   s    rB   c             C   s   t j| }|dkS )NCo)r   r/   )r   r0   r   r   r   is_private_use_only   s    
rD   c             C   s,   yt j| }W n tk
r"   dS X d|kS )NFCJK)r   r   r   )r   character_namer   r   r   is_cjk   s
    rG   c             C   s,   yt j| }W n tk
r"   dS X d|kS )NFZHIRAGANA)r   r   r   )r   rF   r   r   r   is_hiragana   s
    rH   c             C   s,   yt j| }W n tk
r"   dS X d|kS )NFZKATAKANA)r   r   r   )r   rF   r   r   r   is_katakana   s
    rI   c             C   s,   yt j| }W n tk
r"   dS X d|kS )NFZHANGUL)r   r   r   )r   rF   r   r   r   	is_hangul   s
    rJ   c             C   s,   yt j| }W n tk
r"   dS X d|kS )NFZTHAI)r   r   r   )r   rF   r   r   r   is_thai   s
    rK   )r'   r   c                s   t  fddtD S )Nc             3   s   | ]}| kV  qd S )Nr   ).0keyword)r'   r   r   	<genexpr>   s    z-is_unicode_range_secondary.<locals>.<genexpr>)anyr   )r'   r   )r'   r   is_unicode_range_secondary   s    rP      )sequencesearch_zoner   c             C   s   t | tstt| }tt| dt|| jddd}t|dkrHdS xJ|D ]B}|j j	dd}x,t
j D ] \}}||kr|S ||krl|S qlW qNW dS )zW
    Extract using ASCII-only decoder any specified encoding in the first n-bytes.
    Nr*   ignore)errorsr   -_)
isinstancebytes	TypeErrorlenr   r   mindecodelowerreplacer   r&   )rR   rS   Zseq_lenresultsZspecified_encodingencoding_aliasencoding_ianar   r   r   any_specified_encoding   s     

rc      )r   r   c          
   C   s    | dkpt tjd
j| jtS )zQ
    Verify is a specific encoding is a multi byte one based on it IANA name
    utf_8	utf_8_sigutf_16	utf_16_be	utf_16_leutf_32	utf_32_le	utf_32_beutf_7zencodings.{}>	   rg   rk   rm   rj   re   ri   rh   rf   rl   )
issubclass	importlibimport_moduleformatr   r   )r   r   r   r   is_multi_byte_encoding   s            rr   )rR   r   c             C   sJ   xDt D ]<}t | }t|tr"|g}x|D ]}| j|r(||fS q(W qW dS )z9
    Identify and extract SIG/BOM in given sequence.
    N    )Nrs   )r   rX   rY   
startswith)rR   iana_encodingZmarksmarkr   r   r   identify_sig_or_bom   s    



rw   )ru   r   c             C   s   | dkS )Nrg   rj   >   rg   rj   r   )ru   r   r   r   should_strip_sig_or_bom  s    rx   T)cp_namestrictr   c             C   sL   | j  jdd} x$tj D ]\}}| ||gkr|S qW |rHtdj| | S )NrV   rW   z Unable to retrieve IANA for '{}')r^   r_   r   r&   r   rq   )ry   rz   ra   rb   r   r   r   	iana_name  s    r{   )decoded_sequencer   c             C   s8   t  }x(| D ] }t|}|d kr"q|j| qW t|S )N)setr(   addlist)r|   rangesr   r1   r   r   r   
range_scan  s    
r   )iana_name_aiana_name_br   c       	      C   s   t | st |rdS tjdj| j}tjdj|j}|dd}|dd}d}x6tdD ]*}t|g}|j||j|krZ|d7 }qZW |d S )	Ng        zencodings.{}rT   )rU   r      r      )rr   ro   rp   rq   r   rangerY   r]   )	r   r   Z	decoder_aZ	decoder_bZid_aZid_bZcharacter_match_countiZto_be_decodedr   r   r   cp_similarity+  s    


r   c             C   s   | t ko|t |  kS )z
    Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using
    the function cp_similarity.
    )r   )r   r   r   r   r   is_cp_similar@  s    r   Zcharset_normalizerz)%(asctime)s | %(levelname)s | %(message)s)r   levelformat_stringr   c             C   s:   t j| }|j| t j }|jt j| |j| d S )N)logging	getLoggersetLevelStreamHandlersetFormatter	Formatter
addHandler)r   r   r   loggerhandlerr   r   r   set_logging_handlerK  s
    

r   )rQ   )T)=Zunicodedata2r   ImportErrorro   r   codecsr   Zencodings.aliasesr   	functoolsr   rer   typingr   r   r   r	   r
   Z_multibytecodecr   Zconstantr   r   r   r   r   r   strboolr   r$   r(   r)   r-   r2   r5   r6   r?   rB   rD   rG   rH   rI   rJ   rK   r[   rP   rY   r"   rc   rr   rw   rx   r{   r   floatr   r   INFOr   r   r   r   r   <module>   sr    

							
