o
    2g\$                  	   @   sj  zd dl ZW n ey   d dlZY nw d dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZmZmZmZmZ d dlmZ dd	lmZmZmZmZmZmZ e
ed
dedefddZe
ed
dedefddZe
ed
dedee fddZ e
ed
dedefddZ!dedefddZ"e
ed
dedefddZ#e
ed
dedefddZ$e
ed
dedefddZ%e
ed
dedefddZ&e
ed
dedefdd Z'dedefd!d"Z(e
ed
dedefd#d$Z)e
ed
dedefd%d&Z*e
ed
dedefd'd(Z+e
ed
dedefd)d*Z,e
ed
dedefd+d,Z-e
e.ed
d-edefd.d/Z/dRd1e0d2e1dee fd3d4Z2e
d5d
d6edefd7d8Z3d1e0deee e0f fd9d:Z4d;edefd<d=Z5dSd?ed@edefdAdBZ6dCedee fdDdEZ7dFedGede8fdHdIZ9dFedGedefdJdKZ:dLej;dMfd6edNe1dOeddfdPdQZ<dS )T    N)IncrementalDecoder)aliases)	lru_cache)findall)ListOptionalSetTupleUnion)MultibyteIncrementalDecoder   )ENCODING_MARKSIANA_SUPPORTED_SIMILARRE_POSSIBLE_ENCODING_INDICATIONUNICODE_RANGES_COMBINEDUNICODE_SECONDARY_RANGE_KEYWORDUTF8_MAXIMAL_ALLOCATION)maxsize	characterreturnc                 C   sT   zt | }W n
 ty   Y dS w d|v p)d|v p)d|v p)d|v p)d|v p)d|v S )NFz
WITH GRAVEz
WITH ACUTEzWITH CEDILLAzWITH DIAERESISzWITH CIRCUMFLEXz
WITH TILDEunicodedataname
ValueErrorr   description r   g/var/www/mastermindingenieria.com/MONITOR/venv/lib/python3.10/site-packages/charset_normalizer/utils.pyis_accentuated   s    r   c                 C   s.   t | }|s	| S |d}tt|d dS )N r      )r   decompositionsplitchrint)r   
decomposedcodesr   r   r   remove_accent*   s
   

r'   c                 C   s.   t | }t D ]\}}||v r|  S qdS )zK
    Retrieve the Unicode range official name from a single character.
    N)ordr   items)r   character_ord
range_name	ord_ranger   r   r   unicode_range5   s   r-   c                 C   *   z
t | }W d|v S  ty   Y dS w )NFLATINr   r   r   r   r   is_latinC   s   r0   c                 C   s&   z|  d W dS  ty   Y dS w )NasciiFT)encodeUnicodeEncodeErrorr   r   r   r   is_asciiL   s   r5   c                 C   s2   t | }d|v rdS t| }|d u rdS d|v S )NPTFPunctuationr   categoryr-   r   character_categorycharacter_ranger   r   r   is_punctuationT   s   
r=   c                 C   s:   t | }d|v sd|v rdS t| }|d u rdS d|v S )NSNTFFormsr8   r:   r   r   r   	is_symbolc   s   
rA   c                 C   s   t | }|d u r
dS d|v S )NF	Emoticons)r-   )r   r<   r   r   r   is_emoticonr   s   rC   c                 C   s&   |   s| dv r
dS t| }d|v S )N>      ｜+,;<>TZ)isspacer   r9   r   r;   r   r   r   is_separator|   s   
rM   c                 C   s   |   |  kS N)islowerisupperr4   r   r   r   is_case_variable   s   rQ   c                 C   s   t | }|dkS )NCo)r   r9   rL   r   r   r   is_private_use_only   s   
rS   c                 C   r.   )NFCJKr   r   character_namer   r   r   is_cjk      rW   c                 C   r.   )NFHIRAGANAr   rU   r   r   r   is_hiragana   rX   rZ   c                 C   r.   )NFKATAKANAr   rU   r   r   r   is_katakana   rX   r\   c                 C   r.   )NFHANGULr   rU   r   r   r   	is_hangul   rX   r^   c                 C   r.   )NFTHAIr   rU   r   r   r   is_thai   rX   r`   r+   c                    s   t  fddtD S )Nc                 3   s    | ]}| v V  qd S rN   r   ).0keywordr+   r   r   	<genexpr>   s    z-is_unicode_range_secondary.<locals>.<genexpr>)anyr   rc   r   rc   r   is_unicode_range_secondary   s   rf      sequencesearch_zonec                 C   s   t | tstt| }tt| dt|| jddd}t|dkr$dS |D ]'}| 	dd}t
 D ]\}}||krB|    S ||krL|    S q4q&dS )zW
    Extract using ASCII-only decoder any specified encoding in the first n-bytes.
    Nr1   ignoreerrorsr   -_)
isinstancebytes	TypeErrorlenr   r   mindecodelowerreplacer   r)   )rh   ri   seq_lenresultsspecified_encodingencoding_aliasencoding_ianar   r   r   any_specified_encoding   s&   
r|      r   c                 C   s    | dv pt td| jtS )zQ
    Verify is a specific encoding is a multi byte one based on it IANA name
    >	   utf_7utf_8utf_16utf_32	utf_16_be	utf_16_le	utf_32_be	utf_32_le	utf_8_sigencodings.{})
issubclass	importlibimport_moduleformatr   r   )r   r   r   r   is_multi_byte_encoding   s   
r   c                 C   sJ   t D ] }t | }t|tr|g}|D ]}| |r!||f    S qqdS )z9
    Identify and extract SIG/BOM in given sequence.
    )N    )r   ro   rp   
startswith)rh   iana_encodingmarksmarkr   r   r   identify_sig_or_bom   s   

r   r   c                 C   s   | dvS )N>   r   r   r   )r   r   r   r   should_strip_sig_or_bom  s   r   Tcp_namestrictc                 C   sL   |   dd} t D ]\}}| ||fv r|  S q|r$td| | S )Nrm   rn   z Unable to retrieve IANA for '{}')ru   rv   r   r)   r   r   )r   r   rz   r{   r   r   r   	iana_name  s   r   decoded_sequencec                 C   s4   t  }| D ]}t|}|d u rq|| qt|S rN   )setr-   addlist)r   rangesr   r<   r   r   r   
range_scan  s   r   iana_name_aiana_name_bc           	      C   s   t | st |r
dS td| j}td|j}|dd}|dd}d}tdD ]}t|g}||||krA|d7 }q,|d S )	Ng        r   rj   rk   r      r      )r   r   r   r   r   rangerp   rt   )	r   r   	decoder_a	decoder_bid_aid_bcharacter_match_countito_be_decodedr   r   r   cp_similarity+  s   


r   c                 C   s   | t v o	|t |  v S )z
    Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using
    the function cp_similarity.
    )r   )r   r   r   r   r   is_cp_similar@  s   
r   charset_normalizerz)%(asctime)s | %(levelname)s | %(message)slevelformat_stringc                 C   s:   t | }|| t  }|t | || d S rN   )logging	getLoggersetLevelStreamHandlersetFormatter	Formatter
addHandler)r   r   r   loggerhandlerr   r   r   set_logging_handlerK  s
   

r   )rg   )T)=unicodedata2r   ImportErrorr   r   codecsr   encodings.aliasesr   	functoolsr   rer   typingr   r   r   r	   r
   _multibytecodecr   constantr   r   r   r   r   r   strboolr   r'   r-   r0   r5   r=   rA   rC   rM   rQ   rS   rW   rZ   r\   r^   r`   rr   rf   rp   r$   r|   r   r   r   r   r   floatr   r   INFOr   r   r   r   r   <module>   s     

							