
    N)fiR                        d dl Z d dlmZ d dlmZmZmZmZmZ ddl	m
Z
mZmZmZ ddlmZmZmZmZ ddlmZ ddlmZmZ dd	lmZmZmZmZmZmZmZ  e j@                  d
      Z! e jD                         Z#e#jI                   e jJ                  d             	 	 	 	 	 	 	 	 	 ddee&e'f   de(de(de)deee*      deee*      de+de+de)de+defdZ,	 	 	 	 	 	 	 	 	 ddede(de(de)deee*      deee*      de+de+de)de+defdZ-	 	 	 	 	 	 	 	 	 ddee*e&ef   de(de(de)deee*      deee*      de+de+de)de+defdZ.	 	 	 	 	 	 	 	 	 ddeee*ee&f   de(de(de)deee*      deee*      de+de+de)de+de+fdZ/y)     N)PathLike)BinaryIOListOptionalSetUnion   )coherence_ratioencoding_languagesmb_encoding_languagesmerge_coherence_ratios)IANA_SUPPORTEDTOO_BIG_SEQUENCETOO_SMALL_SEQUENCETRACE)
mess_ratio)CharsetMatchCharsetMatches)any_specified_encodingcut_sequence_chunks	iana_nameidentify_sig_or_bomis_cp_similaris_multi_byte_encodingshould_strip_sig_or_bomcharset_normalizerz)%(asctime)s | %(levelname)s | %(message)s	sequencessteps
chunk_size	thresholdcp_isolationcp_exclusionpreemptive_behaviourexplainlanguage_thresholdenable_fallbackreturnc
                    t        | t        t        f      s#t        dj	                  t        |                   |rBt        j                  }
t        j                  t               t        j                  t               t        |       }|dk(  rqt        j                  d       |r@t        j                  t               t        j                  
xs t        j                          t#        t%        | dddg d      g      S |Dt        j'                  t        d	d
j)                  |             |D cg c]  }t+        |d       }}ng }|Dt        j'                  t        dd
j)                  |             |D cg c]  }t+        |d       }}ng }|||z  k  r!t        j'                  t        d|||       d}|}|dkD  r||z  |k  rt-        ||z        }t        |       t.        k  }t        |       t0        k\  }|r*t        j'                  t        dj	                  |             n+|r)t        j'                  t        dj	                  |             g }|rt3        |       nd}|,|j5                  |       t        j'                  t        d|       t7               }g }g }d}d}d}t#               }t9        |       \  }}|6|j5                  |       t        j'                  t        dt        |      |       |j5                  d       d|vr|j5                  d       |t:        z   D ]  }|r||vr|r||v r||v r|j=                  |       d}||k(  }|xr t?        |      }|dv r|st        j'                  t        d|       `|dv r|st        j'                  t        d|       	 tA        |      }	 |r9|du r5tG        |du r| dt-        d       n| t        |      t-        d       |       ntG        |du r| n| t        |      d |      }d} |D ]  }!tM        ||!      sd}  n | rt        j'                  t        d|!       "tO        |sdn
t        |      |t-        ||z              }"|xr |duxr t        |      |k  }#|#rt        j'                  t        d|       t-        t        |"      dz        }$tQ        |$d      }$d}%d}&g }'g }(	 tS        | ||"||||||	      D ]g  })|'j5                  |)       |(j5                  tU        |)||du xr dt        |      cxk  xr dk  nc              |(d    |k\  r|%dz  }%|%|$k\  s|sb|du sg n |&s$|r"|s 	 | t-        d"      d jW                  |d#$       |(rtY        |(      t        |(      z  nd}*|*|k\  s|%|$k\  rk|j5                  |       t        j'                  t        d&||%t[        |*d'z  d()             |	r+|dd|fv r$|&s"t%        | ||dg |      }+||k(  r|+}n
|dk(  r|+}n|+}t        j'                  t        d*|t[        |*d'z  d()             |st]        |      },nt_        |      },|,r3t        j'                  t        d+j	                  |tG        |,                   g }-|dk7  r8|'D ]3  })ta        |)||,rd,j)                  |,      nd      }.|-j5                  |.       5 tc        |-      }/|/r*t        j'                  t        d-j	                  |/|             |j5                  t%        | ||*||/|             ||ddfv r\|*d.k  rWt        j                  d/|       |r.t        j                  t               t        j                  
       t#        ||   g      c S ||k(  sSt        j                  d0|       |r.t        j                  t               t        j                  
       t#        ||   g      c S  t        |      dk(  r|s|s|rt        j'                  t        d1       |r2t        j                  d2|jd                         |j5                  |       nr|r||r|r|jf                  |jf                  k7  s|'t        j                  d3       |j5                  |       n(|r&t        j                  d4       |j5                  |       |r<t        j                  d5|ji                         jd                  t        |      dz
         nt        j                  d6       |r.t        j                  t               t        j                  
       |S c c}w c c}w # tB        tD        f$ r t        j'                  t        d|       Y ;w xY w# tH        tJ        f$ rQ}t        |tJ              s%t        j'                  t        d|tG        |             |j5                  |       Y d}~d}~ww xY w# tH        $ r4}t        j'                  t        d!|tG        |             |$}%d}&Y d}~d}~ww xY w# tH        $ rA}t        j'                  t        d%|tG        |             |j5                  |       Y d}~'d}~ww xY w)7af  
    Given a raw bytes sequence, return the best possibles charset usable to render str objects.
    If there is no results, it is a strong indicator that the source is binary/not text.
    By default, the process will extract 5 blocks of 512o each to assess the mess and coherence of a given sequence.
    And will give up a particular code page after 20% of measured mess. Those criteria are customizable at will.

    The preemptive behavior DOES NOT replace the traditional detection workflow, it prioritize a particular code page
    but never take it for granted. Can improve the performance.

    You may want to focus your attention to some code page or/and not others, use cp_isolation and cp_exclusion for that
    purpose.

    This function will strip the SIG in the payload/sequence every time except on UTF-16, UTF-32.
    By default the library does not setup any handler other than the NullHandler, if you choose to set the 'explain'
    toggle to True it will alter the logger configuration to add a StreamHandler that is suitable for debugging.
    Custom logging format and handler can be set manually.
    z4Expected object of type bytes or bytearray, got: {0}r   z<Encoding detection on empty bytes, assuming utf_8 intention.utf_8g        F Nz`cp_isolation is set. use this flag for debugging purpose. limited list of encoding allowed : %s.z, zacp_exclusion is set. use this flag for debugging purpose. limited list of encoding excluded : %s.z^override steps (%i) and chunk_size (%i) as content does not fit (%i byte(s) given) parameters.r	   z>Trying to detect encoding from a tiny portion of ({}) byte(s).zIUsing lazy str decoding because the payload is quite large, ({}) byte(s).z@Detected declarative mark in sequence. Priority +1 given for %s.zIDetected a SIG or BOM mark on first %i byte(s). Priority +1 given for %s.ascii>   utf_16utf_32z\Encoding %s won't be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.>   utf_7zREncoding %s won't be tested as-is because detection is unreliable without BOM/SIG.z2Encoding %s does not provide an IncrementalDecoderg    A)encodingz9Code page %s does not fit given bytes sequence at ALL. %sTzW%s is deemed too similar to code page %s and was consider unsuited already. Continuing!zpCode page %s is a multi byte encoding table and it appear that at least one character was encoded using n-bytes.      zaLazyStr Loading: After MD chunk decode, code page %s does not fit given bytes sequence at ALL. %sg     j@strict)errorsz^LazyStr Loading: After final lookup, code page %s does not fit given bytes sequence at ALL. %szc%s was excluded because of initial chaos probing. Gave up %i time(s). Computed mean chaos is %f %%.d      )ndigitsz=%s passed initial chaos probing. Mean measured chaos is %f %%z&{} should target any language(s) of {},z We detected language {} using {}皙?z.Encoding detection: %s is most likely the one.zoEncoding detection: %s is most likely the one as we detected a BOM or SIG within the beginning of the sequence.zONothing got out of the detection process. Using ASCII/UTF-8/Specified fallback.z7Encoding detection: %s will be used as a fallback matchz:Encoding detection: utf_8 will be used as a fallback matchz:Encoding detection: ascii will be used as a fallback matchz]Encoding detection: Found %s as plausible (best-candidate) for content. With %i alternatives.z=Encoding detection: Unable to determine any suitable charset.)5
isinstance	bytearraybytes	TypeErrorformattypeloggerlevel
addHandlerexplain_handlersetLevelr   lendebugremoveHandlerloggingWARNINGr   r   logjoinr   intr   r   r   appendsetr   r   addr   r   ModuleNotFoundErrorImportErrorstrUnicodeDecodeErrorLookupErrorr   rangemaxr   r   decodesumroundr   r   r
   r   r/   fingerprintbest)0r   r   r   r    r!   r"   r#   r$   r%   r&   previous_logger_levellengthcpis_too_small_sequenceis_too_large_sequenceprioritized_encodingsspecified_encodingtestedtested_but_hard_failuretested_but_soft_failurefallback_asciifallback_u8fallback_specifiedresultssig_encodingsig_payloadencoding_ianadecoded_payloadbom_or_sig_availablestrip_sig_or_bomis_multi_byte_decoderesimilar_soft_failure_testencoding_soft_failedr_multi_byte_bonusmax_chunk_gave_upearly_stop_countlazy_str_hard_failure	md_chunks	md_ratioschunkmean_mess_ratiofallback_entrytarget_languages	cd_ratioschunk_languagescd_ratios_mergeds0                                                   S/var/www/html/flask-app/venv/lib/python3.12/site-packages/charset_normalizer/api.py
from_bytesr   !   s
   < i)U!34BIIY
 	
 %+\\/*i.F{ST  1OO1DW__E|IwUBPRSTUU

5IIl#		
 8DD	"e,DD

6IIl#		
 8DD	"e,DD*u$%

l	
 
qyVe^j0%(
"%i.3E"E"%i.4D"D

LSS	
 


W^^	
 (* .By)t  %$$%78

N	
 uF)+)+-1N*.K15,.G 3I >L+$$\2

W		
   )++$$W-.? V<M=M\9F"

=!)-%1]%B!5 "
:Q;
 009MJJn
 I%.BJJd
 	*@*O!	$)>%)G'50 kD	*"3{#3c$i@*	 #&'50 "3{#3#56*	#" +0!$; 	 ],@A,0)	
 %JJi$	 )As;/?
 " .t+.O$v- 	 JJ-	 "%SWq[!1 115 ! %!		'	),$ %
    '  !4GA\1B,Ga,G R=I-$)$$(99(-=-F7V &%)
#d)+&--mH-M ENY#i.!@SVi'+;?P+P#**=9JJ0 o+Q7  !gw8J%KK-!-}iO" !$66)7&"g-%3N"0K

K/C'3		
 %*<]*K4]CJJ8??!3'7#8 	 G#" 2"1&2BCHH-.#   12 2)<JJ299$m 	$ 		
 0'7CC#%LL@- $$_5 56!7=#9":;;L(LL1
 $$_5 56!7=#9":;;mV<p 7|q.,>JJa
 LLI"++ NN-.^3"++~/I/II'LLUVNN;'LLUVNN>*kLLN##L1	
 	TU_--.Nq E E^ $[1 	JJD
 	. #K0 		a-

O!F	 $**=9		l 
	) JJsA	  1$(!
	)* & 

t!F	 (..}=su   /d$7d)#d./Ae	A4g>ggh.*eef?.Af::f?	g?)g::g?	i6iifpc
                 F    t        | j                         |||||||||	
      S )z
    Same thing than the function from_bytes but using a file pointer that is already ready.
    Will not close the file pointer.
    )r   read)
r   r   r   r    r!   r"   r#   r$   r%   r&   s
             r   from_fpr     s5      
	     pathc
                 n    t        | d      5 }
t        |
|||||||||	
      cddd       S # 1 sw Y   yxY w)z
    Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode.
    Can raise IOError.
    rbN)openr   )r   r   r   r    r!   r"   r#   r$   r%   r&   r   s              r   	from_pathr     sK      
dD	 
R 

 
 
s   +4fp_or_path_or_payloadc
                     t        | t        t        f      rt        | |||||||||	
      }
|
 S t        | t        t
        f      rt        | |||||||||	
      }
|
 S t        | |||||||||	
      }
|
 S )a)  
    Detect if the given input (file, bytes, or path) points to a binary file. aka. not a string.
    Based on the same main heuristic algorithms and default kwargs at the sole exception that fallbacks match
    are disabled to be stricter around ASCII-compatible but unlikely to be a string.
    )	r   r   r    r!   r"   r#   r$   r%   r&   )r:   rR   r   r   r<   r;   r   r   )r   r   r   r    r!   r"   r#   r$   r%   r&   guessess              r   	is_binaryr   3  s    " '#x9!!%%!51+
Z ;C 
	

 !!%%!51+
4 ; !!%%!51+
 ;r   )	      皙?NNTFr9   T)	r   r   r   NNTFr9   F)0rH   osr   typingr   r   r   r   r   cdr
   r   r   r   constantr   r   r   r   mdr   modelsr   r   utilsr   r   r   r   r   r   r   	getLoggerr@   StreamHandlerrC   setFormatter	Formatterr<   r;   rL   floatrR   boolr   r   r   r    r   r   <module>r      s(     7 7  R Q  0   
		/	0'''')   GAB (,(,!% # RUI%&RR R 	R
 49%R 49%R R R R R Rn (,(,!% #   	
 49% 49%     @ (,(,!% # 

UH$
%

 
 	

 49%
 49%
 
 
 
 
 
B (,(,!% #!? 3%!?@?? ? 	?
 49%? 49%? ? ? ? ? 
?r   