
    N)fh-                         d dl mZ d dlmZ d dlmZ d dlmZmZm	Z	m
Z
mZmZmZ ddlmZ ddlmZmZmZ  G d d	      Z G d
 d      Zeeef   Ze
e   Z G d d      Zy)    )aliases)sha256)dumps)AnyDictIteratorListOptionalTupleUnion   )TOO_BIG_SEQUENCE)	iana_nameis_multi_byte_encodingunicode_rangec                      e Zd Z	 d$dededededddee   fd	Zd
e	defdZ
d
e	defdZedefd       ZdefdZdefdZd%dZedefd       Zedee   fd       Zedefd       Zedefd       Zedee   fd       Zedefd       Zedefd       Zedefd       Zedefd       Zedefd       Zedefd       Zeded    fd       Zedefd       Zedee   fd       Zedee   fd        Z d&d!edefd"Z!edefd#       Z"y)'CharsetMatchNpayloadguessed_encodingmean_mess_ratiohas_sig_or_bom	languagesCoherenceMatchesdecoded_payloadc                     || _         || _        || _        || _        || _        d | _        g | _        d| _        d | _        d | _	        || _
        y )N        )_payload	_encoding_mean_mess_ratio
_languages_has_sig_or_bom_unicode_ranges_leaves_mean_coherence_ratio_output_payload_output_encoding_string)selfr   r   r   r   r   r   s          V/var/www/html/flask-app/venv/lib/python3.12/site-packages/charset_normalizer/models.py__init__zCharsetMatch.__init__   sW      '.'6,5%348+-,/"04/3&5    otherreturnc                    t        |t              sAt        dj                  t	        |j
                        t	        | j
                                    | j                  |j                  k(  xr | j                  |j                  k(  S )Nz&__eq__ cannot be invoked on {} and {}.)
isinstancer   	TypeErrorformatstr	__class__encodingfingerprintr(   r,   s     r)   __eq__zCharsetMatch.__eq__$   si    %.8??(#dnn*= 
 }}.X43C3CuGXGX3XXr+   c                    t        |t              st        t        | j                  |j                  z
        }t        | j
                  |j
                  z
        }|dk  r|dkD  r| j
                  |j
                  kD  S |dk  rS|dk  rNt        | j                        t        k\  r| j                  |j                  k  S | j                  |j                  kD  S | j                  |j                  k  S )zQ
        Implemented to make sorted available upon CharsetMatches items.
        g{Gz?g{Gz?)
r/   r   
ValueErrorabschaos	coherencelenr   r   multi_byte_usage)r(   r,   chaos_differencecoherence_differences       r)   __lt__zCharsetMatch.__lt__-   s     %."%djj5;;&>"?&)$..5??*J&K d"';d'B>>EOO33$)=)E 4==!%55zzEKK//((5+A+AAAzzEKK''r+   c                 \    dt        t        |             t        | j                        z  z
  S )Ng      ?)r=   r2   rawr(   s    r)   r>   zCharsetMatch.multi_byte_usageC   s"    c#d)ns488}455r+   c                 ~    | j                   &t        | j                  | j                  d      | _         | j                   S )Nstrict)r'   r2   r   r   rD   s    r)   __str__zCharsetMatch.__str__G   s.    <<t}}dnnhGDL||r+   c                 N    dj                  | j                  | j                        S )Nz<CharsetMatch '{}' bytes({})>)r1   r4   r5   rD   s    r)   __repr__zCharsetMatch.__repr__M   s    .55dmmTEUEUVVr+   c                     t        |t              r|| k(  r$t        dj                  |j                              d |_        | j                  j                  |       y )Nz;Unable to add instance <{}> as a submatch of a CharsetMatch)r/   r   r9   r1   r3   r'   r#   appendr6   s     r)   add_submatchzCharsetMatch.add_submatchP   sO    %.%4-MTTOO  E"r+   c                     | j                   S N)r   rD   s    r)   r4   zCharsetMatch.encoding[   s    ~~r+   c                     g }t        j                         D ]G  \  }}| j                  |k(  r|j                  |       '| j                  |k(  s7|j                  |       I |S )z
        Encoding name are known by many name, using this could help when searching for IBM855 when it's listed as CP855.
        )r   itemsr4   rK   )r(   also_known_asups       r)   encoding_aliaseszCharsetMatch.encoding_aliases_   s^    
 $&MMO 	(DAq}}!$$Q'!#$$Q'		(
 r+   c                     | j                   S rN   r!   rD   s    r)   bomzCharsetMatch.boml       ###r+   c                     | j                   S rN   rV   rD   s    r)   byte_order_markzCharsetMatch.byte_order_markp   rX   r+   c                 F    | j                   D cg c]  }|d   	 c}S c c}w )z
        Return the complete list of possible languages found in decoded sequence.
        Usually not really useful. Returned list may be empty even if 'language' property return something != 'Unknown'.
        r   r    )r(   es     r)   r   zCharsetMatch.languagest   s      #oo.!...s   c                    | j                   shd| j                  v ryddlm}m} t        | j                        r || j                        n || j                        }t        |      dk(  sd|v ry|d   S | j                   d   d   S )z
        Most probable language found in decoded sequence. If none were detected or inferred, the property will return
        "Unknown".
        asciiEnglishr   )encoding_languagesmb_encoding_languageszLatin BasedUnknown)r    could_be_from_charsetcharset_normalizer.cdra   rb   r   r4   r=   )r(   ra   rb   r   s       r)   languagezCharsetMatch.language|   s      $444  X *$--8 &dmm4'6  9~"my&@ Q<q!!$$r+   c                     | j                   S rN   )r   rD   s    r)   r;   zCharsetMatch.chaos   s    $$$r+   c                 @    | j                   sy| j                   d   d   S )Nr   r   r   r\   rD   s    r)   r<   zCharsetMatch.coherence   s     q!!$$r+   c                 6    t        | j                  dz  d      S Nd      )ndigits)roundr;   rD   s    r)   percent_chaoszCharsetMatch.percent_chaos   s    TZZ#%q11r+   c                 6    t        | j                  dz  d      S rj   )rn   r<   rD   s    r)   percent_coherencezCharsetMatch.percent_coherence   s    T^^c)155r+   c                     | j                   S )z+
        Original untouched bytes.
        )r   rD   s    r)   rC   zCharsetMatch.raw   s    
 }}r+   c                     | j                   S rN   )r#   rD   s    r)   submatchzCharsetMatch.submatch   s    ||r+   c                 2    t        | j                        dkD  S Nr   )r=   r#   rD   s    r)   has_submatchzCharsetMatch.has_submatch   s    4<< 1$$r+   c                     | j                   | j                   S t        |       D cg c]  }t        |       }}t        t	        |D ch c]  }|s|	 c}            | _         | j                   S c c}w c c}w rN   )r"   r2   r   sortedlist)r(   chardetected_rangesrs       r)   	alphabetszCharsetMatch.alphabets   sw    +''' -0I0
$(M$0
 0
  &d+L!!A+L&MN###0
 ,Ms   A0A5A5c                 p    | j                   g| j                  D cg c]  }|j                   c}z   S c c}w )z
        The complete list of encoding that output the exact SAME str result and therefore could be the originating
        encoding.
        This list does include the encoding available in property 'encoding'.
        )r   r#   r4   )r(   ms     r)   rd   z"CharsetMatch.could_be_from_charset   s,     t||"D!1::"DDD"Ds   3r4   c                     | j                   | j                   |k7  r'|| _         t        |       j                  |d      | _        | j                  S )z
        Method to get re-encoded bytes payload using given target encoding. Default to UTF-8.
        Any errors will be simply ignored by the encoder NOT replaced.
        replace)r&   r2   encoder%   )r(   r4   s     r)   outputzCharsetMatch.output   sJ    
   (D,A,AX,M$,D!#&t9#3#3Hi#HD ###r+   c                 P    t        | j                               j                         S )zw
        Retrieve the unique SHA256 computed using the transformed (re-encoded) payload. Not the original one.
        )r   r   	hexdigestrD   s    r)   r5   zCharsetMatch.fingerprint   s    
 dkkm$..00r+   rN   )r,   r   r-   N)utf_8)#__name__
__module____qualname__bytesr2   floatboolr
   r*   objectr7   rA   propertyr>   rG   rI   rL   r4   r	   rT   rW   rZ   r   rf   r;   r<   ro   rq   rC   rt   rw   r~   rd   r   r5    r+   r)   r   r   
   s    *.66 6 	6
 6 &6 "#62YF Yt Y(F (t (, 6% 6 6 W# W	# #   
$s) 
 
 $T $ $ $ $ $ /49 / / %# % %6 %u % % %5 % %
 2u 2 2 65 6 6 U   $~.   %d % % 	$49 	$ 	$ EtCy E E	$s 	$ 	$ 1S 1 1r+   r   c                       e Zd ZdZddeee      fdZdee   fdZ	de
eef   defdZdefd	Zdefd
ZdeddfdZded   fdZded   fdZy)CharsetMatchesz
    Container with every CharsetMatch items ordered by default from most probable to the less one.
    Act like a list(iterable) but does not implements all related methods.
    Nresultsc                 8    |rt        |      | _        y g | _        y rN   )ry   _results)r(   r   s     r)   r*   zCharsetMatches.__init__   s    ?FF7OBr+   r-   c              #   8   K   | j                   E d {    y 7 wrN   r   rD   s    r)   __iter__zCharsetMatches.__iter__   s     ==  s   itemc                     t        |t              r| j                  |   S t        |t              r/t	        |d      }| j                  D ]  }||j
                  v s|c S  t        )z
        Retrieve a single item either by its position or encoding name (alias may be used here).
        Raise KeyError upon invalid index or encoding not present in results.
        F)r/   intr   r2   r   rd   KeyError)r(   r   results      r)   __getitem__zCharsetMatches.__getitem__   s`    
 dC ==&&dC T5)D-- "6777!M" r+   c                 ,    t        | j                        S rN   r=   r   rD   s    r)   __len__zCharsetMatches.__len__   s    4==!!r+   c                 2    t        | j                        dkD  S rv   r   rD   s    r)   __bool__zCharsetMatches.__bool__   s    4==!A%%r+   c                    t        |t              s-t        dj                  t	        |j
                                    t        |j                        t        k  rW| j                  D ]H  }|j                  |j                  k(  s|j                  |j                  k(  s7|j                  |        y | j                  j                  |       t        | j                        | _	        y)z~
        Insert a single match. Will be inserted accordingly to preserve sort.
        Can be inserted as a submatch.
        z-Cannot append instance '{}' to CharsetMatchesN)r/   r   r9   r1   r2   r3   r=   rC   r   r   r5   r;   rL   rK   ry   )r(   r   matchs      r)   rK   zCharsetMatches.append  s    
 $-?FF'  txx=,, $$(8(88U[[DJJ=V&&t, 	T"t}}-r+   r   c                 :    | j                   sy| j                   d   S )zQ
        Simply return the first match. Strict equivalent to matches[0].
        Nr   r   rD   s    r)   bestzCharsetMatches.best  s     }}}}Qr+   c                 "    | j                         S )zP
        Redundant method, call the method best(). Kept for BC reasons.
        )r   rD   s    r)   firstzCharsetMatches.first  s     yy{r+   rN   )r   r   r   __doc__r
   r	   r   r*   r   r   r   r   r2   r   r   r   r   rK   r   r   r   r+   r)   r   r      s    
Ol); < O!(<0 !c3h L " "&$ &.< .D .( h~.  x/ r+   r   c                       e Zd Zdedee   dee   dee   dedee   deded	ed
ee   defdZe	de
eef   fd       ZdefdZy)CliDetectionResultpathr4   rT   alternative_encodingsrf   r~   r   r;   r<   unicode_pathis_preferredc                     || _         |
| _        || _        || _        || _        || _        || _        || _        || _        |	| _	        || _
        y rN   )r   r   r4   rT   r   rf   r~   r   r;   r<   r   )r(   r   r4   rT   r   rf   r~   r   r;   r<   r   r   s               r)   r*   zCliDetectionResult.__init__)  sV     	+7'/+;0E"%$-$2!
 )".r+   r-   c                     | j                   | j                  | j                  | j                  | j                  | j
                  | j                  | j                  | j                  | j                  | j                  dS )Nr   r4   rT   r   rf   r~   r   r;   r<   r   r   r   rD   s    r)   __dict__zCliDetectionResult.__dict__C  se     II $ 5 5%)%?%?"11ZZ -- --
 	
r+   c                 2    t        | j                  dd      S )NT   )ensure_asciiindent)r   r   rD   s    r)   to_jsonzCliDetectionResult.to_jsonS  s    T]]a@@r+   N)r   r   r   r2   r
   r	   r   r   r*   r   r   r   r   r   r   r+   r)   r   r   (  s    // 3-/ s)	/
  $Cy/ / 9/ / / / sm/ /4 
$sCx. 
 
A Ar+   r   N)encodings.aliasesr   hashlibr   jsonr   typingr   r   r   r	   r
   r   r   constantr   utilsr   r   r   r   r   r2   r   CoherenceMatchr   r   r   r+   r)   <module>r      sa    %   D D D & C CT1 T1n@ @F sEz"' ,A ,Ar+   