
    )f[<                     ~    d dl Z d dlZd dlZd dlmZ d dlmZmZmZ  G d de      Z	 G d de      Z
 G d d	e      Zy)
    N)BeautifulSoup)EntitySubstitutionEncodingDetectorUnicodeDammitc                       e Zd ZdZd Zej                  j                  dg d      d        Zd Z	d Z
d Zd	 Zd
 Zd Zy)TestUnicodeDammitz"Standalone tests of UnicodeDammit.c                 @    d}t        |      }|j                  |k(  sJ y )Nu   I'm already Unicode! ☃)r   unicode_markup)selfmarkupdammits      R/var/www/html/flask-app/venv/lib/python3.12/site-packages/bs4/tests/test_dammit.pytest_unicode_inputz$TestUnicodeDammit.test_unicode_input   s%    3v&$$...    z smart_quotes_to,expect_converted))Nu   ‘’“”)xmlz &#x2018;&#x2019;&#x201C;&#x201D;)htmlz&lsquo;&rsquo;&ldquo;&rdquo;)asciiz''""c                 f    d}t        |dg|      j                  }|dj                  |      k(  sJ y)zbVerify the functionality of the smart_quotes_to argument
        to the UnicodeDammit constructor.s   <foo></foo>windows-1252)known_definite_encodingssmart_quotes_toz<foo>{}</foo>N)r   r
   format)r   r   expect_convertedr   	converteds        r   test_smart_quotes_toz&TestUnicodeDammit.test_smart_quotes_to   sD     0!n-=+
 . 	 O223CDDDDr   c                 ~    d}t        |      }|j                  j                         dk(  sJ |j                  dk(  sJ y )Ns   Sacré bleu! ☃utf-8u   Sacré bleu! ☃r   original_encodinglowerr
   )r   utf8r   s      r   test_detect_utf8z"TestUnicodeDammit.test_detect_utf8&   s@    1t$''--/7::$$(DDDDr   c                     d}t        |dg      }|j                  j                         dk(  sJ |j                  dk(  sJ y )N   
iso-8859-8u   םולשr   )r   hebrewr   s      r   test_convert_hebrewz%TestUnicodeDammit.test_convert_hebrew,   sD    $v~6''--/<??$$(BBBBr   c                     d}t        |      }|j                  j                         dk(  sJ |j                  j	                  d      |k(  sJ y )Ns   ケータイ Watchr   )r   r   r    r
   encode)r   utf_8r   s      r   /test_dont_see_smart_quotes_where_there_are_nonezATestUnicodeDammit.test_dont_see_smart_quotes_where_there_are_none2   sJ    Iu%''--/7::$$++G4===r   c                 ~    dj                  d      }t        |dg      }|j                  j                         dk(  sJ y )N   Räksmörgåsr   r%   r)   r   r   r    r   	utf8_datar   s      r    test_ignore_inappropriate_codecsz2TestUnicodeDammit.test_ignore_inappropriate_codecs8   s=    #**73	y<.9''--/7:::r   c                     dj                  d      }dD ].  }t        ||g      }|j                  j                         dk(  r.J  y )Nr-   r   )z.utf8z...z
utF---16.!r.   )r   r0   bad_encodingr   s       r   test_ignore_invalid_codecsz,TestUnicodeDammit.test_ignore_invalid_codecs=   sI    #**73	: 	?L"9|n=F++113w>>	?r   c                     dj                  d      }t        |dg      }|j                  j                         dk(  sJ t        |ddg      }|j                  d k(  sJ y )Nr-   r   )exclude_encodingsr   r.   r/   s      r   test_exclude_encodingsz(TestUnicodeDammit.test_exclude_encodingsC   sg    #**73	 yWIF''--/>AA '>)BD''4///r   N)__name__
__module____qualname____doc__r   pytestmarkparametrizer   r"   r'   r+   r1   r4   r7    r   r   r   r      sW    ,/
 [[*	
EEEC>;
?0r   r   c                   B    e Zd Zd Zd Zd Zd Zd Zd Zd Z	d Z
d	 Zy
)TestEncodingDetectorc                 P    t        d      }t        |j                        }d|v sJ y )Ns'   <?xml version="1.0" encoding="UTF-" ?>u   utf-�)r   list	encodings)r   detectedrD   s      r   Ptest_encoding_detector_replaces_junk_in_encoding_name_with_replacement_characterzeTestEncodingDetector.test_encoding_detector_replaces_junk_in_encoding_name_with_replacement_characterS   s-    #9;++,	.);;;r   c                 N    dD ]   }t        |d      }d|j                  k(  r J  y )N)s&   <html><meta charset="euc-jp" /></html>s&   <html><meta charset='euc-jp' /></html>s$   <html><meta charset=euc-jp /></html>s#   <html><meta charset=euc-jp/></html>Tis_htmlzeuc-jp)r   r   r   datar   s      r    test_detect_html5_style_meta_tagz5TestEncodingDetector.test_detect_html5_style_meta_tagY   s2    4 	8D
 #46Fv7777	8r   c                 ,   d}t         j                  j                  }t        j                  t        j
                         	 d }|t         j                  _        t        |      }d|j                  k(  sJ d|j                  v sJ t        |d      }|j                  sJ 	 t        j                  t        j                         |t         j                  _        y # t        j                  t        j                         |t         j                  _        w xY w)NsT   ﻿<?xml version="1.0" encoding="UTF-8"?>
<html><b>بتر</b>
<i>ѐ</i></html>c                      y Nr?   )strs    r   noopzETestEncodingDetector.test_last_ditch_entity_replacement.<locals>.noopy   s    r   Tu   �zhtml.parser)bs4r   chardet_dammitloggingdisableWARNINGr   contains_replacement_charactersr
   r   NOTSET)r   docchardetrQ   r   soups         r   "test_last_ditch_entity_replacementz7TestEncodingDetector.test_last_ditch_entity_replacementc   s     2 **++(	0(,CJJ%"3'F6AAAAv4444 m4D7777OOGNN+(/CJJ% OOGNN+(/CJJ%s   AC :Dc                 b    d}t        |      }d|j                  k(  sJ d|j                  k(  sJ y )N   < a >   < / a > u   <a>áé</a>utf-16le)r   r
   r   rJ   s      r   test_byte_order_mark_removedz1TestEncodingDetector.test_byte_order_mark_removed   s6    Mt$ 5 555V55555r   c                    d}t        |      }t        |dg      }d|j                  k(  sJ t        |dg      }d|j                  k(  sJ dg|j                  D cg c]  }|d   	 c}k(  sJ d}t        |dgd	g
      }d	|j                  k(  sJ dd	g|j                  D cg c]  }|d   	 c}k(  sJ y c c}w c c}w )Nr^   zutf-16)r   r   )user_encodingsr_   r   r$   r%   )r   rb   r   r   tried_encodings)r   rK   r   beforeafterxr&   s          r   )test_known_definite_versus_user_encodingsz>TestEncodingDetector.test_known_definite_versus_user_encodings   s    
 Nt$ txjI63333
 dG9=U4444|f.D.DE!EEE %v	/;n> v7777&9O9O*PA1Q4*PPPP  F +Qs   B7#B<c                     d}t        |dgdgdg      }d|j                  k(  sJ g d|j                  D cg c]  }|d   	 c}k(  sJ y c c}w )Nr$   	shift-jisr   r%   )r   override_encodingsrb   )rj   r   r%   r   rc   )r   r&   r   rg   s       r   "test_deprecated_override_encodingsz7TestEncodingDetector.test_deprecated_override_encodings   sg     %&1] 'y(>	
 v7777 4!112aQqT2
 	
 
2s   Ac                 ,   dj                  d      }dj                  d      }||z   |z   }t        j                  t              5  |j	                  d       d d d        t        j                  |      }d|j	                  d      k(  sJ y # 1 sw Y   5xY w)Nu	   ☃☃☃r!   u   “Hi, I like Windows!”windows_1252u+   ☃☃☃“Hi, I like Windows!”☃☃☃)r)   r<   raisesUnicodeDecodeErrordecoder   	detwingle)r   r!   rn   rY   fixeds        r   test_detwinglez#TestEncodingDetector.test_detwingle   s    !))&1./5vn/E 	
 \!D( ]]-. 	JJv	 '',<V@TTTT	 	s   B

Bc                     dD ]B  }|j                  d      }|j                  d      sJ t        j                  |      }||k(  rBJ  y )N)u   œu   ₓu   ðr!      )r)   endswithr   rr   )r   tricky_unicode_charinputoutputs       r   +test_detwingle_ignores_multibyte_charactersz@TestEncodingDetector.test_detwingle_ignores_multibyte_characters   sO    
$ 	#
 (..v6E>>'**",,U3FU?"	#r   c                    d}|j                  d      }d}|j                  d      }t        j                  } ||d      J d ||d      k(  sJ d ||d      k(  sJ d ||      k(  sJ d ||      k(  sJ d	d
z  } |||z         J  |||z         J  |||z   dd      dk(  sJ  ||d      dk(  sJ  |d	|z   d      dk(  sJ  |d|z   d      J y )Nz0<html><head><meta charset="utf-8"></head></html>r   z,<?xml version="1.0" encoding="ISO-8859-1" ?>FrH   r   Tz
iso-8859-1    i  )rI   search_entire_document)r~      a)r)   r   find_declared_encoding)r   html_unicode
html_bytesxml_unicode	xml_bytesmspacers          r   test_find_declared_encodingz0TestEncodingDetector.test_find_declared_encoding   s     J!((1
C&&w/	33u-55!L$777!J555q~--q|++ *$%--)#$,,
 fz!4M	
 48LHH	!$?<OO	!$?GGGr   N)r8   r9   r:   rF   rL   r\   r`   rh   rl   rt   r{   r   r?   r   r   rA   rA   Q   s3    <8!0F6Q>
$U.#(Hr   rA   c                       e Zd ZdZd Zej                  j                  dddg      d        Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zy)TestEntitySubstitutionz1Standalone tests of the EntitySubstitution class.c                     t         | _        y rO   )r   subr   s    r   setup_methodz#TestEntitySubstitution.setup_method  s	    %r   zoriginal,substituted)u   foo∀☃õbaru   foo&forall;☃&otilde;bar)u   ‘’foo“”z&lsquo;&rsquo;foo&ldquo;&rdquo;c                 D    | j                   j                  |      |k(  sJ y rO   r   substitute_html)r   originalsubstituteds      r   test_substitute_htmlz+TestEntitySubstitution.test_substitute_html  s!     xx''1[@@@r   c                 p    dD ]1  \  }}d}||z  }||z  }| j                   j                  |      |k(  r1J  y )N)
)z&models;u   ⊧)z&Nfr;u   𝔑)z&ngeqq;u   ≧̸)z&not;   ¬)z&Not;u   ⫬z||)fjr   )z&gt;>)z&lt;<)z&amp;&z3 %s 4r   )r   entityutemplaterawwith_entitiess         r   test_html5_entityz(TestEntitySubstitution.test_html5_entity%  sM    
 	BIFA0  HQ,C$v-M88++C0MAA7	Br   c                     d}d}| j                   j                  |      |k(  sJ d}d}| j                   j                  |      |k(  sJ y )Nu   fjords ⊔ penguinszfjords &sqcup; penguinsu   fjords ⊔︀ penguinszfjords &sqcups; penguinsr   )r   rK   r   s      r   )test_html5_entity_with_variation_selectorz@TestEntitySubstitution.test_html5_entity_with_variation_selectorC  sO     (*xx''-77-+xx''-777r   c                 J    d}| j                   j                  |d      |k(  sJ y )NWelcome to "my bar"Fr   substitute_xmlr   ss     r   Itest_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_falsez`TestEntitySubstitution.test_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_falseO  s&    !xx&&q%0A555r   c                     | j                   j                  dd      dk(  sJ | j                   j                  dd      dk(  sJ y )NWelcomeTz	"Welcome"z	Bob's Barz"Bob's Bar"r   r   s    r   6test_xml_attribute_quoting_normally_uses_double_quoteszMTestEntitySubstitution.test_xml_attribute_quoting_normally_uses_double_quotesS  s=    xx&&y$7;FFxx&&{D9^KKKr   c                 J    d}| j                   j                  |d      dk(  sJ y )Nr   Tz'Welcome to "my bar"'r   r   s     r   Otest_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quoteszfTestEntitySubstitution.test_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotesW  s'    !xx&&q$/3LLLLr   c                 J    d}| j                   j                  |d      dk(  sJ y )NWelcome to "Bob's Bar"Tz""Welcome to &quot;Bob's Bar&quot;"r   r   s     r   btest_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quoteszyTestEntitySubstitution.test_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotes[  s'    %xx&&q$/3XXXXr   c                 H    d}| j                   j                  |      |k(  sJ y )Nr   r   )r   quoteds     r   <test_xml_quotes_arent_escaped_when_value_is_not_being_quotedzSTestEntitySubstitution.test_xml_quotes_arent_escaped_when_value_is_not_being_quoted_  s$    *xx&&v.&888r   c                 D    | j                   j                  d      dk(  sJ y )Nzfoo<bar>zfoo&lt;bar&gt;r   r   s    r   'test_xml_quoting_handles_angle_bracketsz>TestEntitySubstitution.test_xml_quoting_handles_angle_bracketsc  s     xx&&z26FFFFr   c                 D    | j                   j                  d      dk(  sJ y )NzAT&TzAT&amp;Tr   r   s    r   #test_xml_quoting_handles_ampersandsz:TestEntitySubstitution.test_xml_quoting_handles_ampersandsf  s    xx&&v.*<<<r   c                 D    | j                   j                  d      dk(  sJ y )N&Aacute;T&Tz&amp;Aacute;T&amp;Tr   r   s    r   Etest_xml_quoting_including_ampersands_when_they_are_part_of_an_entityz\TestEntitySubstitution.test_xml_quoting_including_ampersands_when_they_are_part_of_an_entityi  s     xx&&}59NNNNr   c                 D    | j                   j                  d      dk(  sJ y )Nr   z&Aacute;T&amp;T)r   "substitute_xml_containing_entitiesr   s    r   Dtest_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entityz[TestEntitySubstitution.test_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entityl  s     xx::=IM^^^^r   c                 H    d}| j                   j                  |      |k(  sJ y)z:There's no need to do this except inside attribute values.zBob's "bar"Nr   )r   texts     r    test_quotes_not_html_substitutedz7TestEntitySubstitution.test_quotes_not_html_substitutedo  s$    xx''-555r   N)r8   r9   r:   r;   r   r<   r=   r>   r   r   r   r   r   r   r   r   r   r   r   r   r   r?   r   r   r   r     s    ;& [[2
 C		
AAB<
86LMY9G=O_6r   r   )r<   rT   rR   r   
bs4.dammitr   r   r   objectr   rA   r   r?   r   r   <module>r      sI      
  C0 C0J{H6 {H|c6V c6r   