
    )f                          d Z ddlZddlZddlmZ ddlmZ ddlmZm	Z	m
Z
 ej                  j                  e d       G d	 d
e
e	             Zy)zDTests to ensure that the html5lib tree builder generates good trees.    N)BeautifulSoup)SoupStrainer   )HTML5LIB_PRESENTHTML5TreeBuilderSmokeTestSoupTestz?html5lib seems not to be present, not testing its tree builder.)reasonc                   z    e Zd ZdZed        Zd Zd Zd Zd Z	d Z
d Zd	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zy)TestHTML5LibBuilderz"See ``HTML5TreeBuilderSmokeTest``.c                     ddl m} |S )Nr   )HTML5TreeBuilder)bs4.builderr   )selfr   s     T/var/www/html/flask-app/venv/lib/python3.12/site-packages/bs4/tests/test_html5lib.pydefault_builderz#TestHTML5LibBuilder.default_builder   s    0    c                 <   t        d      }d}t        j                  d      5 }t        |d|      }d d d        j	                         | j                  |      k(  sJ \  }|j                  t        k(  sJ dt        |j                        v sJ y # 1 sw Y   `xY w)Nbz<p>A <b>bold</b> statement.</p>T)recordhtml5lib)
parse_onlyz4the html5lib tree builder doesn't support parse_only)
r   warningscatch_warningsr   decodedocument_forfilename__file__strmessage)r   strainermarkupwsoupwarnings         r   test_soupstrainerz%TestHTML5LibBuilder.test_soupstrainer   s    $2$$D1 	JQ ID	J{{} 1 1& 999	8++EW__I]]]]	J 	Js   BBc                 N    d}| j                  |d       | j                  d       y)z8html5lib inserts <tbody> tags where other parsers don't.z[<table id="1"><tr><td>Here's another table:<table id="2"><tr><td>foo</td></tr></table></td>z<table id="1"><tbody><tr><td>Here's another table:<table id="2"><tbody><tr><td>foo</td></tr></tbody></table></td></tr></tbody></table>z{<table><thead><tr><td>Foo</td></tr></thead><tbody><tr><td>Bar</td></tr></tbody><tfoot><tr><td>Baz</td></tr></tfoot></table>N)assert_soup)r   r!   s     r   test_correctly_nested_tablesz0TestHTML5LibBuilder.test_correctly_nested_tables&   s6    " 	)	* 	;	<r   c                 h    d}| j                  |      }d|j                  j                         k(  sJ y )Nzy<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html>
<html>
  <head>
  </head>
  <body>
   <p>foo</p>
  </body>
</html>s
   <p>foo</p>)r#   pencoder   r!   r#   s      r   (test_xml_declaration_followed_by_doctypez<TestHTML5LibBuilder.test_xml_declaration_followed_by_doctype:   s1     yy ///r   c                     d}| j                  |      }d|j                  j                         k(  sJ dt        |j	                  d            k(  sJ y )Nz%<p><em>foo</p>
<p>bar<a></a></em></p>zD<body><p><em>foo</em></p><em>
</em><p><em>bar<a></a></em></p></body>   r*   r#   bodyr   lenfind_allr,   s      r   test_reparented_markupz*TestHTML5LibBuilder.test_reparented_markupH   sL    9yy VZ^ZcZcZjZjZlllCc*++++r   c                     d}| j                  |      }d|j                  j                         k(  sJ dt        |j	                  d            k(  sJ y )Nz&<p><em>foo</p>
<p>bar<a></a></em></p>
zE<body><p><em>foo</em></p><em>
</em><p><em>bar<a></a></em></p>
</body>r/   r*   r0   r,   s      r   +test_reparented_markup_ends_with_whitespacez?TestHTML5LibBuilder.test_reparented_markup_ends_with_whitespaceO   sL    ;yy X\`\e\e\l\l\nnnCc*++++r   c                     d}| j                  |      }|j                  d      \  }}|j                  d      \  }}|j                  |u sJ |j                  |u sJ y)zVerify that we keep the two whitespace nodes in this
        document distinct when reparenting the adjacent <tbody> tags.
        z,<table> <tbody><tbody><ims></tbody> </table> stringtbodyN)r#   r3   next_element)r   r!   r#   space1space2tbody1tbody2s          r   <test_reparented_markup_containing_identical_whitespace_nodeszPTestHTML5LibBuilder.test_reparented_markup_containing_identical_whitespace_nodesU   sc     @yy c2w/""f,,""f,,,r   c                     d}| j                  |      }|j                  }d|j                  k(  sJ |j                  d      }|j	                  d      d   }||j                  k(  sJ ||j
                  k(  sJ y )NzF<div><a>aftermath<p><noscript>target</noscript>aftermath</a></p></div>targetr9   	aftermath)r#   noscriptr<   findr3   previous_element)r   r!   r#   rF   rC   final_aftermaths         r   *test_reparented_markup_containing_childrenz>TestHTML5LibBuilder.test_reparented_markup_containing_children`   s}    Yyy ==80000(+ --{-;B?
 &"5"55599999r   c                 b    d}| j                  |      }t        |      j                  d      sJ y)z(Processing instructions become comments.s   <?PITarget PIContent?>z<!--?PITarget PIContent?-->N)r#   r   
startswithr,   s      r   test_processing_instructionz/TestHTML5LibBuilder.test_processing_instructionp   s.    .yy 4y##$ABBBr   c                 l    d}| j                  |      }|j                  d      \  }}||k(  sJ ||usJ y )Ns   <a class="my_class"><p></a>a)r#   r3   )r   r!   r#   a1a2s        r   test_cloned_multivalue_nodez/TestHTML5LibBuilder.test_cloned_multivalue_nodev   s<    3yy s#BRx||r   c                 h    d}| j                  |      }d|j                  j                         k(  sJ y )Ns   <table><td></tbody>Az><body>A<table><tbody><tr><td></td></tr></tbody></table></body>)r#   r1   r   r,   s      r   test_foster_parentingz)TestHTML5LibBuilder.test_foster_parenting}   s3    ,yy OSWS\S\ScScSeeeer   c                    d}| j                  |      } |d      D cg c]  }|j                          c}  |d      D cg c]  }|j                          c} t        |j                  d            dk(  sJ yc c}w c c}w )z
        Test that extraction does not destroy the tree.

        https://bugs.launchpad.net/beautifulsoup/+bug/1782928
        zW
<html><head></head>
<style>
</style><script></script><body><p>hello</p></body></html>
scriptstyler*   r   N)r#   extractr2   r3   )r   r!   r#   ss       r   test_extractionz#TestHTML5LibBuilder.test_extraction   sm    
 yy "8n--"7m,,4==%&!+++ 	.,s   A<Bc                     d}| j                  |      }g }|j                  d      D ]"  }|j                  |j                  d             $ t        |      dk(  sJ y)z
        Test that empty comment does not break structure.

        https://bugs.launchpad.net/beautifulsoup/+bug/1806598
        zI
<html>
<body>
<form>
<!----><input type="text">
</form>
</body>
</html>
forminputr   N)r#   r3   extendr2   )r   r!   r#   inputsr\   s        r   test_empty_commentz&TestHTML5LibBuilder.test_empty_comment   s]     yy MM&) 	2DMM$--01	26{ar   c                    d}| j                  |      }d|j                  j                  k(  sJ d|j                  j                  k(  sJ d|j                  j	                  d      j
                  k(  sJ | j                  |d      }d|j                  j                  j
                  k(  sJ d|j                  j                  j
                  k(  sJ y )Nz=
   <p>

<sourceline>
<b>text</b></sourceline><sourcepos></p>r/      
sourcelineF)store_line_numbers	sourcepos)r#   r*   rc   re   rG   namer,   s      r   test_tracking_line_numbersz.TestHTML5LibBuilder.test_tracking_line_numbers   s     Uyy DFF%%%%DFF$$$$tvv{{<8==== yyEy:tvv005555dff..33333r   c                      y )N )r   s    r   test_special_string_containersz2TestHTML5LibBuilder.test_special_string_containers   s     	r   c                     dD ]o  \  }}}d|z  }| j                  |      j                  }|j                         }d|j                  d      z  }||k(  sJ |j                  d      }d|z  }||k(  roJ  y )N))z&RightArrowLeftArrow;u   ⇄s   &rlarr;)z&models;u   ⊧s   &models;)z&Nfr;u   𝔑s   &Nfr;)z&ngeqq;u   ≧̸s   &ngeqq;)z&not;   ¬s   &not;)z&Not;u   ⫬s   &Not;)z&quot;"   ")z&there4;   ∴   &there4;)z&Therefore;ro   rp   )z&therefore;ro   rp   )z&fjlig;fjs   fj)z&sqcup;u   ⊔s   &sqcup;)z&sqcups;u   ⊔︀s   &sqcups;)z&apos;'   ')z&verbar;|   |z<div>%s</div>s   <div>%s</div>utf8html)	formatter)r#   divr+   )	r   input_elementoutput_unicodeoutput_elementr!   ry   without_elementexpectwith_elements	            r   test_html5_attributesz)TestHTML5LibBuilder.test_html5_attributes   s    >
 	*9M>>" %}4F))F#''C!jjlO%(=(=f(EEF"f,,:::7L%6F6))3	*r   N)__name__
__module____qualname____doc__propertyr   r%   r(   r-   r4   r6   rA   rJ   rM   rR   rT   rZ   r`   rg   rj   r   ri   r   r   r   r      sg    
 -   
^<(0,,	-: Cf
,$ ,4"*r   r   )r   pytestr   bs4r   bs4.elementr    r   r   r   markskipifr   ri   r   r   <module>r      s\    J    $  L  N*($= N*	N*r   