
    )fJ                     V   d Z dgZddlZddlZddlmZmZmZmZm	Z	 ddl
mZmZ ddlZddlmZmZ ddl
mZmZmZmZ 	 ddlmZ d	Z G d de	      Z G d dej:                        Z G d de      Z  G d dejB                        Z" G d de"      Z#y# e$ rZdd
lmZ dZY dZ[^dZ[ww xY w)MITHTML5TreeBuilder    N)DetectsXMLParsedAsHTML
PERMISSIVEHTMLHTML_5HTMLTreeBuilder)NamespacedAttributenonwhitespace_re)
namespacesprefixes)CommentDoctypeNavigableStringTag)_baseF)baseTc                   @    e Zd ZdZdZeeeegZdZ		 d	dZ
d Zd Zd Zy)
r   a  Use html5lib to build a tree.

    Note that this TreeBuilder does not support some features common
    to HTML TreeBuilders. Some of these features could theoretically
    be implemented, but at the very least it's quite difficult,
    because html5lib moves the parse tree around as it's being built.

    * This TreeBuilder doesn't use different subclasses of NavigableString
      based on the name of the tag in which the string was found.

    * You can't use a SoupStrainer to parse only part of a document.
    html5libTNc              #      K   || _         |rt        j                  dd       t        j                  |d       |d d df y w)NzjYou provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.   
stacklevelF)user_specified_encodingwarningswarnr   warn_if_markup_looks_like_xml)selfmarkupr   document_declared_encodingexclude_encodingss        R/var/www/html/flask-app/venv/lib/python3.12/site-packages/bs4/builder/_html5lib.pyprepare_markupzHTML5TreeBuilder.prepare_markup@   sM      (?$
 MM| 	<<q	
 tT5))s   AAc                 B   | j                   j                  t        j                  dd       t	        j
                  | j                        }|| j                  _        t               }t        |t              s%t        r| j                  |d<   n| j                  |d<    |j                  |fi |}t        |t              rd |_        nF|j                   j"                  j$                  d   }t        |t              s|j&                  }||_        d | j                  _        y )NzYou provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.   r   )treeoverride_encodingencodingr   )soup
parse_onlyr   r   r   
HTMLParsercreate_treebuilderunderlying_builderparserdict
isinstancestrnew_html5libr   parseoriginal_encoding	tokenizerstreamcharEncodingname)r   r   r.   extra_kwargsdocr4   s         r"   feedzHTML5TreeBuilder.feedW   s    99+MM U $$$*A*AB)/&v&#&484P4P01+/+G+GZ(fll62\2 fc" %)C! & 0 0 7 7 D DQ G/5 %6$:$:!$5C!)-&    c                 h    t        || j                  | j                        | _        | j                  S )N)store_line_numbers)TreeBuilderForHtml5libr)   r>   r-   )r   namespaceHTMLElementss     r"   r,   z#HTML5TreeBuilder.create_treebuilderv   s/    "8!499#66#
 &&&r<   c                     d|z  S )zSee `TreeBuilder`.z)<html><head></head><body>%s</body></html> )r   fragments     r"   test_fragment_to_documentz*HTML5TreeBuilder.test_fragment_to_document}   s    :XEEr<   )NN)__name__
__module____qualname____doc__NAMEr   r   r   featuresTRACKS_LINE_NUMBERSr#   r;   r,   rD   rB   r<   r"   r   r   *   s@     Dj&$/H  KO*..>'Fr<   c                   Z     e Zd Z	 	 d fd	Zd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Z xZS )r?   c                     |r|| _         nddlm}  |	 dd|i|| _         t        t        |   |       d | _        || _        y )Nr   BeautifulSoupr>   ) html.parser)r)   bs4rO   superr?   __init__r.   r>   )r   r@   r)   r>   kwargsrO   	__class__s         r"   rT   zTreeBuilderForHtml5lib.__init__   sU    DI) &!6HDI 	$d45JK "4r<   c                 x    | j                   j                          t        | j                   | j                   d       S N)r)   resetElementr   s    r"   documentClassz$TreeBuilderForHtml5lib.documentClass   s'    		tyy$))T22r<   c                     |d   }|d   }|d   }t        j                  |||      }| j                  j                  |       y )Nr8   publicIdsystemId)r   for_name_and_idsr)   object_was_parsed)r   tokenr8   r^   r_   doctypes         r"   insertDoctypez$TreeBuilderForHtml5lib.insertDoctype   sE    V}$$**48D		##G,r<   c                    i }| j                   rJ| j                  r>| j                   j                  j                  j	                         \  }}||d<   |dz
  |d<    | j
                  j                  ||fi |}t        || j
                  |      S )N
sourceline   	sourcepos)r.   r>   r5   r6   positionr)   new_tagrZ   )r   r8   	namespacerU   rf   rh   tags          r"   elementClassz#TreeBuilderForHtml5lib.elementClass   s    ;;422 %)KK$9$9$@$@$I$I$K!J	#-F< "+A+F;diii:6:sDIIy11r<   c                 @    t        t        |      | j                        S rX   )TextNoder   r)   )r   datas     r"   commentClassz#TreeBuilderForHtml5lib.commentClass   s    tyy11r<   c                     ddl m}  |dd      | _        d| j                  _        t	        | j                  | j                  d       S )Nr   rN   rP   rQ   z[document_fragment])rR   rO   r)   r8   rZ   )r   rO   s     r"   fragmentClassz$TreeBuilderForHtml5lib.fragmentClass   s7    % ""m4	.		tyy$))T22r<   c                 N    | j                   j                  |j                         y rX   )r)   appendelementr   nodes     r"   appendChildz"TreeBuilderForHtml5lib.appendChild   s    		&r<   c                     | j                   S rX   )r)   r[   s    r"   getDocumentz"TreeBuilderForHtml5lib.getDocument   s    yyr<   c                 T    t         j                  j                  |       j                  S rX   )treebuilder_baseTreeBuildergetFragmentrv   r[   s    r"   r   z"TreeBuilderForHtml5lib.getFragment   s    ++77=EEEr<   c                     ddl m g t        j                  d      dfd	 |d       dj	                        S )Nr   rN   z8^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$c                    t        | 	      r	 t        | t              rǉ
j                  |       }|r|j                  d      }|j                  dkD  r_|j                  d      xs d}|j                  d      xs |j                  d      xs d}j                  dd|z  d|d	|d
|d	       y j                  dd|z  d|d       y j                  dd|z  d       y t        | t              rj                  dd|z  d| d       y t        | t              rj                  dd|z  d| d       y | j                  r#t        | j                     d| j                  }n| j                  }j                  dd|z  d|d       | j                  rg }t        | j                  j                               D ]k  \  }}t        |t              r"t        |j                     d|j                  }t        |t              rdj                  |      }|j                  ||f       m t!        |      D ]%  \  }}j                  dd|dz   z  |d|d       ' |dz  }| j"                  D ]  } ||        y )Nrg      rP   r   r%   | z
<!DOCTYPE z "z" "z">>z<!DOCTYPE >z<!-- z -->"<z=")r0   r   matchgroup	lastindexru   r   r   rk   r   r8   attrslistitemsr
   joinsortedchildren)rv   indentmr8   r^   r_   
attributesvaluechildrO   
doctype_rervserializeElements            r"   r   z?TreeBuilderForHtml5lib.testSerializer.<locals>.serializeElement   s   '=1'7+$$W-771:D{{Q#$771:#3#$771:#A#Ar		#&<x#K L 		fd"KLII#,@AGW-		cFlGDEG_5		vw?@$$&.w/@/@&A&-ll4D #<<D		sV|T:;==!#J'+GMM,?,?,A'B 9e%d,?@.6t~~.F		#RD%eT2$'HHUOE"))4-89 (.j'9 Te		#!2DdE"RST!$-- 4E$UF34r<   
)r   )rR   rO   recompiler   )r   rv   rO   r   r   r   s     @@@@r"   testSerializerz%TreeBuilderForHtml5lib.testSerializer   s?    %ZZ [\
(	4 (	4R 	!$yy}r<   )NT)rE   rF   rG   rT   r\   rd   rm   rq   rs   ry   r{   r   r   __classcell__)rV   s   @r"   r?   r?      s<    37$(5,3-223'F0r<   r?   c                   <    e Zd Zd Zd Zd Zd Zd Zd Zd Z	d Z
y	)
AttrListc                 Z    || _         t        | j                   j                        | _        y rX   )rv   r/   r   )r   rv   s     r"   rT   zAttrList.__init__   s    $,,,,-
r<   c                 d    t        | j                  j                               j                         S rX   )r   r   r   __iter__r[   s    r"   r   zAttrList.__iter__   s#    DJJ$$&'0022r<   c                 H   | j                   j                  xs i }||j                  dg       v s@| j                   j                  |v rM||j                  | j                   j                  g       v r%t	        |t
              st        j                  |      }|| j                   |<   y )N*)rv   cdata_list_attributesgetr8   r0   r   r   findall)r   r8   r   	list_attrs       r"   __setitem__zAttrList.__setitem__   s     LL66<"	IMM#r**!!Y.IMM$,,*;*;R@@ eT*(007"Tr<   c                 H    t        | j                  j                               S rX   )r   r   r   r[   s    r"   r   zAttrList.items  s    DJJ$$&''r<   c                 H    t        | j                  j                               S rX   r   r   keysr[   s    r"   r   zAttrList.keys  s    DJJOO%&&r<   c                 ,    t        | j                        S rX   )lenr   r[   s    r"   __len__zAttrList.__len__  s    4::r<   c                      | j                   |   S rX   )r   r   r8   s     r"   __getitem__zAttrList.__getitem__  s    zz$r<   c                 L    |t        | j                  j                               v S rX   r   r   s     r"   __contains__zAttrList.__contains__  s    tDJJOO-...r<   N)rE   rF   rG   rT   r   r   r   r   r   r   r   rB   r<   r"   r   r      s*    .3#(' /r<   r   c                   r    e Zd Zd Zd Zd Zd Z eee      ZddZ	d Z
d Zd	 Zd
 Zd Zd Z ee      Zy)rZ   c                     t         j                  j                  | |j                         || _        || _        || _        y rX   )r}   NoderT   r8   rv   r)   rk   )r   rv   r)   rk   s       r"   rT   zElement.__init__  s1    &&tW\\:	"r<   c                    d x}}t        |t              r|x}}nYt        |t              r|}nF|j                  j                  t
        k(  r|j                  x}}| |_        n|j                  }| |_        t        |t              s&|j                  |j                  j                          || j                  j                  r| j                  j                  d   j                  t
        k(  rZ| j                  j                  d   }| j                  j                  ||z         }|j                  |       || j                  _        y t        |t              r| j                  j                  |      }| j                  j                  r| j                  j                  d      }n=| j                  j                  | j                  j                         }n| j                  }| j                  j                  || j                  |       y )NF)parentmost_recent_element)r0   r1   r   rv   rV   r   r   extractcontentsr)   
new_stringreplace_with_most_recent_element_last_descendantnext_elementra   )r   rx   string_childr   old_elementnew_elementr   s          r"   ry   zElement.appendChild  s   ##udC  $('L5c" E\\##6#'<</L5DKLLEDK%%%,,*BLL  "$)>)>%%b)33F ,,//3K))..{\/IJK$$[1-8DII*$$		,,T2
 ||$$&*ll&C&CE&J#**6
 '+ii&@&@&B#&*ll#II''dll$7 ( 9r<   c                 d    t        | j                  t              ri S t        | j                        S rX   )r0   rv   r   r   r[   s    r"   getAttributeszElement.getAttributesU  s$    dllG,I%%r<   c                    |t        |      dkD  rg }t        |j                               D ]&  \  }}t        |t              st        | }||= |||<   ( | j                  j                  j                  | j                  |       t        |j                               D ]  \  }}|| j                  |<    | j                  j                  j                  | j                         y y y )Nr   )r   r   r   r0   tupler
   r)   builder$_replace_cdata_list_attribute_valuesr8   rv   set_up_substitutions)r   r   converted_attributesr8   r   new_names         r"   setAttributeszElement.setAttributesZ  s    !c*o&9#% #J$4$4$67 1edE*2D9H"4(+0Jx(	1 IIBB		:'#J$4$4$67 +e%*T"+ II224<<@% ':!r<   Nc                     t        | j                  j                  |      | j                        }|r| j                  ||       y | j	                  |       y rX   )ro   r)   r   insertBeforery   )r   rp   r   texts       r"   
insertTextzElement.insertTextp  sB    		,,T2DII>dL1T"r<   c                    | j                   j                  |j                         }|j                   j                  t        k(  r| j                   j                  r| j                   j                  |dz
     j                  t        k(  rV| j                   j                  |dz
     }| j
                  j                  ||j                   z         }|j                  |       y | j                   j                  ||j                          | |_	        y )Nrg   )
rv   indexrV   r   r   r)   r   r   insertr   )r   rx   refNoder   old_nodenew_strs         r"   r   zElement.insertBeforew  s    ""7??3LL""o5$,,:O:O%%eAg.88OK||,,U1W5Hii**8dll+BCG!!'*LLt||4DKr<   c                 8    |j                   j                          y rX   )rv   r   rw   s     r"   removeChildzElement.removeChild  s    r<   c                 \   | j                   }|j                   }|j                  }|j                  dd      }t        |j                        dkD  r|j                  d   }|j
                  }nd}|j
                  }|j                  }t        |      dkD  rc|d   }	|||	_        n||	_        ||	_        ||	|_        n|	|_        ||	|_        |d   j                  dd      }
||
_        ||
|_        d|
_        |D ]$  }||_        |j                  j                  |       & g |_        ||_        y)z1Move all of this tag's children into another tag.Fr   r   NT)
rv   next_siblingr   r   r   r   previous_elementprevious_siblingr   ru   )r   
new_parentrv   new_parent_elementfinal_next_elementnew_parents_last_descendantnew_parents_last_child(new_parents_last_descendant_next_element	to_appendfirst_childlast_childs_last_descendantr   s               r"   reparentChildrenzElement.reparentChildren  sd    ,,'// %11&8&I&I%QV&W#!**+a/ &8%@%@%D"7R7_7_4 &*"7I7V7V4$$	y>A $A,K*6/J,/A,+AK(*6;F+82="/%16A&3 +4B-*H*HPT*U'7_'47C Mh8I7;'4 	6E-EL''..u5	6
 1r<   c                    | j                   j                  | j                  j                  | j                        }t        || j                   | j                        }| j                  D ]  \  }}||j                  |<    |S rX   )r)   rj   rv   r8   rk   rZ   r   )r   rl   rx   keyr   s        r"   	cloneNodezElement.cloneNode  sf    ii 1 14>>BsDIIt~~6 	)IC#(DOOC 	)r<   c                 .    | j                   j                  S rX   )rv   r   r[   s    r"   
hasContentzElement.hasContent  s    ||$$$r<   c                 z    | j                   d k(  rt        d   | j                  fS | j                   | j                  fS )Nhtml)rk   r   r8   r[   s    r"   getNameTuplezElement.getNameTuple  s5    >>T!f%tyy00>>499,,r<   rX   )rE   rF   rG   rT   ry   r   r   propertyr   r   r   r   r   r   r   r   	nameTuplerB   r<   r"   rZ   rZ     sV    #49l&
A( -7J#
<2D%- &Ir<   rZ   c                       e Zd Zd Zd Zy)ro   c                 `    t         j                  j                  | d        || _        || _        y rX   )r}   r   rT   rv   r)   )r   rv   r)   s      r"   rT   zTextNode.__init__  s&    &&tT2	r<   c                     t         rX   )NotImplementedErrorr[   s    r"   r   zTextNode.cloneNode  s    !!r<   N)rE   rF   rG   rT   r   rB   r<   r"   ro   ro     s    
"r<   ro   )$__license____all__r   r   bs4.builderr   r   r   r   r	   bs4.elementr
   r   r   html5lib.constantsr   r   r   r   r   r   html5lib.treebuildersr   r}   r2   ImportErrorer   r   r~   r?   objectr   r   rZ   ro   rB   r<   r"   <module>r     s      	   ?LUF UFpv-99 vp/v /<@'## @'D"w "k  >Ls   B B(B##B(