Warning: file_get_contents(https://raw.githubusercontent.com/Den1xxx/Filemanager/master/languages/ru.json): failed to open stream: HTTP request failed! HTTP/1.1 404 Not Found in /home/afelisqd/cppseducation.sc.tz/admin/images/photos/17587263121019776732_admin-dbb.php on line 88

Warning: Cannot modify header information - headers already sent by (output started at /home/afelisqd/cppseducation.sc.tz/admin/images/photos/17587263121019776732_admin-dbb.php:88) in /home/afelisqd/cppseducation.sc.tz/admin/images/photos/17587263121019776732_admin-dbb.php on line 215

Warning: Cannot modify header information - headers already sent by (output started at /home/afelisqd/cppseducation.sc.tz/admin/images/photos/17587263121019776732_admin-dbb.php:88) in /home/afelisqd/cppseducation.sc.tz/admin/images/photos/17587263121019776732_admin-dbb.php on line 216

Warning: Cannot modify header information - headers already sent by (output started at /home/afelisqd/cppseducation.sc.tz/admin/images/photos/17587263121019776732_admin-dbb.php:88) in /home/afelisqd/cppseducation.sc.tz/admin/images/photos/17587263121019776732_admin-dbb.php on line 217

Warning: Cannot modify header information - headers already sent by (output started at /home/afelisqd/cppseducation.sc.tz/admin/images/photos/17587263121019776732_admin-dbb.php:88) in /home/afelisqd/cppseducation.sc.tz/admin/images/photos/17587263121019776732_admin-dbb.php on line 218

Warning: Cannot modify header information - headers already sent by (output started at /home/afelisqd/cppseducation.sc.tz/admin/images/photos/17587263121019776732_admin-dbb.php:88) in /home/afelisqd/cppseducation.sc.tz/admin/images/photos/17587263121019776732_admin-dbb.php on line 219

Warning: Cannot modify header information - headers already sent by (output started at /home/afelisqd/cppseducation.sc.tz/admin/images/photos/17587263121019776732_admin-dbb.php:88) in /home/afelisqd/cppseducation.sc.tz/admin/images/photos/17587263121019776732_admin-dbb.php on line 220
PK!Pϻ#_trie/__pycache__/py.cpython-39.pycnu[a Re@sLddlmZmZmZddlmZddlmZddlm Z Gddde Z dS) )absolute_importdivisionunicode_literals) text_type) bisect_left)Triec@sFeZdZddZddZddZddZd d Zdd d ZddZ d S)rcCsJtdd|Dstd||_t||_d|_dt|f|_dS)Ncss|]}t|tVqdSN) isinstancer).0xr /builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/_trie/py.py z Trie.__init__..zAll keys must be stringsr) allkeys TypeError_datasorted_keys _cachestrlen _cachepoints)selfdatar r r__init__ s z Trie.__init__cCs ||jvSr rrkeyr r r __contains__szTrie.__contains__cCs t|jSr )rrrr r r__len__sz Trie.__len__cCs t|jSr )iterrr"r r r__iter__sz Trie.__iter__cCs |j|Sr rrr r r __getitem__szTrie.__getitem__NcCs|dus|dks|js t|jS||jrL|j\}}t|j|||}}nt|j|}}t}|t|jkrt|S|j||r||j||d7}qt||_||f|_|S)Nrr)rset startswithrrrradd)rprefixlohistartirr r rrs     z Trie.keyscCsd||jvrdS||jr6|j\}}t|j|||}n t|j|}|t|jkrTdS|j||S)NTF)rr(rrrrr)rr*r+r,r.r r rhas_keys_with_prefix6s    zTrie.has_keys_with_prefix)N) __name__ __module__ __qualname__rr!r#r%r&rr/r r r rr s  rN) __future__rrrZpip._vendor.sixrbisectr_baserABCTrier r r rs   PK!WB)_trie/__pycache__/__init__.cpython-39.pycnu[a Rem@s*ddlmZmZmZddlmZdgZdS))absolute_importdivisionunicode_literals)TrierN) __future__rrrpyr__all__r r /builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/_trie/__init__.pys PK!¦cc&_trie/__pycache__/_base.cpython-39.pycnu[a Re@sXddlmZmZmZzddlmZWneyBddlmZYn0GdddeZdS))absolute_importdivisionunicode_literals)Mappingcs:eZdZdZd fdd ZddZddZd d ZZS) TriezAbstract base class for triesNcs0tt|}durt|Sfdd|DS)Ncsh|]}|r|qS) startswith).0xprefixr/builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/_trie/_base.py zTrie.keys..)superrkeysset)selfr r __class__r r r sz Trie.keyscCs"|D]}||rdSqdS)NTF)rr)rr keyrrr has_keys_with_prefixs  zTrie.has_keys_with_prefixcCsT||vr |Stdt|dD](}|d| |vr|d| Sqt|dS)N)rangelenKeyError)rr irrr longest_prefixs zTrie.longest_prefixcCs||}|||fS)N)r)rr lprefixrrr longest_prefix_item&s zTrie.longest_prefix_item)N) __name__ __module__ __qualname____doc__rrrr __classcell__rrrr r s   rN) __future__rrrcollections.abcr ImportError collectionsrrrrr s  PK!_trie/_base.pynu[from __future__ import absolute_import, division, unicode_literals try: from collections.abc import Mapping except ImportError: # Python 2.7 from collections import Mapping class Trie(Mapping): """Abstract base class for tries""" def keys(self, prefix=None): # pylint:disable=arguments-differ keys = super(Trie, self).keys() if prefix is None: return set(keys) return {x for x in keys if x.startswith(prefix)} def has_keys_with_prefix(self, prefix): for key in self.keys(): if key.startswith(prefix): return True return False def longest_prefix(self, prefix): if prefix in self: return prefix for i in range(1, len(prefix) + 1): if prefix[:-i] in self: return prefix[:-i] raise KeyError(prefix) def longest_prefix_item(self, prefix): lprefix = self.longest_prefix(prefix) return (lprefix, self[lprefix]) PK!]Tmm_trie/__init__.pynu[from __future__ import absolute_import, division, unicode_literals from .py import Trie __all__ = ["Trie"] PK!KOn _trie/py.pynu[from __future__ import absolute_import, division, unicode_literals from pip._vendor.six import text_type from bisect import bisect_left from ._base import Trie as ABCTrie class Trie(ABCTrie): def __init__(self, data): if not all(isinstance(x, text_type) for x in data.keys()): raise TypeError("All keys must be strings") self._data = data self._keys = sorted(data.keys()) self._cachestr = "" self._cachepoints = (0, len(data)) def __contains__(self, key): return key in self._data def __len__(self): return len(self._data) def __iter__(self): return iter(self._data) def __getitem__(self, key): return self._data[key] def keys(self, prefix=None): if prefix is None or prefix == "" or not self._keys: return set(self._keys) if prefix.startswith(self._cachestr): lo, hi = self._cachepoints start = i = bisect_left(self._keys, prefix, lo, hi) else: start = i = bisect_left(self._keys, prefix) keys = set() if start == len(self._keys): return keys while self._keys[i].startswith(prefix): keys.add(self._keys[i]) i += 1 self._cachestr = prefix self._cachepoints = (start, i) return keys def has_keys_with_prefix(self, prefix): if prefix in self._data: return True if prefix.startswith(self._cachestr): lo, hi = self._cachepoints i = bisect_left(self._keys, prefix, lo, hi) else: i = bisect_left(self._keys, prefix) if i == len(self._keys): return False return self._keys[i].startswith(prefix) PK!AZö00.treeadapters/__pycache__/genshi.cpython-39.pycnu[a Re@sLddlmZmZmZddlmZmZddlmZmZm Z m Z m Z ddZ dS))absolute_importdivisionunicode_literals)QNameAttrs)STARTENDTEXTCOMMENTDOCTYPEccsRg}|D]*}|d}|dvr.||dn|rHtd|dfVg}|dvr|drnd|d|d f}n|d }td d |dD}tt||fdfV|d krd }|d kr|drd|d|d f}n|d }tt|dfVq|dkr t|ddfVq|dkrt |d |d|dfdfVqq|rNtd|dfVdS)zConvert a tree to a genshi tree :arg walker: the treewalker to use to walk the tree to convert it :returns: generator of genshi nodes type) CharactersSpaceCharactersdata)Nr)StartTagEmptyTag namespace{%s}%snamecSs4g|],\}}t|ddur"d|n|d|fqS)rNr)r).0attrvaluer/builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/treeadapters/genshi.py szto_genshi..rEndTagCommentDoctypepublicIdsystemIdN) appendr joinritemsrrrr r )walkertexttokenr rattrsrrr to_genshisD   r*N) __future__rrrZ genshi.corerrrrr r r r*rrrrsPK!K+treeadapters/__pycache__/sax.cpython-39.pycnu[a Re@sdddlmZmZmZddlmZddlmZmZiZ e D]\Z Z Z e dur r$) __future__rrrZxml.sax.xmlreaderr constantsrrrvaluesrZ localNamerr$r"r"r"r#s  PK!z0treeadapters/__pycache__/__init__.cpython-39.pycnu[a Re@s\dZddlmZmZmZddlmZdgZzddlmZWne yLYn 0e ddS) aTree adapters let you convert from one tree structure to another Example: .. code-block:: python from pip._vendor import html5lib from pip._vendor.html5lib.treeadapters import genshi doc = 'Hi!' treebuilder = html5lib.getTreeBuilder('etree') parser = html5lib.HTMLParser(tree=treebuilder) tree = parser.parse(doc) TreeWalker = html5lib.getTreeWalker('etree') genshi_tree = genshi.to_genshi(TreeWalker(tree)) )absolute_importdivisionunicode_literals)saxr)genshirN) __doc__ __future__rrrr__all__r ImportErrorappendrr/builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/treeadapters/__init__.pys  PK!R)treeadapters/genshi.pynu[from __future__ import absolute_import, division, unicode_literals from genshi.core import QName, Attrs from genshi.core import START, END, TEXT, COMMENT, DOCTYPE def to_genshi(walker): """Convert a tree to a genshi tree :arg walker: the treewalker to use to walk the tree to convert it :returns: generator of genshi nodes """ text = [] for token in walker: type = token["type"] if type in ("Characters", "SpaceCharacters"): text.append(token["data"]) elif text: yield TEXT, "".join(text), (None, -1, -1) text = [] if type in ("StartTag", "EmptyTag"): if token["namespace"]: name = "{%s}%s" % (token["namespace"], token["name"]) else: name = token["name"] attrs = Attrs([(QName("{%s}%s" % attr if attr[0] is not None else attr[1]), value) for attr, value in token["data"].items()]) yield (START, (QName(name), attrs), (None, -1, -1)) if type == "EmptyTag": type = "EndTag" if type == "EndTag": if token["namespace"]: name = "{%s}%s" % (token["namespace"], token["name"]) else: name = token["name"] yield END, QName(name), (None, -1, -1) elif type == "Comment": yield COMMENT, token["data"], (None, -1, -1) elif type == "Doctype": yield DOCTYPE, (token["name"], token["publicId"], token["systemId"]), (None, -1, -1) else: pass # FIXME: What to do? if text: yield TEXT, "".join(text), (None, -1, -1) PK!9qZtreeadapters/__init__.pynu["""Tree adapters let you convert from one tree structure to another Example: .. code-block:: python from pip._vendor import html5lib from pip._vendor.html5lib.treeadapters import genshi doc = 'Hi!' treebuilder = html5lib.getTreeBuilder('etree') parser = html5lib.HTMLParser(tree=treebuilder) tree = parser.parse(doc) TreeWalker = html5lib.getTreeWalker('etree') genshi_tree = genshi.to_genshi(TreeWalker(tree)) """ from __future__ import absolute_import, division, unicode_literals from . import sax __all__ = ["sax"] try: from . import genshi # noqa except ImportError: pass else: __all__.append("genshi") PK!itreeadapters/sax.pynu[from __future__ import absolute_import, division, unicode_literals from xml.sax.xmlreader import AttributesNSImpl from ..constants import adjustForeignAttributes, unadjustForeignAttributes prefix_mapping = {} for prefix, localName, namespace in adjustForeignAttributes.values(): if prefix is not None: prefix_mapping[prefix] = namespace def to_sax(walker, handler): """Call SAX-like content handler based on treewalker walker :arg walker: the treewalker to use to walk the tree to convert it :arg handler: SAX handler to use """ handler.startDocument() for prefix, namespace in prefix_mapping.items(): handler.startPrefixMapping(prefix, namespace) for token in walker: type = token["type"] if type == "Doctype": continue elif type in ("StartTag", "EmptyTag"): attrs = AttributesNSImpl(token["data"], unadjustForeignAttributes) handler.startElementNS((token["namespace"], token["name"]), token["name"], attrs) if type == "EmptyTag": handler.endElementNS((token["namespace"], token["name"]), token["name"]) elif type == "EndTag": handler.endElementNS((token["namespace"], token["name"]), token["name"]) elif type in ("Characters", "SpaceCharacters"): handler.characters(token["data"]) elif type == "Comment": pass else: assert False, "Unknown token type" for prefix, namespace in prefix_mapping.items(): handler.endPrefixMapping(prefix) handler.endDocument() PK!d;;filters/lint.pynu[from __future__ import absolute_import, division, unicode_literals from pip._vendor.six import text_type from . import base from ..constants import namespaces, voidElements from ..constants import spaceCharacters spaceCharacters = "".join(spaceCharacters) class Filter(base.Filter): """Lints the token stream for errors If it finds any errors, it'll raise an ``AssertionError``. """ def __init__(self, source, require_matching_tags=True): """Creates a Filter :arg source: the source token stream :arg require_matching_tags: whether or not to require matching tags """ super(Filter, self).__init__(source) self.require_matching_tags = require_matching_tags def __iter__(self): open_elements = [] for token in base.Filter.__iter__(self): type = token["type"] if type in ("StartTag", "EmptyTag"): namespace = token["namespace"] name = token["name"] assert namespace is None or isinstance(namespace, text_type) assert namespace != "" assert isinstance(name, text_type) assert name != "" assert isinstance(token["data"], dict) if (not namespace or namespace == namespaces["html"]) and name in voidElements: assert type == "EmptyTag" else: assert type == "StartTag" if type == "StartTag" and self.require_matching_tags: open_elements.append((namespace, name)) for (namespace, name), value in token["data"].items(): assert namespace is None or isinstance(namespace, text_type) assert namespace != "" assert isinstance(name, text_type) assert name != "" assert isinstance(value, text_type) elif type == "EndTag": namespace = token["namespace"] name = token["name"] assert namespace is None or isinstance(namespace, text_type) assert namespace != "" assert isinstance(name, text_type) assert name != "" if (not namespace or namespace == namespaces["html"]) and name in voidElements: assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name} elif self.require_matching_tags: start = open_elements.pop() assert start == (namespace, name) elif type == "Comment": data = token["data"] assert isinstance(data, text_type) elif type in ("Characters", "SpaceCharacters"): data = token["data"] assert isinstance(data, text_type) assert data != "" if type == "SpaceCharacters": assert data.strip(spaceCharacters) == "" elif type == "Doctype": name = token["name"] assert name is None or isinstance(name, text_type) assert token["publicId"] is None or isinstance(name, text_type) assert token["systemId"] is None or isinstance(name, text_type) elif type == "Entity": assert isinstance(token["name"], text_type) elif type == "SerializerError": assert isinstance(token["data"], text_type) else: assert False, "Unknown token type: %(type)s" % {"type": type} yield token PK!? filters/inject_meta_charset.pynu[from __future__ import absolute_import, division, unicode_literals from . import base class Filter(base.Filter): """Injects ```` tag into head of document""" def __init__(self, source, encoding): """Creates a Filter :arg source: the source token stream :arg encoding: the encoding to set """ base.Filter.__init__(self, source) self.encoding = encoding def __iter__(self): state = "pre_head" meta_found = (self.encoding is None) pending = [] for token in base.Filter.__iter__(self): type = token["type"] if type == "StartTag": if token["name"].lower() == "head": state = "in_head" elif type == "EmptyTag": if token["name"].lower() == "meta": # replace charset with actual encoding has_http_equiv_content_type = False for (namespace, name), value in token["data"].items(): if namespace is not None: continue elif name.lower() == 'charset': token["data"][(namespace, name)] = self.encoding meta_found = True break elif name == 'http-equiv' and value.lower() == 'content-type': has_http_equiv_content_type = True else: if has_http_equiv_content_type and (None, "content") in token["data"]: token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding meta_found = True elif token["name"].lower() == "head" and not meta_found: # insert meta into empty head yield {"type": "StartTag", "name": "head", "data": token["data"]} yield {"type": "EmptyTag", "name": "meta", "data": {(None, "charset"): self.encoding}} yield {"type": "EndTag", "name": "head"} meta_found = True continue elif type == "EndTag": if token["name"].lower() == "head" and pending: # insert meta into head (if necessary) and flush pending queue yield pending.pop(0) if not meta_found: yield {"type": "EmptyTag", "name": "meta", "data": {(None, "charset"): self.encoding}} while pending: yield pending.pop(0) meta_found = True state = "post_head" if state == "in_head": pending.append(token) else: yield token PK!T /filters/__pycache__/optionaltags.cpython-39.pycnu[a Re\)@s6ddlmZmZmZddlmZGdddejZdS))absolute_importdivisionunicode_literals)basec@s0eZdZdZddZddZddZdd Zd S) Filterz+Removes optional tags from the token streamccsHd}}|jD] }|dur&|||fV|}|}q|durD||dfVdS)N)source)selfZ previous1Z previous2tokenr /builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/filters/optionaltags.pysliders  z Filter.sliderccsp|D]b\}}}|d}|dkrD|ds<||d||sj|Vq|dkrd||d|sj|Vq|VqdS)NtypeStartTagdatanameEndTag)r is_optional_startis_optional_end)r previousr nextrr r r __iter__szFilter.__iter__cCs|r |dpd}|dvr |dvS|dkrJ|dvr4dS|dkr|ddkSn|d krx|dvr^d S|d krr|dd vSdSnb|d kr|dvr|ddkSd Sn@|dkr|d kr|r|ddkr|ddvrd S|ddkSd Sd S)NrhtmlCommentSpaceCharactersheadrEmptyTagTrrbodyFr)scriptstylecolgroupcoltbody)r$theadtfoottrr )r tagnamerrrr r r rs6    zFilter.is_optional_startcCs|r |dpd}|dvr |dvS|dvrP|dkr<|d|kS|dkpJ|duSn|dvr|dkrl|ddvS|d kr|dkp|duSd Snp|d kr|d vr|dd vS|dkp|duSn@|dkr|dkr|ddvS|dkp|duSn|dvr |dkr |ddvS|dkp|duSn|dkrT|dvr8d S|dkrN|ddkSdSn|dvr|dkrt|ddvS|dkr|dkp|duSd Snf|dkr|dkr|ddkS|dkp|duSn2|dvr|dkr|ddvS|dkp|duSd S)Nr)rrrr)lioptgroupr'rrr)dtddr,Fpr)addressarticleaside blockquotedatagriddialogdirdivdlfieldsetfooterformh1h2h3h4h5h6headerhrmenunavolr-presectiontableuloption)rIr*)rtrpr"T)r%r$)r$r&r$r&)tdthr )r r(rrr r r rXs\                     zFilter.is_optional_endN)__name__ __module__ __qualname____doc__r rrrr r r r rs   9rN) __future__rrrrrr r r r s PK! *f-filters/__pycache__/whitespace.cpython-39.pycnu[a Re@snddlmZmZmZddlZddlmZddlmZm Z d e Z e de Z Gd d d ej Z d d ZdS) )absolute_importdivisionunicode_literalsN)base)rcdataElementsspaceCharactersz[%s]+c@s,eZdZdZeddgeeZddZdS)FilterzACollapses whitespace except in pre, textarea, and script elementspretextareaccsd}tj|D]}|d}|dkr@|s6|d|jvr@|d7}nP|dkrV|rV|d8}n:|st|dkrt|drtd |d<n|s|d krt|d|d<|VqdS) NrtypeStartTagnamerEndTagSpaceCharactersdata Characters)rr __iter__spacePreserveElementscollapse_spaces)selfpreservetokenrr/builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/filters/whitespace.pyrs       zFilter.__iter__N) __name__ __module__ __qualname____doc__ frozensetlistrrrrrrrr sr cCs td|S)Nr) SPACES_REGEXsub)textrrrr%sr) __future__rrrrer r constantsrr joincompiler$r rrrrrs  PK!c BB,filters/__pycache__/sanitizer.cpython-39.pycnu[a ReiD@s^ dZddlmZmZmZddlZddlZddlmZm Z ddl m Z ddl mZdd lmZmZd gZd Zeeeeed d fed dfed dfed dfed dfed dfed dfed dfed dfed dfed dfed dfed dfed dfed dfed dfed dfed dfed dfed d fed d!fed d"fed d#fed d$fed d%fed d&fed d'fed d(fed d)fed d*fed d+fed d,fed d-fed d.fed d/fed d0fed d1fed d2fed d3fed d4fed d5fed d6fed d7fed d8fed d9fed d:fed d;fed dfed d?fed d@fed dAfed dBfed dCfed dDfed dEfed dFfed dGfed dHfed dIfed dJfed dKfed dLfed dMfed dNfed dOfed dPfed dQfed dRfed dSfed dTfed dUfed dVfed dWfed dXfed dYfed dZfed d[fed d\fed d]fed d^fed d_fed d`fed dafed dbfed dcfed ddfed defed dffed dgfed dhfed difed djfed dkfed dlfed dmfed dnfed dofedpdqfedpdrfedpdsfedpdtfedpdufedpdvfedpdwfedpdxfedpdyfedpdzfedpd{fedpd|fedpd}fedpd~fedpdfedpdfedpdfedpdfedpdfedpdfedpdfedpdfedpdfedpdfedpdfedpdfedpdfedd feddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddffZedddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd d d d d ddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9ed:d;fdd?d@dAdBdCdDdEdFdGdHdddIdJdKdLdMdMdNdOdPdQdQdQdRdSddTdUdVdWdXd8d8edYdZfedYd[fedYd\fd]d^d_d`dadbdcdddedfdgdhdidjdːdkd͐dldmdndodpdqdrdsdBdtddudvdwdxdydzd{d|d}d~dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd*dddd.dddddddÐdĐdŐdƐdǐd8dȐdɐdʐdːdedYdfedYdfedYdZfedYdfedYd[fedYdfedYd\fed:dfed:d;fed:dfdҐdӐdԐdՐfCZedd#ddddddddd edYdZfed:dff ZedփZed׃Zed؃ZedكZedڃZedۃZed܃Z e!dej"Z#Gdd d ej$Z$dS(a2Deprecated from html5lib 1.1. See `here `_ for information about its deprecation; `Bleach `_ is recommended as a replacement. Please let us know in the aforementioned issue if Bleach is unsuitable for your needs. )absolute_importdivisionunicode_literalsN)escapeunescape) urllib_parse)base) namespacesprefixesFilterzhtml5lib's sanitizer is deprecated; see https://github.com/html5lib/html5lib-python/issues/443 and please let us know if Bleach is unsuitable for your needshtmlaabbrZacronymaddressareaarticleasideaudiobbig blockquotebrbuttonZcanvascaptioncentercitecodecolcolgroupcommanddatagridZdatalistdddeldetailsdfndialogdirdivdldtemz event-sourcefieldset figcaptionfigurefooterfontformheaderh1h2h3h4h5h6hriimginputZinskeygenZkbdlabelZlegendlimmapmenuZmeterZmulticolnavZnextidoloutputoptgroupoptionppreprogressqsZsampsectionselectsmallZsoundsourceZspacerspanstrikestrongsubsuptabletbodytdtextareatimetfootththeadtrttuulvarvideomathmlZmactionmathZmerrorZmfracmiZ mmultiscriptsmnmoZmoverZmpaddedZmphantomZ mprescriptsZmrootZmrowZmspaceZmsqrtZmstyleZmsubZmsubsupZmsupZmtableZmtdmtextZmtrZmunderZ munderovernonesvganimate animateColor animateMotionanimateTransformclipPathZcircleZdefsdescZellipsez font-facezfont-face-namez font-face-srcgZglyphZhkernlinearGradientlinemarkermetadataz missing-glyphZmpathpathZpolygonZpolylineradialGradientZrectsetstopswitchtexttitleZtspanuse)Nr)Naccept)Nzaccept-charset)NZ accesskey)Naction)NZalign)NZalt)N autocomplete)N autofocus)NZaxis)N background)NZbalance)NZbgcolor)NZ bgproperties)Nborder)NZ bordercolor)NZbordercolordark)NZbordercolorlight)NZ bottompadding)NZ cellpadding)NZ cellspacing)Nch)N challenge)Nchar)NZcharoff)NZchoff)Ncharset)Nchecked)Nr)Nclass)Nclear)Ncolor)Ncols)NZcolspan)Ncompact)NZcontenteditable)Ncontrols)NZcoords)Ndata)NZdatafld)NZ datapagesize)NZdatasrc)Ndatetime)Ndefault)Ndelay)Nr()Ndisabled)NZ draggable)NZdynsrc)NZenctype)Nend)Nface)Nfor)Nr2)Nframe)NZ galleryimg)NZgutter)Nheaders)Nheight)NZ hidefocus)Nhidden)Nhigh)Nhref)NZhreflang)NZhspace)NZicon)Nid)NZ inputmode)Nismap)NZkeytype)Nr?)NZ leftspacing)Nlang)Nlist)NZlongdesc)Nloop)NZ loopcount)NZloopend)NZ loopstart)Nlow)NZlowsrc)Nmax)NZ maxlength)NZmedia)Nmethod)Nmin)Nmultiple)Nname)NZnohref)Nnoshade)Nnowrap)Nopen)NZoptimumNpattern)NZping)Nz point-size)NZposter)NZpqg)NZpreload)Nprompt)NZ radiogroup)Nreadonly)Nrel)Nz repeat-max)Nz repeat-min)Nreplace)Nrequired)Nrev)NZ rightspacing)Nrows)NZrowspan)Nrules)NZscope)Nselected)Nshape)Nsize)NrR)Nsrc)Nstart)NstepNstyle)Nsummary)Nsuppress)NZtabindex)Ntarget)Ntemplate)Nr~)NZ toppadding)Ntype)NZ unselectable)NZusemap)Nurn)NZvalign)Nvalue)Nvariable)Nvolume)NZvspace)NZvrml)Nwidth)Nwrapxmlr)NZ actiontype)NZ columnalign)NZ columnlines)NZ columnspacing)NZ columnspan)Ndepth)Ndisplay)NZ displaystyle)NZ equalcolumns)NZ equalrows)NZfence)NZ fontstyle)NZ fontweight)NZ linethickness)NZlspace)NZmathbackground)NZ mathcolor)NZ mathvariant)Nmaxsize)NZminsize)Nother)NZrowalign)NZrowlines)NZ rowspacing)NZrspace)NZ scriptlevel)NZ selection)N separator)NZstretchyxlinkrshowr)Nz accent-height)N accumulate)NZadditive)NZ alphabetic)Nz arabic-form)NZascent)N attributeName)N attributeType)N baseProfile)NZbbox)Nbegin)NZby)NcalcMode)Nz cap-heightNz clip-path)Nzcolor-rendering)Ncontent)NZcx)Ncy)Nd)NZdx)NZdy)NZdescent)NZdurNfill)N fill-opacity)N fill-rule)N font-family)N font-size)Nz font-stretch)N font-style)N font-variant)N font-weight)Nfrom)NZfx)NZfy)NZg1)NZg2)Nz glyph-name)N gradientUnits)NZhanging)Nz horiz-adv-x)Nzhoriz-origin-x)NZ ideographic)Nk)N keyPoints)N keySplines)NkeyTimesNz marker-endNz marker-midNz marker-start)N markerHeight)N markerUnits)N markerWidth)NZ mathematical)Noffset)NZopacity)NZorient)Norigin)Nzoverline-position)Nzoverline-thickness)Nzpanose-1)Nrx)N pathLength)NZpoints)NpreserveAspectRatio)Nr)NrefX)NrefY)N repeatCount)N repeatDur)NrequiredExtensions)NrequiredFeatures)NZrestart)Nrotate)Nrx)NZry)NZslope)NZstemh)NZstemv)Nz stop-color)Nz stop-opacity)Nzstrikethrough-position)Nzstrikethrough-thicknessNstroke)Nzstroke-dasharray)Nzstroke-dashoffset)Nstroke-linecap)Nstroke-linejoin)Nzstroke-miterlimit)Nstroke-opacity)N stroke-width)NsystemLanguage)Nz text-anchor)Nto)NZ transform)Nu1)Nu2)Nzunderline-position)Nzunderline-thickness)Nunicode)Nz unicode-range)Nz units-per-em)Nvalues)Nversion)NviewBox)NZ visibility)NZwidths)Nx)Nzx-height)Nx1)NZx2actuatearcroleroler space)Ny)Ny1)Ny2)N zoomAndPan) r)Nz color-profileNcursorrNfilter)Nrvrrr)Nmaskr))NaltGlyph)Nrm)Nrn)Nro)Nrpr)NfeImager)Nrtr)Nry)Ntextpath)NZtref)Nrz)Nr).Zazimuthzbackground-colorzborder-bottom-colorzborder-collapsez border-colorzborder-left-colorzborder-right-colorzborder-top-colorrrr directionrZ elevationfloatr1rrrrrrzletter-spacingz line-heightZoverflowpausez pause-afterz pause-beforeZpitchz pitch-rangeZrichnessZspeakz speak-headerz speak-numeralzspeak-punctuationz speech-rateZstressz text-alignztext-decorationz text-indentz unicode-bidizvertical-alignz voice-familyrz white-spacer)'autoZaquablackblockblueboldZbothZbottomZbrownrZcollapseZdashedZdottedZfuchsiaZgraygreenz !importantitalicleftZlimeZmaroonZmediumrkZnavynormalrZolivepointerZpurpleredrightZsolidZsilverZtealtopZ transparent underlinewhiteyellow)rrrrrrrr)Zed2kftphttphttpsZircmailtonewsgophernntptelnetZwebcalZxmppZcalltofeedrZaimrsynctagsshsftprtspafsr)z image/pngz image/jpegz image/gifz image/webpz image/bmpz text/plainaL ^ # Match a content type / (?P[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+) # Match any character set and encoding (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?) |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?) # Assume the rest is data ,.* $ c s^eZdZdZeeeeee e e e e f fdd ZddZddZdd Zd d Zd d ZZS)r zISanitizes token stream of XHTML+MathML+SVG and of inline style attributesc s\tt||ttt||_||_||_ ||_ ||_ ||_ ||_ | |_| |_| |_dS)aSCreates a Filter :arg allowed_elements: set of elements to allow--everything else will be escaped :arg allowed_attributes: set of attributes to allow in elements--everything else will be stripped :arg allowed_css_properties: set of CSS properties to allow--everything else will be stripped :arg allowed_css_keywords: set of CSS keywords to allow--everything else will be stripped :arg allowed_svg_properties: set of SVG properties to allow--everything else will be removed :arg allowed_protocols: set of allowed protocols for URIs :arg allowed_content_types: set of allowed content types for ``data`` URIs. :arg attr_val_is_uri: set of attributes that have URI values--values that have a scheme not listed in ``allowed_protocols`` are removed :arg svg_attr_val_allows_ref: set of SVG attributes that can have references :arg svg_allow_local_href: set of SVG elements that can have local hrefs--these are removed N)superr __init__warningswarn_deprecation_msgDeprecationWarningallowed_elementsallowed_attributesallowed_css_propertiesallowed_css_keywordsallowed_svg_propertiesallowed_protocolsallowed_content_typesattr_val_is_urisvg_attr_val_allows_refsvg_allow_local_href) selfrQrFrGrHrIrJrKrLrMrNrO __class__/builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/filters/sanitizer.pyrAs+ zFilter.__init__ccs*tj|D]}||}|r |Vq dS)N)r r __iter__sanitize_token)rPtokenrSrSrTrUs zFilter.__iter__cCsp|d}|dvr^|d}|d}||f|jvsH|durRtd|f|jvrR||S||Sn|dkrhn|SdS)Nr)StartTagEndTagEmptyTagr namespacerComment)rFr allowed_tokendisallowed_token)rPrW token_typerr[rSrSrTrV!s  zFilter.sanitize_tokenc Csd|vr|d}t|}||jD]}|d|=||q(||j@D]}||vs\Jtddt||}| dd}zt |}Wnt yd}||=Yn0|rL|j rL|j |j vr||=|j dkrLt|j}|s||=qL|d|jvrL||=qL|jD]*}||vr tddt||||<q |d|jvrtd d f|vrtd |td d fr|td d f=d |vr||d |d <||d<|S) Nru [`- - \s]+u� content_typezurl\s*\(\s*[^#\s][^)]+?\) rrrz ^\s*[^#\s].*r)rzkeysrGremoverMrerUrlowerrurlparse ValueErrorschemerKdata_content_typematchrxgrouprLrNrOr search sanitize_css) rPrWattrsZ attr_names to_removeattrZ val_unescapedurirArSrSrTr]3sX                 zFilter.allowed_tokencCs|d}|dkr"d|d|d<n|dr|dvs6Jg}|dD]:\\}}}|d|durd|ndt||ft|fqFd |dd |f|d<nd |d|d<|d r|ddd d|d<d|d<|d=|S)NrrYzrr)rXrZz %s="%s"z%s:%sz<%s%s>r`z<%s> selfClosingz/> Characters)itemsappendr rjoinget)rPrWr_ronsrvrSrSrTr^es 0 zFilter.disallowed_tokencCstdd|}td|s"dStd|s2dSg}td|D]\}}|sPqB||jvrv||d|dqB|d d d vr|D]}||j vrtd |sqq||d|dqB||j vrB||d|dqBd |S) Nzurl\s*\(\s*[^\s)]+?\s*\)\s*rbz@^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$r`z ^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$z([-\w]+)\s*:\s*([^:;]*)z: ;-r)rrmarginpaddingz_^(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$) recompilerUrkfindallrfrHrwsplitrIrJrx)rPrcleanproprkeywordrSrSrTrnys*     zFilter.sanitize_css)__name__ __module__ __qualname____doc__rFrGrHrIrJrKrLrMrNrOrArUrVr]r^rn __classcell__rSrSrQrTr s":2)%r __future__rrrrerBxml.sax.saxutilsrrpip._vendor.six.movesrrgr`r constantsr r __all__rDrCrE frozensetrFrGrMrNrOrHrIrJrKrLrVERBOSErjr rSrSrSrTs&                                                                                                                                                                    % K   1 *   PK!Fta a 'filters/__pycache__/lint.cpython-39.pycnu[a Re;@shddlmZmZmZddlmZddlmZddlm Z m Z ddlm Z d e Z Gd d d ej Z d S) )absolute_importdivisionunicode_literals) text_type)base) namespaces voidElements)spaceCharacterscs*eZdZdZdfdd ZddZZS)FilterzgLints the token stream for errors If it finds any errors, it'll raise an ``AssertionError``. Tcstt||||_dS)zCreates a Filter :arg source: the source token stream :arg require_matching_tags: whether or not to require matching tags N)superr __init__require_matching_tags)selfsourcer __class__/builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/filters/lint.pyrszFilter.__init__c csDg}tj|D],}|d}|dvrF|d}|d}|dusNt|tsNJ|dksZJt|tshJ|dkstJt|dtsJ|r|tdkr|tvr|dksJn |d ksJ|d kr|jr| ||f|d D]`\\}}}|dust|tsJ|dksJt|ts$J|dks2Jt|tsJqn|d kr|d}|d}|duszt|tszJ|dksJt|tsJ|dksJ|r|tdkr|tvrd sJd d |in"|jr8| }|||fks8Jn6|dkr(|d}t|ts8Jn|dvrx|d}t|tsJJ|dksXJ|dkr8| t dks8Jn|dkr|d}|dust|tsJ|ddust|tsJ|ddus8t|ts8JnV|dkrt|dts8Jn6|dkr"t|dts8Jnd s8Jdd|i|VqdS)Ntype)StartTagEmptyTag namespacenamer datahtmlrrEndTagFz.Void element reported as EndTag token: %(tag)stagComment) CharactersSpaceCharactersr"DoctypepublicIdsystemIdEntityZSerializerErrorzUnknown token type: %(type)s)rr __iter__ isinstancerdictr r rappenditemspopstripr ) rZ open_elementstokenrrrvaluestartrrrrr'sl            zFilter.__iter__)T)__name__ __module__ __qualname____doc__rr' __classcell__rrrrr s r N) __future__rrrZpip._vendor.sixrr r constantsr r r joinr rrrrs     PK!{{6filters/__pycache__/inject_meta_charset.cpython-39.pycnu[a Re @s6ddlmZmZmZddlmZGdddejZdS))absolute_importdivisionunicode_literals)basec@s eZdZdZddZddZdS)Filterz=Injects ```` tag into head of documentcCstj||||_dS)ztCreates a Filter :arg source: the source token stream :arg encoding: the encoding to set N)rr__init__encoding)selfsourcer r /builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/filters/inject_meta_charset.pyrszFilter.__init__c csd}|jdu}g}tj|D]}|d}|dkrL|ddkrHd}nd|dkrJ|ddkrd }|d D]X\\}}} |durqvqv|d kr|j|d ||f<d }qqv|d krv| dkrvd }qv|rd|d vrd|j|d d<d }nP|ddkr|sdd|d dVddd|jidVdddVd }qnf|dkr|ddkr|r|dV|sddd|jidV|r|dVqd }d}|dkr||q|VqdS)NZpre_headtypeStartTagnameheadZin_headEmptyTagmetaFdatacharsetTz http-equivz content-type)Ncontentztext/html; charset=%s)rrr)NrEndTag)rrrZ post_head)r rr__iter__loweritemspopappend) r stateZ meta_foundpendingtokenrZhas_http_equiv_content_type namespacervaluer r r rs^        zFilter.__iter__N)__name__ __module__ __qualname____doc__rrr r r r rs rN) __future__rrrrrr r r r s PK![禍'filters/__pycache__/base.cpython-39.pycnu[a Re@s(ddlmZmZmZGdddeZdS))absolute_importdivisionunicode_literalsc@s$eZdZddZddZddZdS)FiltercCs ||_dSN)source)selfrr /builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/filters/base.py__init__szFilter.__init__cCs t|jSr)iterr)rr r r __iter__szFilter.__iter__cCs t|j|Sr)getattrr)rnamer r r __getattr__ szFilter.__getattr__N)__name__ __module__ __qualname__r r rr r r r rsrN) __future__rrrobjectrr r r r sPK!Hr +filters/__pycache__/__init__.cpython-39.pycnu[a Re@sdS)Nrrr/builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/filters/__init__.pyPK!V9QQ9filters/__pycache__/alphabeticalattributes.cpython-39.pycnu[a Re@sJddlmZmZmZddlmZddlmZddZGdddej Z d S) )absolute_importdivisionunicode_literals)base) OrderedDictcCs|ddpd|ddfS)zReturn an appropriate key for an attribute for sorting Attributes have a namespace that can be either ``None`` or a string. We can't compare the two because they're different types, so we convert ``None`` to an empty string first. rr)attrr r /builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/filters/alphabeticalattributes.py _attr_keysr c@seZdZdZddZdS)Filterz$Alphabetizes attributes for elementsccs\tj|D]J}|ddvrPt}t|dtdD]\}}|||<q6||d<|Vq dS)Ntype)StartTagEmptyTagdata)key)rr __iter__rsorteditemsr )selftokenattrsnamevaluer r r rs   zFilter.__iter__N)__name__ __module__ __qualname____doc__rr r r r r sr N) __future__rrrrr collectionsrr r r r r r s   PK!"filters/base.pynu[from __future__ import absolute_import, division, unicode_literals class Filter(object): def __init__(self, source): self.source = source def __iter__(self): return iter(self.source) def __getattr__(self, name): return getattr(self.source, name) PK!`rfilters/whitespace.pynu[from __future__ import absolute_import, division, unicode_literals import re from . import base from ..constants import rcdataElements, spaceCharacters spaceCharacters = "".join(spaceCharacters) SPACES_REGEX = re.compile("[%s]+" % spaceCharacters) class Filter(base.Filter): """Collapses whitespace except in pre, textarea, and script elements""" spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements)) def __iter__(self): preserve = 0 for token in base.Filter.__iter__(self): type = token["type"] if type == "StartTag" \ and (preserve or token["name"] in self.spacePreserveElements): preserve += 1 elif type == "EndTag" and preserve: preserve -= 1 elif not preserve and type == "SpaceCharacters" and token["data"]: # Test on token["data"] above to not introduce spaces where there were not token["data"] = " " elif not preserve and type == "Characters": token["data"] = collapse_spaces(token["data"]) yield token def collapse_spaces(text): return SPACES_REGEX.sub(' ', text) PK!u*\)\)filters/optionaltags.pynu[from __future__ import absolute_import, division, unicode_literals from . import base class Filter(base.Filter): """Removes optional tags from the token stream""" def slider(self): previous1 = previous2 = None for token in self.source: if previous1 is not None: yield previous2, previous1, token previous2 = previous1 previous1 = token if previous1 is not None: yield previous2, previous1, None def __iter__(self): for previous, token, next in self.slider(): type = token["type"] if type == "StartTag": if (token["data"] or not self.is_optional_start(token["name"], previous, next)): yield token elif type == "EndTag": if not self.is_optional_end(token["name"], next): yield token else: yield token def is_optional_start(self, tagname, previous, next): type = next and next["type"] or None if tagname in 'html': # An html element's start tag may be omitted if the first thing # inside the html element is not a space character or a comment. return type not in ("Comment", "SpaceCharacters") elif tagname == 'head': # A head element's start tag may be omitted if the first thing # inside the head element is an element. # XXX: we also omit the start tag if the head element is empty if type in ("StartTag", "EmptyTag"): return True elif type == "EndTag": return next["name"] == "head" elif tagname == 'body': # A body element's start tag may be omitted if the first thing # inside the body element is not a space character or a comment, # except if the first thing inside the body element is a script # or style element and the node immediately preceding the body # element is a head element whose end tag has been omitted. if type in ("Comment", "SpaceCharacters"): return False elif type == "StartTag": # XXX: we do not look at the preceding event, so we never omit # the body element's start tag if it's followed by a script or # a style element. return next["name"] not in ('script', 'style') else: return True elif tagname == 'colgroup': # A colgroup element's start tag may be omitted if the first thing # inside the colgroup element is a col element, and if the element # is not immediately preceded by another colgroup element whose # end tag has been omitted. if type in ("StartTag", "EmptyTag"): # XXX: we do not look at the preceding event, so instead we never # omit the colgroup element's end tag when it is immediately # followed by another colgroup element. See is_optional_end. return next["name"] == "col" else: return False elif tagname == 'tbody': # A tbody element's start tag may be omitted if the first thing # inside the tbody element is a tr element, and if the element is # not immediately preceded by a tbody, thead, or tfoot element # whose end tag has been omitted. if type == "StartTag": # omit the thead and tfoot elements' end tag when they are # immediately followed by a tbody element. See is_optional_end. if previous and previous['type'] == 'EndTag' and \ previous['name'] in ('tbody', 'thead', 'tfoot'): return False return next["name"] == 'tr' else: return False return False def is_optional_end(self, tagname, next): type = next and next["type"] or None if tagname in ('html', 'head', 'body'): # An html element's end tag may be omitted if the html element # is not immediately followed by a space character or a comment. return type not in ("Comment", "SpaceCharacters") elif tagname in ('li', 'optgroup', 'tr'): # A li element's end tag may be omitted if the li element is # immediately followed by another li element or if there is # no more content in the parent element. # An optgroup element's end tag may be omitted if the optgroup # element is immediately followed by another optgroup element, # or if there is no more content in the parent element. # A tr element's end tag may be omitted if the tr element is # immediately followed by another tr element, or if there is # no more content in the parent element. if type == "StartTag": return next["name"] == tagname else: return type == "EndTag" or type is None elif tagname in ('dt', 'dd'): # A dt element's end tag may be omitted if the dt element is # immediately followed by another dt element or a dd element. # A dd element's end tag may be omitted if the dd element is # immediately followed by another dd element or a dt element, # or if there is no more content in the parent element. if type == "StartTag": return next["name"] in ('dt', 'dd') elif tagname == 'dd': return type == "EndTag" or type is None else: return False elif tagname == 'p': # A p element's end tag may be omitted if the p element is # immediately followed by an address, article, aside, # blockquote, datagrid, dialog, dir, div, dl, fieldset, # footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu, # nav, ol, p, pre, section, table, or ul, element, or if # there is no more content in the parent element. if type in ("StartTag", "EmptyTag"): return next["name"] in ('address', 'article', 'aside', 'blockquote', 'datagrid', 'dialog', 'dir', 'div', 'dl', 'fieldset', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hr', 'menu', 'nav', 'ol', 'p', 'pre', 'section', 'table', 'ul') else: return type == "EndTag" or type is None elif tagname == 'option': # An option element's end tag may be omitted if the option # element is immediately followed by another option element, # or if it is immediately followed by an optgroup # element, or if there is no more content in the parent # element. if type == "StartTag": return next["name"] in ('option', 'optgroup') else: return type == "EndTag" or type is None elif tagname in ('rt', 'rp'): # An rt element's end tag may be omitted if the rt element is # immediately followed by an rt or rp element, or if there is # no more content in the parent element. # An rp element's end tag may be omitted if the rp element is # immediately followed by an rt or rp element, or if there is # no more content in the parent element. if type == "StartTag": return next["name"] in ('rt', 'rp') else: return type == "EndTag" or type is None elif tagname == 'colgroup': # A colgroup element's end tag may be omitted if the colgroup # element is not immediately followed by a space character or # a comment. if type in ("Comment", "SpaceCharacters"): return False elif type == "StartTag": # XXX: we also look for an immediately following colgroup # element. See is_optional_start. return next["name"] != 'colgroup' else: return True elif tagname in ('thead', 'tbody'): # A thead element's end tag may be omitted if the thead element # is immediately followed by a tbody or tfoot element. # A tbody element's end tag may be omitted if the tbody element # is immediately followed by a tbody or tfoot element, or if # there is no more content in the parent element. # A tfoot element's end tag may be omitted if the tfoot element # is immediately followed by a tbody element, or if there is no # more content in the parent element. # XXX: we never omit the end tag when the following element is # a tbody. See is_optional_start. if type == "StartTag": return next["name"] in ['tbody', 'tfoot'] elif tagname == 'tbody': return type == "EndTag" or type is None else: return False elif tagname == 'tfoot': # A tfoot element's end tag may be omitted if the tfoot element # is immediately followed by a tbody element, or if there is no # more content in the parent element. # XXX: we never omit the end tag when the following element is # a tbody. See is_optional_start. if type == "StartTag": return next["name"] == 'tbody' else: return type == "EndTag" or type is None elif tagname in ('td', 'th'): # A td element's end tag may be omitted if the td element is # immediately followed by a td or th element, or if there is # no more content in the parent element. # A th element's end tag may be omitted if the th element is # immediately followed by a td or th element, or if there is # no more content in the parent element. if type == "StartTag": return next["name"] in ('td', 'th') else: return type == "EndTag" or type is None return False PK!!filters/alphabeticalattributes.pynu[from __future__ import absolute_import, division, unicode_literals from . import base from collections import OrderedDict def _attr_key(attr): """Return an appropriate key for an attribute for sorting Attributes have a namespace that can be either ``None`` or a string. We can't compare the two because they're different types, so we convert ``None`` to an empty string first. """ return (attr[0][0] or ''), attr[0][1] class Filter(base.Filter): """Alphabetizes attributes for elements""" def __iter__(self): for token in base.Filter.__iter__(self): if token["type"] in ("StartTag", "EmptyTag"): attrs = OrderedDict() for name, value in sorted(token["data"].items(), key=_attr_key): attrs[name] = value token["data"] = attrs yield token PK!QaMiifilters/sanitizer.pynu["""Deprecated from html5lib 1.1. See `here `_ for information about its deprecation; `Bleach `_ is recommended as a replacement. Please let us know in the aforementioned issue if Bleach is unsuitable for your needs. """ from __future__ import absolute_import, division, unicode_literals import re import warnings from xml.sax.saxutils import escape, unescape from pip._vendor.six.moves import urllib_parse as urlparse from . import base from ..constants import namespaces, prefixes __all__ = ["Filter"] _deprecation_msg = ( "html5lib's sanitizer is deprecated; see " + "https://github.com/html5lib/html5lib-python/issues/443 and please let " + "us know if Bleach is unsuitable for your needs" ) warnings.warn(_deprecation_msg, DeprecationWarning) allowed_elements = frozenset(( (namespaces['html'], 'a'), (namespaces['html'], 'abbr'), (namespaces['html'], 'acronym'), (namespaces['html'], 'address'), (namespaces['html'], 'area'), (namespaces['html'], 'article'), (namespaces['html'], 'aside'), (namespaces['html'], 'audio'), (namespaces['html'], 'b'), (namespaces['html'], 'big'), (namespaces['html'], 'blockquote'), (namespaces['html'], 'br'), (namespaces['html'], 'button'), (namespaces['html'], 'canvas'), (namespaces['html'], 'caption'), (namespaces['html'], 'center'), (namespaces['html'], 'cite'), (namespaces['html'], 'code'), (namespaces['html'], 'col'), (namespaces['html'], 'colgroup'), (namespaces['html'], 'command'), (namespaces['html'], 'datagrid'), (namespaces['html'], 'datalist'), (namespaces['html'], 'dd'), (namespaces['html'], 'del'), (namespaces['html'], 'details'), (namespaces['html'], 'dfn'), (namespaces['html'], 'dialog'), (namespaces['html'], 'dir'), (namespaces['html'], 'div'), (namespaces['html'], 'dl'), (namespaces['html'], 'dt'), (namespaces['html'], 'em'), (namespaces['html'], 'event-source'), (namespaces['html'], 'fieldset'), (namespaces['html'], 'figcaption'), (namespaces['html'], 'figure'), (namespaces['html'], 'footer'), (namespaces['html'], 'font'), (namespaces['html'], 'form'), (namespaces['html'], 'header'), (namespaces['html'], 'h1'), (namespaces['html'], 'h2'), (namespaces['html'], 'h3'), (namespaces['html'], 'h4'), (namespaces['html'], 'h5'), (namespaces['html'], 'h6'), (namespaces['html'], 'hr'), (namespaces['html'], 'i'), (namespaces['html'], 'img'), (namespaces['html'], 'input'), (namespaces['html'], 'ins'), (namespaces['html'], 'keygen'), (namespaces['html'], 'kbd'), (namespaces['html'], 'label'), (namespaces['html'], 'legend'), (namespaces['html'], 'li'), (namespaces['html'], 'm'), (namespaces['html'], 'map'), (namespaces['html'], 'menu'), (namespaces['html'], 'meter'), (namespaces['html'], 'multicol'), (namespaces['html'], 'nav'), (namespaces['html'], 'nextid'), (namespaces['html'], 'ol'), (namespaces['html'], 'output'), (namespaces['html'], 'optgroup'), (namespaces['html'], 'option'), (namespaces['html'], 'p'), (namespaces['html'], 'pre'), (namespaces['html'], 'progress'), (namespaces['html'], 'q'), (namespaces['html'], 's'), (namespaces['html'], 'samp'), (namespaces['html'], 'section'), (namespaces['html'], 'select'), (namespaces['html'], 'small'), (namespaces['html'], 'sound'), (namespaces['html'], 'source'), (namespaces['html'], 'spacer'), (namespaces['html'], 'span'), (namespaces['html'], 'strike'), (namespaces['html'], 'strong'), (namespaces['html'], 'sub'), (namespaces['html'], 'sup'), (namespaces['html'], 'table'), (namespaces['html'], 'tbody'), (namespaces['html'], 'td'), (namespaces['html'], 'textarea'), (namespaces['html'], 'time'), (namespaces['html'], 'tfoot'), (namespaces['html'], 'th'), (namespaces['html'], 'thead'), (namespaces['html'], 'tr'), (namespaces['html'], 'tt'), (namespaces['html'], 'u'), (namespaces['html'], 'ul'), (namespaces['html'], 'var'), (namespaces['html'], 'video'), (namespaces['mathml'], 'maction'), (namespaces['mathml'], 'math'), (namespaces['mathml'], 'merror'), (namespaces['mathml'], 'mfrac'), (namespaces['mathml'], 'mi'), (namespaces['mathml'], 'mmultiscripts'), (namespaces['mathml'], 'mn'), (namespaces['mathml'], 'mo'), (namespaces['mathml'], 'mover'), (namespaces['mathml'], 'mpadded'), (namespaces['mathml'], 'mphantom'), (namespaces['mathml'], 'mprescripts'), (namespaces['mathml'], 'mroot'), (namespaces['mathml'], 'mrow'), (namespaces['mathml'], 'mspace'), (namespaces['mathml'], 'msqrt'), (namespaces['mathml'], 'mstyle'), (namespaces['mathml'], 'msub'), (namespaces['mathml'], 'msubsup'), (namespaces['mathml'], 'msup'), (namespaces['mathml'], 'mtable'), (namespaces['mathml'], 'mtd'), (namespaces['mathml'], 'mtext'), (namespaces['mathml'], 'mtr'), (namespaces['mathml'], 'munder'), (namespaces['mathml'], 'munderover'), (namespaces['mathml'], 'none'), (namespaces['svg'], 'a'), (namespaces['svg'], 'animate'), (namespaces['svg'], 'animateColor'), (namespaces['svg'], 'animateMotion'), (namespaces['svg'], 'animateTransform'), (namespaces['svg'], 'clipPath'), (namespaces['svg'], 'circle'), (namespaces['svg'], 'defs'), (namespaces['svg'], 'desc'), (namespaces['svg'], 'ellipse'), (namespaces['svg'], 'font-face'), (namespaces['svg'], 'font-face-name'), (namespaces['svg'], 'font-face-src'), (namespaces['svg'], 'g'), (namespaces['svg'], 'glyph'), (namespaces['svg'], 'hkern'), (namespaces['svg'], 'linearGradient'), (namespaces['svg'], 'line'), (namespaces['svg'], 'marker'), (namespaces['svg'], 'metadata'), (namespaces['svg'], 'missing-glyph'), (namespaces['svg'], 'mpath'), (namespaces['svg'], 'path'), (namespaces['svg'], 'polygon'), (namespaces['svg'], 'polyline'), (namespaces['svg'], 'radialGradient'), (namespaces['svg'], 'rect'), (namespaces['svg'], 'set'), (namespaces['svg'], 'stop'), (namespaces['svg'], 'svg'), (namespaces['svg'], 'switch'), (namespaces['svg'], 'text'), (namespaces['svg'], 'title'), (namespaces['svg'], 'tspan'), (namespaces['svg'], 'use'), )) allowed_attributes = frozenset(( # HTML attributes (None, 'abbr'), (None, 'accept'), (None, 'accept-charset'), (None, 'accesskey'), (None, 'action'), (None, 'align'), (None, 'alt'), (None, 'autocomplete'), (None, 'autofocus'), (None, 'axis'), (None, 'background'), (None, 'balance'), (None, 'bgcolor'), (None, 'bgproperties'), (None, 'border'), (None, 'bordercolor'), (None, 'bordercolordark'), (None, 'bordercolorlight'), (None, 'bottompadding'), (None, 'cellpadding'), (None, 'cellspacing'), (None, 'ch'), (None, 'challenge'), (None, 'char'), (None, 'charoff'), (None, 'choff'), (None, 'charset'), (None, 'checked'), (None, 'cite'), (None, 'class'), (None, 'clear'), (None, 'color'), (None, 'cols'), (None, 'colspan'), (None, 'compact'), (None, 'contenteditable'), (None, 'controls'), (None, 'coords'), (None, 'data'), (None, 'datafld'), (None, 'datapagesize'), (None, 'datasrc'), (None, 'datetime'), (None, 'default'), (None, 'delay'), (None, 'dir'), (None, 'disabled'), (None, 'draggable'), (None, 'dynsrc'), (None, 'enctype'), (None, 'end'), (None, 'face'), (None, 'for'), (None, 'form'), (None, 'frame'), (None, 'galleryimg'), (None, 'gutter'), (None, 'headers'), (None, 'height'), (None, 'hidefocus'), (None, 'hidden'), (None, 'high'), (None, 'href'), (None, 'hreflang'), (None, 'hspace'), (None, 'icon'), (None, 'id'), (None, 'inputmode'), (None, 'ismap'), (None, 'keytype'), (None, 'label'), (None, 'leftspacing'), (None, 'lang'), (None, 'list'), (None, 'longdesc'), (None, 'loop'), (None, 'loopcount'), (None, 'loopend'), (None, 'loopstart'), (None, 'low'), (None, 'lowsrc'), (None, 'max'), (None, 'maxlength'), (None, 'media'), (None, 'method'), (None, 'min'), (None, 'multiple'), (None, 'name'), (None, 'nohref'), (None, 'noshade'), (None, 'nowrap'), (None, 'open'), (None, 'optimum'), (None, 'pattern'), (None, 'ping'), (None, 'point-size'), (None, 'poster'), (None, 'pqg'), (None, 'preload'), (None, 'prompt'), (None, 'radiogroup'), (None, 'readonly'), (None, 'rel'), (None, 'repeat-max'), (None, 'repeat-min'), (None, 'replace'), (None, 'required'), (None, 'rev'), (None, 'rightspacing'), (None, 'rows'), (None, 'rowspan'), (None, 'rules'), (None, 'scope'), (None, 'selected'), (None, 'shape'), (None, 'size'), (None, 'span'), (None, 'src'), (None, 'start'), (None, 'step'), (None, 'style'), (None, 'summary'), (None, 'suppress'), (None, 'tabindex'), (None, 'target'), (None, 'template'), (None, 'title'), (None, 'toppadding'), (None, 'type'), (None, 'unselectable'), (None, 'usemap'), (None, 'urn'), (None, 'valign'), (None, 'value'), (None, 'variable'), (None, 'volume'), (None, 'vspace'), (None, 'vrml'), (None, 'width'), (None, 'wrap'), (namespaces['xml'], 'lang'), # MathML attributes (None, 'actiontype'), (None, 'align'), (None, 'columnalign'), (None, 'columnalign'), (None, 'columnalign'), (None, 'columnlines'), (None, 'columnspacing'), (None, 'columnspan'), (None, 'depth'), (None, 'display'), (None, 'displaystyle'), (None, 'equalcolumns'), (None, 'equalrows'), (None, 'fence'), (None, 'fontstyle'), (None, 'fontweight'), (None, 'frame'), (None, 'height'), (None, 'linethickness'), (None, 'lspace'), (None, 'mathbackground'), (None, 'mathcolor'), (None, 'mathvariant'), (None, 'mathvariant'), (None, 'maxsize'), (None, 'minsize'), (None, 'other'), (None, 'rowalign'), (None, 'rowalign'), (None, 'rowalign'), (None, 'rowlines'), (None, 'rowspacing'), (None, 'rowspan'), (None, 'rspace'), (None, 'scriptlevel'), (None, 'selection'), (None, 'separator'), (None, 'stretchy'), (None, 'width'), (None, 'width'), (namespaces['xlink'], 'href'), (namespaces['xlink'], 'show'), (namespaces['xlink'], 'type'), # SVG attributes (None, 'accent-height'), (None, 'accumulate'), (None, 'additive'), (None, 'alphabetic'), (None, 'arabic-form'), (None, 'ascent'), (None, 'attributeName'), (None, 'attributeType'), (None, 'baseProfile'), (None, 'bbox'), (None, 'begin'), (None, 'by'), (None, 'calcMode'), (None, 'cap-height'), (None, 'class'), (None, 'clip-path'), (None, 'color'), (None, 'color-rendering'), (None, 'content'), (None, 'cx'), (None, 'cy'), (None, 'd'), (None, 'dx'), (None, 'dy'), (None, 'descent'), (None, 'display'), (None, 'dur'), (None, 'end'), (None, 'fill'), (None, 'fill-opacity'), (None, 'fill-rule'), (None, 'font-family'), (None, 'font-size'), (None, 'font-stretch'), (None, 'font-style'), (None, 'font-variant'), (None, 'font-weight'), (None, 'from'), (None, 'fx'), (None, 'fy'), (None, 'g1'), (None, 'g2'), (None, 'glyph-name'), (None, 'gradientUnits'), (None, 'hanging'), (None, 'height'), (None, 'horiz-adv-x'), (None, 'horiz-origin-x'), (None, 'id'), (None, 'ideographic'), (None, 'k'), (None, 'keyPoints'), (None, 'keySplines'), (None, 'keyTimes'), (None, 'lang'), (None, 'marker-end'), (None, 'marker-mid'), (None, 'marker-start'), (None, 'markerHeight'), (None, 'markerUnits'), (None, 'markerWidth'), (None, 'mathematical'), (None, 'max'), (None, 'min'), (None, 'name'), (None, 'offset'), (None, 'opacity'), (None, 'orient'), (None, 'origin'), (None, 'overline-position'), (None, 'overline-thickness'), (None, 'panose-1'), (None, 'path'), (None, 'pathLength'), (None, 'points'), (None, 'preserveAspectRatio'), (None, 'r'), (None, 'refX'), (None, 'refY'), (None, 'repeatCount'), (None, 'repeatDur'), (None, 'requiredExtensions'), (None, 'requiredFeatures'), (None, 'restart'), (None, 'rotate'), (None, 'rx'), (None, 'ry'), (None, 'slope'), (None, 'stemh'), (None, 'stemv'), (None, 'stop-color'), (None, 'stop-opacity'), (None, 'strikethrough-position'), (None, 'strikethrough-thickness'), (None, 'stroke'), (None, 'stroke-dasharray'), (None, 'stroke-dashoffset'), (None, 'stroke-linecap'), (None, 'stroke-linejoin'), (None, 'stroke-miterlimit'), (None, 'stroke-opacity'), (None, 'stroke-width'), (None, 'systemLanguage'), (None, 'target'), (None, 'text-anchor'), (None, 'to'), (None, 'transform'), (None, 'type'), (None, 'u1'), (None, 'u2'), (None, 'underline-position'), (None, 'underline-thickness'), (None, 'unicode'), (None, 'unicode-range'), (None, 'units-per-em'), (None, 'values'), (None, 'version'), (None, 'viewBox'), (None, 'visibility'), (None, 'width'), (None, 'widths'), (None, 'x'), (None, 'x-height'), (None, 'x1'), (None, 'x2'), (namespaces['xlink'], 'actuate'), (namespaces['xlink'], 'arcrole'), (namespaces['xlink'], 'href'), (namespaces['xlink'], 'role'), (namespaces['xlink'], 'show'), (namespaces['xlink'], 'title'), (namespaces['xlink'], 'type'), (namespaces['xml'], 'base'), (namespaces['xml'], 'lang'), (namespaces['xml'], 'space'), (None, 'y'), (None, 'y1'), (None, 'y2'), (None, 'zoomAndPan'), )) attr_val_is_uri = frozenset(( (None, 'href'), (None, 'src'), (None, 'cite'), (None, 'action'), (None, 'longdesc'), (None, 'poster'), (None, 'background'), (None, 'datasrc'), (None, 'dynsrc'), (None, 'lowsrc'), (None, 'ping'), (namespaces['xlink'], 'href'), (namespaces['xml'], 'base'), )) svg_attr_val_allows_ref = frozenset(( (None, 'clip-path'), (None, 'color-profile'), (None, 'cursor'), (None, 'fill'), (None, 'filter'), (None, 'marker'), (None, 'marker-start'), (None, 'marker-mid'), (None, 'marker-end'), (None, 'mask'), (None, 'stroke'), )) svg_allow_local_href = frozenset(( (None, 'altGlyph'), (None, 'animate'), (None, 'animateColor'), (None, 'animateMotion'), (None, 'animateTransform'), (None, 'cursor'), (None, 'feImage'), (None, 'filter'), (None, 'linearGradient'), (None, 'pattern'), (None, 'radialGradient'), (None, 'textpath'), (None, 'tref'), (None, 'set'), (None, 'use') )) allowed_css_properties = frozenset(( 'azimuth', 'background-color', 'border-bottom-color', 'border-collapse', 'border-color', 'border-left-color', 'border-right-color', 'border-top-color', 'clear', 'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font', 'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight', 'height', 'letter-spacing', 'line-height', 'overflow', 'pause', 'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness', 'speak', 'speak-header', 'speak-numeral', 'speak-punctuation', 'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent', 'unicode-bidi', 'vertical-align', 'voice-family', 'volume', 'white-space', 'width', )) allowed_css_keywords = frozenset(( 'auto', 'aqua', 'black', 'block', 'blue', 'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed', 'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left', 'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive', 'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top', 'transparent', 'underline', 'white', 'yellow', )) allowed_svg_properties = frozenset(( 'fill', 'fill-opacity', 'fill-rule', 'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin', 'stroke-opacity', )) allowed_protocols = frozenset(( 'ed2k', 'ftp', 'http', 'https', 'irc', 'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal', 'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag', 'ssh', 'sftp', 'rtsp', 'afs', 'data', )) allowed_content_types = frozenset(( 'image/png', 'image/jpeg', 'image/gif', 'image/webp', 'image/bmp', 'text/plain', )) data_content_type = re.compile(r''' ^ # Match a content type / (?P[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+) # Match any character set and encoding (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?) |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?) # Assume the rest is data ,.* $ ''', re.VERBOSE) class Filter(base.Filter): """Sanitizes token stream of XHTML+MathML+SVG and of inline style attributes""" def __init__(self, source, allowed_elements=allowed_elements, allowed_attributes=allowed_attributes, allowed_css_properties=allowed_css_properties, allowed_css_keywords=allowed_css_keywords, allowed_svg_properties=allowed_svg_properties, allowed_protocols=allowed_protocols, allowed_content_types=allowed_content_types, attr_val_is_uri=attr_val_is_uri, svg_attr_val_allows_ref=svg_attr_val_allows_ref, svg_allow_local_href=svg_allow_local_href): """Creates a Filter :arg allowed_elements: set of elements to allow--everything else will be escaped :arg allowed_attributes: set of attributes to allow in elements--everything else will be stripped :arg allowed_css_properties: set of CSS properties to allow--everything else will be stripped :arg allowed_css_keywords: set of CSS keywords to allow--everything else will be stripped :arg allowed_svg_properties: set of SVG properties to allow--everything else will be removed :arg allowed_protocols: set of allowed protocols for URIs :arg allowed_content_types: set of allowed content types for ``data`` URIs. :arg attr_val_is_uri: set of attributes that have URI values--values that have a scheme not listed in ``allowed_protocols`` are removed :arg svg_attr_val_allows_ref: set of SVG attributes that can have references :arg svg_allow_local_href: set of SVG elements that can have local hrefs--these are removed """ super(Filter, self).__init__(source) warnings.warn(_deprecation_msg, DeprecationWarning) self.allowed_elements = allowed_elements self.allowed_attributes = allowed_attributes self.allowed_css_properties = allowed_css_properties self.allowed_css_keywords = allowed_css_keywords self.allowed_svg_properties = allowed_svg_properties self.allowed_protocols = allowed_protocols self.allowed_content_types = allowed_content_types self.attr_val_is_uri = attr_val_is_uri self.svg_attr_val_allows_ref = svg_attr_val_allows_ref self.svg_allow_local_href = svg_allow_local_href def __iter__(self): for token in base.Filter.__iter__(self): token = self.sanitize_token(token) if token: yield token # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and # stripping out all attributes not in ALLOWED_ATTRIBUTES. Style attributes # are parsed, and a restricted set, specified by ALLOWED_CSS_PROPERTIES and # ALLOWED_CSS_KEYWORDS, are allowed through. attributes in ATTR_VAL_IS_URI # are scanned, and only URI schemes specified in ALLOWED_PROTOCOLS are # allowed. # # sanitize_html('') # => <script> do_nasty_stuff() </script> # sanitize_html('Click here for $100') # => Click here for $100 def sanitize_token(self, token): # accommodate filters which use token_type differently token_type = token["type"] if token_type in ("StartTag", "EndTag", "EmptyTag"): name = token["name"] namespace = token["namespace"] if ((namespace, name) in self.allowed_elements or (namespace is None and (namespaces["html"], name) in self.allowed_elements)): return self.allowed_token(token) else: return self.disallowed_token(token) elif token_type == "Comment": pass else: return token def allowed_token(self, token): if "data" in token: attrs = token["data"] attr_names = set(attrs.keys()) # Remove forbidden attributes for to_remove in (attr_names - self.allowed_attributes): del token["data"][to_remove] attr_names.remove(to_remove) # Remove attributes with disallowed URL values for attr in (attr_names & self.attr_val_is_uri): assert attr in attrs # I don't have a clue where this regexp comes from or why it matches those # characters, nor why we call unescape. I just know it's always been here. # Should you be worried by this comment in a sanitizer? Yes. On the other hand, all # this will do is remove *more* than it otherwise would. val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '', unescape(attrs[attr])).lower() # remove replacement characters from unescaped characters val_unescaped = val_unescaped.replace("\ufffd", "") try: uri = urlparse.urlparse(val_unescaped) except ValueError: uri = None del attrs[attr] if uri and uri.scheme: if uri.scheme not in self.allowed_protocols: del attrs[attr] if uri.scheme == 'data': m = data_content_type.match(uri.path) if not m: del attrs[attr] elif m.group('content_type') not in self.allowed_content_types: del attrs[attr] for attr in self.svg_attr_val_allows_ref: if attr in attrs: attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', ' ', unescape(attrs[attr])) if (token["name"] in self.svg_allow_local_href and (namespaces['xlink'], 'href') in attrs and re.search(r'^\s*[^#\s].*', attrs[(namespaces['xlink'], 'href')])): del attrs[(namespaces['xlink'], 'href')] if (None, 'style') in attrs: attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')]) token["data"] = attrs return token def disallowed_token(self, token): token_type = token["type"] if token_type == "EndTag": token["data"] = "" % token["name"] elif token["data"]: assert token_type in ("StartTag", "EmptyTag") attrs = [] for (ns, name), v in token["data"].items(): attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v))) token["data"] = "<%s%s>" % (token["name"], ''.join(attrs)) else: token["data"] = "<%s>" % token["name"] if token.get("selfClosing"): token["data"] = token["data"][:-1] + "/>" token["type"] = "Characters" del token["name"] return token def sanitize_css(self, style): # disallow urls style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style) # gauntlet if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): return '' if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): return '' clean = [] for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style): if not value: continue if prop.lower() in self.allowed_css_properties: clean.append(prop + ': ' + value + ';') elif prop.split('-')[0].lower() in ['background', 'border', 'margin', 'padding']: for keyword in value.split(): if keyword not in self.allowed_css_keywords and \ not re.match(r"^(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): # noqa break else: clean.append(prop + ': ' + value + ';') elif prop.lower() in self.allowed_svg_properties: clean.append(prop + ': ' + value + ';') return ' '.join(clean) PK!filters/__init__.pynu[PK!ʧ-XAXA _ihatexml.pynu[from __future__ import absolute_import, division, unicode_literals import re import warnings from .constants import DataLossWarning baseChar = """ [#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] | [#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 | [#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] | [#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] | [#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] | [#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] | [#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 | [#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] | [#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] | [#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D | [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] | [#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] | [#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] | [#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] | [#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] | [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 | [#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] | [#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | [#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] | [#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | [#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] | [#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | [#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A | #x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 | #x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] | #x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] | [#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] | [#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C | #x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 | [#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] | [#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] | [#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 | [#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B | #x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE | [#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | [#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 | [#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] | [#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]""" ideographic = """[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]""" combiningCharacter = """ [#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] | [#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] | [#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] | #x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] | [#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 | #x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] | [#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC | [#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] | [#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | [#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | [#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] | #x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 | [#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] | #x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] | [#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] | #x3099 | #x309A""" digit = """ [#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] | [#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | [#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]""" extender = """ #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 | #[#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]""" letter = " | ".join([baseChar, ideographic]) # Without the name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter, extender]) nameFirst = " | ".join([letter, "_"]) reChar = re.compile(r"#x([\d|A-F]{4,4})") reCharRange = re.compile(r"\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]") def charStringToList(chars): charRanges = [item.strip() for item in chars.split(" | ")] rv = [] for item in charRanges: foundMatch = False for regexp in (reChar, reCharRange): match = regexp.match(item) if match is not None: rv.append([hexToInt(item) for item in match.groups()]) if len(rv[-1]) == 1: rv[-1] = rv[-1] * 2 foundMatch = True break if not foundMatch: assert len(item) == 1 rv.append([ord(item)] * 2) rv = normaliseCharList(rv) return rv def normaliseCharList(charList): charList = sorted(charList) for item in charList: assert item[1] >= item[0] rv = [] i = 0 while i < len(charList): j = 1 rv.append(charList[i]) while i + j < len(charList) and charList[i + j][0] <= rv[-1][1] + 1: rv[-1][1] = charList[i + j][1] j += 1 i += j return rv # We don't really support characters above the BMP :( max_unicode = int("FFFF", 16) def missingRanges(charList): rv = [] if charList[0] != 0: rv.append([0, charList[0][0] - 1]) for i, item in enumerate(charList[:-1]): rv.append([item[1] + 1, charList[i + 1][0] - 1]) if charList[-1][1] != max_unicode: rv.append([charList[-1][1] + 1, max_unicode]) return rv def listToRegexpStr(charList): rv = [] for item in charList: if item[0] == item[1]: rv.append(escapeRegexp(chr(item[0]))) else: rv.append(escapeRegexp(chr(item[0])) + "-" + escapeRegexp(chr(item[1]))) return "[%s]" % "".join(rv) def hexToInt(hex_str): return int(hex_str, 16) def escapeRegexp(string): specialCharacters = (".", "^", "$", "*", "+", "?", "{", "}", "[", "]", "|", "(", ")", "-") for char in specialCharacters: string = string.replace(char, "\\" + char) return string # output from the above nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa # Simpler things nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]") class InfosetFilter(object): replacementRegexp = re.compile(r"U[\dA-F]{5,5}") def __init__(self, dropXmlnsLocalName=False, dropXmlnsAttrNs=False, preventDoubleDashComments=False, preventDashAtCommentEnd=False, replaceFormFeedCharacters=True, preventSingleQuotePubid=False): self.dropXmlnsLocalName = dropXmlnsLocalName self.dropXmlnsAttrNs = dropXmlnsAttrNs self.preventDoubleDashComments = preventDoubleDashComments self.preventDashAtCommentEnd = preventDashAtCommentEnd self.replaceFormFeedCharacters = replaceFormFeedCharacters self.preventSingleQuotePubid = preventSingleQuotePubid self.replaceCache = {} def coerceAttribute(self, name, namespace=None): if self.dropXmlnsLocalName and name.startswith("xmlns:"): warnings.warn("Attributes cannot begin with xmlns", DataLossWarning) return None elif (self.dropXmlnsAttrNs and namespace == "http://www.w3.org/2000/xmlns/"): warnings.warn("Attributes cannot be in the xml namespace", DataLossWarning) return None else: return self.toXmlName(name) def coerceElement(self, name): return self.toXmlName(name) def coerceComment(self, data): if self.preventDoubleDashComments: while "--" in data: warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning) data = data.replace("--", "- -") if data.endswith("-"): warnings.warn("Comments cannot end in a dash", DataLossWarning) data += " " return data def coerceCharacters(self, data): if self.replaceFormFeedCharacters: for _ in range(data.count("\x0C")): warnings.warn("Text cannot contain U+000C", DataLossWarning) data = data.replace("\x0C", " ") # Other non-xml characters return data def coercePubid(self, data): dataOutput = data for char in nonPubidCharRegexp.findall(data): warnings.warn("Coercing non-XML pubid", DataLossWarning) replacement = self.getReplacementCharacter(char) dataOutput = dataOutput.replace(char, replacement) if self.preventSingleQuotePubid and dataOutput.find("'") >= 0: warnings.warn("Pubid cannot contain single quote", DataLossWarning) dataOutput = dataOutput.replace("'", self.getReplacementCharacter("'")) return dataOutput def toXmlName(self, name): nameFirst = name[0] nameRest = name[1:] m = nonXmlNameFirstBMPRegexp.match(nameFirst) if m: warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning) nameFirstOutput = self.getReplacementCharacter(nameFirst) else: nameFirstOutput = nameFirst nameRestOutput = nameRest replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest)) for char in replaceChars: warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning) replacement = self.getReplacementCharacter(char) nameRestOutput = nameRestOutput.replace(char, replacement) return nameFirstOutput + nameRestOutput def getReplacementCharacter(self, char): if char in self.replaceCache: replacement = self.replaceCache[char] else: replacement = self.escapeChar(char) return replacement def fromXmlName(self, name): for item in set(self.replacementRegexp.findall(name)): name = name.replace(item, self.unescapeChar(item)) return name def escapeChar(self, char): replacement = "U%05X" % ord(char) self.replaceCache[char] = replacement return replacement def unescapeChar(self, charcode): return chr(int(charcode[1:], 16)) PK!s== serializer.pynu[from __future__ import absolute_import, division, unicode_literals from pip._vendor.six import text_type import re from codecs import register_error, xmlcharrefreplace_errors from .constants import voidElements, booleanAttributes, spaceCharacters from .constants import rcdataElements, entities, xmlEntities from . import treewalkers, _utils from xml.sax.saxutils import escape _quoteAttributeSpecChars = "".join(spaceCharacters) + "\"'=<>`" _quoteAttributeSpec = re.compile("[" + _quoteAttributeSpecChars + "]") _quoteAttributeLegacy = re.compile("[" + _quoteAttributeSpecChars + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n" "\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15" "\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" "\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000" "\u2001\u2002\u2003\u2004\u2005\u2006\u2007" "\u2008\u2009\u200a\u2028\u2029\u202f\u205f" "\u3000]") _encode_entity_map = {} _is_ucs4 = len("\U0010FFFF") == 1 for k, v in list(entities.items()): # skip multi-character entities if ((_is_ucs4 and len(v) > 1) or (not _is_ucs4 and len(v) > 2)): continue if v != "&": if len(v) == 2: v = _utils.surrogatePairToCodepoint(v) else: v = ord(v) if v not in _encode_entity_map or k.islower(): # prefer < over < and similarly for &, >, etc. _encode_entity_map[v] = k def htmlentityreplace_errors(exc): if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)): res = [] codepoints = [] skip = False for i, c in enumerate(exc.object[exc.start:exc.end]): if skip: skip = False continue index = i + exc.start if _utils.isSurrogatePair(exc.object[index:min([exc.end, index + 2])]): codepoint = _utils.surrogatePairToCodepoint(exc.object[index:index + 2]) skip = True else: codepoint = ord(c) codepoints.append(codepoint) for cp in codepoints: e = _encode_entity_map.get(cp) if e: res.append("&") res.append(e) if not e.endswith(";"): res.append(";") else: res.append("&#x%s;" % (hex(cp)[2:])) return ("".join(res), exc.end) else: return xmlcharrefreplace_errors(exc) register_error("htmlentityreplace", htmlentityreplace_errors) def serialize(input, tree="etree", encoding=None, **serializer_opts): """Serializes the input token stream using the specified treewalker :arg input: the token stream to serialize :arg tree: the treewalker to use :arg encoding: the encoding to use :arg serializer_opts: any options to pass to the :py:class:`html5lib.serializer.HTMLSerializer` that gets created :returns: the tree serialized as a string Example: >>> from html5lib.html5parser import parse >>> from html5lib.serializer import serialize >>> token_stream = parse('

Hi!

') >>> serialize(token_stream, omit_optional_tags=False) '

Hi!

' """ # XXX: Should we cache this? walker = treewalkers.getTreeWalker(tree) s = HTMLSerializer(**serializer_opts) return s.render(walker(input), encoding) class HTMLSerializer(object): # attribute quoting options quote_attr_values = "legacy" # be secure by default quote_char = '"' use_best_quote_char = True # tag syntax options omit_optional_tags = True minimize_boolean_attributes = True use_trailing_solidus = False space_before_trailing_solidus = True # escaping options escape_lt_in_attrs = False escape_rcdata = False resolve_entities = True # miscellaneous options alphabetical_attributes = False inject_meta_charset = True strip_whitespace = False sanitize = False options = ("quote_attr_values", "quote_char", "use_best_quote_char", "omit_optional_tags", "minimize_boolean_attributes", "use_trailing_solidus", "space_before_trailing_solidus", "escape_lt_in_attrs", "escape_rcdata", "resolve_entities", "alphabetical_attributes", "inject_meta_charset", "strip_whitespace", "sanitize") def __init__(self, **kwargs): """Initialize HTMLSerializer :arg inject_meta_charset: Whether or not to inject the meta charset. Defaults to ``True``. :arg quote_attr_values: Whether to quote attribute values that don't require quoting per legacy browser behavior (``"legacy"``), when required by the standard (``"spec"``), or always (``"always"``). Defaults to ``"legacy"``. :arg quote_char: Use given quote character for attribute quoting. Defaults to ``"`` which will use double quotes unless attribute value contains a double quote, in which case single quotes are used. :arg escape_lt_in_attrs: Whether or not to escape ``<`` in attribute values. Defaults to ``False``. :arg escape_rcdata: Whether to escape characters that need to be escaped within normal elements within rcdata elements such as style. Defaults to ``False``. :arg resolve_entities: Whether to resolve named character entities that appear in the source tree. The XML predefined entities < > & " ' are unaffected by this setting. Defaults to ``True``. :arg strip_whitespace: Whether to remove semantically meaningless whitespace. (This compresses all whitespace to a single space except within ``pre``.) Defaults to ``False``. :arg minimize_boolean_attributes: Shortens boolean attributes to give just the attribute value, for example:: becomes:: Defaults to ``True``. :arg use_trailing_solidus: Includes a close-tag slash at the end of the start tag of void elements (empty elements whose end tag is forbidden). E.g. ``
``. Defaults to ``False``. :arg space_before_trailing_solidus: Places a space immediately before the closing slash in a tag using a trailing solidus. E.g. ``
``. Requires ``use_trailing_solidus=True``. Defaults to ``True``. :arg sanitize: Strip all unsafe or unknown constructs from output. See :py:class:`html5lib.filters.sanitizer.Filter`. Defaults to ``False``. :arg omit_optional_tags: Omit start/end tags that are optional. Defaults to ``True``. :arg alphabetical_attributes: Reorder attributes to be in alphabetical order. Defaults to ``False``. """ unexpected_args = frozenset(kwargs) - frozenset(self.options) if len(unexpected_args) > 0: raise TypeError("__init__() got an unexpected keyword argument '%s'" % next(iter(unexpected_args))) if 'quote_char' in kwargs: self.use_best_quote_char = False for attr in self.options: setattr(self, attr, kwargs.get(attr, getattr(self, attr))) self.errors = [] self.strict = False def encode(self, string): assert(isinstance(string, text_type)) if self.encoding: return string.encode(self.encoding, "htmlentityreplace") else: return string def encodeStrict(self, string): assert(isinstance(string, text_type)) if self.encoding: return string.encode(self.encoding, "strict") else: return string def serialize(self, treewalker, encoding=None): # pylint:disable=too-many-nested-blocks self.encoding = encoding in_cdata = False self.errors = [] if encoding and self.inject_meta_charset: from .filters.inject_meta_charset import Filter treewalker = Filter(treewalker, encoding) # Alphabetical attributes is here under the assumption that none of # the later filters add or change order of attributes; it needs to be # before the sanitizer so escaped elements come out correctly if self.alphabetical_attributes: from .filters.alphabeticalattributes import Filter treewalker = Filter(treewalker) # WhitespaceFilter should be used before OptionalTagFilter # for maximum efficiently of this latter filter if self.strip_whitespace: from .filters.whitespace import Filter treewalker = Filter(treewalker) if self.sanitize: from .filters.sanitizer import Filter treewalker = Filter(treewalker) if self.omit_optional_tags: from .filters.optionaltags import Filter treewalker = Filter(treewalker) for token in treewalker: type = token["type"] if type == "Doctype": doctype = "= 0: if token["systemId"].find("'") >= 0: self.serializeError("System identifier contains both single and double quote characters") quote_char = "'" else: quote_char = '"' doctype += " %s%s%s" % (quote_char, token["systemId"], quote_char) doctype += ">" yield self.encodeStrict(doctype) elif type in ("Characters", "SpaceCharacters"): if type == "SpaceCharacters" or in_cdata: if in_cdata and token["data"].find("= 0: self.serializeError("Unexpected ") elif type == "EndTag": name = token["name"] if name in rcdataElements: in_cdata = False elif in_cdata: self.serializeError("Unexpected child element of a CDATA element") yield self.encodeStrict("" % name) elif type == "Comment": data = token["data"] if data.find("--") >= 0: self.serializeError("Comment contains --") yield self.encodeStrict("" % token["data"]) elif type == "Entity": name = token["name"] key = name + ";" if key not in entities: self.serializeError("Entity %s not recognized" % name) if self.resolve_entities and key not in xmlEntities: data = entities[key] else: data = "&%s;" % name yield self.encodeStrict(data) else: self.serializeError(token["data"]) def render(self, treewalker, encoding=None): """Serializes the stream from the treewalker into a string :arg treewalker: the treewalker to serialize :arg encoding: the string encoding to use :returns: the serialized tree Example: >>> from html5lib import parse, getTreeWalker >>> from html5lib.serializer import HTMLSerializer >>> token_stream = parse('Hi!') >>> walker = getTreeWalker('etree') >>> serializer = HTMLSerializer(omit_optional_tags=False) >>> serializer.render(walker(token_stream)) 'Hi!' """ if encoding: return b"".join(list(self.serialize(treewalker, encoding))) else: return "".join(list(self.serialize(treewalker))) def serializeError(self, data="XXX ERROR MESSAGE NEEDED"): # XXX The idea is to make data mandatory. self.errors.append(data) if self.strict: raise SerializeError class SerializeError(Exception): """Error in serialized tree""" pass PK!ӉICC _utils.pynu[from __future__ import absolute_import, division, unicode_literals from types import ModuleType try: from collections.abc import Mapping except ImportError: from collections import Mapping from pip._vendor.six import text_type, PY3 if PY3: import xml.etree.ElementTree as default_etree else: try: import xml.etree.cElementTree as default_etree except ImportError: import xml.etree.ElementTree as default_etree __all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair", "surrogatePairToCodepoint", "moduleFactoryFactory", "supports_lone_surrogates"] # Platforms not supporting lone surrogates (\uD800-\uDFFF) should be # caught by the below test. In general this would be any platform # using UTF-16 as its encoding of unicode strings, such as # Jython. This is because UTF-16 itself is based on the use of such # surrogates, and there is no mechanism to further escape such # escapes. try: _x = eval('"\\uD800"') # pylint:disable=eval-used if not isinstance(_x, text_type): # We need this with u"" because of http://bugs.jython.org/issue2039 _x = eval('u"\\uD800"') # pylint:disable=eval-used assert isinstance(_x, text_type) except Exception: supports_lone_surrogates = False else: supports_lone_surrogates = True class MethodDispatcher(dict): """Dict with 2 special properties: On initiation, keys that are lists, sets or tuples are converted to multiple keys so accessing any one of the items in the original list-like object returns the matching value md = MethodDispatcher({("foo", "bar"):"baz"}) md["foo"] == "baz" A default value which can be set through the default attribute. """ def __init__(self, items=()): _dictEntries = [] for name, value in items: if isinstance(name, (list, tuple, frozenset, set)): for item in name: _dictEntries.append((item, value)) else: _dictEntries.append((name, value)) dict.__init__(self, _dictEntries) assert len(self) == len(_dictEntries) self.default = None def __getitem__(self, key): return dict.get(self, key, self.default) def __get__(self, instance, owner=None): return BoundMethodDispatcher(instance, self) class BoundMethodDispatcher(Mapping): """Wraps a MethodDispatcher, binding its return values to `instance`""" def __init__(self, instance, dispatcher): self.instance = instance self.dispatcher = dispatcher def __getitem__(self, key): # see https://docs.python.org/3/reference/datamodel.html#object.__get__ # on a function, __get__ is used to bind a function to an instance as a bound method return self.dispatcher[key].__get__(self.instance) def get(self, key, default): if key in self.dispatcher: return self[key] else: return default def __iter__(self): return iter(self.dispatcher) def __len__(self): return len(self.dispatcher) def __contains__(self, key): return key in self.dispatcher # Some utility functions to deal with weirdness around UCS2 vs UCS4 # python builds def isSurrogatePair(data): return (len(data) == 2 and ord(data[0]) >= 0xD800 and ord(data[0]) <= 0xDBFF and ord(data[1]) >= 0xDC00 and ord(data[1]) <= 0xDFFF) def surrogatePairToCodepoint(data): char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 + (ord(data[1]) - 0xDC00)) return char_val # Module Factory Factory (no, this isn't Java, I know) # Here to stop this being duplicated all over the place. def moduleFactoryFactory(factory): moduleCache = {} def moduleFactory(baseModule, *args, **kwargs): if isinstance(ModuleType.__name__, type("")): name = "_%s_factory" % baseModule.__name__ else: name = b"_%s_factory" % baseModule.__name__ kwargs_tuple = tuple(kwargs.items()) try: return moduleCache[name][args][kwargs_tuple] except KeyError: mod = ModuleType(name) objs = factory(baseModule, *args, **kwargs) mod.__dict__.update(objs) if "name" not in moduleCache: moduleCache[name] = {} if "args" not in moduleCache[name]: moduleCache[name][args] = {} if "kwargs" not in moduleCache[name][args]: moduleCache[name][args][kwargs_tuple] = {} moduleCache[name][args][kwargs_tuple] = mod return mod return moduleFactory def memoize(func): cache = {} def wrapped(*args, **kwargs): key = (tuple(args), tuple(kwargs.items())) if key not in cache: cache[key] = func(*args, **kwargs) return cache[key] return wrapped PK!A KK$__pycache__/constants.cpython-39.pycnu[a ReF@s~-ddlmZmZmZddlZdZddddddd d d d d ddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d,d,d0d1d2d3d4d5d6d7d8d9d:d;dd?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcdddedfdgdhdidjdkdldmdndodpdqdrdsdtdudvdwdxdydzd{d|d}d~dddddZdddddddZeeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfgZ eeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfgZ eeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfeddfgNZ eeddfeddfeddfeddfgZ eeddfeddfeddfeddfeddfgZ dddddddddddddddddddddddddd d d d d ddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.>Zd/d0iZd1d2ed1fd1d3ed1fd1d4ed1fd1d5ed1fd1d6ed1fd1ded1fd1d7ed1fd8ded8fd8d9ed8fd8d:ed8fdd;ed;fd;d1ed;fd< Zd=d>eDZegd?Zegd@ZeejZeejZeejZeejZeejZdAd>ejDZdBZegdCZ eddgZ!egdDZ"edEdFgedGgedHgedIdJgedIdJgedKdLgedMgedNdOgegdPedQgedRgedOdSgegdTedOdSgedOdUgegdVegdWedOdSgedXgdYZ#dZZ$egd[Z%d\d\d]d]d^d^d_d`d`dadbdcdcdddedfdgdhdidjdjdkdldmdmdndndodpdqdrdsdtdudvdwdxdtdydzd{d{d|d}d~ddddddddddddddddddddddddddddddd~ddddddddddddddddddddddddddddddddddddddddddddÐdĐdŐdƐdǐdȐdɐddʐdːd̐d͐d͐dΐdΐdϐdАdАdѐdҐdӐdԐdԐdՐd֐dאdؐdِdڐdېdܐdݐdސdߐdddddddddddddddddddddddddddddddddddddddddddddd dyd d d d dddddddddddddddddddddddd d!d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdddSdTdUdVdWdXdYdZd[d\d]d^d_dddd`dadbd6dcdddedfdgdhdidjdkdldidmdndodpdqdrdsdsdsdsddedtdudvdwdxdydzd{d|d}d~ddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddÐdĐdŐdƐdǐdȐdɐdʐdːd̐dd͐dΐdϐdАdѐdҐdӐdԐddՐd֐dאdؐdؐdِdڐdېdܐdݐdݐdސdߐdddddddddddddddddddddddddddddddddddddddddddd d d d d dddd<ddddddddddddddddddd d!d"d#dd$d%d&d'd%d(d(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d>d?d@dAdBdCdCdDdEdFdGdGdHdIdJdKdLdMdNdOdPddQdRdSdTdUdVdddWdXdYdZd[d\d]d^d^d_d`dadbdcdddededfdgdhdidjdkdldmdndodpdqdrdsdtdudvdwdxdydzd{d{d|d}d~dddddddddsddddddddddddddddddiddddddddd]d]ddddddddddddddddddddddjdddd9dddddd9dddddd9dddddddddddddddddKdÐddĐdŐdsdsdƐddtdǐdȐdɐdʐddːdMd̐d͐dΐdϐdАdѐdҐdӐdddpdԐdՐdd֐dאdؐdِdڐdېdܐdݐdސdߐddddUdUdddddddddddddddddddddddddddddddddddddddd d d d d dddxddddddddddddydd d ddddddddddd d!d"d"d#d$d%d&ddd'd(d(dd)d*d+d+d,d-d.d/d0d1d2dݐd3d4d5d6d0d7d8d9d:d:d;dldld<d=d>dd>dd8d?dd@dd{d{dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdTdUdVdWdXdLdMdYdZd[d[dNdWdYdZdd\d]dd^d_d`ddadGdbddcdddddedfdgdgdhdidjdkdBddddldlddmdndodododpdpdqdrdsdtdudddvdwdxdydqddedBddܐdzd{d|d}d~ddddאddddddfdvdddddddddddddddddddddddddddddddddddddddddddݐddddddddddddݐdddddddddߐddddddddddddddddddddddÐdĐdĐdŐdƐdǐdȐdɐdʐdʐdːd̐d͐dΐdϐdАdѐdҐdӐddԐdՐd֐dmdאdؐdِdڐdېdddddddܐdݐdސdߐddddddddddddddddddddddddddddddddאddddԐddddddjddddEdddddddddddddddddddd d dad d dd d d dddddddddddddddddddddՐdddddddddd d!d"d#d$d d%d%d&dՐd'd(d)d*dՐdd+d,d-d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d[dd@dAdTdBdCdDdEd6dFd;dGd;dHdIdId<d=dJdKddLdMdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\d?dVd]d^ddŐd_d`dadbd<dOdÐdQdcdEd>dddddSdbdTdWdWdedfdgdhdidjdHdkdSdBdUdXdldDdmdUdndÐdQdodpdqdedcdrdrdsdtdududvdwdwdxdxdvdydzd{d@d]d^d|d_dLd}d~dddddIdddՐddd>dzdddddddcdXdddWdddd2d2dddkddddddddLdؐdddddddddddddɐdHdVddddddddddfdddddddwddddkdddkdddddddddddddddddddddddddddxdwdwddddddddddd~dÐdĐdXdXdŐdzdƐddddǐdddddddddddȐdɐddʐdːdd̐dd͐dΐdϐddΐdȐdd͐dddddddddАdѐdѐd}dҐdӐd}dԐdՐddd֐dאd|d|dؐdِdڐdddddddېdܐdݐdېddddddސdd|ddddd|dddߐddddddddddddddddddddddddddddddddddddddddddWdWdd3ddd4d5dddd6ddddddddddddddd1dddÐdd d d d dd ddddd2ddddddddddddddddddddddddddd d!dǐd"d#d$d%dUd&d'd(d)did*d+dd,dd-dd.d/dd0dxd1d2d̐d̐d3d4d̐d5d6d7d7dϐd8d9dѐdАdϐd9dѐdАd:d;d<dҐd=d͐d;d:d<dԐd>d?d@dՐdՐdҐdAdBdCdDdEddFdGdHddIdJddؐdؐdddKdbdLdMdNddOddPdQddRdRddSddTdUdVdd}dWdXdYdZd[d\dڐdԐd]d^d_d`dadbdcdddd^dedfdgdddhdddddidݐdݐdjddkdƐddldmdnddYdƐdddސdodZdpdqdddސdrdsdsdtdudvddwdxdydzd{d|d}dod~ddd_dddpddddِdddddd dddd"d!ddddddd#dddyddddbdbdddddododddҐddddfddddddd6dd7d7dddddddd<dodddfdddddddddddddddddddddddddddddddoddddАdd)dddddddddddddddddddd dd"d!dddd#d$ddÐdÐdĐdĐdŐdŐd&dƐdǐdȐd'dɐdʐdːd̐d͐dΐdϐdАd&d'dƐdϐdΐdѐdҐdӐdԐddadadՐd֐d֐dאdؐdŐdِdڐdېd;dܐdݐd2d2dސdߐdߐd9d6d5d9d6ddddddddddƐdȐddddddd)ddddLdddddddddddddddɐd7ddddddddddddRddTdddddOdddddddddddddSdOddNddYddddd d d d d ddd֐ddddddddddd7dd)d,dՐdSdnddddddߐdJddddddddgdgddJddddՐdddddddd d!dd"d#d$d%d#didid&ddːdMdѐd'dd^d(dd]d|d)d̐d*d͐dΐdd_d+dϐdӐdҐdddpd,d,d-d.d/d0d0d1d2d3d4d5d6d6d7d8d9d:dd;d<d=d>d?d@dAdBdCZ&dDdEddFdddŐdd_dd/d%d dddGddHdIddddddddd)dd~ddJd8ddK"Z'ddLdMdNdOdPdQdRdSZ(ee(dTe(dUe(dVgZ)dWd>eDZ*dXe*d<GdYdZdZe+Z,Gd[d\d\e-Z.dS(])absolute_importdivisionunicode_literalsNz5Null character in input stream, replaced with U+FFFD.zInvalid codepoint in stream.z&Solidus (/) incorrectly placed in tag.z.Incorrect CR newline entity, replaced with LF.z9Entity used with illegal number (windows-1252 reference).zPNumeric entity couldn't be converted to character (codepoint U+%(charAsInt)08x).zBNumeric entity represents an illegal codepoint: U+%(charAsInt)08x.z#Numeric entity didn't end with ';'.z1Numeric entity expected. Got end of file instead.z'Numeric entity expected but none found.z!Named entity didn't end with ';'.z Named entity expected. Got none.z'End tag contains unexpected attributes.z.End tag contains unexpected self-closing flag.z#Expected tag name. Got '>' instead.zSExpected tag name. Got '?' instead. (HTML doesn't support processing instructions.)z-Expected tag name. Got something else insteadz6Expected closing tag. Got '>' instead. Ignoring ''.z-Expected closing tag. Unexpected end of file.z' instead.z"Unexpected = in unquoted attributez*Unexpected character in unquoted attributez*Unexpected character after attribute name.z+Unexpected character after attribute value.z.Unexpected end of file in attribute value (").z.Unexpected end of file in attribute value (').z*Unexpected end of file in attribute value.z)Unexpected end of file in tag. Expected >z/Unexpected character after / in tag. Expected >z&Expected '--' or 'DOCTYPE'. Not found.z Unexpected ! after -- in commentz$Unexpected space after -- in commentzIncorrect comment.z"Unexpected end of file in comment.z%Unexpected end of file in comment (-)z+Unexpected '-' after '--' found in comment.z'Unexpected end of file in comment (--).z&Unexpected character in comment found.z(No space after literal string 'DOCTYPE'.z.Unexpected > character. Expected DOCTYPE name.z.Unexpected end of file. Expected DOCTYPE name.z'Unexpected end of file in DOCTYPE name.z"Unexpected end of file in DOCTYPE.z%Expected space or '>'. Got '%(data)s'zUnexpected end of DOCTYPE.z Unexpected character in DOCTYPE.zXXX innerHTML EOFzUnexpected DOCTYPE. Ignored.z%html needs to be the first start tag.z)Unexpected End of file. Expected DOCTYPE.zErroneous DOCTYPE.z2Unexpected non-space characters. Expected DOCTYPE.z2Unexpected start tag (%(name)s). Expected DOCTYPE.z0Unexpected end tag (%(name)s). Expected DOCTYPE.z?Unexpected end tag (%(name)s) after the (implied) root element.z4Unexpected end of file. Expected end tag (%(name)s).z4Unexpected start tag head in existing head. Ignored.z'Unexpected end tag (%(name)s). Ignored.z;Unexpected start tag (%(name)s) that can be in head. Moved.z Unexpected start tag (%(name)s).zMissing end tag (%(name)s).zMissing end tags (%(name)s).zCUnexpected start tag (%(startName)s) implies end tag (%(endName)s).z@Unexpected start tag (%(originalName)s). Treated as %(newName)s.z,Unexpected start tag %(name)s. Don't use it!z'Unexpected start tag %(name)s. Ignored.zEUnexpected end tag (%(gotName)s). Missing end tag (%(expectedName)s).z:End tag (%(name)s) seen too early. Expected other end tag.zFUnexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s).z+End tag (%(name)s) seen too early. Ignored.zQEnd tag (%(name)s) violates step 1, paragraph 1 of the adoption agency algorithm.zQEnd tag (%(name)s) violates step 1, paragraph 2 of the adoption agency algorithm.zQEnd tag (%(name)s) violates step 1, paragraph 3 of the adoption agency algorithm.zQEnd tag (%(name)s) violates step 4, paragraph 4 of the adoption agency algorithm.z>Unexpected end tag (%(originalName)s). Treated as %(newName)s.z'This element (%(name)s) has no end tag.z9Unexpected implied end tag (%(name)s) in the table phase.z>Unexpected implied end tag (%(name)s) in the table body phase.zDUnexpected non-space characters in table context caused voodoo mode.z3Unexpected input with type hidden in table context.z!Unexpected form in table context.zDUnexpected start tag (%(name)s) in table context caused voodoo mode.zBUnexpected end tag (%(name)s) in table context caused voodoo mode.zCUnexpected table cell start tag (%(name)s) in the table body phase.zFGot table cell end tag (%(name)s) while required end tags are missing.z?Unexpected end tag (%(name)s) in the table body phase. Ignored.z=Unexpected implied end tag (%(name)s) in the table row phase.z>Unexpected end tag (%(name)s) in the table row phase. Ignored.zJUnexpected select start tag in the select phase treated as select end tag.z/Unexpected input start tag in the select phase.zBUnexpected start tag token (%(name)s in the select phase. Ignored.z;Unexpected end tag (%(name)s) in the select phase. Ignored.zKUnexpected table element start tag (%(name)s) in the select in table phase.zIUnexpected table element end tag (%(name)s) in the select in table phase.z8Unexpected non-space characters in the after body phase.z>Unexpected start tag token (%(name)s) in the after body phase.z attributename attributetype basefrequency baseprofilecalcmode clippathunitscontentscripttypecontentstyletypediffuseconstantedgemodeexternalresourcesrequired filterres filterunitsglyphrefgradienttransform gradientunits kernelmatrixkernelunitlength keypoints keysplineskeytimes lengthadjustlimitingconeangle markerheight markerunits markerwidthmaskcontentunits maskunits numoctaves pathlengthpatterncontentunitspatterntransform patternunits pointsatx pointsaty pointsatz preservealphapreserveaspectratioprimitiveunitsrefxrefy repeatcount repeatdurrequiredextensionsrequiredfeaturesspecularconstantspecularexponent spreadmethod startoffset stddeviation stitchtiles surfacescalesystemlanguage tablevaluestargetxtargety textlengthviewbox viewtargetxchannelselectorychannelselector zoomandpan definitionurl definitionURLractuatearcrolehrefroleshowtyper langspacer ) z xlink:actuatez xlink:arcrolez xlink:hrefz xlink:rolez xlink:showz xlink:titlez xlink:typezxml:basezxml:langz xml:spacer z xmlns:xlinkcCs i|]\}\}}}||f|qSr).0qnameprefixlocalnsrr/builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/constants.py r)     )rrdrfrgrhcCsi|]}t|t|qSr)ordlower)rcrrrr#r)rDrErFrGrHrI)r,r6z event-sourcerSrVrLr1rOr=r]r)r4rPsourcetrack)rcr`rkrMrXrYrZ irrelevant itemscopescopedismapautoplaycontrolsdeferasyncopenmultipledisabled)hiddenrcheckeddefaultnoshade autosubmitreadonly)rrselected autofocus)rrrequiredrrr)rrrrseamless)rcrOaudiovideor`r8datagridr6rLrUr>optionoptgroupr2rPrboutputrM) i i ii i& i i! ii0 i`i9 iRr!i}r!r!i i i i i" i i ii"!iai: iSr!i~ix)lt;gt;amp;apos;quot;Æ&ÁuĂÂuАu𝔄ÀuΑuĀu⩓uĄu𝔸u⁡Åu𝒜u≔ÃÄu∖u⫧u⌆uБu∵uℬuΒu𝔅u𝔹u˘u≎uЧ©uĆu⋒uⅅuℭuČÇuĈu∰uĊ¸·uΧu⊙u⊖u⊕u⊗u∲u”u’u∷u⩴u≡u∯u∮uℂu∐u∳u⨯u𝒞u⋓u≍u⤑uЂuЅuЏu‡u↡u⫤uĎuДu∇uΔu𝔇´u˙u˝`u˜u⋄uⅆu𝔻¨u⃜u≐u⇓u⇐u⇔u⟸u⟺u⟹u⇒u⊨u⇑u⇕u∥u↓u⤓u⇵ȗu⥐u⥞u↽u⥖u⥟u⇁u⥗u⊤u↧u𝒟uĐuŊÐÉuĚÊuЭuĖu𝔈Èu∈uĒu◻u▫uĘu𝔼uΕu⩵u≂u⇌uℰu⩳uΗËu∃uⅇuФu𝔉u◼u▪u𝔽u∀uℱuЃ>uΓuϜuĞuĢuĜuГuĠu𝔊u⋙u𝔾u≥u⋛u≧u⪢u≷u⩾u≳u𝒢u≫uЪuˇ^uĤuℌuℋuℍu─uĦu≏uЕuIJuЁÍÎuИuİuℑÌuĪuⅈu∬u∫u⋂u⁣u⁢uĮu𝕀uΙuℐuĨuІÏuĴuЙu𝔍u𝕁u𝒥uЈuЄuХuЌuΚuĶuКu𝔎u𝕂u𝒦uЉ⃒u⧞u⤂u≤⃒u<⃒u⊴⃒u⤃u⊵⃒u∼⃒u⇖u⤣u⤧óôuоuőu⨸u⦼uœu⦿u𝔬u˛òu⧁u⦵u⦾u⦻u⧀uōuωuοu⦶u𝕠u⦷u⦹u∨u⩝uℴªºu⊶u⩖u⩗u⩛øu⊘õu⨶öu⌽¶u⫳u⫽uп%.u‰u‱u𝔭uφuϕu☎uπuϖuℎ+u⨣u⨢u⨥u⩲u⨦u⨧u⨕u𝕡£u⪳u⪷u⪹u⪵u⋨u′u⌮u⌒u⌓u⊰u𝓅uψu u𝔮u𝕢u⁗u𝓆u⨖?u⤜u⥤u∽̱uŕu⦳u⦒u⦥»u⥵u⤠u⤳u⤞u⥅u⥴u↣u↝u⤚u∶u❳}]u⦌u⦎u⦐uřuŗuрu⤷u⥩u↳u▭u⥽u𝔯u⥬uρuϱu⇉u⋌u˚u‏u⎱u⫮u⟭u⇾u⦆u𝕣u⨮u⨵)u⦔u⨒u›u𝓇u⋊u▹u⧎u⥨u℞uśu⪴u⪸ušuşuŝu⪶u⪺u⋩u⨓uсu⋅u⩦u⇘§;u⤩u✶u𝔰u♯uщuш­uσuςu⩪u⪞u⪠u⪝u⪟u≆u⨤u⥲u⨳u⧤u⌣u⪪u⪬u⪬︀uь/u⧄u⌿u𝕤u♠u⊓︀u⊔︀u𝓈u☆u⊂u⫅u⪽u⫃u⫁u⫋u⊊u⪿u⥹u⫇u⫕u⫓u♪¹²³u⫆u⪾u⫘u⫄u⟉u⫗u⥻u⫂u⫌u⊋u⫀u⫈u⫔u⫖u⇙u⤪ßu⌖uτuťuţuтu⌕u𝔱uθuϑþ×u⨱u⨰u⌶u⫱u𝕥u⫚u‴u▵u≜u◬u⨺u⨹u⧍u⨻u⏢u𝓉uцuћuŧu⥣úuўuŭûuуuűu⥾u𝔲ùu▀u⌜u⌏u◸uūuųu𝕦uυu⇈u⌝u⌎uůu◹u𝓊u⋰uũüu⦧u⫨u⫩u⦜u⊊︀u⫋︀u⊋︀u⫌︀uвu⊻u≚u⋮u𝔳u𝕧u𝓋u⦚uŵu⩟u≙u℘u𝔴u𝕨u𝓌u𝔵uξu⋻u𝕩u𝓍ýuяuŷuы¥u𝔶uїu𝕪u𝓎uюÿuźužuзużuζu𝔷uжu⇝u𝕫u𝓏u‍u‌(AEligzAElig;AMPzAMP;AacutezAacute;zAbreve;AcirczAcirc;zAcy;zAfr;AgravezAgrave;zAlpha;zAmacr;zAnd;zAogon;zAopf;zApplyFunction;AringzAring;zAscr;zAssign;AtildezAtilde;AumlzAuml;z Backslash;zBarv;zBarwed;zBcy;zBecause;z Bernoullis;zBeta;zBfr;zBopf;zBreve;zBscr;zBumpeq;zCHcy;COPYzCOPY;zCacute;zCap;zCapitalDifferentialD;zCayleys;zCcaron;CcedilzCcedil;zCcirc;zCconint;zCdot;zCedilla;z CenterDot;zCfr;zChi;z CircleDot;z CircleMinus;z CirclePlus;z CircleTimes;zClockwiseContourIntegral;zCloseCurlyDoubleQuote;zCloseCurlyQuote;zColon;zColone;z Congruent;zConint;zContourIntegral;zCopf;z Coproduct;z CounterClockwiseContourIntegral;zCross;zCscr;zCup;zCupCap;zDD;z DDotrahd;zDJcy;zDScy;zDZcy;zDagger;zDarr;zDashv;zDcaron;zDcy;zDel;zDelta;zDfr;zDiacriticalAcute;zDiacriticalDot;zDiacriticalDoubleAcute;zDiacriticalGrave;zDiacriticalTilde;zDiamond;zDifferentialD;zDopf;zDot;zDotDot;z DotEqual;zDoubleContourIntegral;z DoubleDot;zDoubleDownArrow;zDoubleLeftArrow;zDoubleLeftRightArrow;zDoubleLeftTee;zDoubleLongLeftArrow;zDoubleLongLeftRightArrow;zDoubleLongRightArrow;zDoubleRightArrow;zDoubleRightTee;zDoubleUpArrow;zDoubleUpDownArrow;zDoubleVerticalBar;z DownArrow;z DownArrowBar;zDownArrowUpArrow;z DownBreve;zDownLeftRightVector;zDownLeftTeeVector;zDownLeftVector;zDownLeftVectorBar;zDownRightTeeVector;zDownRightVector;zDownRightVectorBar;zDownTee;z DownTeeArrow;z Downarrow;zDscr;zDstrok;zENG;ETHzETH;EacutezEacute;zEcaron;EcirczEcirc;zEcy;zEdot;zEfr;EgravezEgrave;zElement;zEmacr;zEmptySmallSquare;zEmptyVerySmallSquare;zEogon;zEopf;zEpsilon;zEqual;z EqualTilde;z Equilibrium;zEscr;zEsim;zEta;EumlzEuml;zExists;z ExponentialE;zFcy;zFfr;zFilledSmallSquare;zFilledVerySmallSquare;zFopf;zForAll;z Fouriertrf;zFscr;zGJcy;GTzGT;zGamma;zGammad;zGbreve;zGcedil;zGcirc;zGcy;zGdot;zGfr;zGg;zGopf;z GreaterEqual;zGreaterEqualLess;zGreaterFullEqual;zGreaterGreater;z GreaterLess;zGreaterSlantEqual;z GreaterTilde;zGscr;zGt;zHARDcy;zHacek;zHat;zHcirc;zHfr;z HilbertSpace;zHopf;zHorizontalLine;zHscr;zHstrok;z HumpDownHump;z HumpEqual;zIEcy;zIJlig;zIOcy;IacutezIacute;IcirczIcirc;zIcy;zIdot;zIfr;IgravezIgrave;zIm;zImacr;z ImaginaryI;zImplies;zInt;z Integral;z Intersection;zInvisibleComma;zInvisibleTimes;zIogon;zIopf;zIota;zIscr;zItilde;zIukcy;IumlzIuml;zJcirc;zJcy;zJfr;zJopf;zJscr;zJsercy;zJukcy;zKHcy;zKJcy;zKappa;zKcedil;zKcy;zKfr;zKopf;zKscr;zLJcy;LTzLT;zLacute;zLambda;zLang;z Laplacetrf;zLarr;zLcaron;zLcedil;zLcy;zLeftAngleBracket;z LeftArrow;z LeftArrowBar;zLeftArrowRightArrow;z LeftCeiling;zLeftDoubleBracket;zLeftDownTeeVector;zLeftDownVector;zLeftDownVectorBar;z LeftFloor;zLeftRightArrow;zLeftRightVector;zLeftTee;z LeftTeeArrow;zLeftTeeVector;z LeftTriangle;zLeftTriangleBar;zLeftTriangleEqual;zLeftUpDownVector;zLeftUpTeeVector;z LeftUpVector;zLeftUpVectorBar;z LeftVector;zLeftVectorBar;z Leftarrow;zLeftrightarrow;zLessEqualGreater;zLessFullEqual;z LessGreater;z LessLess;zLessSlantEqual;z LessTilde;zLfr;zLl;z Lleftarrow;zLmidot;zLongLeftArrow;zLongLeftRightArrow;zLongRightArrow;zLongleftarrow;zLongleftrightarrow;zLongrightarrow;zLopf;zLowerLeftArrow;zLowerRightArrow;zLscr;zLsh;zLstrok;zLt;zMap;zMcy;z MediumSpace;z Mellintrf;zMfr;z MinusPlus;zMopf;zMscr;zMu;zNJcy;zNacute;zNcaron;zNcedil;zNcy;zNegativeMediumSpace;zNegativeThickSpace;zNegativeThinSpace;zNegativeVeryThinSpace;zNestedGreaterGreater;zNestedLessLess;zNewLine;zNfr;zNoBreak;zNonBreakingSpace;zNopf;zNot;z NotCongruent;z NotCupCap;zNotDoubleVerticalBar;z NotElement;z NotEqual;zNotEqualTilde;z NotExists;z NotGreater;zNotGreaterEqual;zNotGreaterFullEqual;zNotGreaterGreater;zNotGreaterLess;zNotGreaterSlantEqual;zNotGreaterTilde;zNotHumpDownHump;z NotHumpEqual;zNotLeftTriangle;zNotLeftTriangleBar;zNotLeftTriangleEqual;zNotLess;z NotLessEqual;zNotLessGreater;z NotLessLess;zNotLessSlantEqual;z NotLessTilde;zNotNestedGreaterGreater;zNotNestedLessLess;z NotPrecedes;zNotPrecedesEqual;zNotPrecedesSlantEqual;zNotReverseElement;zNotRightTriangle;zNotRightTriangleBar;zNotRightTriangleEqual;zNotSquareSubset;zNotSquareSubsetEqual;zNotSquareSuperset;zNotSquareSupersetEqual;z NotSubset;zNotSubsetEqual;z NotSucceeds;zNotSucceedsEqual;zNotSucceedsSlantEqual;zNotSucceedsTilde;z NotSuperset;zNotSupersetEqual;z NotTilde;zNotTildeEqual;zNotTildeFullEqual;zNotTildeTilde;zNotVerticalBar;zNscr;NtildezNtilde;zNu;zOElig;OacutezOacute;OcirczOcirc;zOcy;zOdblac;zOfr;OgravezOgrave;zOmacr;zOmega;zOmicron;zOopf;zOpenCurlyDoubleQuote;zOpenCurlyQuote;zOr;zOscr;OslashzOslash;OtildezOtilde;zOtimes;OumlzOuml;zOverBar;z OverBrace;z OverBracket;zOverParenthesis;z PartialD;zPcy;zPfr;zPhi;zPi;z PlusMinus;zPoincareplane;zPopf;zPr;z Precedes;zPrecedesEqual;zPrecedesSlantEqual;zPrecedesTilde;zPrime;zProduct;z Proportion;z Proportional;zPscr;zPsi;QUOTzQUOT;zQfr;zQopf;zQscr;zRBarr;REGzREG;zRacute;zRang;zRarr;zRarrtl;zRcaron;zRcedil;zRcy;zRe;zReverseElement;zReverseEquilibrium;zReverseUpEquilibrium;zRfr;zRho;zRightAngleBracket;z RightArrow;zRightArrowBar;zRightArrowLeftArrow;z RightCeiling;zRightDoubleBracket;zRightDownTeeVector;zRightDownVector;zRightDownVectorBar;z RightFloor;z RightTee;zRightTeeArrow;zRightTeeVector;zRightTriangle;zRightTriangleBar;zRightTriangleEqual;zRightUpDownVector;zRightUpTeeVector;zRightUpVector;zRightUpVectorBar;z RightVector;zRightVectorBar;z Rightarrow;zRopf;z RoundImplies;z Rrightarrow;zRscr;zRsh;z RuleDelayed;zSHCHcy;zSHcy;zSOFTcy;zSacute;zSc;zScaron;zScedil;zScirc;zScy;zSfr;zShortDownArrow;zShortLeftArrow;zShortRightArrow;z ShortUpArrow;zSigma;z SmallCircle;zSopf;zSqrt;zSquare;zSquareIntersection;z SquareSubset;zSquareSubsetEqual;zSquareSuperset;zSquareSupersetEqual;z SquareUnion;zSscr;zStar;zSub;zSubset;z SubsetEqual;z Succeeds;zSucceedsEqual;zSucceedsSlantEqual;zSucceedsTilde;z SuchThat;zSum;zSup;z Superset;zSupersetEqual;zSupset;THORNzTHORN;zTRADE;zTSHcy;zTScy;zTab;zTau;zTcaron;zTcedil;zTcy;zTfr;z Therefore;zTheta;z ThickSpace;z ThinSpace;zTilde;z TildeEqual;zTildeFullEqual;z TildeTilde;zTopf;z TripleDot;zTscr;zTstrok;UacutezUacute;zUarr;z Uarrocir;zUbrcy;zUbreve;UcirczUcirc;zUcy;zUdblac;zUfr;UgravezUgrave;zUmacr;z UnderBar;z UnderBrace;z UnderBracket;zUnderParenthesis;zUnion;z UnionPlus;zUogon;zUopf;zUpArrow;z UpArrowBar;zUpArrowDownArrow;z UpDownArrow;zUpEquilibrium;zUpTee;z UpTeeArrow;zUparrow;z Updownarrow;zUpperLeftArrow;zUpperRightArrow;zUpsi;zUpsilon;zUring;zUscr;zUtilde;UumlzUuml;zVDash;zVbar;zVcy;zVdash;zVdashl;zVee;zVerbar;zVert;z VerticalBar;z VerticalLine;zVerticalSeparator;zVerticalTilde;zVeryThinSpace;zVfr;zVopf;zVscr;zVvdash;zWcirc;zWedge;zWfr;zWopf;zWscr;zXfr;zXi;zXopf;zXscr;zYAcy;zYIcy;zYUcy;YacutezYacute;zYcirc;zYcy;zYfr;zYopf;zYscr;zYuml;zZHcy;zZacute;zZcaron;zZcy;zZdot;zZeroWidthSpace;zZeta;zZfr;zZopf;zZscr;aacutezaacute;zabreve;zac;zacE;zacd;acirczacirc;acutezacute;zacy;aeligzaelig;zaf;zafr;agravezagrave;zalefsym;zaleph;zalpha;zamacr;zamalg;ampr$zand;zandand;zandd;z andslope;zandv;zang;zange;zangle;zangmsd;z angmsdaa;z angmsdab;z angmsdac;z angmsdad;z angmsdae;z angmsdaf;z angmsdag;z angmsdah;zangrt;zangrtvb;z angrtvbd;zangsph;zangst;zangzarr;zaogon;zaopf;zap;zapE;zapacir;zape;zapid;r%zapprox;z approxeq;aringzaring;zascr;zast;zasymp;zasympeq;atildezatilde;aumlzauml;z awconint;zawint;zbNot;z backcong;z backepsilon;z backprime;zbacksim;z backsimeq;zbarvee;zbarwed;z barwedge;zbbrk;z bbrktbrk;zbcong;zbcy;zbdquo;zbecaus;zbecause;zbemptyv;zbepsi;zbernou;zbeta;zbeth;zbetween;zbfr;zbigcap;zbigcirc;zbigcup;zbigodot;z bigoplus;z bigotimes;z bigsqcup;zbigstar;zbigtriangledown;zbigtriangleup;z biguplus;zbigvee;z bigwedge;zbkarow;z blacklozenge;z blacksquare;zblacktriangle;zblacktriangledown;zblacktriangleleft;zblacktriangleright;zblank;zblk12;zblk14;zblk34;zblock;zbne;zbnequiv;zbnot;zbopf;zbot;zbottom;zbowtie;zboxDL;zboxDR;zboxDl;zboxDr;zboxH;zboxHD;zboxHU;zboxHd;zboxHu;zboxUL;zboxUR;zboxUl;zboxUr;zboxV;zboxVH;zboxVL;zboxVR;zboxVh;zboxVl;zboxVr;zboxbox;zboxdL;zboxdR;zboxdl;zboxdr;zboxh;zboxhD;zboxhU;zboxhd;zboxhu;z boxminus;zboxplus;z boxtimes;zboxuL;zboxuR;zboxul;zboxur;zboxv;zboxvH;zboxvL;zboxvR;zboxvh;zboxvl;zboxvr;zbprime;zbreve;brvbarzbrvbar;zbscr;zbsemi;zbsim;zbsime;zbsol;zbsolb;z bsolhsub;zbull;zbullet;zbump;zbumpE;zbumpe;zbumpeq;zcacute;zcap;zcapand;z capbrcup;zcapcap;zcapcup;zcapdot;zcaps;zcaret;zcaron;zccaps;zccaron;ccedilzccedil;zccirc;zccups;zccupssm;zcdot;cedilzcedil;zcemptyv;centzcent;z centerdot;zcfr;zchcy;zcheck;z checkmark;zchi;zcir;zcirE;zcirc;zcirceq;zcirclearrowleft;zcirclearrowright;z circledR;z circledS;z circledast;z circledcirc;z circleddash;zcire;z cirfnint;zcirmid;zcirscir;zclubs;z clubsuit;zcolon;zcolone;zcoloneq;zcomma;zcommat;zcomp;zcompfn;z complement;z complexes;zcong;zcongdot;zconint;zcopf;zcoprod;copyzcopy;zcopysr;zcrarr;zcross;zcscr;zcsub;zcsube;zcsup;zcsupe;zctdot;zcudarrl;zcudarrr;zcuepr;zcuesc;zcularr;zcularrp;zcup;z cupbrcap;zcupcap;zcupcup;zcupdot;zcupor;zcups;zcurarr;zcurarrm;z curlyeqprec;z curlyeqsucc;z curlyvee;z curlywedge;currenzcurren;zcurvearrowleft;zcurvearrowright;zcuvee;zcuwed;z cwconint;zcwint;zcylcty;zdArr;zdHar;zdagger;zdaleth;zdarr;zdash;zdashv;zdbkarow;zdblac;zdcaron;zdcy;zdd;zddagger;zddarr;zddotseq;degzdeg;zdelta;zdemptyv;zdfisht;zdfr;zdharl;zdharr;zdiam;zdiamond;z diamondsuit;zdiams;zdie;zdigamma;zdisin;zdiv;dividezdivide;zdivideontimes;zdivonx;zdjcy;zdlcorn;zdlcrop;zdollar;zdopf;zdot;zdoteq;z doteqdot;z dotminus;zdotplus;z dotsquare;zdoublebarwedge;z downarrow;zdowndownarrows;zdownharpoonleft;zdownharpoonright;z drbkarow;zdrcorn;zdrcrop;zdscr;zdscy;zdsol;zdstrok;zdtdot;zdtri;zdtrif;zduarr;zduhar;zdwangle;zdzcy;z dzigrarr;zeDDot;zeDot;eacutezeacute;zeaster;zecaron;zecir;ecirczecirc;zecolon;zecy;zedot;zee;zefDot;zefr;zeg;egravezegrave;zegs;zegsdot;zel;z elinters;zell;zels;zelsdot;zemacr;zempty;z emptyset;zemptyv;zemsp13;zemsp14;zemsp;zeng;zensp;zeogon;zeopf;zepar;zeparsl;zeplus;zepsi;zepsilon;zepsiv;zeqcirc;zeqcolon;zeqsim;z eqslantgtr;z eqslantless;zequals;zequest;zequiv;zequivDD;z eqvparsl;zerDot;zerarr;zescr;zesdot;zesim;zeta;ethzeth;eumlzeuml;zeuro;zexcl;zexist;z expectation;z exponentiale;zfallingdotseq;zfcy;zfemale;zffilig;zfflig;zffllig;zffr;zfilig;zfjlig;zflat;zfllig;zfltns;zfnof;zfopf;zforall;zfork;zforkv;z fpartint;frac12zfrac12;zfrac13;frac14zfrac14;zfrac15;zfrac16;zfrac18;zfrac23;zfrac25;frac34zfrac34;zfrac35;zfrac38;zfrac45;zfrac56;zfrac58;zfrac78;zfrasl;zfrown;zfscr;zgE;zgEl;zgacute;zgamma;zgammad;zgap;zgbreve;zgcirc;zgcy;zgdot;zge;zgel;zgeq;zgeqq;z geqslant;zges;zgescc;zgesdot;zgesdoto;z gesdotol;zgesl;zgesles;zgfr;zgg;zggg;zgimel;zgjcy;zgl;zglE;zgla;zglj;zgnE;zgnap;z gnapprox;zgne;zgneq;zgneqq;zgnsim;zgopf;zgrave;zgscr;zgsim;zgsime;zgsiml;gtr#zgtcc;zgtcir;zgtdot;zgtlPar;zgtquest;z gtrapprox;zgtrarr;zgtrdot;z gtreqless;z gtreqqless;zgtrless;zgtrsim;z gvertneqq;zgvnE;zhArr;zhairsp;zhalf;zhamilt;zhardcy;zharr;zharrcir;zharrw;zhbar;zhcirc;zhearts;z heartsuit;zhellip;zhercon;zhfr;z hksearow;z hkswarow;zhoarr;zhomtht;zhookleftarrow;zhookrightarrow;zhopf;zhorbar;zhscr;zhslash;zhstrok;zhybull;zhyphen;iacuteziacute;zic;icirczicirc;zicy;ziecy;iexclziexcl;ziff;zifr;igravezigrave;zii;ziiiint;ziiint;ziinfin;ziiota;zijlig;zimacr;zimage;z imagline;z imagpart;zimath;zimof;zimped;zin;zincare;zinfin;z infintie;zinodot;zint;zintcal;z integers;z intercal;z intlarhk;zintprod;ziocy;ziogon;ziopf;ziota;ziprod;iquestziquest;ziscr;zisin;zisinE;zisindot;zisins;zisinsv;zisinv;zit;zitilde;ziukcy;iumlziuml;zjcirc;zjcy;zjfr;zjmath;zjopf;zjscr;zjsercy;zjukcy;zkappa;zkappav;zkcedil;zkcy;zkfr;zkgreen;zkhcy;zkjcy;zkopf;zkscr;zlAarr;zlArr;zlAtail;zlBarr;zlE;zlEg;zlHar;zlacute;z laemptyv;zlagran;zlambda;zlang;zlangd;zlangle;zlap;laquozlaquo;zlarr;zlarrb;zlarrbfs;zlarrfs;zlarrhk;zlarrlp;zlarrpl;zlarrsim;zlarrtl;zlat;zlatail;zlate;zlates;zlbarr;zlbbrk;zlbrace;zlbrack;zlbrke;zlbrksld;zlbrkslu;zlcaron;zlcedil;zlceil;zlcub;zlcy;zldca;zldquo;zldquor;zldrdhar;z ldrushar;zldsh;zle;z leftarrow;zleftarrowtail;zleftharpoondown;zleftharpoonup;zleftleftarrows;zleftrightarrow;zleftrightarrows;zleftrightharpoons;zleftrightsquigarrow;zleftthreetimes;zleg;zleq;zleqq;z leqslant;zles;zlescc;zlesdot;zlesdoto;z lesdotor;zlesg;zlesges;z lessapprox;zlessdot;z lesseqgtr;z lesseqqgtr;zlessgtr;zlesssim;zlfisht;zlfloor;zlfr;zlg;zlgE;zlhard;zlharu;zlharul;zlhblk;zljcy;zll;zllarr;z llcorner;zllhard;zlltri;zlmidot;zlmoust;z lmoustache;zlnE;zlnap;z lnapprox;zlne;zlneq;zlneqq;zlnsim;zloang;zloarr;zlobrk;zlongleftarrow;zlongleftrightarrow;z longmapsto;zlongrightarrow;zlooparrowleft;zlooparrowright;zlopar;zlopf;zloplus;zlotimes;zlowast;zlowbar;zloz;zlozenge;zlozf;zlpar;zlparlt;zlrarr;z lrcorner;zlrhar;zlrhard;zlrm;zlrtri;zlsaquo;zlscr;zlsh;zlsim;zlsime;zlsimg;zlsqb;zlsquo;zlsquor;zlstrok;ltr"zltcc;zltcir;zltdot;zlthree;zltimes;zltlarr;zltquest;zltrPar;zltri;zltrie;zltrif;z lurdshar;zluruhar;z lvertneqq;zlvnE;zmDDot;macrzmacr;zmale;zmalt;zmaltese;zmap;zmapsto;z mapstodown;z mapstoleft;z mapstoup;zmarker;zmcomma;zmcy;zmdash;zmeasuredangle;zmfr;zmho;microzmicro;zmid;zmidast;zmidcir;middotzmiddot;zminus;zminusb;zminusd;zminusdu;zmlcp;zmldr;zmnplus;zmodels;zmopf;zmp;zmscr;zmstpos;zmu;z multimap;zmumap;znGg;znGt;znGtv;z nLeftarrow;znLeftrightarrow;znLl;znLt;znLtv;z nRightarrow;znVDash;znVdash;znabla;znacute;znang;znap;znapE;znapid;znapos;znapprox;znatur;znatural;z naturals;nbspznbsp;znbump;znbumpe;zncap;zncaron;zncedil;zncong;z ncongdot;zncup;zncy;zndash;zne;zneArr;znearhk;znearr;znearrow;znedot;znequiv;znesear;znesim;znexist;znexists;znfr;zngE;znge;zngeq;zngeqq;z ngeqslant;znges;zngsim;zngt;zngtr;znhArr;znharr;znhpar;zni;znis;znisd;zniv;znjcy;znlArr;znlE;znlarr;znldr;znle;z nleftarrow;znleftrightarrow;znleq;znleqq;z nleqslant;znles;znless;znlsim;znlt;znltri;znltrie;znmid;znopf;notznot;znotin;znotinE;z notindot;znotinva;znotinvb;znotinvc;znotni;znotniva;znotnivb;znotnivc;znpar;z nparallel;znparsl;znpart;znpolint;znpr;znprcue;znpre;znprec;znpreceq;znrArr;znrarr;znrarrc;znrarrw;z nrightarrow;znrtri;znrtrie;znsc;znsccue;znsce;znscr;z nshortmid;znshortparallel;znsim;znsime;znsimeq;znsmid;znspar;znsqsube;znsqsupe;znsub;znsubE;znsube;znsubset;z nsubseteq;z nsubseteqq;znsucc;znsucceq;znsup;znsupE;znsupe;znsupset;z nsupseteq;z nsupseteqq;zntgl;ntildezntilde;zntlg;zntriangleleft;zntrianglelefteq;zntriangleright;zntrianglerighteq;znu;znum;znumero;znumsp;znvDash;znvHarr;znvap;znvdash;znvge;znvgt;znvinfin;znvlArr;znvle;znvlt;znvltrie;znvrArr;znvrtrie;znvsim;znwArr;znwarhk;znwarr;znwarrow;znwnear;zoS;oacutezoacute;zoast;zocir;ocirczocirc;zocy;zodash;zodblac;zodiv;zodot;zodsold;zoelig;zofcir;zofr;zogon;ogravezograve;zogt;zohbar;zohm;zoint;zolarr;zolcir;zolcross;zoline;zolt;zomacr;zomega;zomicron;zomid;zominus;zoopf;zopar;zoperp;zoplus;zor;zorarr;zord;zorder;zorderof;ordfzordf;ordmzordm;zorigof;zoror;zorslope;zorv;zoscr;oslashzoslash;zosol;otildezotilde;zotimes;z otimesas;oumlzouml;zovbar;zpar;parazpara;z parallel;zparsim;zparsl;zpart;zpcy;zpercnt;zperiod;zpermil;zperp;zpertenk;zpfr;zphi;zphiv;zphmmat;zphone;zpi;z pitchfork;zpiv;zplanck;zplanckh;zplankv;zplus;z plusacir;zplusb;zpluscir;zplusdo;zplusdu;zpluse;plusmnzplusmn;zplussim;zplustwo;zpm;z pointint;zpopf;poundzpound;zpr;zprE;zprap;zprcue;zpre;zprec;z precapprox;z preccurlyeq;zpreceq;z precnapprox;z precneqq;z precnsim;zprecsim;zprime;zprimes;zprnE;zprnap;zprnsim;zprod;z profalar;z profline;z profsurf;zprop;zpropto;zprsim;zprurel;zpscr;zpsi;zpuncsp;zqfr;zqint;zqopf;zqprime;zqscr;z quaternions;zquatint;zquest;zquesteq;quotr&zrAarr;zrArr;zrAtail;zrBarr;zrHar;zrace;zracute;zradic;z raemptyv;zrang;zrangd;zrange;zrangle;raquozraquo;zrarr;zrarrap;zrarrb;zrarrbfs;zrarrc;zrarrfs;zrarrhk;zrarrlp;zrarrpl;zrarrsim;zrarrtl;zrarrw;zratail;zratio;z rationals;zrbarr;zrbbrk;zrbrace;zrbrack;zrbrke;zrbrksld;zrbrkslu;zrcaron;zrcedil;zrceil;zrcub;zrcy;zrdca;zrdldhar;zrdquo;zrdquor;zrdsh;zreal;zrealine;z realpart;zreals;zrect;regzreg;zrfisht;zrfloor;zrfr;zrhard;zrharu;zrharul;zrho;zrhov;z rightarrow;zrightarrowtail;zrightharpoondown;zrightharpoonup;zrightleftarrows;zrightleftharpoons;zrightrightarrows;zrightsquigarrow;zrightthreetimes;zring;z risingdotseq;zrlarr;zrlhar;zrlm;zrmoust;z rmoustache;zrnmid;zroang;zroarr;zrobrk;zropar;zropf;zroplus;zrotimes;zrpar;zrpargt;z rppolint;zrrarr;zrsaquo;zrscr;zrsh;zrsqb;zrsquo;zrsquor;zrthree;zrtimes;zrtri;zrtrie;zrtrif;z rtriltri;zruluhar;zrx;zsacute;zsbquo;zsc;zscE;zscap;zscaron;zsccue;zsce;zscedil;zscirc;zscnE;zscnap;zscnsim;z scpolint;zscsim;zscy;zsdot;zsdotb;zsdote;zseArr;zsearhk;zsearr;zsearrow;sectzsect;zsemi;zseswar;z setminus;zsetmn;zsext;zsfr;zsfrown;zsharp;zshchcy;zshcy;z shortmid;zshortparallel;shyzshy;zsigma;zsigmaf;zsigmav;zsim;zsimdot;zsime;zsimeq;zsimg;zsimgE;zsiml;zsimlE;zsimne;zsimplus;zsimrarr;zslarr;zsmallsetminus;zsmashp;z smeparsl;zsmid;zsmile;zsmt;zsmte;zsmtes;zsoftcy;zsol;zsolb;zsolbar;zsopf;zspades;z spadesuit;zspar;zsqcap;zsqcaps;zsqcup;zsqcups;zsqsub;zsqsube;z sqsubset;z sqsubseteq;zsqsup;zsqsupe;z sqsupset;z sqsupseteq;zsqu;zsquare;zsquarf;zsquf;zsrarr;zsscr;zssetmn;zssmile;zsstarf;zstar;zstarf;zstraightepsilon;z straightphi;zstrns;zsub;zsubE;zsubdot;zsube;zsubedot;zsubmult;zsubnE;zsubne;zsubplus;zsubrarr;zsubset;z subseteq;z subseteqq;z subsetneq;z subsetneqq;zsubsim;zsubsub;zsubsup;zsucc;z succapprox;z succcurlyeq;zsucceq;z succnapprox;z succneqq;z succnsim;zsuccsim;zsum;zsung;sup1zsup1;sup2zsup2;sup3zsup3;zsup;zsupE;zsupdot;zsupdsub;zsupe;zsupedot;zsuphsol;zsuphsub;zsuplarr;zsupmult;zsupnE;zsupne;zsupplus;zsupset;z supseteq;z supseteqq;z supsetneq;z supsetneqq;zsupsim;zsupsub;zsupsup;zswArr;zswarhk;zswarr;zswarrow;zswnwar;szligzszlig;ztarget;ztau;ztbrk;ztcaron;ztcedil;ztcy;ztdot;ztelrec;ztfr;zthere4;z therefore;ztheta;z thetasym;zthetav;z thickapprox;z thicksim;zthinsp;zthkap;zthksim;thornzthorn;ztilde;timesztimes;ztimesb;z timesbar;ztimesd;ztint;ztoea;ztop;ztopbot;ztopcir;ztopf;ztopfork;ztosa;ztprime;ztrade;z triangle;z triangledown;z triangleleft;ztrianglelefteq;z triangleq;ztriangleright;ztrianglerighteq;ztridot;ztrie;z triminus;ztriplus;ztrisb;ztritime;z trpezium;ztscr;ztscy;ztshcy;ztstrok;ztwixt;ztwoheadleftarrow;ztwoheadrightarrow;zuArr;zuHar;uacutezuacute;zuarr;zubrcy;zubreve;ucirczucirc;zucy;zudarr;zudblac;zudhar;zufisht;zufr;ugravezugrave;zuharl;zuharr;zuhblk;zulcorn;z ulcorner;zulcrop;zultri;zumacr;umlzuml;zuogon;zuopf;zuparrow;z updownarrow;zupharpoonleft;zupharpoonright;zuplus;zupsi;zupsih;zupsilon;z upuparrows;zurcorn;z urcorner;zurcrop;zuring;zurtri;zuscr;zutdot;zutilde;zutri;zutrif;zuuarr;uumlzuuml;zuwangle;zvArr;zvBar;zvBarv;zvDash;zvangrt;z varepsilon;z varkappa;z varnothing;zvarphi;zvarpi;z varpropto;zvarr;zvarrho;z varsigma;z varsubsetneq;zvarsubsetneqq;z varsupsetneq;zvarsupsetneqq;z vartheta;zvartriangleleft;zvartriangleright;zvcy;zvdash;zvee;zveebar;zveeeq;zvellip;zverbar;zvert;zvfr;zvltri;zvnsub;zvnsup;zvopf;zvprop;zvrtri;zvscr;zvsubnE;zvsubne;zvsupnE;zvsupne;zvzigzag;zwcirc;zwedbar;zwedge;zwedgeq;zweierp;zwfr;zwopf;zwp;zwr;zwreath;zwscr;zxcap;zxcirc;zxcup;zxdtri;zxfr;zxhArr;zxharr;zxi;zxlArr;zxlarr;zxmap;zxnis;zxodot;zxopf;zxoplus;zxotime;zxrArr;zxrarr;zxscr;zxsqcup;zxuplus;zxutri;zxvee;zxwedge;yacutezyacute;zyacy;zycirc;zycy;yenzyen;zyfr;zyicy;zyopf;zyscr;zyucy;yumlzyuml;zzacute;zzcaron;zzcy;zzdot;zzeetrf;zzeta;zzfr;zzhcy;zzigrarr;zzopf;zzscr;zzwj;zzwnj;u�r)"r )Doctype CharactersSpaceCharactersStartTagEndTagEmptyTagComment ParseErrorr@rArBcCsi|]\}}||qSrr)rkvrrrrx rmathc@seZdZdZdS)DataLossWarningzBRaised when the current tree is unable to represent the input dataN)__name__ __module__ __qualname____doc__rrrrrH| srHc@s eZdZdS)_ReparseExceptionN)rIrJrKrrrrrM srM)/ __future__rrrstringEOFE namespaces frozensetscopingElementsformattingElementsspecialElementshtmlIntegrationPointElements"mathmlTextIntegrationPointElementsadjustSVGAttributesadjustMathMLAttributesadjustForeignAttributesitemsunadjustForeignAttributesspaceCharacterstableInsertModeElementsascii_lowercaseasciiLowercaseascii_uppercaseasciiUppercase ascii_letters asciiLettersdigits hexdigits hexDigitsasciiUpper2LowerheadingElements voidElements cdataElementsrcdataElementsbooleanAttributesentitiesWindows1252 xmlEntitiesentitiesreplacementCharacters tokenTypes tagTokenTypesprefixes UserWarningrH ExceptionrMrrrrs$                                                                                                              S          A                    # L&  PK!Wcc&__pycache__/html5parser.cpython-39.pycnu[a Re@sddlmZmZmZddlmZmZddlZddlm Z ddlm Z ddlm Z ddl m Z dd lmZdd lmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZm Z d d dZ!d!ddZ"ddZ#Gddde$Z%ej&ddZ'ddZ(d"ddZ)Gddde*Z+dS)#)absolute_importdivisionunicode_literals)with_metaclassviewkeysN) _inputstream) _tokenizer) treebuilders)Marker)_utils)spaceCharactersasciiUpper2LowerspecialElementsheadingElements cdataElementsrcdataElements tokenTypes tagTokenTypes namespaceshtmlIntegrationPointElements"mathmlTextIntegrationPointElementsadjustForeignAttributesadjustMathMLAttributesadjustSVGAttributesE_ReparseExceptionetreeTcKs(t|}t||d}|j|fi|S)aParse an HTML document as a string or file-like object into a tree :arg doc: the document to parse as a string or file-like object :arg treebuilder: the treebuilder to use when parsing :arg namespaceHTMLElements: whether or not to namespace HTML elements :returns: parsed tree Example: >>> from html5lib.html5parser import parse >>> parse('

This is a doc

') namespaceHTMLElements)r getTreeBuilder HTMLParserparse)doc treebuilderrkwargstbpr(/builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/html5parser.pyr"s  r"divcKs,t|}t||d}|j|fd|i|S)a#Parse an HTML fragment as a string or file-like object into a tree :arg doc: the fragment to parse as a string or file-like object :arg container: the container context to parse the fragment in :arg treebuilder: the treebuilder to use when parsing :arg namespaceHTMLElements: whether or not to namespace HTML elements :returns: parsed tree Example: >>> from html5lib.html5libparser import parseFragment >>> parseFragment('this is a fragment') r container)r r r! parseFragment)r#r+r$rr%r&r'r(r(r)r,1s  r,csGfdddt}|S)NcseZdZfddZdS)z-method_decorator_metaclass..Decoratedcs>|D]$\}}t|tjr$|}|||<qt||||SN)items isinstancetypes FunctionTypetype__new__)meta classnamebases classDict attributeName attributefunctionr(r)r3Ls   z5method_decorator_metaclass..Decorated.__new__N)__name__ __module__ __qualname__r3r(r:r(r) DecoratedKsr?)r2)r;r?r(r:r)method_decorator_metaclassJsr@c@seZdZdZd'ddZd(dd Zd d Zed d ZddZ ddZ ddZ ddZ ddZ d)ddZddZddZdd Zd!d"Zd#d$Zd%d&ZdS)*r!z]HTML parser Generates a tree structure from a stream of (possibly malformed) HTML. NFTcsH|_|durtd}||_g_fddt|D_dS)a :arg tree: a treebuilder class controlling the type of tree that will be returned. Built in treebuilders can be accessed through html5lib.treebuilders.getTreeBuilder(treeType) :arg strict: raise an exception when a parse error is encountered :arg namespaceHTMLElements: whether or not to namespace HTML elements :arg debug: whether or not to enable debug mode which logs things Example: >>> from html5lib.html5parser import HTMLParser >>> parser = HTMLParser() # generates parser with etree builder >>> parser = HTMLParser('lxml', strict=True) # generates parser with lxml builder which is strict Nrcsi|]\}}||jqSr()tree).0nameclsselfr(r) yz'HTMLParser.__init__..)strictr r rAerrors getPhasesr.phases)rFrArIrdebugr(rEr)__init__]s    zHTMLParser.__init__r*cKsf||_||_||_tj|fd|i||_|z |Wn"ty`||Yn0dS)Nparser) innerHTMLModer+ scriptingr HTMLTokenizer tokenizerresetmainLoopr)rFstream innerHTMLr+rQr%r(r(r)_parse|s  zHTMLParser._parsecCs|jd|_g|_g|_d|_|jr|j|_ |j t vrL|j j |j _ n0|j tvrd|j j|j _ n|j dkr||j j|j _ n|jd|_|j|nd|_ |jd|_d|_d|_d|_dS)NFz no quirks plaintext beforeHtmlinitialT)rArT firstStartTagrJlog compatModerPr+lowerrWrrS rcdataStatestater rawtextStateplaintextStaterLphaseinsertHtmlElementresetInsertionMode lastPhasebeforeRCDataPhase framesetOKrEr(r(r)rTs*         zHTMLParser.resetcCst|dsdS|jjjdjS)zName of the character encoding that was used to decode the input stream, or :obj:`None` if that is not determined yet rSNr)hasattrrSrV charEncodingrCrEr(r(r)documentEncodings zHTMLParser.documentEncodingcCsJ|jdkr6|jtdkr6d|jvo4|jdtdvS|j|jftvSdS)Nannotation-xmlmathmlencoding)z text/htmlzapplication/xhtml+xml)rC namespacer attributes translaterrrFelementr(r(r)isHTMLIntegrationPoints    z!HTMLParser.isHTMLIntegrationPointcCs|j|jftvSr-)rprCrrsr(r(r)isMathMLTextIntegrationPointsz'HTMLParser.isMathMLTextIntegrationPointcCsftd}td}td}td}td}td}td}|jD]}d} |} | dur| } |jjrn|jjdnd} | r|| jnd} | r| jnd} | d }||kr|| d | d id} qLt|jjd ksb| |jj ksb| | r||kr|d t ddgvsb|||fvsb| t dkrF| dkrF||krF|d dksb| | rj||||fvrj|j}n |jd}||kr|| } qL||kr|| } qL||kr|| } qL||kr|| } qL||kr|| } qL||krL|| } qL||kr>| dr>| ds>|dd | d iq>d}g}|rb||j|j}|r,|j|vs,Jq,dS)N CharactersSpaceCharactersStartTagEndTagCommentDoctype ParseErrorr2datadatavarsrrCmglyph malignmarkrnrmsvginForeignContent selfClosingselfClosingAcknowledgedz&non-void-element-with-trailing-solidusT)rrSrA openElementsrprC parseErrorgetlendefaultNamespacerv frozensetrrurdrLprocessCharactersprocessSpaceCharactersprocessStartTag processEndTagprocessCommentprocessDoctypeappend processEOF)rFCharactersTokenSpaceCharactersToken StartTagToken EndTagToken CommentToken DoctypeTokenParseErrorTokentoken prev_token new_token currentNodecurrentNodeNamespacecurrentNodeNamer2rd reprocessrLr(r(r)rUs                        zHTMLParser.mainLoopcOs&|j|ddg|Ri||jS)aParse a HTML document into a well-formed tree :arg stream: a file-like object or string containing the HTML to be parsed The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element). :arg scripting: treat noscript elements as if JavaScript was turned on :returns: parsed tree Example: >>> from html5lib.html5parser import HTMLParser >>> parser = HTMLParser() >>> parser.parse('

This is a doc

') FN)rXrA getDocumentrFrVargsr%r(r(r)r"szHTMLParser.parsecOs$|j|dg|Ri||jS)aZParse a HTML fragment into a well-formed tree fragment :arg container: name of the element we're setting the innerHTML property if set to None, default to 'div' :arg stream: a file-like object or string containing the HTML to be parsed The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) :arg scripting: treat noscript elements as if JavaScript was turned on :returns: parsed tree Example: >>> from html5lib.html5libparser import HTMLParser >>> parser = HTMLParser() >>> parser.parseFragment('this is a fragment') T)rXrA getFragmentrr(r(r)r,szHTMLParser.parseFragmentXXX-undefined-errorcCs@|dur i}|j|jj||f|jr__doc__rNrXrTpropertyrlrurvrUr"r,rrrrrrfrr(r(r(r)r!Vs$  "  C ,r!csdd}dd}Gdddt|||Gddd}Gd d d }Gd d d }Gd dd}Gddd}Gddd}GfdddGddd} Gfddd} GfdddGfddd} Gfddd} Gfdd d } Gfd!d"d"}Gfd#d$d$}Gfd%d&d&}Gd'd(d(}Gfd)d*d*}Gd+d,d,}Gfd-d.d.}Gfd/d0d0}Gd1d2d2}Gd3d4d4}||||||| | | | | ||||||||||d5S)6Ncs$ddtDfdd}|S)z4Logger that records which phase processes each tokencSsi|]\}}||qSr(r()rBkeyvaluer(r(r)rGrHz*getPhases..log..csjdrt|dkr|d}d|di}|dtvrH|d|d<|jj|jjjj|jj j j|j jj|f|g|Ri|S|g|Ri|SdS)Nprocessrr2rC) r< startswithrrrOr]rrSrard __class__)rFrr%rinfor; type_namesr(r)wrappeds   z'getPhases..log..wrapped)rr.)r;rr(rr)r]szgetPhases..logcSs|r t|StSdSr-)r@r2) use_metaclassmetaclass_funcr(r(r) getMetaclassszgetPhases..getMetaclassc@s\eZdZdZdZddZddZddZd d Zd d Z d dZ ddZ ddZ ddZ dS)zgetPhases..PhasezNBase class for helper object that implements each phase of processing )rOrA__startTagCache __endTagCachecSs||_||_i|_i|_dSr-)rOrA_Phase__startTagCache_Phase__endTagCache)rFrOrAr(r(r)rNsz!getPhases..Phase.__init__cSstdSr-)NotImplementedErrorrEr(r(r)rsz#getPhases..Phase.processEOFcSs|j||jjddS)Nr~rA insertCommentrrr(r(r)rsz'getPhases..Phase.processCommentcSs|jddS)Nzunexpected-doctyperOrrr(r(r)rsz'getPhases..Phase.processDoctypecSs|j|ddSNrrA insertTextrr(r(r)rsz*getPhases..Phase.processCharacterscSs|j|ddSrrrr(r(r)rsz/getPhases..Phase.processSpaceCharacterscSsj|d}||jvr|j|}nD|j|}|j|<t|jt|jdkrb|jtt|jq2||SNrCg?)rstartTagHandlerrpopnextiterrFrrCfuncr(r(r)rs  z(getPhases..Phase.processStartTagcSsf|jjs |ddkr |jd|dD],\}}||jjdjvr,||jjdj|<q,d|j_dS)NrCrz non-html-rootrrF)rOr\rr.rArrqrFrattrrr(r(r) startTagHtmls  z%getPhases..Phase.startTagHtmlcSsj|d}||jvr|j|}nD|j|}|j|<t|jt|jdkrb|jtt|jq2||Sr)r endTagHandlerrrrrrr(r(r)rs  z&getPhases..Phase.processEndTagN)r<r=r>r __slots__rNrrrrrrrrr(r(r(r)Phases rc@sReZdZeZddZddZddZddZd d Z d d Z d dZ ddZ dS)zgetPhases..InitialPhasecSsdSr-r(rr(r(r)rsz6getPhases..InitialPhase.processSpaceCharacterscSs|j||jjdSr-rArdocumentrr(r(r)rsz.getPhases..InitialPhase.processCommentcSs|d}|d}|d}|d}|dks@|dus@|durL|dkrL|jd|durXd}|j||dkrv|t}|r|ddks|d s|d vs|d r|dus|r|d krd |j_n$|ds|d r|durd|j_|jj d|j_ dS)NrCpublicIdsystemIdcorrectrzabout:legacy-compatzunknown-doctype)7z*+//silmaril//dtd html pro v0r11 19970101//z4-//advasoft ltd//dtd html 3.0 aswedit + extensions//z*-//as//dtd html 3.0 aswedit + extensions//z-//ietf//dtd html 2.0 level 1//z-//ietf//dtd html 2.0 level 2//z&-//ietf//dtd html 2.0 strict level 1//z&-//ietf//dtd html 2.0 strict level 2//z-//ietf//dtd html 2.0 strict//z-//ietf//dtd html 2.0//z-//ietf//dtd html 2.1e//z-//ietf//dtd html 3.0//z-//ietf//dtd html 3.2 final//z-//ietf//dtd html 3.2//z-//ietf//dtd html 3//z-//ietf//dtd html level 0//z-//ietf//dtd html level 1//z-//ietf//dtd html level 2//z-//ietf//dtd html level 3//z"-//ietf//dtd html strict level 0//z"-//ietf//dtd html strict level 1//z"-//ietf//dtd html strict level 2//z"-//ietf//dtd html strict level 3//z-//ietf//dtd html strict//z-//ietf//dtd html//z(-//metrius//dtd metrius presentational//z5-//microsoft//dtd internet explorer 2.0 html strict//z.-//microsoft//dtd internet explorer 2.0 html//z0-//microsoft//dtd internet explorer 2.0 tables//z5-//microsoft//dtd internet explorer 3.0 html strict//z.-//microsoft//dtd internet explorer 3.0 html//z0-//microsoft//dtd internet explorer 3.0 tables//z#-//netscape comm. corp.//dtd html//z*-//netscape comm. corp.//dtd strict html//z*-//o'reilly and associates//dtd html 2.0//z3-//o'reilly and associates//dtd html extended 1.0//z;-//o'reilly and associates//dtd html extended relaxed 1.0//zN-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//zE-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//z$-//spyglass//dtd html 2.0 extended//z+-//sq//dtd html 2.0 hotmetal + extensions//z--//sun microsystems corp.//dtd hotjava html//z4-//sun microsystems corp.//dtd hotjava strict html//z-//w3c//dtd html 3 1995-03-24//z-//w3c//dtd html 3.2 draft//z-//w3c//dtd html 3.2 final//z-//w3c//dtd html 3.2//z-//w3c//dtd html 3.2s draft//z-//w3c//dtd html 4.0 frameset//z#-//w3c//dtd html 4.0 transitional//z(-//w3c//dtd html experimental 19960712//z&-//w3c//dtd html experimental 970421//z-//w3c//dtd w3 html//z-//w3o//dtd w3 html 3.0//z#-//webtechs//dtd mozilla html 2.0//z-//webtechs//dtd mozilla html//)z$-//w3o//dtd w3 html strict 3.0//en//z"-/w3c/dtd html 4.0 transitional/enr)z -//w3c//dtd html 4.01 frameset//z$-//w3c//dtd html 4.01 transitional//z:http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtdquirks)z -//w3c//dtd xhtml 1.0 frameset//z$-//w3c//dtd xhtml 1.0 transitional//zlimited quirksrZ) rOrrA insertDoctyperrrrr_r^rLrd)rFrrCrrrr(r(r)rsX   9<?@@ A z.getPhases..InitialPhase.processDoctypecSsd|j_|jjd|j_dS)NrrZ)rOr^rLrdrEr(r(r) anythingElseZsz,getPhases..InitialPhase.anythingElsecSs|jd||S)Nzexpected-doctype-but-got-charsrOrrrr(r(r)r^s z1getPhases..InitialPhase.processCharacterscSs"|jdd|di||S)Nz"expected-doctype-but-got-start-tagrCrrr(r(r)rcs  z/getPhases..InitialPhase.processStartTagcSs"|jdd|di||S)Nz expected-doctype-but-got-end-tagrCrrr(r(r)ris  z-getPhases..InitialPhase.processEndTagcSs|jd|dS)Nzexpected-doctype-but-got-eofTrrEr(r(r)ros z*getPhases..InitialPhase.processEOFN) r<r=r>tuplerrrrrrrrrr(r(r(r) InitialPhases_rc@sJeZdZeZddZddZddZddZd d Z d d Z d dZ dS)z"getPhases..BeforeHtmlPhasecSs&|jtdd|jjd|j_dS)Nrryr)rA insertRootimpliedTagTokenrOrLrdrEr(r(r)rexsz4getPhases..BeforeHtmlPhase.insertHtmlElementcSs |dSNTrerEr(r(r)r}sz-getPhases..BeforeHtmlPhase.processEOFcSs|j||jjdSr-rrr(r(r)rsz1getPhases..BeforeHtmlPhase.processCommentcSsdSr-r(rr(r(r)rsz9getPhases..BeforeHtmlPhase.processSpaceCharacterscSs ||Sr-rrr(r(r)rsz4getPhases..BeforeHtmlPhase.processCharacterscSs |ddkrd|j_||S)NrCrT)rOr\rerr(r(r)rs z2getPhases..BeforeHtmlPhase.processStartTagcSs4|ddvr$|jdd|din ||SdS)NrCrrrbrzunexpected-end-tag-before-html)rOrrerr(r(r)rs   z0getPhases..BeforeHtmlPhase.processEndTagN) r<r=r>rrrerrrrrrr(r(r(r)BeforeHtmlPhasetsr c@seZdZeZddZddZddZddZd d Z d d Z d dZ ddZ e defde fgZe e_e de fgZe e_dS)z"getPhases..BeforeHeadPhasecSs|tdddS)NrryT startTagHeadrrEr(r(r)rsz-getPhases..BeforeHeadPhase.processEOFcSsdSr-r(rr(r(r)rsz9getPhases..BeforeHeadPhase.processSpaceCharacterscSs|tdd|SNrryr rr(r(r)rsz4getPhases..BeforeHeadPhase.processCharacterscSs|jjd|SNrrOrLrrr(r(r)rsz/getPhases..BeforeHeadPhase.startTagHtmlcSs0|j||jjd|j_|jjd|j_dS)Nr~inHead)rArr headPointerrOrLrdrr(r(r)r s z/getPhases..BeforeHeadPhase.startTagHeadcSs|tdd|Sr r rr(r(r) startTagOthersz0getPhases..BeforeHeadPhase.startTagOthercSs|tdd|Sr r rr(r(r)endTagImplyHeadsz2getPhases..BeforeHeadPhase.endTagImplyHeadcSs|jdd|didS)Nzend-tag-after-implied-rootrCrrr(r(r) endTagOthers z.getPhases..BeforeHeadPhase.endTagOtherrrrN)r<r=r>rrrrrrr rrrr MethodDispatcherrdefaultrr(r(r(r)BeforeHeadPhases$rc @seZdZeZddZddZddZddZd d Z d d Z d dZ ddZ ddZ ddZddZddZddZddZddZedefd e fd!e fd"e fd#efd$e fd%e fd&efgZee_ed&efd'efgZee_d(S))zgetPhases..InHeadPhasecSs |dSrrrEr(r(r)rsz)getPhases..InHeadPhase.processEOFcSs ||Sr-rrr(r(r)rsz0getPhases..InHeadPhase.processCharacterscSs|jjd|Srrrr(r(r)rsz+getPhases..InHeadPhase.startTagHtmlcSs|jddS)Nz!two-heads-are-not-better-than-onerrr(r(r)r sz+getPhases..InHeadPhase.startTagHeadcSs$|j||jjd|d<dSNTrrArrrrr(r(r)startTagBaseLinkCommands  z6getPhases..InHeadPhase.startTagBaseLinkCommandcSs|j||jjd|d<|d}|jjjjddkrd|vrZ|jjj|dnVd|vrd|vr|d d krt |d d }t |}|}|jjj|dS) NTrrr tentativecharsetcontentz http-equivz content-typezutf-8)rArrrrOrSrVrkchangeEncodingr_r EncodingBytesencodeContentAttrParserr")rFrrqrrOcodecr(r(r) startTagMetas    z+getPhases..InHeadPhase.startTagMetacSs|j|ddS)NrrOrrr(r(r) startTagTitlesz,getPhases..InHeadPhase.startTagTitlecSs|j|ddS)Nrr%rr(r(r)startTagNoFramesStylesz4getPhases..InHeadPhase.startTagNoFramesStylecSs8|jjr|j|dn|j||jjd|j_dS)NrinHeadNoscript)rOrQrrArrLrdrr(r(r)startTagNoscripts z/getPhases..InHeadPhase.startTagNoscriptcSs<|j||jjj|jj_|jj|j_|jjd|j_dS)Nr) rArrOrSscriptDataStaterardrrLrr(r(r)startTagScripts  z-getPhases..InHeadPhase.startTagScriptcSs ||Sr-rrr(r(r)rsz,getPhases..InHeadPhase.startTagOthercSs:|jjj}|jdks&Jd|j|jjd|j_dS)NrzExpected head got %s afterHeadrOrArrrCrLrdrFrrr(r(r) endTagHead sz)getPhases..InHeadPhase.endTagHeadcSs ||Sr-rrr(r(r)endTagHtmlBodyBrsz/getPhases..InHeadPhase.endTagHtmlBodyBrcSs|jdd|didSNunexpected-end-tagrCrrr(r(r)rsz*getPhases..InHeadPhase.endTagOthercSs|tddS)Nr)r/rrEr(r(r)rsz+getPhases..InHeadPhase.anythingElsertitle)noframesstylenoscriptscript)basebasefontbgsoundcommandlinkr4r)r rrN)r<r=r>rrrrrr rr$r&r'r)r+rr/r0rrr rrrrr(r(r(r) InHeadPhasesD r=c@seZdZeZddZddZddZddZd d Z d d Z d dZ ddZ ddZ ddZddZddZede fde fde fgZe e_ede fdefgZee_dS)z&getPhases..InHeadNoscriptPhasecSs|jd|dS)Nzeof-in-head-noscriptTrrEr(r(r)r/s z1getPhases..InHeadNoscriptPhase.processEOFcSs|jjd|SNr)rOrLrrr(r(r)r4sz5getPhases..InHeadNoscriptPhase.processCommentcSs|jd||S)Nzchar-in-head-noscriptrrr(r(r)r7s z8getPhases..InHeadNoscriptPhase.processCharacterscSs|jjd|Sr>rOrLrrr(r(r)r<sz=getPhases..InHeadNoscriptPhase.processSpaceCharacterscSs|jjd|Srrrr(r(r)r?sz3getPhases..InHeadNoscriptPhase.startTagHtmlcSs|jjd|Sr>rrr(r(r)rBsz>getPhases..InHeadNoscriptPhase.startTagBaseLinkCommandcSs|jdd|didSNunexpected-start-tagrCrrr(r(r)startTagHeadNoscriptEsz;getPhases..InHeadNoscriptPhase.startTagHeadNoscriptcSs"|jdd|di||SNzunexpected-inhead-noscript-tagrCrrr(r(r)rHsz4getPhases..InHeadNoscriptPhase.startTagOthercSs:|jjj}|jdks&Jd|j|jjd|j_dS)Nr6zExpected noscript got %srr-r.r(r(r)endTagNoscriptMsz5getPhases..InHeadNoscriptPhase.endTagNoscriptcSs"|jdd|di||SrCrrr(r(r)endTagBrRsz/getPhases..InHeadNoscriptPhase.endTagBrcSs|jdd|didSr1rrr(r(r)rWsz2getPhases..InHeadNoscriptPhase.endTagOthercSs|tddS)Nr6)rDrrEr(r(r)rZsz3getPhases..InHeadNoscriptPhase.anythingElser)r9r:r<r4r4r5)rr6r6r N)r<r=r>rrrrrrrrrBrrDrErrr rrrrr(r(r(r)InHeadNoscriptPhase,s0rFc@seZdZeZddZddZddZddZd d Z d d Z d dZ ddZ ddZ ddZddZedefdefde fde fde fgZe e_ede fgZee_dS)z!getPhases..AfterHeadPhasecSs |dSrrrEr(r(r)rnsz,getPhases..AfterHeadPhase.processEOFcSs ||Sr-rrr(r(r)rrsz3getPhases..AfterHeadPhase.processCharacterscSs|jjd|Srrrr(r(r)rvsz.getPhases..AfterHeadPhase.startTagHtmlcSs(d|j_|j||jjd|j_dS)NFr)rOrirArrLrdrr(r(r) startTagBodyys z.getPhases..AfterHeadPhase.startTagBodycSs |j||jjd|j_dS)Nr)rArrOrLrdrr(r(r)startTagFrameset~s z2getPhases..AfterHeadPhase.startTagFramesetcSsr|jdd|di|jj|jj|jjd||jjdddD] }|jdkrL|jj |qnqLdS)Nz#unexpected-start-tag-out-of-my-headrCrr~r) rOrrArrrrLrrCremover.r(r(r)startTagFromHeads  z2getPhases..AfterHeadPhase.startTagFromHeadcSs|jdd|didSr@rrr(r(r)r sz.getPhases..AfterHeadPhase.startTagHeadcSs ||Sr-rrr(r(r)rsz/getPhases..AfterHeadPhase.startTagOthercSs ||Sr-rrr(r(r)r0sz2getPhases..AfterHeadPhase.endTagHtmlBodyBrcSs|jdd|didSr1rrr(r(r)rsz-getPhases..AfterHeadPhase.endTagOthercSs.|jtdd|jjd|j_d|j_dS)NrryrT)rArrrOrLrdrirEr(r(r)rsz.getPhases..AfterHeadPhase.anythingElserrr) r8r9r:r<r4r4r7r5r3r)rrr N)r<r=r>rrrrrrGrHrJr rr0rrr rrrrr(r(r(r)AfterHeadPhaseks4  rKc$seZdZdZfddZddZddZdd Zd d Zd d Z ddZ ddZ ddZ ddZ ddZddZddZddZddZd d!Zd"d#Zd$d%Zd&d'Zd(d)Zd*d+Zd,d-Zd.d/Zd0d1Zd2d3Zd4d5Zd6d7Zd8d9Zd:d;Z dd?Z"d@dAZ#dBdCZ$dDdEZ%dFdGZ&dHdIZ'dJdKZ(dLdMZ)dNdOZ*dPdQZ+dRdSZ,dTdUZ-dVdWZ.dXdYZ/dZd[Z0d\d]Z1d^d_Z2d`daZ3dbdcZ4dddeZ5dfdgZ6e78dhj9fdie fdje fdke fdlefe:efdmefdnefdoefdpefdqefdrefdsefdtefduefdvefdwefdxefdyefdzefd{efd|efd}e fd~e!fde"fde#fde$fde&fde'fde%fde(fde)fde*fg!Z;e+e;_S)zgetPhases..InBodyPhase)rcs"t|j|i||j|_dSr-)superrNprocessSpaceCharactersNonPrerrFrr%) InBodyPhaserr(r)rNsz'getPhases..InBodyPhase.__init__cSs$|j|jko"|j|jko"|j|jkSr-)rCrprq)rFnode1node2r(r(r)isMatchingFormattingElements    z:getPhases..InBodyPhase.isMatchingFormattingElementcSs|j||jjd}g}|jjdddD](}|tur@qXq.|||r.||q.t|dkshJt|dkr|jj|d|jj|dS)Nr~) rArractiveFormattingElementsr rRrrrI)rFrrtmatchingElementsrr(r(r)addFormattingElements     z3getPhases..InBodyPhase.addFormattingElementcSs>td}|jjdddD]}|j|vr|jdq:qdS)N) dddtlir'rrrrrrrrr~z expected-closing-tag-but-got-eof)rrArrCrOr)rFallowed_elementsrr(r(r)rs   z)getPhases..InBodyPhase.processEOFcSsf|d}|j|_|drH|jjdjdvrH|jjdsH|dd}|rb|j|j|dS)Nr r~)prelistingtextarear) rMrrrArrC hasContent#reconstructActiveFormattingElementsr)rFrrr(r(r)!processSpaceCharactersDropNewlines   z@getPhases..InBodyPhase.processSpaceCharactersDropNewlinecSsT|ddkrdS|j|j|d|jjrPtdd|dDrPd|j_dS)NrcSsg|] }|tvqSr(r rBcharr(r(r) szDgetPhases..InBodyPhase.processCharacters..F)rAr`rrOrianyrr(r(r)rs  z0getPhases..InBodyPhase.processCharacterscSs|j|j|ddSr)rAr`rrr(r(r)rMs z;getPhases..InBodyPhase.processSpaceCharactersNonPrecSs|jjd|Sr>rrr(r(r)startTagProcessInHeadsz4getPhases..InBodyPhase.startTagProcessInHeadcSs|jdddit|jjdks4|jjdjdkrB|jjsJnBd|j_|dD],\}}||jjdj vrV||jjdj |<qVdS)NrArCrrFr) rOrrrArrCrWrir.rqrr(r(r)rGsz+getPhases..InBodyPhase.startTagBodycSs|jdddit|jjdks4|jjdjdkrB|jjsJnp|jjsLnf|jjdjrv|jjdj |jjd|jjdjdkr|jj qv|j ||jj d|j_ dS) NrArCrrrr~rr)rOrrrArrCrWriparent removeChildrrrLrdrr(r(r)rHs" z/getPhases..InBodyPhase.startTagFramesetcSs.|jjdddr|td|j|dSNr'buttonvariant)rAelementInScopeendTagPrrrr(r(r)startTagClosePsz-getPhases..InBodyPhase.startTagClosePcSs>|jjdddr|td|j|d|j_|j|_dS)Nr'rlrmF) rArorprrrOrirarrr(r(r)startTagPreListings  z1getPhases..InBodyPhase.startTagPreListingcSsZ|jjr|jdddin:|jjdddr:|td|j||jjd|j_dS)NrArCformr'rlrmr~) rA formPointerrOrrorprrrrr(r(r) startTagForms  z+getPhases..InBodyPhase.startTagFormcSsd|j_dgddgddgd}||d}t|jjD]@}|j|vr^|jjt|jdqx|j t vr6|jdvr6qxq6|jj d d d r|jjtd d|j |dS) NFrYrXrW)rYrXrWrCrz)addressr*r'r'rlrm) rOrireversedrArrCrdrr nameTuplerror)rFr stopNamesMap stopNamesrr(r(r)startTagListItem&s*    z/getPhases..InBodyPhase.startTagListItemcSs>|jjdddr|td|j||jjj|jj_dSrk) rArorprrrOrSrcrarr(r(r)startTagPlaintext<s z0getPhases..InBodyPhase.startTagPlaintextcSsb|jjdddr|td|jjdjtvrR|jdd|di|jj |j |dS)Nr'rlrmr~rArC) rArorprrrCrrOrrrrr(r(r)startTagHeadingBs  z.getPhases..InBodyPhase.startTagHeadingcSs~|jd}|rf|jdddd|td||jjvrL|jj|||jjvrf|jj||j | |dS)Na$unexpected-start-tag-implies-end-tag startNameendName) rA!elementInActiveFormattingElementsrOrendTagFormattingrrrIrTr`rV)rFr afeAElementr(r(r) startTagAJs    z(getPhases..InBodyPhase.startTagAcSs|j||dSr-)rAr`rVrr(r(r)startTagFormattingWs z1getPhases..InBodyPhase.startTagFormattingcSsP|j|jdrB|jdddd|td|j||dS)Nnobrrr)rAr`rorOrrrrVrr(r(r) startTagNobr[s   z+getPhases..InBodyPhase.startTagNobrcSsT|jdr2|jdddd|td|S|j|j|d|j_dS)NrlrrF) rArorOrrrr`rrirr(r(r)startTagButtones   z-getPhases..InBodyPhase.startTagButtoncSs0|j|j||jjtd|j_dSNF)rAr`rrTrr rOrirr(r(r)startTagAppletMarqueeObjectps  z:getPhases..InBodyPhase.startTagAppletMarqueeObjectcSsB|jjdddr|td|jd|j_|j|ddS)Nr'rlrmFr)rArorprr`rOrirrr(r(r) startTagXmpvs  z*getPhases..InBodyPhase.startTagXmpcSsR|jjdkr*|jjdddr*|td|j|d|j_|jjd|j_ dS)Nrr'rlrmFr) rOr^rArorrrrirLrdrr(r(r) startTagTable}s   z,getPhases..InBodyPhase.startTagTablecSs6|j|j||jjd|d<d|j_dS)NTrF)rAr`rrrrOrirr(r(r)startTagVoidFormattings    z5getPhases..InBodyPhase.startTagVoidFormattingcSs@|jj}||d|dvr<|ddtdkr<||j_dS)Nr2rhidden)rOrirrrr)rFrrir(r(r) startTagInputs   z,getPhases..InBodyPhase.startTagInputcSs$|j||jjd|d<dSrrrr(r(r)startTagParamSources  z2getPhases..InBodyPhase.startTagParamSourcecSsJ|jjdddr|td|j||jjd|d<d|j_dS)Nr'rlrmTrF) rArorprrrrrOrirr(r(r) startTagHrs   z)getPhases..InBodyPhase.startTagHrcSs6|jdddd|tdd|d|dddS) Nzunexpected-start-tag-treated-asimageimg originalNamenewNameryrrrqr)rOrrrrr(r(r) startTagImages z,getPhases..InBodyPhase.startTagImagecSs|jdddi|jjrdSi}d|dvr>|dd|d<|tdd|d|td d|td dd |dvr|dd }nd }|td |d|d}d|vr|d=d |vr|d =d|d<|tdd||dd| td |td d| tddS)Nzdeprecated-tagrCisindexactionrrsry)rqhrlabelpromptz3This is a searchable index. Enter search keywords: rwr2rinputrr) rOrrArtrrrrcopyr)rFr form_attrsrrqr(r(r)startTagIsIndexs<      z.getPhases..InBodyPhase.startTagIsIndexcSs0|j||jjj|jj_|j|_d|j_dSr) rArrOrSr`rararrirr(r(r)startTagTextareas z/getPhases..InBodyPhase.startTagTextareacSsd|j_||dSr)rOristartTagRawtextrr(r(r)startTagIFramesz-getPhases..InBodyPhase.startTagIFramecSs"|jjr||n ||dSr-)rOrQrrrr(r(r)r)s z/getPhases..InBodyPhase.startTagNoscriptcSs|j|ddS)z8iframe, noembed noframes, noscript(if scripting enabled)rNr%rr(r(r)rsz.getPhases..InBodyPhase.startTagRawtextcSs@|jjdjdkr$|jjtd|j|jj|dSNr~option) rArrCrOrdrrr`rrr(r(r) startTagOpts z*getPhases..InBodyPhase.startTagOptcSs|j|j|d|j_|jj|jjd|jjd|jjd|jjd|jjd|jjdfvrx|jjd|j_n|jjd |j_dS) NFrrrrrrinSelectInTabler)rAr`rrOrirdrLrr(r(r)startTagSelects       z-getPhases..InBodyPhase.startTagSelectcSsB|jdr2|j|jjdjdkr2|j|j|dS)Nrubyr~)rArogenerateImpliedEndTagsrrCrOrrrr(r(r) startTagRpRts    z+getPhases..InBodyPhase.startTagRpRtcSsZ|j|j||j|td|d<|j||drV|jjd|d<dS)NrnrprTr) rAr`rOrrrrrrrr(r(r) startTagMaths      z+getPhases..InBodyPhase.startTagMathcSsZ|j|j||j|td|d<|j||drV|jjd|d<dS)NrrprTr) rAr`rOrrrrrrrr(r(r) startTagSvgs      z*getPhases..InBodyPhase.startTagSvgcSs|jdd|didS)a5 Elements that should be children of other elements that have a different insertion mode; here they are ignored "caption", "col", "colgroup", "frame", "frameset", "head", "option", "optgroup", "tbody", "td", "tfoot", "th", "thead", "tr", "noscript" zunexpected-start-tag-ignoredrCNrrr(r(r)startTagMisplacedsz0getPhases..InBodyPhase.startTagMisplacedcSs|j|j|dSr-)rAr`rrr(r(r)rs z,getPhases..InBodyPhase.startTagOthercSs|jjdddsD|tdd|jdddi|tddnT|jd|jjdj dkrt|jdddi|jj }|j dkr|jj }qdS) Nr'rlrmryr2rCrzr~) rArorqrrOrrprrrCrr.r(r(r)rps   z&getPhases..InBodyPhase.endTagPcSs~|jds|jdS|jjdjdkrj|jjddD],}|jtdvr<|jdd|jdqjq<|jjd|j_dS)Nrr~)rWrXrYoptgrouprr'rprtrrrrrrrr$expected-one-end-tag-but-got-anothergotName expectedName afterBody) rArorOrrrCrrLrdr.r(r(r) endTagBody)s   z)getPhases..InBodyPhase.endTagBodycSs"|jdr|td|SdS)Nr)rArorrrr(r(r) endTagHtml;s z)getPhases..InBodyPhase.endTagHtmlcSs|ddkr|j|_|j|d}|r2|j|jjdj|dkr^|jdd|di|r|jj }|j|dkr|jj }qndS)NrCr\r~end-tag-too-early) rMrrArorrrCrOrr)rFrinScoperr(r(r) endTagBlockAs   z*getPhases..InBodyPhase.endTagBlockcSsv|jj}d|j_|dus$|j|s8|jdddin:|j|jjd|krd|jdddi|jj|dS)Nr2rCrsr~zend-tag-too-early-ignored)rArtrorOrrrrIr.r(r(r) endTagFormOs z)getPhases..InBodyPhase.endTagFormcSs|ddkrd}nd}|jj|d|dsB|jdd|dinf|jj|dd|jjdj|dkr|jdd|di|jj}|j|dkr|jj}qdS) NrCrYlistrmr2excluder~r)rArorOrrrrCr)rFrrnrr(r(r)endTagListItem\s   z-getPhases..InBodyPhase.endTagListItemcSstD]}|j|r|jq$q|jjdj|dkrP|jdd|ditD]8}|j|rT|jj}|jtvr|jj}qpqqTdS)Nr~rCr) rrArorrrCrOrr)rFritemr(r(r) endTagHeadingms     z,getPhases..InBodyPhase.endTagHeadingcSsd}|dkr |d7}|j|d}|rD||jjvrR|j|jsR||dS||jjvr|jdd|di|jj |dS|j|js|jdd|didS||jjdkr|jd d|di|jj |}d}|jj|dD]}|j t vr|}qq|durR|jj }||kr@|jj }q&|jj |dS|jj|d}|jj |}|} } d} |jj | } | d krX| d7} | d8} |jj| } | |jjvr|jj | q| |krސqX| |kr|jj | d}| } | |jj|jj | <| |jj|jj | <| } | jrF| j| | | | } q| jrl| j| |jtd vr|j\}}|| |n || |} || || |jj ||jj|| |jj ||jj|jj |d| qdS) z)The much-feared adoption agency algorithmrrrCNzadoption-agency-1.2zadoption-agency-4.4r~zadoption-agency-1.3rSrrrrr)rArrrorCrrOrrTrIindexrxrr cloneNoderirj appendChildrgetTableMisnestedNodePosition insertBeforereparentChildreninsert)rFrouterLoopCounterformattingElementafeIndex furthestBlockrtcommonAncestorbookmarklastNoderinnerLoopCounterrclonerirr(r(r)r|s                      z/getPhases..InBodyPhase.endTagFormattingcSs|j|dr|j|jjdj|dkrF|jdd|di|j|dr|jj}|j|dkr~|jj}qb|jdS)NrCr~r) rArorrrCrOrrclearActiveFormattingElements)rFrrtr(r(r)endTagAppletMarqueeObjects  z8getPhases..InBodyPhase.endTagAppletMarqueeObjectcSs@|jdddd|j|jtdd|jjdS)Nzunexpected-end-tag-treated-asr z br elementrry)rOrrAr`rrrrrr(r(r)rE+s  z'getPhases..InBodyPhase.endTagBrcSs|jjdddD]}|j|dkrz|jj|dd|jjdj|dkrb|jdd|di|jj|krtqbqq|jtvr|jdd|diqqdS)Nr~rCrr2) rArrCrrOrrrxrr.r(r(r)r2s z*getPhases..InBodyPhase.endTagOtherr) r8r9r:r;r<r4r7r5r3rr)rvarticleaside blockquotecenterdetailsdirr*dlfieldset figcaptionfigurefooterheaderhgroupmainmenunavolr'sectionsummaryul)r\r]rs)rYrWrXrYr~) bbigcodeemfontissmallstrikestrongtturrl)appletmarqueeobjectxmpr)arear embedrkeygenwbr)paramsourcetrackrrrrr^iframer6)noembedr4r)rr)rrmathr) rcolrframerrrrrrr)rvrrrrlrrdialogrr*rrrrrrrr]rrrrr\rrrr')rWrXrY)r~rrrrrrrrrrrrrr )?r<r=r>rrNrRrVrrarrMrhrGrHrqrrrur{r|r}rrrrrrrrrrrrrrrr)rrrrrrrrrprrrrrrrrrErr rrrrrr __classcell__r()rOrrr)rOs             $ -rOc@s`eZdZeZddZddZddZddZd d Z e gZ ee _ e d efgZe e_ d S) zgetPhases..TextPhasecSs|j|ddSrrrr(r(r)rsz.getPhases..TextPhase.processCharacterscSs8|jdd|jjdji|jj|jj|j_dS)Nz&expected-named-closing-tag-but-got-eofrCr~T)rOrrArrCrrrdrEr(r(r)rs   z'getPhases..TextPhase.processEOFcSsdsJd|ddS)NFz4Tried to process start tag %s in RCDATA/RAWTEXT moderCr(rr(r(r)rsz*getPhases..TextPhase.startTagOthercSs*|jj}|jdksJ|jj|j_dS)Nr7)rArrrCrOrrdr.r(r(r) endTagScripts z)getPhases..TextPhase.endTagScriptcSs|jj|jj|j_dSr-)rArrrOrrdrr(r(r)rs z(getPhases..TextPhase.endTagOtherr7N)r<r=r>rrrrrrrr rrrrr(r(r(r) TextPhases rc s eZdZeZddZddZddZddZd d Z d d Z d dZ ddZ ddZ ddZddZddZddZddZddZdd Zd!d"Zd#d$Zed%jfd&e fd'e fd(e fd)e fd*efd+efd,efd-efd.efg Zee_ed+efd/efgZee_d0S)1zgetPhases..InTablePhasecSs$|jjdjdvr |jjqdS)Nr~)rr)rArrCrrEr(r(r)clearStackToTableContextsz8getPhases..InTablePhase.clearStackToTableContextcSs0|jjdjdkr |jdn |jjs,JdS)Nr~rz eof-in-tablerArrCrOrrWrEr(r(r)rsz*getPhases..InTablePhase.processEOFcSs4|jj}|jjd|j_||jj_|jj|dSN inTableText)rOrdrLrrrFrrr(r(r)rs z6getPhases..InTablePhase.processSpaceCharacterscSs4|jj}|jjd|j_||jj_|jj|dSr)rOrdrLrrrr(r(r)rs z1getPhases..InTablePhase.processCharacterscSs&d|j_|jjd|d|j_dS)NTrF)rAinsertFromTablerOrLrrr(r(r)rsz*getPhases..InTablePhase.insertTextcSs6||jjt|j||jjd|j_dS)Nr) rrArTrr rrOrLrdrr(r(r)startTagCaptions z/getPhases..InTablePhase.startTagCaptioncSs(||j||jjd|j_dS)NrrrArrOrLrdrr(r(r)startTagColgroups z0getPhases..InTablePhase.startTagColgroupcSs|tdd|S)Nrry)rrrr(r(r) startTagColsz+getPhases..InTablePhase.startTagColcSs(||j||jjd|j_dSNrrrr(r(r)startTagRowGroups z0getPhases..InTablePhase.startTagRowGroupcSs|tdd|S)Nrry)rrrr(r(r)startTagImplyTbodysz2getPhases..InTablePhase.startTagImplyTbodycSs6|jdddd|jjtd|jjs2|SdS)Nrrr)rOrrdrrrWrr(r(r)rs z-getPhases..InTablePhase.startTagTablecSs|jjd|Sr>rrr(r(r)startTagStyleScriptsz3getPhases..InTablePhase.startTagStyleScriptcSsVd|dvrH|ddtdkrH|jd|j||jjn ||dS)Nr2rrz unexpected-hidden-input-in-table) rrrrOrrArrrrrr(r(r)rs   z-getPhases..InTablePhase.startTagInputcSsD|jd|jjdur@|j||jjd|j_|jjdS)Nzunexpected-form-in-tabler~)rOrrArtrrrrr(r(r)rus    z,getPhases..InTablePhase.startTagFormcSs<|jdd|did|j_|jjd|d|j_dS)Nz)unexpected-start-tag-implies-table-voodoorCTrF)rOrrArrLrrr(r(r)rsz-getPhases..InTablePhase.startTagOthercSs|jjdddr|j|jjdjdkrJ|jdd|jjdjd|jjdjdkrj|jjqJ|jj|jn|jj sJ|jdS)Nrrmr~zend-tag-too-early-namedr) rArorrrCrOrrrfrWrr(r(r) endTagTables     z+getPhases..InTablePhase.endTagTablecSs|jdd|didSr1rrr(r(r) endTagIgnoresz,getPhases..InTablePhase.endTagIgnorecSs<|jdd|did|j_|jjd|d|j_dS)Nz'unexpected-end-tag-implies-table-voodoorCTrF)rOrrArrLrrr(r(r)rsz+getPhases..InTablePhase.endTagOtherrrrr rrr)rrrr)r5r7rrs) rrr rrrrrrrrN)r<r=r>rrrrrrrrrrrrrrrrurrr rr rrrrrr(rr(r) InTablePhasesN   r#csZeZdZdZfddZddZddZdd Zd d Zd d Z ddZ ddZ Z S)z#getPhases..InTableTextPhase)rcharacterTokenscs&t|j|i|d|_g|_dSr-)rLrNrr$rN)InTableTextPhaserr(r)rN6sz,getPhases..InTableTextPhase.__init__cSsdddd|jD}tdd|DrJtd|d}|jjd|n|rZ|j|g|_dS)NrcSsg|] }|dqS)rr(rBrr(r(r)rf<rHzGgetPhases..InTableTextPhase.flushCharacters..cSsg|] }|tvqSr(rcr&r(r(r)rf=rHrwrr)joinr$rgrrOrLrrA)rFrrr(r(r)flushCharacters;s z3getPhases..InTableTextPhase.flushCharacterscSs||j|j_|Sr-r(rrOrdrr(r(r)rDs z2getPhases..InTableTextPhase.processCommentcSs||j|j_dSrr)rEr(r(r)rIs z.getPhases..InTableTextPhase.processEOFcSs |ddkrdS|j|dSNrrbr$rrr(r(r)rNs z5getPhases..InTableTextPhase.processCharacterscSs|j|dSr-r+rr(r(r)rSsz:getPhases..InTableTextPhase.processSpaceCharacterscSs||j|j_|Sr-r)rr(r(r)rXs z3getPhases..InTableTextPhase.processStartTagcSs||j|j_|Sr-r)rr(r(r)r]s z1getPhases..InTableTextPhase.processEndTag) r<r=r>rrNr(rrrrrrr r()r%rr)r%3s r%cseZdZeZddZddZddZddZd d Z d d Z d dZ ddZ ddZ edjfdefgZe e_ede fde fde fgZe e_dS)z!getPhases..InCaptionPhasecSs|jjddd S)NrrrmrArorEr(r(r)ignoreEndTagCaptionfsz5getPhases..InCaptionPhase.ignoreEndTagCaptioncSs|jjddSrrOrLrrEr(r(r)risz,getPhases..InCaptionPhase.processEOFcSs|jjd|SrrOrLrrr(r(r)rlsz3getPhases..InCaptionPhase.processCharacterscSs0|j|}|jjtd|s,|SdSNrrOrr-rdrrrFr ignoreEndTagr(r(r)startTagTableElementos  z6getPhases..InCaptionPhase.startTagTableElementcSs|jjd|Srrrr(r(r)rwsz/getPhases..InCaptionPhase.startTagOthercSs|s|j|jjdjdkrB|jdd|jjdjd|jjdjdkrb|jjqB|jj|j|jj d|j_ n|jj sJ|jdS)Nr~rrrr) r-rArrrCrOrrrrLrdrWrr(r(r) endTagCaptionzs     z/getPhases..InCaptionPhase.endTagCaptioncSs0|j|}|jjtd|s,|SdSr0r1r2r(r(r)rs  z-getPhases..InCaptionPhase.endTagTablecSs|jdd|didSr1rrr(r(r)r sz.getPhases..InCaptionPhase.endTagIgnorecSs|jjd|SrrOrLrrr(r(r)rsz-getPhases..InCaptionPhase.endTagOtherr rr rrrrrrrrr) rr rrrrrrrrN)r<r=r>rrr-rrr4rr5rr rr rrrrrr(r"r(r)InCaptionPhasebs2r8cseZdZeZddZddZddZddZd d Z d d Z d dZ ddZ e djfdefgZe e_e de fde fgZe e_dS)z%getPhases..InColumnGroupPhasecSs|jjdjdkS)Nr~r)rArrCrEr(r(r)ignoreEndTagColgroupsz:getPhases..InColumnGroupPhase.ignoreEndTagColgroupcSsD|jjdjdkr"|jjsJdS|}|td|s@dSdS)Nr~rrT)rArrCrOrWr9endTagColgroupr)rFr3r(r(r)rs z0getPhases..InColumnGroupPhase.processEOFcSs"|}|td|s|SdSNrr9r:rr2r(r(r)rsz7getPhases..InColumnGroupPhase.processCharacterscSs$|j||jjd|d<dSrrrr(r(r)rs  z1getPhases..InColumnGroupPhase.startTagColcSs"|}|td|s|SdSr;r<r2r(r(r)rsz3getPhases..InColumnGroupPhase.startTagOthercSs@|r |jjsJ|jn|jj|jjd|j_dSNr) r9rOrWrrArrrLrdrr(r(r)r:s    z4getPhases..InColumnGroupPhase.endTagColgroupcSs|jdddidS)Nz no-end-tagrCr rrr(r(r) endTagColsz/getPhases..InColumnGroupPhase.endTagColcSs"|}|td|s|SdSr;r<r2r(r(r)rsz1getPhases..InColumnGroupPhase.endTagOtherrr rN)r<r=r>rrr9rrrrr:r>rr rrrrrr(r"r(r)InColumnGroupPhases&  r?cseZdZeZddZddZddZddZd d Z d d Z d dZ ddZ ddZ ddZddZddZedjfde fde fde fgZe e_ede fdefdefgZee_d S)!z#getPhases..InTableBodyPhasecSsB|jjdjdvr |jjq|jjdjdkr>|jjs>JdS)Nr~)rrrrr)rArrCrrOrWrEr(r(r)clearStackToTableBodyContextsz@getPhases..InTableBodyPhase.clearStackToTableBodyContextcSs|jjddSr=r.rEr(r(r)rsz.getPhases..InTableBodyPhase.processEOFcSs|jjd|Sr=r?rr(r(r)rsz:getPhases..InTableBodyPhase.processSpaceCharacterscSs|jjd|Sr=r/rr(r(r)rsz5getPhases..InTableBodyPhase.processCharacterscSs(||j||jjd|j_dS)Nr)r@rArrOrLrdrr(r(r) startTagTrs z.getPhases..InTableBodyPhase.startTagTrcSs*|jdd|di|tdd|S)Nzunexpected-cell-in-table-bodyrCrry)rOrrArrr(r(r)startTagTableCells  z5getPhases..InTableBodyPhase.startTagTableCellcSsn|jjddds0|jjddds0|jjdddrT||t|jjdj|S|jjs`J|j dSNrrrmrrr~ rAror@endTagTableRowGrouprrrCrOrWrrr(r(r)startTagTableOther s z6getPhases..InTableBodyPhase.startTagTableOthercSs|jjd|Sr=rrr(r(r)rsz1getPhases..InTableBodyPhase.startTagOthercSsT|jj|dddr:||jj|jjd|j_n|jdd|didS)NrCrrmr unexpected-end-tag-in-table-body) rAror@rrrOrLrdrrr(r(r)rEs  z7getPhases..InTableBodyPhase.endTagTableRowGroupcSsn|jjddds0|jjddds0|jjdddrT||t|jjdj|S|jjs`J|j dSrCrDrr(r(r)r%s z/getPhases..InTableBodyPhase.endTagTablecSs|jdd|didS)NrGrCrrr(r(r)r 2s z0getPhases..InTableBodyPhase.endTagIgnorecSs|jjd|Sr=r6rr(r(r)r6sz/getPhases..InTableBodyPhase.endTagOtherrrrr)rr rrrrr!r)rrr rrrrrN)r<r=r>rrr@rrrrArBrFrrErr rr rrrrrr(r"r(r)InTableBodyPhases<   rIcseZdZeZddZddZddZddZd d Z d d Z d dZ ddZ ddZ ddZddZddZddZedjfde fde fgZe e_ede fdefd efd!efgZee_d"S)#zgetPhases..InRowPhasecSs@|jjdjdvr<|jdd|jjdji|jjqdS)Nr~)rrz'unexpected-implied-end-tag-in-table-rowrC)rArrCrOrrrEr(r(r)clearStackToTableRowContextOs z9getPhases..InRowPhase.clearStackToTableRowContextcSs|jjddd S)Nrrrmr,rEr(r(r)ignoreEndTagTrUsz,getPhases..InRowPhase.ignoreEndTagTrcSs|jjddSr=r.rEr(r(r)rYsz(getPhases..InRowPhase.processEOFcSs|jjd|Sr=r?rr(r(r)r\sz4getPhases..InRowPhase.processSpaceCharacterscSs|jjd|Sr=r/rr(r(r)r_sz/getPhases..InRowPhase.processCharacterscSs6||j||jjd|j_|jjtdS)Nr) rJrArrOrLrdrTrr rr(r(r)rBbs z/getPhases..InRowPhase.startTagTableCellcSs"|}|td|s|SdSNrrKendTagTrrr2r(r(r)rFhsz0getPhases..InRowPhase.startTagTableOthercSs|jjd|Sr=rrr(r(r)rosz+getPhases..InRowPhase.startTagOthercSsH|s.||jj|jjd|j_n|jjs:J|j dSr) rKrJrArrrOrLrdrWrrr(r(r)rNrs   z&getPhases..InRowPhase.endTagTrcSs"|}|td|s|SdSrLrMr2r(r(r)r|sz)getPhases..InRowPhase.endTagTablecSs4|jj|dddr&|td|S|jdS)NrCrrmr)rArorNrrOrrr(r(r)rEsz1getPhases..InRowPhase.endTagTableRowGroupcSs|jdd|didS)Nzunexpected-end-tag-in-table-rowrCrrr(r(r)r s z*getPhases..InRowPhase.endTagIgnorecSs|jjd|Sr=r6rr(r(r)rsz)getPhases..InRowPhase.endTagOtherrrH)rr rrrrrrrr!)rrr rrrrN)r<r=r>rrrJrKrrrrBrFrrNrrEr rr rrrrrr(r"r(r) InRowPhaseJs> rOcseZdZeZddZddZddZddZd d Z d d Z d dZ ddZ ddZ edjfdefgZe e_ede fde fde fgZe e_dS)zgetPhases..InCellPhasecSsB|jjdddr |tdn|jjdddr>|tddSNrrrmr)rAroendTagTableCellrrEr(r(r) closeCellsz(getPhases..InCellPhase.closeCellcSs|jjddSrr.rEr(r(r)rsz)getPhases..InCellPhase.processEOFcSs|jjd|Srr/rr(r(r)rsz0getPhases..InCellPhase.processCharacterscSsF|jjddds |jjdddr,||S|jjs8J|jdSrP)rArorRrOrWrrr(r(r)rFs z1getPhases..InCellPhase.startTagTableOthercSs|jjd|Srrrr(r(r)rsz,getPhases..InCellPhase.startTagOthercSs|jj|dddr|j|d|jjdj|dkrp|jdd|di|jj}|j|dkrPq|qPn |jj|j|jj d|j_ n|jdd|didS)NrCrrmr~zunexpected-cell-end-tagrr2) rArorrrCrOrrrrLrdr.r(r(r)rQs    z.getPhases..InCellPhase.endTagTableCellcSs|jdd|didSr1rrr(r(r)r sz+getPhases..InCellPhase.endTagIgnorecSs.|jj|dddr ||S|jdS)NrCrrm)rArorRrOrrr(r(r) endTagImplysz*getPhases..InCellPhase.endTagImplycSs|jjd|Srr6rr(r(r)rsz*getPhases..InCellPhase.endTagOtherrr7rH)rrr rrrN)r<r=r>rrrRrrrFrrQr rSrr rrrrrr(r"r(r) InCellPhases. rTc seZdZeZddZddZddZddZd d Z d d Z d dZ ddZ ddZ ddZddZddZedjfdefdefde fde fde fgZe e_ede fdefdefgZee_dS) z getPhases..InSelectPhasecSs0|jjdjdkr |jdn |jjs,JdS)Nr~rz eof-in-selectrrEr(r(r)rsz+getPhases..InSelectPhase.processEOFcSs$|ddkrdS|j|ddSr*rrr(r(r)rs z2getPhases..InSelectPhase.processCharacterscSs.|jjdjdkr|jj|j|dSrrArrCrrrr(r(r)startTagOptions z/getPhases..InSelectPhase.startTagOptioncSsL|jjdjdkr|jj|jjdjdkr<|jj|j|dS)Nr~rrrUrr(r(r)startTagOptgroup s   z1getPhases..InSelectPhase.startTagOptgroupcSs|jd|tddS)Nzunexpected-select-in-selectr)rOr endTagSelectrrr(r(r)r s z/getPhases..InSelectPhase.startTagSelectcSs>|jd|jjdddr.|td|S|jjs:JdS)Nzunexpected-input-in-selectrrm)rOrrArorXrrWrr(r(r)r s  z.getPhases..InSelectPhase.startTagInputcSs|jjd|Sr>rrr(r(r)r+ sz/getPhases..InSelectPhase.startTagScriptcSs|jdd|didS)Nzunexpected-start-tag-in-selectrCrrr(r(r)r s z.getPhases..InSelectPhase.startTagOthercSs6|jjdjdkr |jjn|jdddidS)Nr~runexpected-end-tag-in-selectrCrArrCrrOrrr(r(r) endTagOption s z-getPhases..InSelectPhase.endTagOptioncSsf|jjdjdkr0|jjdjdkr0|jj|jjdjdkrP|jjn|jdddidS)Nr~rrrYrCrZrr(r(r)endTagOptgroup% s z/getPhases..InSelectPhase.endTagOptgroupcSsZ|jjdddr@|jj}|jdkr4|jj}q|jn|jjsLJ|jdS)Nrrm) rArorrrCrOrfrWrr.r(r(r)rX2 s    z-getPhases..InSelectPhase.endTagSelectcSs|jdd|didS)NrYrCrrr(r(r)r= s z,getPhases..InSelectPhase.endTagOtherrrrr)rrr^r7N)r<r=r>rrrrrVrWrrr+rr[r]rXrr rrrrrr(r"r(r) InSelectPhases8  r^c@sneZdZeZddZddZddZddZd d Z d d Z e d efgZ ee _e d e fgZe e_dS)z'getPhases..InSelectInTablePhasecSs|jjddSNrr.rEr(r(r)rU sz2getPhases..InSelectInTablePhase.processEOFcSs|jjd|Sr_r/rr(r(r)rX sz9getPhases..InSelectInTablePhase.processCharacterscSs(|jdd|di|td|S)Nz5unexpected-table-element-start-tag-in-select-in-tablerCr)rOrrrrr(r(r)r[ sz5getPhases..InSelectInTablePhase.startTagTablecSs|jjd|Sr_rrr(r(r)r` sz5getPhases..InSelectInTablePhase.startTagOthercSs@|jdd|di|jj|dddr<|td|SdS)Nz3unexpected-table-element-end-tag-in-select-in-tablerCrrmr)rOrrArorrrr(r(r)rc sz3getPhases..InSelectInTablePhase.endTagTablecSs|jjd|Sr_r6rr(r(r)ri sz3getPhases..InSelectInTablePhase.endTagOther)rrrrrrrrN)r<r=r>rrrrrrrrr rrrrr(r(r(r)InSelectInTablePhaseR s&r`csBeZdZeZegdZddZfddZddZ dd Z d S) z(getPhases..InForeignContentPhase),rrrrr rrrWr*rrXrrh1h2h3h4h5h6rrrrrYr]rr4rrr'r\rrrspanrrsubsuprrrrvarc%Ssnddddddddd d d d d ddddddddddddddddddd d!d"d#d$d%$}|d&|vrj||d&|d&<dS)'NaltGlyph altGlyphDef altGlyphItem animateColor animateMotionanimateTransformclipPathfeBlend feColorMatrixfeComponentTransfer feCompositefeConvolveMatrixfeDiffuseLightingfeDisplacementMapfeDistantLightfeFloodfeFuncAfeFuncBfeFuncGfeFuncRfeGaussianBlurfeImagefeMerge feMergeNode feMorphologyfeOffset fePointLightfeSpecularLighting feSpotLightfeTile feTurbulence foreignObjectglyphReflinearGradientradialGradienttextPath)$altglyph altglyphdef altglyphitem animatecolor animatemotionanimatetransformclippathfeblend fecolormatrixfecomponenttransfer fecompositefeconvolvematrixfediffuselightingfedisplacementmapfedistantlightfefloodfefuncafefuncbfefuncgfefuncrfegaussianblurfeimagefemerge femergenode femorphologyfeoffset fepointlightfespecularlighting fespotlightfetile feturbulence foreignobjectglyphreflineargradientradialgradienttextpathrCr()rFr replacementsr(r(r)adjustSVGTagNames sN% z:getPhases..InForeignContentPhase.adjustSVGTagNamescsL|ddkrd|d<n&|jjr rHzMgetPhases..InForeignContentPhase.processCharacters..F)rOrirgrrr"r(r)r s  z:getPhases..InForeignContentPhase.processCharacterscSs(|jjd}|d|jvs>|ddkrt|dhd@r|jdd|di|jjdj|jjkr|j |jjds|j |jjds|jj qT|S|jt dkr|j |n$|jt dkr|||j||j||j|d <|j||d r$|jj d |d <dS) Nr~rCrr>sizefacecolorz*unexpected-html-element-in-foreign-contentrnrrprTr)rArbreakoutElementssetkeysrOrrprrurvrrrrrrr)rFrrr(r(r)r s:           z8getPhases..InForeignContentPhase.processStartTagcSst|jjd}|jjd}|jt|dkrF|jdd|di|jt|dkr|jj|jj dkr|jj |jjj |j_|jj |kr|jjsJqd}q|d8}|jj|}|j |jjkrqFqF|jj|}qqF|S)Nrr~rCr2r)rrArrCrrrrOrrdrLr(rrrprr)rFr nodeIndexrrr(r(r)r s&   z6getPhases..InForeignContentPhase.processEndTagN) r<r=r>rrrrrrrrr(r"r(r)InForeignContentPhasex s  ) rc@sveZdZeZddZddZddZddZd d Z d d Z d dZ e defgZe e_e de fgZe e_dS)z!getPhases..AfterBodyPhasecSsdSr-r(rEr(r(r)r sz,getPhases..AfterBodyPhase.processEOFcSs|j||jjddS)Nrrrr(r(r)r sz0getPhases..AfterBodyPhase.processCommentcSs |jd|jjd|j_|S)Nzunexpected-char-after-bodyrrOrrLrdrr(r(r)r s z3getPhases..AfterBodyPhase.processCharacterscSs|jjd|Srrrr(r(r)r sz.getPhases..AfterBodyPhase.startTagHtmlcSs*|jdd|di|jjd|j_|S)Nzunexpected-start-tag-after-bodyrCrrrr(r(r)r s  z/getPhases..AfterBodyPhase.startTagOthercSs*|jjr|jdn|jjd|j_dS)Nz'unexpected-end-tag-after-body-innerhtmlafterAfterBody)rOrWrrLrd)rFrCr(r(r)r sz,getPhases..AfterBodyPhase.endTagHtmlcSs*|jdd|di|jjd|j_|S)Nzunexpected-end-tag-after-bodyrCrrrr(r(r)r s  z-getPhases..AfterBodyPhase.endTagOtherrN)r<r=r>rrrrrrrrrr rrrrr(r(r(r)AfterBodyPhase srcseZdZeZddZddZddZddZd d Z d d Z d dZ ddZ e djfdefdefde fgZe e_e de fgZe e_dS)z"getPhases..InFramesetPhasecSs0|jjdjdkr |jdn |jjs,JdS)Nr~rzeof-in-framesetrrEr(r(r)r sz-getPhases..InFramesetPhase.processEOFcSs|jddS)Nzunexpected-char-in-framesetrrr(r(r)r" sz4getPhases..InFramesetPhase.processCharacterscSs|j|dSr-)rArrr(r(r)rH% sz3getPhases..InFramesetPhase.startTagFramesetcSs|j||jjdSr-rrr(r(r) startTagFrame( s z0getPhases..InFramesetPhase.startTagFramecSs|jjd|Srrrr(r(r)startTagNoframes, sz3getPhases..InFramesetPhase.startTagNoframescSs|jdd|didS)Nz unexpected-start-tag-in-framesetrCrrr(r(r)r/ s z0getPhases..InFramesetPhase.startTagOthercSsZ|jjdjdkr |jdn |jj|jjsV|jjdjdkrV|jjd|j_dS)Nr~rz)unexpected-frameset-in-frameset-innerhtmlr afterFrameset) rArrCrOrrrWrLrdrr(r(r)endTagFrameset3 s z1getPhases..InFramesetPhase.endTagFramesetcSs|jdd|didS)Nzunexpected-end-tag-in-framesetrCrrr(r(r)r? s z.getPhases..InFramesetPhase.endTagOtherrrr r4N)r<r=r>rrrrrHrrrrrr rrrrrr(r"r(r)InFramesetPhase s( rcsveZdZeZddZddZddZddZd d Z d d Z e d j fdefgZee_e d e fgZe e_dS)z%getPhases..AfterFramesetPhasecSsdSr-r(rEr(r(r)rT sz0getPhases..AfterFramesetPhase.processEOFcSs|jddS)Nzunexpected-char-after-framesetrrr(r(r)rX sz7getPhases..AfterFramesetPhase.processCharacterscSs|jjd|Sr>rrr(r(r)r[ sz6getPhases..AfterFramesetPhase.startTagNoframescSs|jdd|didS)Nz#unexpected-start-tag-after-framesetrCrrr(r(r)r^ s z3getPhases..AfterFramesetPhase.startTagOthercSs|jjd|j_dS)NafterAfterFrameset)rOrLrdrr(r(r)rb sz0getPhases..AfterFramesetPhase.endTagHtmlcSs|jdd|didS)Nz!unexpected-end-tag-after-framesetrCrrr(r(r)re s z1getPhases..AfterFramesetPhase.endTagOtherrr4N)r<r=r>rrrrrrrrr rrrrrr(r"r(r)AfterFramesetPhaseP s rc@s`eZdZeZddZddZddZddZd d Z d d Z d dZ e de fgZe e_dS)z&getPhases..AfterAfterBodyPhasecSsdSr-r(rEr(r(r)rw sz1getPhases..AfterAfterBodyPhase.processEOFcSs|j||jjdSr-rrr(r(r)rz sz5getPhases..AfterAfterBodyPhase.processCommentcSs|jjd|Srr?rr(r(r)r} sz=getPhases..AfterAfterBodyPhase.processSpaceCharacterscSs |jd|jjd|j_|S)Nexpected-eof-but-got-charrrrr(r(r)r s z8getPhases..AfterAfterBodyPhase.processCharacterscSs|jjd|Srrrr(r(r)r sz3getPhases..AfterAfterBodyPhase.startTagHtmlcSs*|jdd|di|jjd|j_|S)Nexpected-eof-but-got-start-tagrCrrrr(r(r)r s  z4getPhases..AfterAfterBodyPhase.startTagOthercSs*|jdd|di|jjd|j_|S)Nexpected-eof-but-got-end-tagrCrrrr(r(r)r s  z4getPhases..AfterAfterBodyPhase.processEndTagrN)r<r=r>rrrrrrrrrr rrrr(r(r(r)AfterAfterBodyPhaset src@sneZdZeZddZddZddZddZd d Z d d Z d dZ ddZ e de fde fgZe e_dS)z*getPhases..AfterAfterFramesetPhasecSsdSr-r(rEr(r(r)r sz5getPhases..AfterAfterFramesetPhase.processEOFcSs|j||jjdSr-rrr(r(r)r sz9getPhases..AfterAfterFramesetPhase.processCommentcSs|jjd|Srr?rr(r(r)r szAgetPhases..AfterAfterFramesetPhase.processSpaceCharacterscSs|jddS)Nrrrr(r(r)r sz.AfterAfterFramesetPhase.processCharacterscSs|jjd|Srrrr(r(r)r sz7getPhases..AfterAfterFramesetPhase.startTagHtmlcSs|jjd|Sr>rrr(r(r)startTagNoFrames sz;getPhases..AfterAfterFramesetPhase.startTagNoFramescSs|jdd|didS)NrrCrrr(r(r)r s z8getPhases..AfterAfterFramesetPhase.startTagOthercSs|jdd|didS)NrrCrrr(r(r)r s z8getPhases..AfterAfterFramesetPhase.processEndTagrr4N)r<r=r>rrrrrrrrrrr rrrr(r(r(r)AfterAfterFramesetPhase sr)r[rZrrr(r,rrrrrrrrrrrrrrrrr)r)rMr]rrr rr=rFrKrr#r8r?rIrOrTr^r`rrrrrrr()rOr%rr)rKsrH%-f?BY!/F@bYLc&r.8$%&rKcsFt|dt@}|rBt|dfdd|dD|d<dS)Nrc3s"|]\}}|||fVqdSr-)r)rBkvrr(r)r sz$adjust_attributes..)rr2r.)rrneeds_adjustmentr(rr)r s  rrzFcCs|dur i}t||||dS)N)r2rCrr)r)rCr2rqrr(r(r)r s  rc@seZdZdZdS)r}zError in parsed documentN)r<r=r>rr(r(r(r)r} sr})rT)r*rT)rzNF), __future__rrrZpip._vendor.sixrrr0rrr r Ztreebuilders.baser r constantsr rrrrrrrrrrrrrrrrr"r,r@rr!memoizerKrr Exceptionr}r(r(r(r)sN     H   8 _ PK!TT%__pycache__/_tokenizer.cpython-39.pycnu[a Re,@sddlmZmZmZddlmZddlmZm Z ddl m Z ddl m Z ddl mZddl mZmZdd l mZmZmZdd l mZmZdd l mZdd lmZdd lmZeeZe dkreZne ZGdddeZdS))absolute_importdivisionunicode_literals)unichr)deque OrderedDict) version_info)spaceCharacters)entities) asciiLettersasciiUpper2Lower)digits hexDigitsEOF) tokenTypes tagTokenTypes)replacementCharacters)HTMLInputStream)Trie)csdeZdZdZdfdd ZddZddZdd d Zd d ZddZ ddZ ddZ ddZ ddZ ddZddZddZddZd d!Zd"d#Zd$d%Zd&d'Zd(d)Zd*d+Zd,d-Zd.d/Zd0d1Zd2d3Zd4d5Zd6d7Zd8d9Zd:d;Zdd?Z!d@dAZ"dBdCZ#dDdEZ$dFdGZ%dHdIZ&dJdKZ'dLdMZ(dNdOZ)dPdQZ*dRdSZ+dTdUZ,dVdWZ-dXdYZ.dZd[Z/d\d]Z0d^d_Z1d`daZ2dbdcZ3dddeZ4dfdgZ5dhdiZ6djdkZ7dldmZ8dndoZ9dpdqZ:drdsZ;dtduZdzd{Z?d|d}Z@d~dZAddZBddZCddZDddZEddZFddZGddZHddZIddZJddZKddZLZMS) HTMLTokenizera  This class takes care of tokenizing HTML. * self.currentToken Holds the token that is currently being processed. * self.state Holds a reference to the method to be invoked... XXX * self.stream Points to HTMLInputStream object. Nc sJt|fi||_||_d|_g|_|j|_d|_d|_t t | dS)NF) rstreamparser escapeFlag lastFourChars dataStatestateescape currentTokensuperr__init__)selfrrkwargs __class__/builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/_tokenizer.pyr"(szHTMLTokenizer.__init__ccsPtg|_|rL|jjr6td|jjddVq|jr |jVq6q dS)z This is where the magic happens. We do our usually processing through the states and when we have a token to return we yield the token which pauses processing until the next token is requested. ParseErrorrtypedataN)r tokenQueuerrerrorsrpoppopleftr#r'r'r(__iter__7s  zHTMLTokenizer.__iter__c Cst}d}|rt}d}g}|j}||vrH|turH|||j}q"td||}|tvrt|}|j t ddd|idnd|krd ksn|d krd }|j t ddd|idnd |krd ks>nd|krdks>nd|krdks>nd|kr*dks>n|t gdvrZ|j t ddd|idz t |}Wn<t y|d}t d|d?Bt d|d@B}Yn0|dkr|j t ddd|j||S)zThis function returns either U+FFFD or the character based on the decimal or hexadecimal representation. It also discards ";" if present. If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked. r)z$illegal-codepoint-for-numeric-entity charAsIntr+r,datavarsii�r ii)# iiiiiiiiiiiiiiiiiii i i i i i i i i i iiiiir9iii;z numeric-entity-without-semicolonr*)rrrcharrappendintjoinrr-r frozensetchr ValueErrorunget) r#isHexallowedradix charStackcr6rBvr'r'r(consumeNumberEntityGsn              &   z!HTMLTokenizer.consumeNumberEntityFc Csd}|jg}|dtvsB|dtddfvsB|durV||dkrV|j|dn|ddkr d}||j|ddvrd}||j|r|dtvs|s|dtvr|j|d||}n4|j t d d d |j| dd |}nf|dturDt d |s0qD||jq z$t d |dd}t|}Wntyd}Yn0|dur>|dd kr|j t d dd |dd kr|r||tvs||tvs||dkr|j| dd |}n.t|}|j| |d ||d7}n4|j t d dd |j| dd |}|r|jddd|7<n*|tvrd}nd}|j t ||d dS)N&r<#F)xXTr)zexpected-numeric-entityr*r5rAznamed-entity-without-semicolon=zexpected-named-entityr,r SpaceCharacters Characters)rrBr rrIrCrrrPr-rr/rE entitiesTriehas_keys_with_prefixlongest_prefixlenKeyErrorr r r ) r# allowedChar fromAttributeoutputrMhex entityName entityLength tokenTyper'r'r( consumeEntitys~               zHTMLTokenizer.consumeEntitycCs|j|dddS)zIThis method replaces the need for "entityInAttributeValueState". T)r_r`N)rf)r#r_r'r'r(processEntityInAttributesz&HTMLTokenizer.processEntityInAttributecCs|j}|dtvr|dt|d<|dtdkrp|d}t|}t|t|krh||ddd||d<|dtdkr|dr|j tdd d |d r|j tdd d |j ||j |_ dS) zThis method is a generic handler for emitting the tags. It also sets the state to "data" because that's what's needed after a token has been emitted. r+nameStartTagr,NrTEndTagr)zattributes-in-end-tagr* selfClosingzself-closing-flag-on-end-tag) r r translater r attributeMapr]updater-rCrr)r#tokenrawr,r'r'r(emitCurrentTokens(    zHTMLTokenizer.emitCurrentTokencCs|j}|dkr|j|_n|dkr.|j|_n|dkrd|jtddd|jtdddn`|turpdS|t vr|jtd ||j t d dn&|j d }|jtd||dd S) NrQrRr)invalid-codepointr*rYFrXTrQrRrr) rrBentityDataStater tagOpenStater-rCrrr charsUntilr#r,charsr'r'r(rs.          zHTMLTokenizer.dataStatecCs||j|_dSNT)rfrrr1r'r'r(ruszHTMLTokenizer.entityDataStatecCs|j}|dkr|j|_n|dkr.|j|_n|tkr:dS|dkrp|jtddd|jtdd dnT|t vr|jtd ||j t d dn&|j d }|jtd||dd S) NrQrRFrrr)rsr*rYr:rXTrt) rrBcharacterReferenceInRcdatarrcdataLessThanSignStaterr-rCrr rwrxr'r'r( rcdataState"s.          zHTMLTokenizer.rcdataStatecCs||j|_dSrz)rfr}rr1r'r'r(r{?sz(HTMLTokenizer.characterReferenceInRcdatacCs|j}|dkr|j|_nh|dkrR|jtddd|jtdddn2|tkr^dS|jd }|jtd||dd S NrRrrr)rsr*rYr:F)rRrrT) rrBrawtextLessThanSignStaterr-rCrrrwrxr'r'r( rawtextStateDs"       zHTMLTokenizer.rawtextStatecCs|j}|dkr|j|_nh|dkrR|jtddd|jtdddn2|tkr^dS|jd }|jtd||dd Sr~) rrBscriptDataLessThanSignStaterr-rCrrrwrxr'r'r(scriptDataStateVs"       zHTMLTokenizer.scriptDataStatecCsr|j}|tkrdS|dkrL|jtddd|jtdddn"|jtd||jdddS) NFrrr)rsr*rYr:T)rrBrr-rCrrwr#r,r'r'r(plaintextStatehs     zHTMLTokenizer.plaintextStatecCs |j}|dkr|j|_n|dkr.|j|_n|tvrVtd|gddd|_|j|_n|dkr|j tddd |j td d d |j |_nt|d kr|j tdd d |j ||j |_n@|j tddd |j td dd |j ||j |_dS)N!/riF)r+rhr,rkselfClosingAcknowledged>r)z'expected-tag-name-but-got-right-bracketr*rYz<>?z'expected-tag-name-but-got-question-markzexpected-tag-namerRT)rrBmarkupDeclarationOpenStatercloseTagOpenStater rr tagNameStater-rCrrIbogusCommentStaterr'r'r(rvws>           zHTMLTokenizer.tagOpenStatecCs|j}|tvr0td|gdd|_|j|_n|dkrX|jtddd|j |_nn|t ur|jtddd|jtd d d|j |_n0|jtdd d |id |j ||j |_dS)NrjFr+rhr,rkrr)z*expected-closing-tag-but-got-right-bracketr*z expected-closing-tag-but-got-eofrY|tkr|jtdd d|j |_n|jtd|dd S NrrYr*rRrrr)rsr:eof-in-script-in-scriptT) rrBr-rCr scriptDataDoubleEscapedDashStater(scriptDataDoubleEscapedLessThanSignStaterrrr'r'r(rs*        z*HTMLTokenizer.scriptDataDoubleEscapedStatecCs|j}|dkr2|jtddd|j|_n|dkrZ|jtddd|j|_n|dkr|jtddd|jtddd|j|_nF|t kr|jtdd d|j |_n|jtd|d|j|_d Sr) rrBr-rCr$scriptDataDoubleEscapedDashDashStaterrrrrrr'r'r(rs.        z.HTMLTokenizer.scriptDataDoubleEscapedDashStatecCs|j}|dkr*|jtdddn|dkrR|jtddd|j|_n|dkrz|jtddd|j|_n|dkr|jtddd|jtdd d|j|_nF|t kr|jtdd d|j |_n|jtd|d|j|_d S) NrrYr*rRrrrr)rsr:rT) rrBr-rCrrrrrrrrr'r'r(r%s2        z2HTMLTokenizer.scriptDataDoubleEscapedDashDashStatecCsP|j}|dkr8|jtdddd|_|j|_n|j||j |_dS)NrrYr*r5T) rrBr-rCrrscriptDataDoubleEscapeEndStaterrIrrr'r'r(r>s   z6HTMLTokenizer.scriptDataDoubleEscapedLessThanSignStatecCs|j}|ttdBvrR|jtd|d|jdkrH|j |_ q|j |_ nB|t vr|jtd|d|j|7_n|j ||j |_ dSr)rrBr rFr-rCrrrrrrr rIrr'r'r(rIs    z,HTMLTokenizer.scriptDataDoubleEscapeEndStatecCs0|j}|tvr$|jtdn|tvrJ|jd|dg|j|_n|dkr\| n|dkrn|j |_n|dvr|j t ddd |jd|dg|j|_n|d kr|j t dd d |jdd dg|j|_nF|t ur|j t dd d |j|_n|jd|dg|j|_dS)NTr,r5rr)'"rWrRr)#invalid-character-in-attribute-namer*rrrsr:z#expected-attribute-name-but-got-eof)rrBr rwr r rCattributeNameStaterrqrr-rrrrr'r'r(rYs<           z&HTMLTokenizer.beforeAttributeNameStatecCs|j}d}d}|dkr&|j|_n.|tvr\|jddd||jtd7<d}n|dkrjd}n|tvr||j|_n|dkr|j |_n|d kr|j t d d d |jdddd 7<d}n|dvr |j t d dd |jddd|7<d}nH|t ur6|j t d dd |j|_n|jddd|7<d}|r|jdddt|jddd<|jdddD]>\}}|jddd|kr|j t d dd qҐq|r|dS)NTFrWr,rTrrrrrr)rsr*r:rrrRrzeof-in-attribute-namezduplicate-attribute)rrBbeforeAttributeValueStaterr r rwr afterAttributeNameStaterr-rCrrrrlr rq)r#r,leavingThisState emitTokenrh_r'r'r(rws^             z HTMLTokenizer.attributeNameStatecCsD|j}|tvr$|jtdn|dkr8|j|_n|dkrJ|n|tvrp|jd |dg|j |_n|dkr|j |_n|dkr|j t dd d |jd d dg|j |_n|d vr|j t dd d |jd |dg|j |_nF|tur$|j t ddd |j|_n|jd |dg|j |_dS)NTrWrr,r5rrrr)rsr*r:rz&invalid-character-after-attribute-namezexpected-end-of-tag-but-got-eof)rrBr rwrrrqr r rCrrr-rrrrr'r'r(rs@            z%HTMLTokenizer.afterAttributeNameStatecCsh|j}|tvr$|jtdn@|dkr8|j|_n,|dkrX|j|_|j|n |dkrj|j|_n|dkr|j t ddd| n|d kr|j t dd d|j d d d d7<|j|_n|dvr|j t ddd|j d d d |7<|j|_nL|turB|j t ddd|j|_n"|j d d d |7<|j|_dS)NTrrQrrr)z.expected-attribute-value-but-got-right-bracketr*rrrsr,rTr r:)rWrR`z"equals-in-unquoted-attribute-valuez$expected-attribute-value-but-got-eof)rrBr rwattributeValueDoubleQuotedStaterattributeValueUnQuotedStaterIattributeValueSingleQuotedStater-rCrrqr rrrr'r'r(rsF             z'HTMLTokenizer.beforeAttributeValueStatecCs|j}|dkr|j|_n|dkr0|dn|dkrj|jtddd|jddd d 7<nN|t ur|jtdd d|j |_n&|jddd ||j d 7<d S)NrrQrrr)rsr*r,rTr r:z#eof-in-attribute-value-double-quote)rrQrrT rrBafterAttributeValueStaterrgr-rCrr rrrwrr'r'r(rs&       z-HTMLTokenizer.attributeValueDoubleQuotedStatecCs|j}|dkr|j|_n|dkr0|dn|dkrj|jtddd|jddd d 7<nN|t ur|jtdd d|j |_n&|jddd ||j d 7<d S)NrrQrrr)rsr*r,rTr r:z#eof-in-attribute-value-single-quote)rrQrrTrrr'r'r(rs&       z-HTMLTokenizer.attributeValueSingleQuotedStatecCs|j}|tvr|j|_n|dkr0|dn|dkrB|n|dvr||jt ddd|j ddd |7<n|d kr|jt dd d|j ddd d 7<nV|t ur|jt dd d|j |_n.|j ddd ||j tdtB7<dS)NrQr)rrrWrRrr)z0unexpected-character-in-unquoted-attribute-valuer*r,rTr rrrsr:z eof-in-attribute-value-no-quotes)rQrrrrWrRrrrT)rrBr rrrgrqr-rCrr rrrwrFrr'r'r(rs4         z)HTMLTokenizer.attributeValueUnQuotedStatecCs|j}|tvr|j|_n|dkr.|np|dkr@|j|_n^|turt|j t ddd|j ||j |_n*|j t ddd|j ||j|_dS)Nrrr)z$unexpected-EOF-after-attribute-valuer*z*unexpected-character-after-attribute-valueT) rrBr rrrqrrr-rCrrIrrr'r'r(r.s&         z&HTMLTokenizer.afterAttributeValueStatecCs|j}|dkr&d|jd<|n^|turZ|jtddd|j||j |_ n*|jtddd|j||j |_ dS)NrTrkr)z#unexpected-EOF-after-solidus-in-tagr*z)unexpected-character-after-solidus-in-tag) rrBr rqrr-rCrrIrrrrr'r'r(rBs         z&HTMLTokenizer.selfClosingStartTagStatecCsD|jd}|dd}|jtd|d|j|j|_dS)Nrrrr:Commentr*T) rrwreplacer-rCrrBrrrr'r'r(rTs    zHTMLTokenizer.bogusCommentStatecCs|jg}|ddkrR||j|ddkrPtddd|_|j|_dSn|ddvrd}dD](}||j|d|vrfd }qqf|rtd ddddd |_|j|_dSn|dd krD|jdurD|jj j rD|jj j dj |jj j krDd}d D].}||j|d|krd }q2q|rD|j |_dS|jtddd|rt|j|qZ|j|_dS)NrTrrr5r*T)dD))oOrNCtTyYpPeEFDoctype)r+rhpublicIdsystemIdcorrect[)rrArrrr)zexpected-dashes-or-doctype)rrBrCrr commentStartStater doctypeStatertree openElements namespacedefaultNamespacecdataSectionStater-rIr/r)r#rMmatchedexpectedr'r'r(rcsZ       z(HTMLTokenizer.markupDeclarationOpenStatecCs|j}|dkr|j|_n|dkrN|jtddd|jdd7<n|dkr|jtdd d|j|j|j|_nP|t ur|jtdd d|j|j|j|_n|jd|7<|j |_d S) Nrrrr)rsr*r,r:rincorrect-commenteof-in-commentT) rrBcommentStartDashStaterr-rCrr rr commentStaterr'r'r(rs.       zHTMLTokenizer.commentStartStatecCs|j}|dkr|j|_n|dkrN|jtddd|jdd7<n|dkr|jtdd d|j|j|j|_nT|t ur|jtdd d|j|j|j|_n|jdd|7<|j |_d S) Nrrrr)rsr*r,-�rrrT) rrBcommentEndStaterr-rCrr rrrrr'r'r(rs.       z#HTMLTokenizer.commentStartDashStatecCs|j}|dkr|j|_n|dkrN|jtddd|jdd7<nT|tur|jtddd|j|j|j |_n|jd||j d 7<d S) Nrrrr)rsr*r,r:r)rrrT) rrBcommentEndDashStaterr-rCrr rrrwrr'r'r(rs$       zHTMLTokenizer.commentStatecCs|j}|dkr|j|_n|dkrV|jtddd|jdd7<|j|_nT|t ur|jtddd|j|j|j |_n|jdd|7<|j|_d S) Nrrrr)rsr*r,rzeof-in-comment-end-dashT) rrBrrr-rCrr rrrrr'r'r(rs$      z!HTMLTokenizer.commentEndDashStatecCs,|j}|dkr*|j|j|j|_n|dkrd|jtddd|jdd7<|j|_n|dkr|jtdd d|j |_n|d kr|jtdd d|jd|7<nj|t ur|jtdd d|j|j|j|_n4|jtdd d|jdd|7<|j|_dS)Nrrrr)rsr*r,u--�rz,unexpected-bang-after-double-dash-in-commentrz,unexpected-dash-after-double-dash-in-commentzeof-in-comment-double-dashzunexpected-char-in-commentz--T) rrBr-rCr rrrrcommentEndBangStaterrr'r'r(rs@          zHTMLTokenizer.commentEndStatecCs|j}|dkr*|j|j|j|_n|dkrN|jdd7<|j|_n|dkr|jtddd|jdd 7<|j |_nT|t ur|jtdd d|j|j|j|_n|jdd|7<|j |_d S) Nrrr,z--!rrr)rsr*u--!�zeof-in-comment-end-bang-stateT) rrBr-rCr rrrrrrrr'r'r(rs,       z!HTMLTokenizer.commentEndBangStatecCs|j}|tvr|j|_nj|tur\|jtdddd|j d<|j|j |j |_n*|jtddd|j ||j|_dS)Nr)!expected-doctype-name-but-got-eofr*Frzneed-space-after-doctypeT) rrBr beforeDoctypeNameStaterrr-rCrr rrIrr'r'r(rs        zHTMLTokenizer.doctypeStatecCs|j}|tvrn|dkrT|jtdddd|jd<|j|j|j|_n|dkr|jtdddd |jd <|j |_nR|t ur|jtdd dd|jd<|j|j|j|_n||jd <|j |_d S) Nrr)z+expected-doctype-name-but-got-right-bracketr*Frrrrsr:rhrT) rrBr r-rCrr rrdoctypeNameStaterrr'r'r(r*s4           z$HTMLTokenizer.beforeDoctypeNameStatecCs|j}|tvr2|jdt|jd<|j|_n|dkrh|jdt|jd<|j |j|j |_n|dkr|j t ddd|jdd7<|j |_nh|t ur|j t dddd |jd <|jdt|jd<|j |j|j |_n|jd|7<d S) Nrhrrrr)rsr*r:zeof-in-doctype-nameFrT)rrBr r rlr afterDoctypeNameStaterr-rCrrrrrr'r'r(rDs0        zHTMLTokenizer.doctypeNameStatecCsH|j}|tvrn.|dkr8|j|j|j|_n |turd|jd<|j ||jt ddd|j|j|j|_n|dvrd}d D]}|j}||vrd}qq|r|j |_dSnD|d vr d}d D]}|j}||vrd}qq|r |j |_dS|j ||jt dd d |idd|jd<|j |_dS)NrFrr)eof-in-doctyper*rT))uU)bB)lL)iIrsS)rrrr)mMz*expected-space-or-right-bracket-in-doctyper,r7)rrBr r-rCr rrrrIrafterDoctypePublicKeywordStateafterDoctypeSystemKeywordStatebogusDoctypeState)r#r,rrr'r'r(r]sT            z#HTMLTokenizer.afterDoctypeNameStatecCs|j}|tvr|j|_n|dvrP|jtddd|j||j|_nT|t ur|jtdddd|j d<|j|j |j |_n|j||j|_dS N)rrr)unexpected-char-in-doctyper*rFrT) rrBr "beforeDoctypePublicIdentifierStaterr-rCrrIrr rrr'r'r(rs&         z,HTMLTokenizer.afterDoctypePublicKeywordStatecCs|j}|tvrn|dkr0d|jd<|j|_n|dkrLd|jd<|j|_n|dkr|jt dddd |jd <|j|j|j |_nh|t ur|jt dd dd |jd <|j|j|j |_n(|jt dd dd |jd <|j |_d S)Nrr5rrrr)unexpected-end-of-doctyper*FrrrT) rrBr r (doctypePublicIdentifierDoubleQuotedStater(doctypePublicIdentifierSingleQuotedStater-rCrrrrrr'r'r(rs:             z0HTMLTokenizer.beforeDoctypePublicIdentifierStatecCs|j}|dkr|j|_n|dkrN|jtddd|jdd7<n|dkr|jtdd dd |jd <|j|j|j|_nR|t ur|jtdd dd |jd <|j|j|j|_n|jd|7<d S)Nrrrr)rsr*rr:rrFrrT rrB!afterDoctypePublicIdentifierStaterr-rCrr rrrr'r'r(rs0         z6HTMLTokenizer.doctypePublicIdentifierDoubleQuotedStatecCs|j}|dkr|j|_n|dkrN|jtddd|jdd7<n|dkr|jtdd dd |jd <|j|j|j|_nR|t ur|jtdd dd |jd <|j|j|j|_n|jd|7<d S)Nrrrr)rsr*rr:rrFrrTr rr'r'r(rs0         z6HTMLTokenizer.doctypePublicIdentifierSingleQuotedStatecCs |j}|tvr|j|_n|dkr<|j|j|j|_n|dkrn|jt dddd|jd<|j |_n|dkr|jt dddd|jd<|j |_nh|t ur|jt dd dd |jd <|j|j|j|_n(|jt dddd |jd <|j |_d S) Nrrr)rr*r5rrrFrT)rrBr -betweenDoctypePublicAndSystemIdentifiersStaterr-rCr rr(doctypeSystemIdentifierDoubleQuotedState(doctypeSystemIdentifierSingleQuotedStaterrrr'r'r(r s>              z/HTMLTokenizer.afterDoctypePublicIdentifierStatecCs|j}|tvrn|dkr4|j|j|j|_n|dkrPd|jd<|j|_n|dkrld|jd<|j |_nh|t kr|jt dddd |jd <|j|j|j|_n(|jt dd dd |jd <|j |_d S) Nrrr5rrr)rr*FrrT) rrBr r-rCr rrr r rrrrr'r'r(r s2           z;HTMLTokenizer.betweenDoctypePublicAndSystemIdentifiersStatecCs|j}|tvr|j|_n|dvrP|jtddd|j||j|_nT|t ur|jtdddd|j d<|j|j |j |_n|j||j|_dSr) rrBr "beforeDoctypeSystemIdentifierStaterr-rCrrIrr rrr'r'r(r)s&         z,HTMLTokenizer.afterDoctypeSystemKeywordStatecCs|j}|tvrn|dkr0d|jd<|j|_n|dkrLd|jd<|j|_n|dkr|jt dddd |jd <|j|j|j |_nh|t ur|jt dd dd |jd <|j|j|j |_n(|jt dddd |jd <|j |_d S) Nrr5rrrr)rr*FrrT) rrBr r r rr r-rCrrrrrr'r'r(r=s:             z0HTMLTokenizer.beforeDoctypeSystemIdentifierStatecCs|j}|dkr|j|_n|dkrN|jtddd|jdd7<n|dkr|jtdd dd |jd <|j|j|j|_nR|t ur|jtdd dd |jd <|j|j|j|_n|jd|7<d S)Nrrrr)rsr*rr:rrFrrT rrB!afterDoctypeSystemIdentifierStaterr-rCrr rrrr'r'r(r Zs0         z6HTMLTokenizer.doctypeSystemIdentifierDoubleQuotedStatecCs|j}|dkr|j|_n|dkrN|jtddd|jdd7<n|dkr|jtdd dd |jd <|j|j|j|_nR|t ur|jtdd dd |jd <|j|j|j|_n|jd|7<d S)Nrrrr)rsr*rr:rrFrrTrrr'r'r(r rs0         z6HTMLTokenizer.doctypeSystemIdentifierSingleQuotedStatecCs|j}|tvrn~|dkr4|j|j|j|_n^|turt|jt dddd|jd<|j|j|j|_n|jt ddd|j |_dS) Nrr)rr*FrrT) rrBr r-rCr rrrrrrr'r'r(rs$      z/HTMLTokenizer.afterDoctypeSystemIdentifierStatecCsZ|j}|dkr*|j|j|j|_n,|turV|j||j|j|j|_ndS)NrT) rrBr-rCr rrrrIrr'r'r(rs    zHTMLTokenizer.bogusDoctypeStatecCsg}||jd||jd|j}|tkr>qq|dksJJ|ddddkrv|ddd|d<qq||qd|}|d}|dkrt|D]}|jt d d d q| dd }|r|jt d |d |j |_ dS)N]rrTz]]r5rrrr)rsr*r:rYT) rCrrwrBrrEcountranger-rrrr)r#r,rB nullCountrr'r'r(rs2          zHTMLTokenizer.cdataSectionState)N)NF)N__name__ __module__ __qualname____doc__r"r2rPrfrgrqrrur}r{rrrrvrrr|rrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr r rrr r rrr __classcell__r'r'r%r(rs H P#         6 "-3rN) __future__rrrZpip._vendor.sixrrG collectionsrrsysr constantsr r r r rrrrrr _inputstreamr_trierrZdictrmobjectrr'r'r'r(s        PK!s55$__pycache__/_ihatexml.cpython-39.pycnu[a ReXA @sddlmZmZmZddlZddlZddlmZdZdZ dZ dZ d Z d ee gZd ee d d d e e gZd ed gZedZedZddZddZeddZddZddZddZddZedZedZed ZGd!d"d"eZ dS)#)absolute_importdivisionunicode_literalsN)DataLossWarninga^ [#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] | [#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 | [#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] | [#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] | [#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] | [#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] | [#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 | [#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] | [#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] | [#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D | [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] | [#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] | [#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] | [#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] | [#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] | [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 | [#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] | [#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | [#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] | [#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | [#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] | [#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | [#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A | #x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 | #x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] | #x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] | [#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] | [#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C | #x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 | [#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] | [#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] | [#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 | [#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B | #x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE | [#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | [#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 | [#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] | [#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]z*[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]a [#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] | [#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] | [#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] | #x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] | [#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 | #x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] | [#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC | [#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] | [#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | [#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | [#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] | #x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 | [#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] | #x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] | [#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] | #x3099 | #x309Aa  [#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] | [#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | [#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]z} #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 | #[#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE] | .-_z#x([\d|A-F]{4,4})z'\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]cCsdd|dD}g}|D]}d}ttfD]V}||}|dur,|dd|Dt|ddkrz|dd|d<d }qq,|st|dksJ|t|gdqt|}|S) NcSsg|] }|qS)strip.0itemr r /builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/_ihatexml.py hz$charStringToList..rFcSsg|] }t|qSr )hexToIntr r r rrorrT) splitreChar reCharRangematchappendgroupslenordnormaliseCharList)charsZ charRangesrvrZ foundMatchregexprr r rcharStringToListgs"  r"cCst|}|D]}|d|dks Jq g}d}|t|krd}|||||t|kr|||d|dddkr|||d|dd<|d7}qL||7}q.|S)Nrrr)sortedrr)charListrr ijr r rr|s 0  rZFFFFcCsg}|ddkr*|d|dddgt|ddD].\}}||dd||dddgq:|ddtkr||dddtg|S)Nrrr)r enumerate max_unicode)r$r r%rr r r missingRangess (r*cCsng}|D]V}|d|dkr4|tt|dq|tt|ddtt|dqdd|S)Nrrr z[%s])r escapeRegexpchrjoin)r$r rr r rlistToRegexpStrsr/cCs t|dS)Nr')int)hex_strr r rrsrcCs"d}|D]}||d|}q|S)N)r^$*+?{}[]|()r \)replace)stringZspecialCharacterscharr r rr,sr,u[-,/:-@\[-\^`\{-¶¸-¿×÷IJ-ijĿ-ŀʼnſDŽ-njDZ-dzǶ-ǹȘ-ɏʩ-ʺ˂-ˏ˒-˿͆-͟͢-΅΋΍΢Ϗϗ-ϙϛϝϟϡϴ-ЀЍѐѝ҂҇-ҏӅ-ӆӉ-ӊӍ-ӏӬ-ӭӶ-ӷӺ-԰՗-՘՚-ՠև-֐ֺ֢־׀׃ׅ-׏׫-ׯ׳-ؠػ-ؿٓ-ٟ٪-ٯڸ-ڹڿۏ۔۩ۮ-ۯۺ-ऀऄऺ-ऻॎ-ॐॕ-ॗ।-॥॰-ঀ঄঍-঎঑-঒঩঱঳-঵঺-঻ঽ৅-৆৉-৊ৎ-৖৘-৛৞৤-৥৲-ਁਃ-਄਋-਎਑-਒਩਱਴਷਺-਻਽੃-੆੉-੊੎-੘੝੟-੥ੵ-઀઄ઌ઎઒઩઱઴઺-઻૆૊૎-૟ૡ-૥૰-଀଄଍-଎଑-଒଩଱଴-ଵ଺-଻ୄ-୆୉-୊୎-୕୘-୛୞ୢ-୥୰-஁஄஋-஍஑஖-஘஛஝஠-஢஥-஧஫-஭ஶ஺-஽௃-௅௉௎-௖௘-௦௰-ఀఄ఍఑఩ఴ఺-ఽ౅౉౎-౔౗-౟ౢ-౥౰-ಁ಄಍಑಩಴಺-ಽ೅೉೎-೔೗-ೝ೟ೢ-೥೰-ഁഄ഍഑ഩഺ-ഽൄ-൅൉ൎ-ൖ൘-ൟൢ-൥൰-฀ฯ฻-฿๏๚-຀຃຅-ຆຉ຋-ຌຎ-ຓຘຠ຤຦ຨ-ຩຬຯ຺຾-຿໅໇໎-໏໚-༗༚-༟༪-༴༶༸༺-༽཈ཪ-཰྅ྌ-ྏྖ྘ྮ-ྰྸྺ-႟჆-჏ჷ-ჿᄁᄄᄈᄊᄍᄓ-ᄻᄽᄿᅁ-ᅋᅍᅏᅑ-ᅓᅖ-ᅘᅚ-ᅞᅢᅤᅦᅨᅪ-ᅬᅯ-ᅱᅴᅶ-ᆝᆟ-ᆧᆩ-ᆪᆬ-ᆭᆰ-ᆶᆹᆻᇃ-ᇪᇬ-ᇯᇱ-ᇸᇺ-᷿ẜ-ẟỺ-ỿ἖-἗἞-἟὆-὇὎-὏὘὚὜὞὾-὿᾵᾽᾿-῁῅῍-῏῔-῕῜-῟῭-῱῵´-⃏⃝-⃠⃢-℥℧-℩ℬ-ℭℯ-ⅿↃ-〄〆〈-〠〰〶-぀ゕ-゘゛-゜ゟ-゠・ヿ-㄄ㄭ-䷿龦-꯿힤-￿]u[-@\[-\^`\{-¿×÷IJ-ijĿ-ŀʼnſDŽ-njDZ-dzǶ-ǹȘ-ɏʩ-ʺ˂-΅·΋΍΢Ϗϗ-ϙϛϝϟϡϴ-ЀЍѐѝ҂-ҏӅ-ӆӉ-ӊӍ-ӏӬ-ӭӶ-ӷӺ-԰՗-՘՚-ՠև-׏׫-ׯ׳-ؠػ-ـً-ٰڸ-ڹڿۏ۔ۖ-ۤۧ-ऄऺ-़ा-ॗॢ-঄঍-঎঑-঒঩঱঳-঵঺-৛৞ৢ-৯৲-਄਋-਎਑-਒਩਱਴਷਺-੘੝੟-ੱੵ-઄ઌ઎઒઩઱઴઺-઼ા-૟ૡ-଄଍-଎଑-଒଩଱଴-ଵ଺-଼ା-୛୞ୢ-஄஋-஍஑஖-஘஛஝஠-஢஥-஧஫-஭ஶ஺-ఄ఍఑఩ఴ఺-౟ౢ-಄಍಑಩಴಺-ೝ೟ೢ-ഄ഍഑ഩഺ-ൟൢ-฀ฯัิ-฿ๆ-຀຃຅-ຆຉ຋-ຌຎ-ຓຘຠ຤຦ຨ-ຩຬຯັິ-ຼ຾-຿໅-༿཈ཪ-႟჆-჏ჷ-ჿᄁᄄᄈᄊᄍᄓ-ᄻᄽᄿᅁ-ᅋᅍᅏᅑ-ᅓᅖ-ᅘᅚ-ᅞᅢᅤᅦᅨᅪ-ᅬᅯ-ᅱᅴᅶ-ᆝᆟ-ᆧᆩ-ᆪᆬ-ᆭᆰ-ᆶᆹᆻᇃ-ᇪᇬ-ᇯᇱ-ᇸᇺ-᷿ẜ-ẟỺ-ỿ἖-἗἞-἟὆-὇὎-὏὘὚὜὞὾-὿᾵᾽᾿-῁῅῍-῏῔-῕῜-῟῭-῱῵´-℥℧-℩ℬ-ℭℯ-ⅿↃ-〆〈-〠〪-぀ゕ-゠・-㄄ㄭ-䷿龦-꯿힤-￿]z#[^ a-zA-Z0-9\-'()+,./:=?;!*#@$_%]c@sreZdZedZdddZdddZd d Zd d Z d dZ ddZ ddZ ddZ ddZddZddZdS) InfosetFilterz U[\dA-F]{5,5}FTcCs.||_||_||_||_||_||_i|_dSN)dropXmlnsLocalNamedropXmlnsAttrNspreventDoubleDashCommentspreventDashAtCommentEndreplaceFormFeedCharacterspreventSingleQuotePubid replaceCache)selfrDrErFrGrHrIr r r__init__szInfosetFilter.__init__NcCsL|jr |dr tdtdS|jr>|dkr>tdtdS||SdS)Nzxmlns:z"Attributes cannot begin with xmlnszhttp://www.w3.org/2000/xmlns/z)Attributes cannot be in the xml namespace)rD startswithwarningswarnrrE toXmlName)rKname namespacer r rcoerceAttributes  zInfosetFilter.coerceAttributecCs ||SrC)rP)rKrQr r r coerceElementszInfosetFilter.coerceElementcCsJ|jrFd|vr(tdt|dd}q|drFtdt|d7}|S)Nz--z'Comments cannot contain adjacent dashesz- -r zComments cannot end in a dash )rFrNrOrr?endswith)rKdatar r r coerceComments   zInfosetFilter.coerceCommentcCs6|jr2t|dD]}tdtq|dd}|S)N zText cannot contain U+000CrU)rHrangecountrNrOrr?)rKrWr r r rcoerceCharacterss  zInfosetFilter.coerceCharacterscCsl|}t|D]&}tdt||}|||}q|jrh|ddkrhtdt|d|d}|S)NzCoercing non-XML pubid'rz!Pubid cannot contain single quote) nonPubidCharRegexpfindallrNrOrgetReplacementCharacterr?rIfind)rKrWZ dataOutputrA replacementr r r coercePubids   zInfosetFilter.coercePubidc Cs|d}|dd}t|}|r>td|t||}n|}|}tt|}|D]*}td|t||} | || }qX||S)NrrzCoercing non-XML name: %s) nonXmlNameFirstBMPRegexprrNrOrr`setnonXmlNameBMPRegexpr_r?) rKrQ nameFirstZnameRestmZnameFirstOutputZnameRestOutputZ replaceCharsrArbr r rrPs    zInfosetFilter.toXmlNamecCs$||jvr|j|}n ||}|SrC)rJ escapeCharrKrArbr r rr`s   z%InfosetFilter.getReplacementCharactercCs,t|j|D]}||||}q|SrC)rereplacementRegexpr_r? unescapeChar)rKrQrr r r fromXmlNameszInfosetFilter.fromXmlNamecCsdt|}||j|<|S)NzU%05X)rrJrjr r rris  zInfosetFilter.escapeCharcCstt|dddS)Nrr')r-r0)rKZcharcoder r rrl szInfosetFilter.unescapeChar)FFFFTF)N)__name__ __module__ __qualname__recompilerkrLrSrTrXr\rcrPr`rmrirlr r r rrBs$     rB)! __future__rrrrqrN constantsrZbaseCharZ ideographicZcombiningCharacterdigitZextenderr.letterrQrgrrrrr"rr0r)r*r/rr,rfrdr^objectrBr r r rs4 0        PK!ifTT'__pycache__/_inputstream.cpython-39.pycnu[a Rea~@sddlmZmZmZddlmZddlmZmZddl Z ddl Z ddl m Z m Z ddlmZddlmZmZmZmZdd lmZdd lmZed d eDZed d eDZedd eDZeeddgBZdZejreddkreddksJe edde!ddZ"n e eZ"hdZ#e dZ$iZ%Gddde&Z'ddZ(Gddde&Z)Gddde)Z*Gdd d e+Z,Gd!d"d"e&Z-Gd#d$d$e&Z.d%d&Z/dS)')absolute_importdivisionunicode_literals) text_type) http_clienturllibN)BytesIOStringIO) webencodings)EOFspaceCharacters asciiLettersasciiUppercase)_ReparseException)_utilscCsg|]}|dqSasciiencode.0itemr/builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/_inputstream.py rcCsg|]}|dqSrrrrrrrrcCsg|]}|dqSrrrrrrrr> i iii iiiiiiiiii iii ii i iiiiii i i ii iiz[ - -/:-@\[-`{-~]c@sHeZdZdZddZddZddZdd Zd d Zd d Z ddZ dS)BufferedStreamzBuffering for streams that do not have buffering of their own The buffer is implemented as a list of chunks on the assumption that joining many strings will be slow since it is O(n**2) cCs||_g|_ddg|_dS)Nrr)streambufferposition)selfr"rrr__init__:szBufferedStream.__init__cCs<d}|jd|jdD]}|t|7}q||jd7}|SNrr )r#r$len)r%poschunkrrrtell?s zBufferedStream.tellcCsT||ksJ|}d}t|j||krF|t|j|8}|d7}q||g|_dSr')_bufferedBytesr(r#r$)r%r)offsetirrrseekFs zBufferedStream.seekcCsT|js||S|jdt|jkrF|jdt|jdkrF||S||SdS)Nrr r)r# _readStreamr$r(_readFromBufferr%bytesrrrreadOs  zBufferedStream.readcCstdd|jDS)NcSsg|] }t|qSr)r(rrrrrYrz1BufferedStream._bufferedBytes..)sumr#r%rrrr,XszBufferedStream._bufferedBytescCs<|j|}|j||jdd7<t||jd<|Sr')r"r4r#appendr$r()r%r3datarrrr0[s   zBufferedStream._readStreamcCs|}g}|jd}|jd}|t|jkr|dkr|dks>J|j|}|t||krl|}|||g|_n"t||}|t|g|_|d7}|||||||8}d}q|r|||d|S)Nrr r)r$r(r#r7r0join)r%r3remainingBytesrv bufferIndex bufferOffset bufferedData bytesToReadrrrr1bs&     zBufferedStream._readFromBufferN) __name__ __module__ __qualname____doc__r&r+r/r4r,r0r1rrrrr!3s  r!cKst|tjs(t|tjjr.t|jtjr.d}n&t|drJt|dt }n t|t }|rdd|D}|rvt d|t |fi|St |fi|SdS)NFr4rcSsg|]}|dr|qS) _encoding)endswith)rxrrrrrz#HTMLInputStream..z3Cannot set an encoding with a unicode input, set %r) isinstancer HTTPResponserresponseaddbasefphasattrr4r TypeErrorHTMLUnicodeInputStreamHTMLBinaryInputStream)sourcekwargs isUnicode encodingsrrrHTMLInputStream}s      rTc@speZdZdZdZddZddZddZd d Zd d Z d dZ dddZ ddZ ddZ dddZddZdS)rNProvides a unicode stream of characters to the HTMLTokenizer. This class takes care of character encoding and removing or replacing incorrect byte-sequences and also provides column and line tracking. i(cCsZtjsd|_ntddkr$|j|_n|j|_dg|_tddf|_| ||_ | dS)Initialises the HTMLInputStream. HTMLInputStream(source, [encoding]) -> Normalized stream from source for use by html5lib. source can be either a file-object, local filename or a string. The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) Nu􏿿r rutf-8certain) rsupports_lone_surrogatesreportCharacterErrorsr(characterErrorsUCS4characterErrorsUCS2newLineslookupEncoding charEncoding openStream dataStreamreset)r%rPrrrr&s   zHTMLUnicodeInputStream.__init__cCs.d|_d|_d|_g|_d|_d|_d|_dS)Nr)r* chunkSize chunkOffseterrors prevNumLines prevNumCols_bufferedCharacterr6rrrrbszHTMLUnicodeInputStream.resetcCst|dr|}nt|}|SzvProduces a file object from source. source can be either a file object, local filename or a string. r4)rLr r%rPr"rrrr`s z!HTMLUnicodeInputStream.openStreamcCsT|j}|dd|}|j|}|dd|}|dkr@|j|}n ||d}||fS)N rrr )r*countrgrfindrh)r%r-r*nLines positionLine lastLinePospositionColumnrrr _positions   z HTMLUnicodeInputStream._positioncCs||j\}}|d|fS)z:Returns (line, col) of the current position in the stream.r )rsre)r%linecolrrrr$szHTMLUnicodeInputStream.positioncCs6|j|jkr|stS|j}|j|}|d|_|S)zo Read one character from the stream or queue if available. Return EOF when EOF is reached. r )rerd readChunkr r*)r%recharrrrrws   zHTMLUnicodeInputStream.charNcCs|dur|j}||j\|_|_d|_d|_d|_|j|}|j rX|j |}d|_ n|s`dSt |dkrt |d}|dksd|krdkrnn|d|_ |dd}|j r| || d d }| d d }||_t ||_d S) NrcrFr r iz rl T)_defaultChunkSizersrdrgrhr*rerar4rir(ordrZreplace)r%rdr8lastvrrrrvs0           z HTMLUnicodeInputStream.readChunkcCs(ttt|D]}|jdqdS)Ninvalid-codepoint)ranger(invalid_unicode_refindallrfr7)r%r8_rrrr[sz*HTMLUnicodeInputStream.characterErrorsUCS4cCsd}t|D]}|rqt|}|}t|||drrt|||d}|tvrl|j dd}q|dkr|dkr|t |dkr|j dqd}|j dqdS)NFrTryir ) rfinditerr|groupstartrisSurrogatePairsurrogatePairToCodepointnon_bmp_invalid_codepointsrfr7r()r%r8skipmatch codepointr)char_valrrrr\#s"  z*HTMLUnicodeInputStream.characterErrorsUCS2Fc Cszt||f}Wnftyv|D]}t|dks"Jq"ddd|D}|sXd|}td|}t||f<Yn0g}||j|j}|dur|j|j krqn0| }||j kr| |j|j|||_q| |j|jd| s|qq|d|} | S)z Returns a string of characters from the stream up to but not including any character in 'characters' or EOF. 'characters' must be a container that supports the 'in' method and iteration over its characters. rccSsg|]}dt|qS)z\x%02x)r|)rcrrrrHrz5HTMLUnicodeInputStream.charsUntil..z^%sz[%s]+N) charsUntilRegExKeyErrorr|r9recompilerr*rerdendr7rv) r% charactersoppositecharsrregexr;mrrrrr charsUntil:s0     z!HTMLUnicodeInputStream.charsUntilcCsT|turP|jdkr.||j|_|jd7_n"|jd8_|j|j|ksPJdSr')r rer*rd)r%rwrrrungetis   zHTMLUnicodeInputStream.unget)N)F)r@rArBrCr{r&rbr`rsr$rwrvr[r\rrrrrrrNs   & /rNc@sLeZdZdZdddZddZd d Zdd d Zd dZddZ ddZ dS)rOrUN windows-1252TcCsn|||_t||jd|_d|_||_||_||_||_ ||_ | ||_ |j ddusbJ| dS)rVidrN)r` rawStreamrNr& numBytesMetanumBytesChardetoverride_encodingtransport_encodingsame_origin_parent_encodinglikely_encodingdefault_encodingdetermineEncodingr_rb)r%rPrrrrr useChardetrrrr&s  zHTMLBinaryInputStream.__init__cCs&|jdj|jd|_t|dS)Nrr})r_ codec_info streamreaderrrarNrbr6rrrrbszHTMLBinaryInputStream.resetcCsJt|dr|}nt|}z||WntyDt|}Yn0|Srj)rLrr/r+ Exceptionr!rkrrrr`s  z HTMLBinaryInputStream.openStreamcCs|df}|ddur|St|jdf}|ddur:|St|jdf}|ddurX|S|df}|ddurt|St|jdf}|ddur|djds|St|jdf}|ddur|S|rnzddl m }Wnt yYn0g}|}|j s:|j |j}t|tsJ|s$q:||||q|t|jd}|j d|durn|dfSt|jdf}|ddur|StddfS)NrXr tentativezutf-16)UniversalDetectorencodingr) detectBOMr^rrdetectEncodingMetarname startswithr%pip._vendor.chardet.universaldetectorr ImportErrordonerr4rrGr3r7feedcloseresultr/r)r%chardetr_rbuffersdetectorr#rrrrrsR            z'HTMLBinaryInputStream.determineEncodingcCs|jddksJt|}|dur&dS|jdvrFtd}|dusJnT||jdkrf|jddf|_n4|jd|df|_|td|jd|fdS)Nr rXutf-16beutf-16lerWrzEncoding changed from %s to %s)r_r^rrr/rbr)r% newEncodingrrrchangeEncodings   z$HTMLBinaryInputStream.changeEncodingc Cstjdtjdtjdtjdtjdi}|jd}t|t s"rOc@seZdZdZddZddZddZdd Zd d Zd d Z ddZ ddZ e e e Z ddZe eZefddZddZddZddZdS) EncodingByteszString-like object with an associated position and various extra methods If the position is ever greater than the string length then an exception is raisedcCst|tsJt||SN)rGr3__new__lowerr%valuerrrrFszEncodingBytes.__new__cCs d|_dS)Nr)rsrrrrr&JszEncodingBytes.__init__cCs|Srrr6rrr__iter__NszEncodingBytes.__iter__cCs>|jd}|_|t|kr"tn |dkr.t|||dS)Nr rrsr( StopIterationrMr%prrr__next__Qs  zEncodingBytes.__next__cCs|Sr)rr6rrrnextYszEncodingBytes.nextcCsB|j}|t|krtn |dkr$t|d|_}|||dSr'rrrrrprevious]s zEncodingBytes.previouscCs|jt|krt||_dSrrsr(r)r%r$rrr setPositionfszEncodingBytes.setPositioncCs*|jt|krt|jdkr"|jSdSdS)Nrrr6rrr getPositionks  zEncodingBytes.getPositioncCs||j|jdSNr )r$r6rrrgetCurrentByteuszEncodingBytes.getCurrentBytecCsH|j}|t|kr>|||d}||vr4||_|S|d7}q||_dS)zSkip past a list of charactersr Nr$r(rsr%rrrrrrrzs  zEncodingBytes.skipcCsH|j}|t|kr>|||d}||vr4||_|S|d7}q||_dSrrrrrr skipUntils  zEncodingBytes.skipUntilcCs(|||j}|r$|jt|7_|S)zLook for a sequence of bytes at the start of a string. If the bytes are found return True and advance the position to the byte after the match. Otherwise return False and leave the position alone)rr$r()r%r3r;rrr matchBytesszEncodingBytes.matchBytescCs<z |||jt|d|_Wnty6tYn0dS)zLook for the next sequence of bytes matching a given sequence. If a match is found advance the position to the last byte of the matchr T)indexr$r(rs ValueErrorrr2rrrjumpTos    zEncodingBytes.jumpToN)r@rArBrCrr&rrrrrrpropertyr$r currentBytespaceCharactersBytesrrrrrrrrrBs      rc@sXeZdZdZddZddZddZdd Zd d Zd d Z ddZ ddZ ddZ dS)rz?Mini parser for detecting character encoding from meta elementscCst||_d|_dS)z3string - the data to work on for encoding detectionN)rr8rr%r8rrrr&s zEncodingParser.__init__c Csd|jvrdSd|jfd|jfd|jfd|jfd|jfd|jff}|jD]}d}z|jdWntyxYqYn0|D]B\}}|j|r~z|}WqWq~tyd}YqYq~0q~|sHqqH|j S) Nsr8rr6rrrrszEncodingParser.handleCommentcCs|jjtvrdSd}d}|}|dur,dS|ddkr\|ddk}|r|dur||_dSq|ddkr|d}t|}|dur||_dSq|ddkrtt|d}|}|durt|}|dur|r||_dS|}qdS) NTFrs http-equivr s content-typecharsetscontent) r8rr getAttributerr^ContentAttrParserrparse)r% hasPragmapendingEncodingattrtentativeEncodingcodec contentParserrrrrs8      zEncodingParser.handleMetacCs |dS)NF)handlePossibleTagr6rrrrsz%EncodingParser.handlePossibleStartTagcCst|j|dS)NT)rr8rr6rrrrs z#EncodingParser.handlePossibleEndTagcCsb|j}|jtvr(|r$||dS|t}|dkrD|n|}|dur^|}qLdS)NTr)r8rasciiLettersBytesrrrspacesAngleBracketsr)r%endTagr8rrrrrrs    z EncodingParser.handlePossibleTagcCs |jdS)Nrrr6rrrrszEncodingParser.handleOthercCs|j}|ttdgB}|dus2t|dks2J|dvr>dSg}g}|dkrV|rVqnX|tvrj|}qnD|dvrd|dfS|tvr||n|durdS||t |}qF|dkr| d|dfSt ||}|dvrJ|}t |}||kr"t |d|d|fS|tvr<||q||qnJ|d krbd|dfS|tvr|||n|durdS||t |}|t vrd|d|fS|tvr||n|durdS||qdS) z_Return a name,value pair for the next attribute in the stream, if one is found, or None/Nr )rN=)rrr)'"r) r8rr frozensetr(r9asciiUppercaseBytesr7rrrr)r%r8rattrName attrValue quoteCharrrrrsb             zEncodingParser.getAttributeN) r@rArBrCr&rrrrrrrrrrrrrs$rc@seZdZddZddZdS)rcCst|tsJ||_dSr)rGr3r8rrrrr&aszContentAttrParser.__init__cCsz|jd|jjd7_|j|jjdkssH      JgIb='PK!z'J:==#__pycache__/__init__.cpython-39.pycnu[a Re@s`dZddlmZmZmZddlmZmZmZddl m Z ddl m Z ddl mZgdZd Zd S) a HTML parsing library based on the `WHATWG HTML specification `_. The parser is designed to be compatible with existing HTML found in the wild and implements well-defined error recovery that is largely compatible with modern desktop web browsers. Example usage:: from pip._vendor import html5lib with open("my_document.html", "rb") as f: tree = html5lib.parse(f) For convenience, this module re-exports the following names: * :func:`~.html5parser.parse` * :func:`~.html5parser.parseFragment` * :class:`~.html5parser.HTMLParser` * :func:`~.treebuilders.getTreeBuilder` * :func:`~.treewalkers.getTreeWalker` * :func:`~.serializer.serialize` )absolute_importdivisionunicode_literals) HTMLParserparse parseFragment)getTreeBuilder) getTreeWalker) serialize)rrrr r r z1.1N)__doc__ __future__rrr html5parserrrr treebuildersr treewalkersr serializerr __all__ __version__rr/builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/__init__.pys   PK!Fd*d*%__pycache__/serializer.cpython-39.pycnu[a Re=@szddlmZmZmZddlmZddlZddlmZm Z ddl m Z m Z m Z ddl mZmZmZddlmZmZdd lmZd e d Zed ed Zed edZiZeddkZeeD]n\Z Z!eree!dksesee!dkrqe!dkree!dkre"e!Z!ne#e!Z!e!evs0e $re ee!<qddZ%ede%dddZ&Gddde'Z(Gddde)Z*dS))absolute_importdivisionunicode_literals) text_typeN)register_errorxmlcharrefreplace_errors) voidElementsbooleanAttributesspaceCharacters)rcdataElementsentities xmlEntities) treewalkers_utils)escapez"'=<>`[]u_  /`  ᠎᠏           

   ]u􏿿&c Cst|ttfrg}g}d}t|j|j|jD]n\}}|rDd}q2||j}t|j|t |j|dgrt |j||d}d}nt |}| |q2|D]V}t |} | r| d| | | ds| dq| dt|ddqd||jfSt|SdS)NFrTr;z&#x%s;r) isinstanceUnicodeEncodeErrorUnicodeTranslateError enumerateobjectstartendrisSurrogatePairminsurrogatePairToCodepointordappend_encode_entity_mapgetendswithhexjoinr) excres codepointsskipicindex codepointcper3/builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/serializer.pyhtmlentityreplace_errors*s0 "      r5htmlentityreplaceetreecKs(t|}tfi|}||||S)aSerializes the input token stream using the specified treewalker :arg input: the token stream to serialize :arg tree: the treewalker to use :arg encoding: the encoding to use :arg serializer_opts: any options to pass to the :py:class:`html5lib.serializer.HTMLSerializer` that gets created :returns: the tree serialized as a string Example: >>> from html5lib.html5parser import parse >>> from html5lib.serializer import serialize >>> token_stream = parse('

Hi!

') >>> serialize(token_stream, omit_optional_tags=False) '

Hi!

' )r getTreeWalkerHTMLSerializerrender)inputtreeencodingserializer_optswalkersr3r3r4 serializeKs rAc@s~eZdZdZdZdZdZdZdZdZ dZ dZ dZ dZ dZdZdZdZddZdd Zd d Zdd dZdddZdddZd S)r9legacy"TF)quote_attr_values quote_charuse_best_quote_charomit_optional_tagsminimize_boolean_attributesuse_trailing_solidusspace_before_trailing_solidusescape_lt_in_attrs escape_rcdataresolve_entitiesalphabetical_attributesinject_meta_charsetstrip_whitespacesanitizec Ksvt|t|j}t|dkr2tdtt|d|vr@d|_|jD]}t||||t ||qFg|_ d|_ dS)aB Initialize HTMLSerializer :arg inject_meta_charset: Whether or not to inject the meta charset. Defaults to ``True``. :arg quote_attr_values: Whether to quote attribute values that don't require quoting per legacy browser behavior (``"legacy"``), when required by the standard (``"spec"``), or always (``"always"``). Defaults to ``"legacy"``. :arg quote_char: Use given quote character for attribute quoting. Defaults to ``"`` which will use double quotes unless attribute value contains a double quote, in which case single quotes are used. :arg escape_lt_in_attrs: Whether or not to escape ``<`` in attribute values. Defaults to ``False``. :arg escape_rcdata: Whether to escape characters that need to be escaped within normal elements within rcdata elements such as style. Defaults to ``False``. :arg resolve_entities: Whether to resolve named character entities that appear in the source tree. The XML predefined entities < > & " ' are unaffected by this setting. Defaults to ``True``. :arg strip_whitespace: Whether to remove semantically meaningless whitespace. (This compresses all whitespace to a single space except within ``pre``.) Defaults to ``False``. :arg minimize_boolean_attributes: Shortens boolean attributes to give just the attribute value, for example:: becomes:: Defaults to ``True``. :arg use_trailing_solidus: Includes a close-tag slash at the end of the start tag of void elements (empty elements whose end tag is forbidden). E.g. ``
``. Defaults to ``False``. :arg space_before_trailing_solidus: Places a space immediately before the closing slash in a tag using a trailing solidus. E.g. ``
``. Requires ``use_trailing_solidus=True``. Defaults to ``True``. :arg sanitize: Strip all unsafe or unknown constructs from output. See :py:class:`html5lib.filters.sanitizer.Filter`. Defaults to ``False``. :arg omit_optional_tags: Omit start/end tags that are optional. Defaults to ``True``. :arg alphabetical_attributes: Reorder attributes to be in alphabetical order. Defaults to ``False``. rz2__init__() got an unexpected keyword argument '%s'rEFN) frozensetoptionslen TypeErrornextiterrFsetattrr%getattrerrorsstrict)selfkwargsunexpected_argsattrr3r3r4__init__sO  zHTMLSerializer.__init__cCs*t|tsJ|jr"||jdS|SdS)Nr6rrr=encoder\stringr3r3r4rbszHTMLSerializer.encodecCs*t|tsJ|jr"||jdS|SdS)Nr[rarcr3r3r4 encodeStrictszHTMLSerializer.encodeStrictNccs||_d}g|_|r0|jr0ddlm}|||}|jrJddlm}||}|jrdddlm}||}|j r~ddl m}||}|j rddl m}||}|D]>}|d}|dkr\d|d}|dr|d |d7}n|d r|d 7}|d rF|d  d d kr,|d  dd kr&|dd}nd }|d||d |f7}|d7}||Vq|dvr|dksv|r|r|d dd kr|d||dVn|t|dVq|dvr|d} |d| V| tvr|jsd}n|r |d|dD]\\} } } | } | }|dV|| V|jrv| t| tvr| tdtvr|dV|jdkst|d krd}n@|jdkrt|du}n$|jd krt|du}ntd!|d"d#}|j r|d$d%}|r|j!}|j"rHd|vr0d |vr0d }nd |vrHd|vrHd}|dkr`|dd&}n |d d'}||V||V||Vn ||Vq| t#vr|j$r|j%r|d(Vn |d)V|dVq|d*kr(|d} | tvrd}n|r|d|d+| Vq|d,krj|d}| d-d krT|d.|d/|dVq|d0kr|d} | d1}|t&vr|d2| |j'r|t(vrt&|}nd3| }||Vq||dqdS)4NFr)FiltertypeDoctypez ) CharactersSpaceCharactersrodatazCommentz--zComment contains --z EntityrzEntity %s not recognizedz&%s;))r=rZrOfilters.inject_meta_charsetrfrNfilters.alphabeticalattributesrPfilters.whitespacerQfilters.sanitizerrGfilters.optionaltagsfindserializeErrorrerbrr rLitemsrHr r%tuplerDrT_quoteAttributeSpecsearch_quoteAttributeLegacy ValueErrorreplacerKrErFr rIrJr rMr)r\ treewalkerr=in_cdatarftokenrgdoctyperEri_ attr_name attr_valuekv quote_attrrpkeyr3r3r4rAs                                    zHTMLSerializer.serializecCs2|rdt|||Sdt||SdS)anSerializes the stream from the treewalker into a string :arg treewalker: the treewalker to serialize :arg encoding: the string encoding to use :returns: the serialized tree Example: >>> from html5lib import parse, getTreeWalker >>> from html5lib.serializer import HTMLSerializer >>> token_stream = parse('Hi!') >>> walker = getTreeWalker('etree') >>> serializer = HTMLSerializer(omit_optional_tags=False) >>> serializer.render(walker(token_stream)) 'Hi!' rN)r(listrA)r\rr=r3r3r4r:wszHTMLSerializer.renderXXX ERROR MESSAGE NEEDEDcCs|j||jrtdS)N)rZr#r[SerializeError)r\rpr3r3r4rs zHTMLSerializer.serializeError)N)N)r)__name__ __module__ __qualname__rDrErFrGrHrIrJrKrLrMrNrOrPrQrSr`rbrerAr:rr3r3r3r4r9hs,Y  r9c@seZdZdZdS)rzError in serialized treeN)rrr__doc__r3r3r3r4rsr)r7N)+ __future__rrrZpip._vendor.sixrrecodecsrr constantsr r r r r rrrrxml.sax.saxutilsrr(_quoteAttributeSpecCharscompilerrr$rT_is_ucs4rrrrr!r"islowerr5rArr9 Exceptionrr3r3r3r4sD          1PK!o!__pycache__/_utils.cpython-39.pycnu[a ReC@sHddlmZmZmZddlmZzddlmZWneyNddl mZYn0ddl m Z m Z e rxddl mmZnr kwargs_tuplemodobjsfactory moduleCacherr! moduleFactory|s$       z+moduleFactoryFactory..moduleFactoryr)rHrJrrGr!r ysr csifdd}|S)Ncs6t|t|f}|vr.|i||<|Sr)rr)r=r>r$cachefuncrr!wrappedszmemoize..wrappedr)rMrNrrKr!memoizesrO) __future__rrrtypesrcollections.abcr ImportError collectionsZpip._vendor.sixrrxml.etree.ElementTreeetree ElementTreer Zxml.etree.cElementTreeZ cElementTree__all__eval_xr Exceptionrrr r&r r r rOrrrr!s6         PK!P""treebuilders/dom.pynu[from __future__ import absolute_import, division, unicode_literals try: from collections.abc import MutableMapping except ImportError: # Python 2.7 from collections import MutableMapping from xml.dom import minidom, Node import weakref from . import base from .. import constants from ..constants import namespaces from .._utils import moduleFactoryFactory def getDomBuilder(DomImplementation): Dom = DomImplementation class AttrList(MutableMapping): def __init__(self, element): self.element = element def __iter__(self): return iter(self.element.attributes.keys()) def __setitem__(self, name, value): if isinstance(name, tuple): raise NotImplementedError else: attr = self.element.ownerDocument.createAttribute(name) attr.value = value self.element.attributes[name] = attr def __len__(self): return len(self.element.attributes) def items(self): return list(self.element.attributes.items()) def values(self): return list(self.element.attributes.values()) def __getitem__(self, name): if isinstance(name, tuple): raise NotImplementedError else: return self.element.attributes[name].value def __delitem__(self, name): if isinstance(name, tuple): raise NotImplementedError else: del self.element.attributes[name] class NodeBuilder(base.Node): def __init__(self, element): base.Node.__init__(self, element.nodeName) self.element = element namespace = property(lambda self: hasattr(self.element, "namespaceURI") and self.element.namespaceURI or None) def appendChild(self, node): node.parent = self self.element.appendChild(node.element) def insertText(self, data, insertBefore=None): text = self.element.ownerDocument.createTextNode(data) if insertBefore: self.element.insertBefore(text, insertBefore.element) else: self.element.appendChild(text) def insertBefore(self, node, refNode): self.element.insertBefore(node.element, refNode.element) node.parent = self def removeChild(self, node): if node.element.parentNode == self.element: self.element.removeChild(node.element) node.parent = None def reparentChildren(self, newParent): while self.element.hasChildNodes(): child = self.element.firstChild self.element.removeChild(child) newParent.element.appendChild(child) self.childNodes = [] def getAttributes(self): return AttrList(self.element) def setAttributes(self, attributes): if attributes: for name, value in list(attributes.items()): if isinstance(name, tuple): if name[0] is not None: qualifiedName = (name[0] + ":" + name[1]) else: qualifiedName = name[1] self.element.setAttributeNS(name[2], qualifiedName, value) else: self.element.setAttribute( name, value) attributes = property(getAttributes, setAttributes) def cloneNode(self): return NodeBuilder(self.element.cloneNode(False)) def hasContent(self): return self.element.hasChildNodes() def getNameTuple(self): if self.namespace is None: return namespaces["html"], self.name else: return self.namespace, self.name nameTuple = property(getNameTuple) class TreeBuilder(base.TreeBuilder): # pylint:disable=unused-variable def documentClass(self): self.dom = Dom.getDOMImplementation().createDocument(None, None, None) return weakref.proxy(self) def insertDoctype(self, token): name = token["name"] publicId = token["publicId"] systemId = token["systemId"] domimpl = Dom.getDOMImplementation() doctype = domimpl.createDocumentType(name, publicId, systemId) self.document.appendChild(NodeBuilder(doctype)) if Dom == minidom: doctype.ownerDocument = self.dom def elementClass(self, name, namespace=None): if namespace is None and self.defaultNamespace is None: node = self.dom.createElement(name) else: node = self.dom.createElementNS(namespace, name) return NodeBuilder(node) def commentClass(self, data): return NodeBuilder(self.dom.createComment(data)) def fragmentClass(self): return NodeBuilder(self.dom.createDocumentFragment()) def appendChild(self, node): self.dom.appendChild(node.element) def testSerializer(self, element): return testSerializer(element) def getDocument(self): return self.dom def getFragment(self): return base.TreeBuilder.getFragment(self).element def insertText(self, data, parent=None): data = data if parent != self: base.TreeBuilder.insertText(self, data, parent) else: # HACK: allow text nodes as children of the document node if hasattr(self.dom, '_child_node_types'): # pylint:disable=protected-access if Node.TEXT_NODE not in self.dom._child_node_types: self.dom._child_node_types = list(self.dom._child_node_types) self.dom._child_node_types.append(Node.TEXT_NODE) self.dom.appendChild(self.dom.createTextNode(data)) implementation = DomImplementation name = None def testSerializer(element): element.normalize() rv = [] def serializeElement(element, indent=0): if element.nodeType == Node.DOCUMENT_TYPE_NODE: if element.name: if element.publicId or element.systemId: publicId = element.publicId or "" systemId = element.systemId or "" rv.append("""|%s""" % (' ' * indent, element.name, publicId, systemId)) else: rv.append("|%s" % (' ' * indent, element.name)) else: rv.append("|%s" % (' ' * indent,)) elif element.nodeType == Node.DOCUMENT_NODE: rv.append("#document") elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE: rv.append("#document-fragment") elif element.nodeType == Node.COMMENT_NODE: rv.append("|%s" % (' ' * indent, element.nodeValue)) elif element.nodeType == Node.TEXT_NODE: rv.append("|%s\"%s\"" % (' ' * indent, element.nodeValue)) else: if (hasattr(element, "namespaceURI") and element.namespaceURI is not None): name = "%s %s" % (constants.prefixes[element.namespaceURI], element.nodeName) else: name = element.nodeName rv.append("|%s<%s>" % (' ' * indent, name)) if element.hasAttributes(): attributes = [] for i in range(len(element.attributes)): attr = element.attributes.item(i) name = attr.nodeName value = attr.value ns = attr.namespaceURI if ns: name = "%s %s" % (constants.prefixes[ns], attr.localName) else: name = attr.nodeName attributes.append((name, value)) for name, value in sorted(attributes): rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) indent += 2 for child in element.childNodes: serializeElement(child, indent) serializeElement(element, 0) return "\n".join(rv) return locals() # The actual means to get a module! getDomModule = moduleFactoryFactory(getDomBuilder) PK!l:222treebuilders/__pycache__/etree_lxml.cpython-39.pycnu[a Re9@sdZddlmZmZmZddlZddlZddlZzddlm Z Wne y^ddl m Z Yn0ddl m Z ddlmZdd l mZdd l mZdd l mZddlmZdd lmZmZd ZedZedjZGdddeZGdddeZddZ ddZ!Gddde j"Z"dS)aModule for supporting the lxml.etree library. The idea here is to use as much of the native library as possible, without using fragile hacks like custom element names that break between releases. The downside of this is that we cannot represent all possible trees; specifically the following are known to cause problems: Text or comments as siblings of the root element Docypes with no name When any of these things occur, we emit a DataLossWarning )absolute_importdivisionunicode_literalsN)MutableMapping)base)DataLossWarning) constants)etree) _ihatexml)PY3 binary_typeTz {([^}]*)}(.*)Zasdc@seZdZddZdS) DocumentTypecCs||_||_||_dSN)namepublicIdsystemId)selfrrrr/builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/treebuilders/etree_lxml.py__init__)szDocumentType.__init__N)__name__ __module__ __qualname__rrrrrr(src@s,eZdZddZddZddZeeZdS)DocumentcCsd|_g|_dSr) _elementTree _childNodesrrrrr0szDocument.__init__cCs.|j}|jD]}q||jdSr)rgetrootZ itersiblingsZaddnext_element)relementlastrrr appendChild4s zDocument.appendChildcCs|jSr)rrrrr_getChildNodes;szDocument._getChildNodesN)rrrrr#r$property childNodesrrrrr/srcs6gtjdddfdd |ddS)NTZpreventDoubleDashCommentsrc s,t|ds,t|drd|jjrz|jjsF|jjsFd|jj}nd|jj|jj|jjf}dd|d|f|}|dur|}q|dur||d| }qnnt |t st |t rt |t st jd dksJd d||fn$d |D]}||dqn|jtkrd d||jft|d r(|jr(d d||jfnt |tjsJtj|j}|dur|d}|d}tj|}dd|||fndd||jft|drg}|jD]d\} } t| }|durj|\}} | } tj|}d|| f} n | } || | fq t|D]&\} } dd|d| | fq|jrڈd d|d|jf|d7}|D]} | |qt|d r(|jr(d d|d|jfdS)Ntagrz #document zz|%s%s rrz|%s"%s"z#document-fragmentz|%stailrz |%s<%s %s>z|%s<%s>attribz%s %sz |%s%s="%s") hasattrappenddocinfo internalDTD public_idZ system_url root_namerZ getpreviousZgetnext isinstancestrbytessys version_infor( comment_typetextr+r Z_Elementetree_builders tag_regexpmatchgroupr prefixes fromXmlNamer,itemsgroupssorted) r!indentdtd_strZ next_elementZnsmatchnsr(prefix attributesrvalueZ attr_stringchild infosetFilterrvserializeElementrrrMEs|                       z(testSerializer..serializeElement )r)r InfosetFilterjoinr!rrJrtestSerializerAs  F rRcs$gfdd|dS)z4Serialize an element and its child nodes to a stringcst|dsH|jjr:|jjr$|jj}n d|jj}||n|jtkrfd|j fn~|j sd|jfn.d dd|j D}d|j|f|j r|j |D] }|qĈd |jft|d r|j r|j dS) Nr(r)z z<%s>r*cSsg|]\}}d||fqS)z%s="%s"r).0rrHrrr sz6tostring..serializeElement..z<%s %s>zr+)r-r/r0doctyper2r.rr(r8r9r,rPr@r+)r!rDattrrIrLrMrrrMs,        z"tostring..serializeElement)rPrQrrWrtostrings rYcszeZdZeZeZdZdZeZ e Z dddZ ddZ ddZd d Zd d Zd dZdddZdfdd ZddZZS) TreeBuilderNFcstjt|dtjdd|_||_GfdddtGfdddj}Gfdd d j }||_ ||_ t j ||dS) N)fullTreeTr'csPeZdZddZfddZddZddZd d Zd d Zd dZ ddZ dS)z(TreeBuilder.__init__..AttributescSs ||_dSr)r rr!rrrrsz1TreeBuilder.__init__..Attributes.__init__cs4t|tr&d|d|df}n |}|S)N{%s}%srr)r3tupleZcoerceAttribute)rkeyrrKrr _coerceKeys  z3TreeBuilder.__init__..Attributes._coerceKeycSs0|jjj||}ts,t|tr,|d}|S)Nascii)r r,rar r3rdecoderr_rHrrr __getitem__s z4TreeBuilder.__init__..Attributes.__getitem__cSs||jjj||<dSrr r,rardrrr __setitem__sz4TreeBuilder.__init__..Attributes.__setitem__cSs|jjj||=dSrrf)rr_rrr __delitem__sz4TreeBuilder.__init__..Attributes.__delitem__cSst|jjjSr)iterr r,rrrr__iter__sz1TreeBuilder.__init__..Attributes.__iter__cSst|jjjSr)lenr r,rrrr__len__sz0TreeBuilder.__init__..Attributes.__len__cSs|jjjSr)r r,clearrrrrrmsz.TreeBuilder.__init__..Attributes.clearN) rrrrrarergrhrjrlrmrr`rr Attributess rncspeZdZfddZfddZfddZeeeZddZd d Z eee Z dfd d Z ddZ d S)z%TreeBuilder.__init__..Elementcs*|}jj|||d||_dS)N) namespace) coerceElementElementr _attributes)rrrornbuilderrKrrrs z.TreeBuilder.__init__..Element.__init__cs$||_||j|j|j_dSr)rp_nameZ _getETreeTagZ _namespacer r()rrr`rr_setNames z.TreeBuilder.__init__..Element._setNamecs |jSr)r?rurr`rr_getNamesz.TreeBuilder.__init__..Element._getNamecSs|jSr)rrrrrr_getAttributessz4TreeBuilder.__init__..Element._getAttributescSs|j}|||dSr)rGrmupdate)rrHrGrrr_setAttributessz4TreeBuilder.__init__..Element._setAttributesNcs|}j|||dSr)ZcoerceCharactersrq insertText)rdata insertBeforertrKrrr{s z0TreeBuilder.__init__..Element.insertTextcSs0t||j|j}|jjr,|jj|jj|Sr)typerror r,ryr\rrr cloneNodesz/TreeBuilder.__init__..Element.cloneNode)N) rrrrrvrwr%rrxrzrGr{rrrsrrrqs    rqcs8eZdZfddZfddZddZeeeZdS)z%TreeBuilder.__init__..Commentcs|}j||dSr) coerceCommentCommentrrr|r~rrr s z.TreeBuilder.__init__..Comment.__init__cs|}||j_dSr)rr r9rr`rr_setDatas z.TreeBuilder.__init__..Comment._setDatacSs|jjSr)r r9rrrr_getDatasz.TreeBuilder.__init__..Comment._getDataN)rrrrrrr%r|rr~rrr s r)r:getETreeModuler r rOrKnamespaceHTMLElementsrrqr elementClass commentClassrrZr)rrr[rqrrrsrrs $zTreeBuilder.__init__cCs$tj||j|_g|_d|_dSr)rrZresetinsertCommentInitial insertCommentinitial_commentsrUrrrrrs zTreeBuilder.resetcCst|Sr)rRr\rrrrR#szTreeBuilder.testSerializercCstr |jjS|jjSdSr)r[documentrrrrrr getDocument&szTreeBuilder.getDocumentcCsFg}|jdj}|jr"||j|t||jrB||j|S)Nr) openElementsr r9r.extendlistr+)rfragmentr!rrr getFragment,s   zTreeBuilder.getFragmentcCsh|d}|d}|d}|s0tdtd|_n4|j|}||krPtdt||||}||_dS)Nrrrz#lxml cannot represent empty doctypez%lxml cannot represent non-xml doctype)warningswarnr rUrKrp doctypeClass)rtokenrrrZ coercedNamerUrrr insertDoctype6s   zTreeBuilder.insertDoctypecCs6|dus||jusJ|jjdus&J|j|dSr)rrrr.rr|parentrrrrFsz TreeBuilder.insertCommentInitialcsB||jkr,|jjdjtkr,tdttt | ||dS)Nz@lxml cannot represent adjacent comments beyond the root elements) rrrr(r8rrr superrZrr __class__rrinsertCommentMainKs   zTreeBuilder.insertCommentMainc Csd}|jr|jjsJ|d|jj7}|jjdus>|jjdur|d|j|jjpRd7}|jjr|jj}|ddkr|ddkrtdt | dd}|ddkr|d |7}q|d |7}n|d 7}|d 7}|jj|d krtdt |d7}t |}|j D] }||d}||jq ||_||j_|d }|d|j}|durj|} n d||f} | |_|||} || _|jj| |j| |j|_dS)NrXz rzGlxml cannot represent doctype with a different name to the root elementz$r|ror])rUrrrrKZ coercePubidfindrrr replacer fromstringrrZ addpreviousr documentClassrZ getroottreergetdefaultNamespacer(rrr.rrr) rrZdocStrsysidroot comment_tokencommentrroZ etree_tagZ root_elementrrr insertRootQsP              zTreeBuilder.insertRoot)F)N)N)rrrrrrrrr fragmentClassr implementationrrrRrrrrrr __classcell__rrrrrZs \  rZ)#__doc__ __future__rrrrrer6collections.abcr ImportError collectionsrXrr r r r:r Z lxml.etreeZpip._vendor.sixr rr[compiler;rr(r8objectrrrRrYrZrrrrs.          O)PK!U'ϚT.T.-treebuilders/__pycache__/etree.cpython-39.pycnu[a Re$2@sddlmZmZmZddlmZddlZddlmZddlm Z ddlm Z dd lm Z dd l m Z dd l mZed ZdddZeeZdS))absolute_importdivisionunicode_literals) text_typeN)copy)base) _ihatexml) constants) namespaces)moduleFactoryFactoryz {([^}]*)}(.*)Fc sdjGfdddtjGfdddGfdddGfdd d Gfd d d fd d  fdd}G fdddtj}tS)NZasdcseZdZd$fdd ZddZddZdd ZeeeZd d Z d d Z ee e Z ddZ ddZ ee e ZddZddZeeeZddZddZddZddZd%ddZd d!Zd"d#ZdS)&z getETreeBuilder..ElementNcs^||_||_||||_|dur:td|jf|_n|j|jf|_d|_g|_g|_ dS)Nhtml) _name _namespaceElement _getETreeTag_elementr nameTupleparent _childNodes_flags)selfname namespace ElementTree/builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/treebuilders/etree.py__init__s z)getETreeBuilder..Element.__init__cSs|dur|}n d||f}|S)N{%s}%sr)rrrZ etree_tagrrrr%s z-getETreeBuilder..Element._getETreeTagcSs||_||j|j|j_dSN)rrrrtag)rrrrr_setName,sz)getETreeBuilder..Element._setNamecSs|jSr!)rrrrr_getName0sz)getETreeBuilder..Element._getNamecSs||_||j|j|j_dSr!)rrrrr")rrrrr _setNamespace5sz.getETreeBuilder..Element._setNamespacecSs|jSr!)rr$rrr _getNamespace9sz.getETreeBuilder..Element._getNamespacecSs|jjSr!)rattribr$rrr_getAttributes>sz/getETreeBuilder..Element._getAttributescSsV|jj}||rR|D]4\}}t|trDd|d|df}n|}|||<qdS)Nr r r)rr(clearitems isinstancetuple)r attributesZ el_attribkeyvaluerrrr_setAttributesAs z/getETreeBuilder..Element._setAttributescSs|jSr!)rr$rrr_getChildNodesPsz/getETreeBuilder..Element._getChildNodescSs*|jdd=g|_|D]}||qdSr!)rrZ insertChild)rr0elementrrr_setChildNodesSs z/getETreeBuilder..Element._setChildNodescSst|jjpt|jS)z,Return true if the node has children or text)boolrtextlenr$rrr hasContent[sz+getETreeBuilder..Element.hasContentcSs$|j||j|j||_dSr!)rappendrrrnoderrr appendChild_s z,getETreeBuilder..Element.appendChildcSs,t|j|j}|j||j||_dSr!)listrindexinsertr)rr;refNoder>rrr insertBeforedsz-getETreeBuilder..Element.insertBeforecSs$|j||j|jd|_dSr!)rremoverrr:rrr removeChildis z,getETreeBuilder..Element.removeChildcSst|js,|jjsd|j_|jj|7_n|durb|jdjsLd|jd_|jdj|7_nxt|j}||j}|dkr|j|djsd|j|d_|j|dj|7_n |jjsd|j_|jj|7_dS)Nrr)r7rr6tailr=r>)rdatarAchildrenr>rrr insertTextns"     z+getETreeBuilder..Element.insertTextcSs.t||j|j}|jjr*t|jj|j_|Sr!)typerrrr(rrr3rrr cloneNodesz*getETreeBuilder..Element.cloneNodecSsl|jr"|jdjj|jj7_n0|jjs2d|j_|jjdurR|jj|jj7_d|j_tj||dS)NrErD) childNodesrrFr6rNodereparentChildren)r newParentrrrrOs z1getETreeBuilder..Element.reparentChildren)N)N)__name__ __module__ __qualname__rrr#r%propertyrr&r'rr)r1r.r2r4rMr8r<rArCrIrLrOrrrrrs*      rcs2eZdZfddZddZddZeeeZdS)z getETreeBuilder..Commentcs"||_d|_g|_g|_dSr!)Commentrrrr)rrGrrrrs z)getETreeBuilder..Comment.__init__cSs|jjSr!rr6r$rrr_getDatasz)getETreeBuilder..Comment._getDatacSs ||j_dSr!rVrr0rrr_setDatasz)getETreeBuilder..Comment._setDataN)rQrRrSrrWrYrTrGrrrrrUs rUcsLeZdZfddZddZddZeeeZddZd d Z eee Z d S) z%getETreeBuilder..DocumentTypecs$|d||j_||_||_dS)N )rrr6publicIdsystemId)rrr[r\rrrrs z.getETreeBuilder..DocumentType.__init__cSs|jddS)Nr[rDrgetr$rrr _getPublicIdsz2getETreeBuilder..DocumentType._getPublicIdcSs|dur|jd|dS)Nr[rsetrXrrr _setPublicIdsz2getETreeBuilder..DocumentType._setPublicIdcSs|jddS)Nr\rDr^r$rrr _getSystemIdsz2getETreeBuilder..DocumentType._getSystemIdcSs|dur|jd|dS)Nr\rarXrrr _setSystemIdsz2getETreeBuilder..DocumentType._setSystemIdN) rQrRrSrr`rcrTr[rdrer\rr]rr DocumentTypes  rfcseZdZfddZdS)z!getETreeBuilder..Documentcs|ddS)N DOCUMENT_ROOTrr$r]rrrsz*getETreeBuilder..Document.__init__NrQrRrSrrr]rrDocumentsrjcseZdZfddZdS)z)getETreeBuilder..DocumentFragmentcs|ddS)NZDOCUMENT_FRAGMENTrhr$r]rrrsz2getETreeBuilder..DocumentFragment.__init__Nrirr]rrDocumentFragmentsrkcs*gdfdd |ddS)Nrc st|ds|}|jdkrz|ds0|drd|dp rgz #documentz|%s"%s" r Document node cannot have tailr($Document node cannot have attributesz|%szExpected unicode, got %s, %sz%s %sz|%s<%s>z |%s%s="%s")hasattrgetrootr"r_r9r6rF TypeErrorr7r(r,rrJ tag_regexpmatchgroupsr prefixesr+sorted) r3indentr[r\Znsmatchrnsprefixr.r0Z attr_stringchild)ElementTreeCommentTypervserializeElementrrr~sb                    zAgetETreeBuilder..testSerializer..serializeElement )r)joinr3)r|)r}r~rtestSerializers7 z'getETreeBuilder..testSerializercs2gtfdd|dS)z4Serialize an element and its child nodes to a stringcst|jr|}|jdkr||ds2|drf|dp>d}|dpLd}d|j||fnd|jfn|jdkr|jdur|j|jdurtdt |d rt |j rtd |D] }|qn|jkrd |jfn|j sd  |jfn2d fdd|j D}d|j|f|jrd|j|D]}|qhd|jf|jr|jdS)NrZr[r\rDzrlrgrnr(roz z<%s>rmcs"g|]\}}d||fqS)z%s="%s") fromXmlName).0rr0)filterrr )s zOgetETreeBuilder..tostring..serializeElement..z<%s %s>z)r,rrqr"r_r9r6rFrrrpr7r(rrr+)r3r[r\r{attr)rr|rr}r~rrr~ sD           z;getETreeBuilder..tostring..serializeElementrD)r Z InfosetFilterrr)rr|)rr}r~rtostrings -z!getETreeBuilder..tostringcsDeZdZZZZZZZfddZ fddZ ddZ dS)z$getETreeBuilder..TreeBuildercs|Sr!rrK)rrrrDsz3getETreeBuilder..TreeBuilder.testSerializercs<r |jjS|jdur*|jjd|jS|jjdSdS)Nz{%s}htmlr)documentrdefaultNamespacefindr$)fullTreerr getDocumentGs z0getETreeBuilder..TreeBuilder.getDocumentcSstj|jSr!)r TreeBuilder getFragmentrr$rrrrQsz0getETreeBuilder..TreeBuilder.getFragmentN) rQrRrS documentClass doctypeClass elementClass commentClass fragmentClassimplementationrrrr)rUrjrkrfrElementTreeImplementationrrrrr<s  r)rUr"rrNrlocals)rrrrr) rUrjrkrfrrr|rrrrgetETreeBuilders  >6$r)F) __future__rrrZpip._vendor.sixrrerrDrr r r _utilsr compilersrgetETreeModulerrrrs         FPK!SS%%+treebuilders/__pycache__/dom.cpython-39.pycnu[a Re"@sddlmZmZmZzddlmZWneyBddlmZYn0ddlm Z m Z ddl Z ddl m Z ddl mZdd lmZdd lmZd d ZeeZdS) )absolute_importdivisionunicode_literals)MutableMapping)minidomNodeN)base) constants) namespaces)moduleFactoryFactorycsVGdddtGfdddtjGfdddtj}ddtS) Nc@sLeZdZddZddZddZddZd d Zd d Zd dZ ddZ dS)zgetDomBuilder..AttrListcSs ||_dSNelementselfrr/builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/treebuilders/dom.py__init__sz(getDomBuilder..AttrList.__init__cSst|jjSr)iterr attributeskeysrrrr__iter__sz(getDomBuilder..AttrList.__iter__cSs4t|trtn |jj|}||_||jj|<dSr) isinstancetupleNotImplementedErrorr ownerDocumentZcreateAttributevaluer)rnamerattrrrr __setitem__s  z+getDomBuilder..AttrList.__setitem__cSs t|jjSr)lenrrrrrr__len__#sz'getDomBuilder..AttrList.__len__cSst|jjSr)listrritemsrrrrr&&sz%getDomBuilder..AttrList.itemscSst|jjSr)r%rrvaluesrrrrr')sz&getDomBuilder..AttrList.valuescSs"t|trtn|jj|jSdSr)rrrrrrrr rrr __getitem__,s z+getDomBuilder..AttrList.__getitem__cSst|trtn |jj|=dSr)rrrrrr(rrr __delitem__2s z+getDomBuilder..AttrList.__delitem__N) __name__ __module__ __qualname__rrr"r$r&r'r)r*rrrrAttrListsr.cseZdZddZeddZddZddd Zd d Zd d Z ddZ fddZ ddZ ee e Z fddZddZddZeeZdS)z"getDomBuilder..NodeBuildercSstj||j||_dSr)r rrnodeNamerrrrrr9sz+getDomBuilder..NodeBuilder.__init__cSst|jdr|jjpdS)N namespaceURI)hasattrrr0rrrr=s z+getDomBuilder..NodeBuilder.cSs||_|j|jdSr)parentr appendChildrnoderrrr4@sz.getDomBuilder..NodeBuilder.appendChildNcSs4|jj|}|r$|j||jn |j|dSr)rrcreateTextNode insertBeforer4)rdatar8textrrr insertTextDsz-getDomBuilder..NodeBuilder.insertTextcSs|j|j|j||_dSr)rr8r3)rr6refNoderrrr8Ksz/getDomBuilder..NodeBuilder.insertBeforecSs&|jj|jkr|j|jd|_dSr)rZ parentNode removeChildr3r5rrrr=Osz.getDomBuilder..NodeBuilder.removeChildcSs6|jr,|jj}|j||j|qg|_dSr)r hasChildNodesZ firstChildr=r4 childNodes)r newParentchildrrrreparentChildrenTs   z3getDomBuilder..NodeBuilder.reparentChildrencs |jSrrr)r.rr getAttributes[sz0getDomBuilder..NodeBuilder.getAttributescSsv|rrt|D]`\}}t|trb|ddurD|dd|d}n|d}|j|d||q|j||qdS)Nr:rr )r%r&rrrZsetAttributeNSZ setAttribute)rrr rZ qualifiedNamerrr setAttributes^s  z0getDomBuilder..NodeBuilder.setAttributescs|jdS)NF)r cloneNoder NodeBuilderrrrFmsz,getDomBuilder..NodeBuilder.cloneNodecSs |jSr)rr>rrrr hasContentpsz-getDomBuilder..NodeBuilder.hasContentcSs(|jdurtd|jfS|j|jfSdS)Nhtml) namespacer r rrrr getNameTupless z/getDomBuilder..NodeBuilder.getNameTuple)N)r+r,r-rpropertyrKr4r;r8r=rBrCrErrFrIrL nameTupler)r.rHrrrH8s     rHcseZdZfddZfddZdfdd Zfdd Zfd d Zd d ZfddZ ddZ ddZ dddZ Z dZdS)z"getDomBuilder..TreeBuildercsddd|_t|Sr)getDOMImplementationZcreateDocumentdomweakrefproxyr)Domrr documentClass|sz0getDomBuilder..TreeBuilder.documentClasscsR|d}|d}|d}}||||}|j|tkrN|j|_dS)Nr publicIdsystemId)rOZcreateDocumentTypedocumentr4rrPr)rtokenr rUrVZdomimpldoctype)rSrHrr insertDoctypesz0getDomBuilder..TreeBuilder.insertDoctypeNcs6|dur |jdur |j|}n|j||}|Sr)defaultNamespacerP createElementZcreateElementNS)rr rKr6rGrr elementClasssz/getDomBuilder..TreeBuilder.elementClasscs|j|Sr)rPZ createComment)rr9rGrr commentClasssz/getDomBuilder..TreeBuilder.commentClasscs|jSr)rPZcreateDocumentFragmentrrGrr fragmentClasssz0getDomBuilder..TreeBuilder.fragmentClasscSs|j|jdSr)rPr4rr5rrrr4sz.getDomBuilder..TreeBuilder.appendChildcs|Srrr)testSerializerrrr`sz1getDomBuilder..TreeBuilder.testSerializercSs|jSr)rPrrrr getDocumentsz.getDomBuilder..TreeBuilder.getDocumentcSstj|jSr)r TreeBuilder getFragmentrrrrrrcsz.getDomBuilder..TreeBuilder.getFragmentcSsp|}||krtj|||nNt|jdrXtj|jjvrXt|jj|j_|jj tj|j |j |dS)N_child_node_types) r rbr;r1rPr TEXT_NODErdr%appendr4r7)rr9r3rrrr;s z-getDomBuilder..TreeBuilder.insertText)N)N)r+r,r-rTrZr]r^r_r4r`rarcr;implementationr r)rSDomImplementationrHr`rrrb{s      rbcs0|gdfdd |ddS)Nrc s|jtjkr|jrj|js|jrP|jp&d}|jp0d}dd||j||fq~dd||jfndd|fnr|jtjkrdnX|jtjkrdn>|jtj krވdd||j fn|jtj krd d||j fnt |d r6|j dur6d tj|j |jf}n|j}d d||f|rg}tt|jD]T}|j|}|j}|j}|j } | rd tj| |jf}n|j}|||fqnt|D]&\}}d d|d||fq|d7}|jD]} | |qdS)Nz|%s z|%sz|%sz #documentz#document-fragmentz|%sz|%s"%s"r0z%s %sz|%s<%s>z |%s%s="%s"r )ZnodeTyperZDOCUMENT_TYPE_NODEr rUrVrfZ DOCUMENT_NODEZDOCUMENT_FRAGMENT_NODEZ COMMENT_NODEZ nodeValuerer1r0r prefixesr/Z hasAttributesranger#ritemrZ localNamesortedr?) rindentrUrVr rir!rnsrArvserializeElementrrrtsT             z?getDomBuilder..testSerializer..serializeElement )r) normalizejoinrrrrrr`s . z%getDomBuilder..testSerializer)rr rrblocals)rhrbr)r.rSrhrHr`r getDomBuilders $C:6ry) __future__rrrcollections.abcr ImportError collectionsxml.domrrrQrir r r _utilsr ry getDomModulerrrrs     _PK!Dn[,[,,treebuilders/__pycache__/base.cpython-39.pycnu[a Re8@sddlmZmZmZddlmZddlmZmZm Z dZ e edfe ee ddfhBdfe ee dd fe dd fhBdfe e ddfe dd fgdfe e dd fe dd fgdfdZ Gddde ZGdddeZGddde ZdS))absolute_importdivisionunicode_literals) text_type)scopingElementstableInsertModeElements namespacesNFhtmlbuttonolultableoptgroupoptionT)Nr listrselectc@sbeZdZdZddZddZddZdd Zdd d Zd dZ ddZ ddZ ddZ ddZ d S)NodezRepresents an item in the treecCs(||_d|_d|_i|_g|_g|_dS)zRCreates a Node :arg name: The tag name associated with the node N)nameparentvalue attributes childNodes_flags)selfrr/builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/treebuilders/base.py__init__s z Node.__init__cCs:ddd|jD}|r,d|j|fSd|jSdS)N cSsg|]\}}d||fqS)z%s="%s"r).0rrrrr .sz Node.__str__..z<%s %s><%s>)joinritemsr)r attributesStrrrr__str__-s  z Node.__str__cCs d|jS)Nr!)rrrrr__repr__6sz Node.__repr__cCstdS)z[Insert node as a child of the current node :arg node: the node to insert NNotImplementedErrorrnoderrr appendChild9szNode.appendChildNcCstdS)aBInsert data as text in the current node, positioned before the start of node insertBefore or to the end of the node's text. :arg data: the data to insert :arg insertBefore: True if you want to insert the text before the node and False if you want to insert it after the node Nr()rdata insertBeforerrr insertTextAs zNode.insertTextcCstdS)aInsert node as a child of the current node, before refNode in the list of child nodes. Raises ValueError if refNode is not a child of the current node :arg node: the node to insert :arg refNode: the child node to insert the node before Nr()rr+refNoderrrr.Ms zNode.insertBeforecCstdS)zhRemove node from the children of the current node :arg node: the child node to remove Nr(r*rrr removeChildYszNode.removeChildcCs |jD]}||qg|_dS)zMove all the children of the current node to newParent. This is needed so that trees that don't store text as nodes move the text in the correct way :arg newParent: the node to move all this node's children to N)rr,)r newParentchildrrrreparentChildrenas  zNode.reparentChildrencCstdS)zReturn a shallow copy of the current node i.e. a node with the same name and attributes but with no parent or child nodes Nr(r&rrr cloneNodenszNode.cloneNodecCstdS)zFReturn true if the node has children or text, false otherwise Nr(r&rrr hasContenttszNode.hasContent)N)__name__ __module__ __qualname____doc__rr%r'r,r/r.r1r4r5r6rrrrrs    rc@seZdZddZddZdS)ActiveFormattingElementscCsfd}|tkrV|dddD]:}|tkr*qV|||r>|d7}|dkr||qVqt||dS)Nr)Marker nodesEqualremoverappend)rr+ equalCountelementrrrrB{s  zActiveFormattingElements.appendcCs$|j|jksdS|j|jks dSdS)NFT) nameTupler)rnode1node2rrrr@s   z#ActiveFormattingElements.nodesEqualN)r7r8r9rBr@rrrrr;zs r;c@seZdZdZdZdZdZdZdZddZ ddZ d+ddZ d d Z d d Z d dZddZddZd,ddZddZddZddZeeeZddZddZd-dd Zd!d"Zd.d#d$Zd%d&Zd'd(Zd)d*ZdS)/ TreeBuilderaBase treebuilder implementation * documentClass - the class to use for the bottommost node of a document * elementClass - the class to use for HTML Elements * commentClass - the class to use for comments * doctypeClass - the class to use for doctypes NcCs|r d|_nd|_|dS)zmCreate a TreeBuilder :arg namespaceHTMLElements: whether or not to namespace HTML elements zhttp://www.w3.org/1999/xhtmlN)defaultNamespacereset)rnamespaceHTMLElementsrrrrszTreeBuilder.__init__cCs.g|_t|_d|_d|_d|_||_dS)NF) openElementsr;activeFormattingElements headPointer formPointerinsertFromTable documentClassdocumentr&rrrrJs zTreeBuilder.resetcCst|d}|s2t|tr$td|f}t|ts2Jt|\}}t|jD]>}|r^||kr^dS|sr|j|krrdS||j|vArHdSqHdsJdS)NrEr TF) hasattr isinstancerr tuplelistElementsMapreversedrLrE)rtargetvariant exactNode listElementsinvertr+rrrelementInScopes     zTreeBuilder.elementInScopecCs|js dSt|jd}|j|}|tks4||jvr8dS|tkrl||jvrl|dkrXd}ql|d8}|j|}q8|d7}|j|}|}|d|j|j|jd}||j|<||jdkrlqqldS)Nr=rr<StartTag)typer namespacer-) rMlenr?rLr5 insertElementrr`r)rientryclonerDrrr#reconstructActiveFormattingElementss.    z/TreeBuilder.reconstructActiveFormattingElementscCs(|j}|jr$|tkr$|j}q dSN)rMpopr?)rrdrrrclearActiveFormattingElementss z)TreeBuilder.clearActiveFormattingElementscCs:|jdddD]$}|tkr"q6q|j|kr|SqdS)zCheck if an element exists between the end of the active formatting elements and the last marker. If it does, return it, else return falseNr<F)rMr?r)rritemrrr!elementInActiveFormattingElements s   z-TreeBuilder.elementInActiveFormattingElementscCs&||}|j||j|dSrg) createElementrLrBrRr,)rtokenrDrrr insertRoots  zTreeBuilder.insertRootcCs6|d}|d}|d}||||}|j|dS)NrpublicIdsystemId) doctypeClassrRr,)rrmrrorpdoctyperrr insertDoctype s zTreeBuilder.insertDoctypecCs*|dur|jd}|||ddS)Nr<r-)rLr, commentClass)rrmrrrr insertComment(s zTreeBuilder.insertCommentcCs0|d}|d|j}|||}|d|_|S)z.Create an element but don't insert it anywhererr`r-)getrI elementClassrrrmrr`rDrrrrl-s   zTreeBuilder.createElementcCs|jSrg)_insertFromTabler&rrr_getInsertFromTable5szTreeBuilder._getInsertFromTablecCs ||_|r|j|_n|j|_dS)zsSwitch the function used to insert an element from the normal one to the misnested table one and back againN)ryinsertElementTablerbinsertElementNormal)rrrrr_setInsertFromTable8s zTreeBuilder._setInsertFromTablecCsb|d}t|tsJd||d|j}|||}|d|_|jd||j||S)NrzElement %s not unicoder`r-r<) rTrrvrIrwrrLr,rBrxrrrr|Cs   zTreeBuilder.insertElementNormalcCs`||}|jdjtvr$||S|\}}|durD||n ||||j||S)z-Create an element and insert it into the treer<N) rlrLrrr|getTableMisnestedNodePositionr,r.rB)rrmrDrr.rrrr{Ms      zTreeBuilder.insertElementTablecCsV|dur|jd}|jr.|jr:|jdjtvr:||n|\}}|||dS)zInsert text data.Nr<)rLrPrrr/r~)rr-rr.rrrr/]s     zTreeBuilder.insertTextcCstd}d}d}|jdddD]}|jdkr|}q4q|rb|jrJ|j}|}ql|j|j|d}n |jd}||fS)zsGet the foster parent element, and sibling to insert before (or None) when inserting a misnested table nodeNr<rr=r)rLrrindex)r lastTable fosterParentr.elmrrrr~ls   z)TreeBuilder.getTableMisnestedNodePositioncCs8|jdj}|tdvr4||kr4|j||dS)Nr<)dddtlirrprprt)rLr frozensetrhgenerateImpliedEndTags)rexcluderrrrrs    z"TreeBuilder.generateImpliedEndTagscCs|jS)zReturn the final tree)rRr&rrr getDocumentszTreeBuilder.getDocumentcCs|}|jd||S)zReturn the final fragmentr) fragmentClassrLr4)rfragmentrrr getFragmentszTreeBuilder.getFragmentcCstdS)zSerialize the subtree of node in the format required by unit tests :arg node: the node from which to start serializing Nr(r*rrrtestSerializerszTreeBuilder.testSerializer)N)N)N)N)r7r8r9r:rQrwrtrqrrrJr]rfrirkrnrsrurlrzr}propertyrPr|r{r/r~rrrrrrrrrHs6   .     rH) __future__rrrZpip._vendor.sixr constantsrrr r?rrVobjectrrr;rHrrrrs0        cPK!:E+ + 0treebuilders/__pycache__/__init__.cpython-39.pycnu[a Re@s6dZddlmZmZmZddlmZiZdddZdS) a)A collection of modules for building different kinds of trees from HTML documents. To create a treebuilder for a new type of tree, you need to do implement several things: 1. A set of classes for various types of elements: Document, Doctype, Comment, Element. These must implement the interface of ``base.treebuilders.Node`` (although comment nodes have a different signature for their constructor, see ``treebuilders.etree.Comment``) Textual content may also be implemented as another node type, or not, as your tree implementation requires. 2. A treebuilder object (called ``TreeBuilder`` by convention) that inherits from ``treebuilders.base.TreeBuilder``. This has 4 required attributes: * ``documentClass`` - the class to use for the bottommost node of a document * ``elementClass`` - the class to use for HTML Elements * ``commentClass`` - the class to use for comments * ``doctypeClass`` - the class to use for doctypes It also has one required method: * ``getDocument`` - Returns the root node of the complete document tree 3. If you wish to run the unit tests, you must also create a ``testSerializer`` method on your treebuilder which accepts a node and returns a string containing Node and its children serialized according to the format used in the unittests )absolute_importdivisionunicode_literals) default_etreeNcKs|}|tvr|dkrPddlm}|dur>> from html5lib.treebuilders import getTreeBuilder >>> builder = getTreeBuilder('etree') dom)rNr)minidomlxml) etree_lxmletree)r zUnrecognised treebuilder "%s" )lowertreeBuilderCacherxml.domr getDomModule TreeBuilderr r rgetETreeModule ValueErrorget)treeTypeimplementationkwargsrr r r r/builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/treebuilders/__init__.pygetTreeBuilder's$      r)N) __doc__ __future__rrr_utilsrrrrrrrs PK!>>h88treebuilders/base.pynu[from __future__ import absolute_import, division, unicode_literals from pip._vendor.six import text_type from ..constants import scopingElements, tableInsertModeElements, namespaces # The scope markers are inserted when entering object elements, # marquees, table cells, and table captions, and are used to prevent formatting # from "leaking" into tables, object elements, and marquees. Marker = None listElementsMap = { None: (frozenset(scopingElements), False), "button": (frozenset(scopingElements | {(namespaces["html"], "button")}), False), "list": (frozenset(scopingElements | {(namespaces["html"], "ol"), (namespaces["html"], "ul")}), False), "table": (frozenset([(namespaces["html"], "html"), (namespaces["html"], "table")]), False), "select": (frozenset([(namespaces["html"], "optgroup"), (namespaces["html"], "option")]), True) } class Node(object): """Represents an item in the tree""" def __init__(self, name): """Creates a Node :arg name: The tag name associated with the node """ # The tag name associated with the node self.name = name # The parent of the current node (or None for the document node) self.parent = None # The value of the current node (applies to text nodes and comments) self.value = None # A dict holding name -> value pairs for attributes of the node self.attributes = {} # A list of child nodes of the current node. This must include all # elements but not necessarily other node types. self.childNodes = [] # A list of miscellaneous flags that can be set on the node. self._flags = [] def __str__(self): attributesStr = " ".join(["%s=\"%s\"" % (name, value) for name, value in self.attributes.items()]) if attributesStr: return "<%s %s>" % (self.name, attributesStr) else: return "<%s>" % (self.name) def __repr__(self): return "<%s>" % (self.name) def appendChild(self, node): """Insert node as a child of the current node :arg node: the node to insert """ raise NotImplementedError def insertText(self, data, insertBefore=None): """Insert data as text in the current node, positioned before the start of node insertBefore or to the end of the node's text. :arg data: the data to insert :arg insertBefore: True if you want to insert the text before the node and False if you want to insert it after the node """ raise NotImplementedError def insertBefore(self, node, refNode): """Insert node as a child of the current node, before refNode in the list of child nodes. Raises ValueError if refNode is not a child of the current node :arg node: the node to insert :arg refNode: the child node to insert the node before """ raise NotImplementedError def removeChild(self, node): """Remove node from the children of the current node :arg node: the child node to remove """ raise NotImplementedError def reparentChildren(self, newParent): """Move all the children of the current node to newParent. This is needed so that trees that don't store text as nodes move the text in the correct way :arg newParent: the node to move all this node's children to """ # XXX - should this method be made more general? for child in self.childNodes: newParent.appendChild(child) self.childNodes = [] def cloneNode(self): """Return a shallow copy of the current node i.e. a node with the same name and attributes but with no parent or child nodes """ raise NotImplementedError def hasContent(self): """Return true if the node has children or text, false otherwise """ raise NotImplementedError class ActiveFormattingElements(list): def append(self, node): equalCount = 0 if node != Marker: for element in self[::-1]: if element == Marker: break if self.nodesEqual(element, node): equalCount += 1 if equalCount == 3: self.remove(element) break list.append(self, node) def nodesEqual(self, node1, node2): if not node1.nameTuple == node2.nameTuple: return False if not node1.attributes == node2.attributes: return False return True class TreeBuilder(object): """Base treebuilder implementation * documentClass - the class to use for the bottommost node of a document * elementClass - the class to use for HTML Elements * commentClass - the class to use for comments * doctypeClass - the class to use for doctypes """ # pylint:disable=not-callable # Document class documentClass = None # The class to use for creating a node elementClass = None # The class to use for creating comments commentClass = None # The class to use for creating doctypes doctypeClass = None # Fragment class fragmentClass = None def __init__(self, namespaceHTMLElements): """Create a TreeBuilder :arg namespaceHTMLElements: whether or not to namespace HTML elements """ if namespaceHTMLElements: self.defaultNamespace = "http://www.w3.org/1999/xhtml" else: self.defaultNamespace = None self.reset() def reset(self): self.openElements = [] self.activeFormattingElements = ActiveFormattingElements() # XXX - rename these to headElement, formElement self.headPointer = None self.formPointer = None self.insertFromTable = False self.document = self.documentClass() def elementInScope(self, target, variant=None): # If we pass a node in we match that. if we pass a string # match any node with that name exactNode = hasattr(target, "nameTuple") if not exactNode: if isinstance(target, text_type): target = (namespaces["html"], target) assert isinstance(target, tuple) listElements, invert = listElementsMap[variant] for node in reversed(self.openElements): if exactNode and node == target: return True elif not exactNode and node.nameTuple == target: return True elif (invert ^ (node.nameTuple in listElements)): return False assert False # We should never reach this point def reconstructActiveFormattingElements(self): # Within this algorithm the order of steps described in the # specification is not quite the same as the order of steps in the # code. It should still do the same though. # Step 1: stop the algorithm when there's nothing to do. if not self.activeFormattingElements: return # Step 2 and step 3: we start with the last element. So i is -1. i = len(self.activeFormattingElements) - 1 entry = self.activeFormattingElements[i] if entry == Marker or entry in self.openElements: return # Step 6 while entry != Marker and entry not in self.openElements: if i == 0: # This will be reset to 0 below i = -1 break i -= 1 # Step 5: let entry be one earlier in the list. entry = self.activeFormattingElements[i] while True: # Step 7 i += 1 # Step 8 entry = self.activeFormattingElements[i] clone = entry.cloneNode() # Mainly to get a new copy of the attributes # Step 9 element = self.insertElement({"type": "StartTag", "name": clone.name, "namespace": clone.namespace, "data": clone.attributes}) # Step 10 self.activeFormattingElements[i] = element # Step 11 if element == self.activeFormattingElements[-1]: break def clearActiveFormattingElements(self): entry = self.activeFormattingElements.pop() while self.activeFormattingElements and entry != Marker: entry = self.activeFormattingElements.pop() def elementInActiveFormattingElements(self, name): """Check if an element exists between the end of the active formatting elements and the last marker. If it does, return it, else return false""" for item in self.activeFormattingElements[::-1]: # Check for Marker first because if it's a Marker it doesn't have a # name attribute. if item == Marker: break elif item.name == name: return item return False def insertRoot(self, token): element = self.createElement(token) self.openElements.append(element) self.document.appendChild(element) def insertDoctype(self, token): name = token["name"] publicId = token["publicId"] systemId = token["systemId"] doctype = self.doctypeClass(name, publicId, systemId) self.document.appendChild(doctype) def insertComment(self, token, parent=None): if parent is None: parent = self.openElements[-1] parent.appendChild(self.commentClass(token["data"])) def createElement(self, token): """Create an element but don't insert it anywhere""" name = token["name"] namespace = token.get("namespace", self.defaultNamespace) element = self.elementClass(name, namespace) element.attributes = token["data"] return element def _getInsertFromTable(self): return self._insertFromTable def _setInsertFromTable(self, value): """Switch the function used to insert an element from the normal one to the misnested table one and back again""" self._insertFromTable = value if value: self.insertElement = self.insertElementTable else: self.insertElement = self.insertElementNormal insertFromTable = property(_getInsertFromTable, _setInsertFromTable) def insertElementNormal(self, token): name = token["name"] assert isinstance(name, text_type), "Element %s not unicode" % name namespace = token.get("namespace", self.defaultNamespace) element = self.elementClass(name, namespace) element.attributes = token["data"] self.openElements[-1].appendChild(element) self.openElements.append(element) return element def insertElementTable(self, token): """Create an element and insert it into the tree""" element = self.createElement(token) if self.openElements[-1].name not in tableInsertModeElements: return self.insertElementNormal(token) else: # We should be in the InTable mode. This means we want to do # special magic element rearranging parent, insertBefore = self.getTableMisnestedNodePosition() if insertBefore is None: parent.appendChild(element) else: parent.insertBefore(element, insertBefore) self.openElements.append(element) return element def insertText(self, data, parent=None): """Insert text data.""" if parent is None: parent = self.openElements[-1] if (not self.insertFromTable or (self.insertFromTable and self.openElements[-1].name not in tableInsertModeElements)): parent.insertText(data) else: # We should be in the InTable mode. This means we want to do # special magic element rearranging parent, insertBefore = self.getTableMisnestedNodePosition() parent.insertText(data, insertBefore) def getTableMisnestedNodePosition(self): """Get the foster parent element, and sibling to insert before (or None) when inserting a misnested table node""" # The foster parent element is the one which comes before the most # recently opened table element # XXX - this is really inelegant lastTable = None fosterParent = None insertBefore = None for elm in self.openElements[::-1]: if elm.name == "table": lastTable = elm break if lastTable: # XXX - we should really check that this parent is actually a # node here if lastTable.parent: fosterParent = lastTable.parent insertBefore = lastTable else: fosterParent = self.openElements[ self.openElements.index(lastTable) - 1] else: fosterParent = self.openElements[0] return fosterParent, insertBefore def generateImpliedEndTags(self, exclude=None): name = self.openElements[-1].name # XXX td, th and tr are not actually needed if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt")) and name != exclude): self.openElements.pop() # XXX This is not entirely what the specification says. We should # investigate it more closely. self.generateImpliedEndTags(exclude) def getDocument(self): """Return the final tree""" return self.document def getFragment(self): """Return the final fragment""" # assert self.innerHTML fragment = self.fragmentClass() self.openElements[0].reparentChildren(fragment) return fragment def testSerializer(self, node): """Serialize the subtree of node in the format required by unit tests :arg node: the node from which to start serializing """ raise NotImplementedError PK!99treebuilders/etree_lxml.pynu["""Module for supporting the lxml.etree library. The idea here is to use as much of the native library as possible, without using fragile hacks like custom element names that break between releases. The downside of this is that we cannot represent all possible trees; specifically the following are known to cause problems: Text or comments as siblings of the root element Docypes with no name When any of these things occur, we emit a DataLossWarning """ from __future__ import absolute_import, division, unicode_literals # pylint:disable=protected-access import warnings import re import sys try: from collections.abc import MutableMapping except ImportError: from collections import MutableMapping from . import base from ..constants import DataLossWarning from .. import constants from . import etree as etree_builders from .. import _ihatexml import lxml.etree as etree from pip._vendor.six import PY3, binary_type fullTree = True tag_regexp = re.compile("{([^}]*)}(.*)") comment_type = etree.Comment("asd").tag class DocumentType(object): def __init__(self, name, publicId, systemId): self.name = name self.publicId = publicId self.systemId = systemId class Document(object): def __init__(self): self._elementTree = None self._childNodes = [] def appendChild(self, element): last = self._elementTree.getroot() for last in self._elementTree.getroot().itersiblings(): pass last.addnext(element._element) def _getChildNodes(self): return self._childNodes childNodes = property(_getChildNodes) def testSerializer(element): rv = [] infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True) def serializeElement(element, indent=0): if not hasattr(element, "tag"): if hasattr(element, "getroot"): # Full tree case rv.append("#document") if element.docinfo.internalDTD: if not (element.docinfo.public_id or element.docinfo.system_url): dtd_str = "" % element.docinfo.root_name else: dtd_str = """""" % ( element.docinfo.root_name, element.docinfo.public_id, element.docinfo.system_url) rv.append("|%s%s" % (' ' * (indent + 2), dtd_str)) next_element = element.getroot() while next_element.getprevious() is not None: next_element = next_element.getprevious() while next_element is not None: serializeElement(next_element, indent + 2) next_element = next_element.getnext() elif isinstance(element, str) or isinstance(element, bytes): # Text in a fragment assert isinstance(element, str) or sys.version_info[0] == 2 rv.append("|%s\"%s\"" % (' ' * indent, element)) else: # Fragment case rv.append("#document-fragment") for next_element in element: serializeElement(next_element, indent + 2) elif element.tag == comment_type: rv.append("|%s" % (' ' * indent, element.text)) if hasattr(element, "tail") and element.tail: rv.append("|%s\"%s\"" % (' ' * indent, element.tail)) else: assert isinstance(element, etree._Element) nsmatch = etree_builders.tag_regexp.match(element.tag) if nsmatch is not None: ns = nsmatch.group(1) tag = nsmatch.group(2) prefix = constants.prefixes[ns] rv.append("|%s<%s %s>" % (' ' * indent, prefix, infosetFilter.fromXmlName(tag))) else: rv.append("|%s<%s>" % (' ' * indent, infosetFilter.fromXmlName(element.tag))) if hasattr(element, "attrib"): attributes = [] for name, value in element.attrib.items(): nsmatch = tag_regexp.match(name) if nsmatch is not None: ns, name = nsmatch.groups() name = infosetFilter.fromXmlName(name) prefix = constants.prefixes[ns] attr_string = "%s %s" % (prefix, name) else: attr_string = infosetFilter.fromXmlName(name) attributes.append((attr_string, value)) for name, value in sorted(attributes): rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) if element.text: rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) indent += 2 for child in element: serializeElement(child, indent) if hasattr(element, "tail") and element.tail: rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail)) serializeElement(element, 0) return "\n".join(rv) def tostring(element): """Serialize an element and its child nodes to a string""" rv = [] def serializeElement(element): if not hasattr(element, "tag"): if element.docinfo.internalDTD: if element.docinfo.doctype: dtd_str = element.docinfo.doctype else: dtd_str = "" % element.docinfo.root_name rv.append(dtd_str) serializeElement(element.getroot()) elif element.tag == comment_type: rv.append("" % (element.text,)) else: # This is assumed to be an ordinary element if not element.attrib: rv.append("<%s>" % (element.tag,)) else: attr = " ".join(["%s=\"%s\"" % (name, value) for name, value in element.attrib.items()]) rv.append("<%s %s>" % (element.tag, attr)) if element.text: rv.append(element.text) for child in element: serializeElement(child) rv.append("" % (element.tag,)) if hasattr(element, "tail") and element.tail: rv.append(element.tail) serializeElement(element) return "".join(rv) class TreeBuilder(base.TreeBuilder): documentClass = Document doctypeClass = DocumentType elementClass = None commentClass = None fragmentClass = Document implementation = etree def __init__(self, namespaceHTMLElements, fullTree=False): builder = etree_builders.getETreeModule(etree, fullTree=fullTree) infosetFilter = self.infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True) self.namespaceHTMLElements = namespaceHTMLElements class Attributes(MutableMapping): def __init__(self, element): self._element = element def _coerceKey(self, key): if isinstance(key, tuple): name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1])) else: name = infosetFilter.coerceAttribute(key) return name def __getitem__(self, key): value = self._element._element.attrib[self._coerceKey(key)] if not PY3 and isinstance(value, binary_type): value = value.decode("ascii") return value def __setitem__(self, key, value): self._element._element.attrib[self._coerceKey(key)] = value def __delitem__(self, key): del self._element._element.attrib[self._coerceKey(key)] def __iter__(self): return iter(self._element._element.attrib) def __len__(self): return len(self._element._element.attrib) def clear(self): return self._element._element.attrib.clear() class Element(builder.Element): def __init__(self, name, namespace): name = infosetFilter.coerceElement(name) builder.Element.__init__(self, name, namespace=namespace) self._attributes = Attributes(self) def _setName(self, name): self._name = infosetFilter.coerceElement(name) self._element.tag = self._getETreeTag( self._name, self._namespace) def _getName(self): return infosetFilter.fromXmlName(self._name) name = property(_getName, _setName) def _getAttributes(self): return self._attributes def _setAttributes(self, value): attributes = self.attributes attributes.clear() attributes.update(value) attributes = property(_getAttributes, _setAttributes) def insertText(self, data, insertBefore=None): data = infosetFilter.coerceCharacters(data) builder.Element.insertText(self, data, insertBefore) def cloneNode(self): element = type(self)(self.name, self.namespace) if self._element.attrib: element._element.attrib.update(self._element.attrib) return element class Comment(builder.Comment): def __init__(self, data): data = infosetFilter.coerceComment(data) builder.Comment.__init__(self, data) def _setData(self, data): data = infosetFilter.coerceComment(data) self._element.text = data def _getData(self): return self._element.text data = property(_getData, _setData) self.elementClass = Element self.commentClass = Comment # self.fragmentClass = builder.DocumentFragment base.TreeBuilder.__init__(self, namespaceHTMLElements) def reset(self): base.TreeBuilder.reset(self) self.insertComment = self.insertCommentInitial self.initial_comments = [] self.doctype = None def testSerializer(self, element): return testSerializer(element) def getDocument(self): if fullTree: return self.document._elementTree else: return self.document._elementTree.getroot() def getFragment(self): fragment = [] element = self.openElements[0]._element if element.text: fragment.append(element.text) fragment.extend(list(element)) if element.tail: fragment.append(element.tail) return fragment def insertDoctype(self, token): name = token["name"] publicId = token["publicId"] systemId = token["systemId"] if not name: warnings.warn("lxml cannot represent empty doctype", DataLossWarning) self.doctype = None else: coercedName = self.infosetFilter.coerceElement(name) if coercedName != name: warnings.warn("lxml cannot represent non-xml doctype", DataLossWarning) doctype = self.doctypeClass(coercedName, publicId, systemId) self.doctype = doctype def insertCommentInitial(self, data, parent=None): assert parent is None or parent is self.document assert self.document._elementTree is None self.initial_comments.append(data) def insertCommentMain(self, data, parent=None): if (parent == self.document and self.document._elementTree.getroot()[-1].tag == comment_type): warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning) super(TreeBuilder, self).insertComment(data, parent) def insertRoot(self, token): # Because of the way libxml2 works, it doesn't seem to be possible to # alter information like the doctype after the tree has been parsed. # Therefore we need to use the built-in parser to create our initial # tree, after which we can add elements like normal docStr = "" if self.doctype: assert self.doctype.name docStr += "= 0 and sysid.find('"') >= 0: warnings.warn("DOCTYPE system cannot contain single and double quotes", DataLossWarning) sysid = sysid.replace("'", 'U00027') if sysid.find("'") >= 0: docStr += '"%s"' % sysid else: docStr += "'%s'" % sysid else: docStr += "''" docStr += ">" if self.doctype.name != token["name"]: warnings.warn("lxml cannot represent doctype with a different name to the root element", DataLossWarning) docStr += "" root = etree.fromstring(docStr) # Append the initial comments: for comment_token in self.initial_comments: comment = self.commentClass(comment_token["data"]) root.addprevious(comment._element) # Create the root document and add the ElementTree to it self.document = self.documentClass() self.document._elementTree = root.getroottree() # Give the root element the right name name = token["name"] namespace = token.get("namespace", self.defaultNamespace) if namespace is None: etree_tag = name else: etree_tag = "{%s}%s" % (namespace, name) root.tag = etree_tag # Add the root element to the internal child/open data structures root_element = self.elementClass(name, namespace) root_element._element = root self.document._childNodes.append(root_element) self.openElements.append(root_element) # Reset to the default insert comment function self.insertComment = self.insertCommentMain PK!ekLtreebuilders/__init__.pynu["""A collection of modules for building different kinds of trees from HTML documents. To create a treebuilder for a new type of tree, you need to do implement several things: 1. A set of classes for various types of elements: Document, Doctype, Comment, Element. These must implement the interface of ``base.treebuilders.Node`` (although comment nodes have a different signature for their constructor, see ``treebuilders.etree.Comment``) Textual content may also be implemented as another node type, or not, as your tree implementation requires. 2. A treebuilder object (called ``TreeBuilder`` by convention) that inherits from ``treebuilders.base.TreeBuilder``. This has 4 required attributes: * ``documentClass`` - the class to use for the bottommost node of a document * ``elementClass`` - the class to use for HTML Elements * ``commentClass`` - the class to use for comments * ``doctypeClass`` - the class to use for doctypes It also has one required method: * ``getDocument`` - Returns the root node of the complete document tree 3. If you wish to run the unit tests, you must also create a ``testSerializer`` method on your treebuilder which accepts a node and returns a string containing Node and its children serialized according to the format used in the unittests """ from __future__ import absolute_import, division, unicode_literals from .._utils import default_etree treeBuilderCache = {} def getTreeBuilder(treeType, implementation=None, **kwargs): """Get a TreeBuilder class for various types of trees with built-in support :arg treeType: the name of the tree type required (case-insensitive). Supported values are: * "dom" - A generic builder for DOM implementations, defaulting to a xml.dom.minidom based implementation. * "etree" - A generic builder for tree implementations exposing an ElementTree-like interface, defaulting to xml.etree.cElementTree if available and xml.etree.ElementTree if not. * "lxml" - A etree-based builder for lxml.etree, handling limitations of lxml's implementation. :arg implementation: (Currently applies to the "etree" and "dom" tree types). A module implementing the tree type e.g. xml.etree.ElementTree or xml.etree.cElementTree. :arg kwargs: Any additional options to pass to the TreeBuilder when creating it. Example: >>> from html5lib.treebuilders import getTreeBuilder >>> builder = getTreeBuilder('etree') """ treeType = treeType.lower() if treeType not in treeBuilderCache: if treeType == "dom": from . import dom # Come up with a sane default (pref. from the stdlib) if implementation is None: from xml.dom import minidom implementation = minidom # NEVER cache here, caching is done in the dom submodule return dom.getDomModule(implementation, **kwargs).TreeBuilder elif treeType == "lxml": from . import etree_lxml treeBuilderCache[treeType] = etree_lxml.TreeBuilder elif treeType == "etree": from . import etree if implementation is None: implementation = default_etree # NEVER cache here, caching is done in the etree submodule return etree.getETreeModule(implementation, **kwargs).TreeBuilder else: raise ValueError("""Unrecognised treebuilder "%s" """ % treeType) return treeBuilderCache.get(treeType) PK!MЇ$2$2treebuilders/etree.pynu[from __future__ import absolute_import, division, unicode_literals # pylint:disable=protected-access from pip._vendor.six import text_type import re from copy import copy from . import base from .. import _ihatexml from .. import constants from ..constants import namespaces from .._utils import moduleFactoryFactory tag_regexp = re.compile("{([^}]*)}(.*)") def getETreeBuilder(ElementTreeImplementation, fullTree=False): ElementTree = ElementTreeImplementation ElementTreeCommentType = ElementTree.Comment("asd").tag class Element(base.Node): def __init__(self, name, namespace=None): self._name = name self._namespace = namespace self._element = ElementTree.Element(self._getETreeTag(name, namespace)) if namespace is None: self.nameTuple = namespaces["html"], self._name else: self.nameTuple = self._namespace, self._name self.parent = None self._childNodes = [] self._flags = [] def _getETreeTag(self, name, namespace): if namespace is None: etree_tag = name else: etree_tag = "{%s}%s" % (namespace, name) return etree_tag def _setName(self, name): self._name = name self._element.tag = self._getETreeTag(self._name, self._namespace) def _getName(self): return self._name name = property(_getName, _setName) def _setNamespace(self, namespace): self._namespace = namespace self._element.tag = self._getETreeTag(self._name, self._namespace) def _getNamespace(self): return self._namespace namespace = property(_getNamespace, _setNamespace) def _getAttributes(self): return self._element.attrib def _setAttributes(self, attributes): el_attrib = self._element.attrib el_attrib.clear() if attributes: # calling .items _always_ allocates, and the above truthy check is cheaper than the # allocation on average for key, value in attributes.items(): if isinstance(key, tuple): name = "{%s}%s" % (key[2], key[1]) else: name = key el_attrib[name] = value attributes = property(_getAttributes, _setAttributes) def _getChildNodes(self): return self._childNodes def _setChildNodes(self, value): del self._element[:] self._childNodes = [] for element in value: self.insertChild(element) childNodes = property(_getChildNodes, _setChildNodes) def hasContent(self): """Return true if the node has children or text""" return bool(self._element.text or len(self._element)) def appendChild(self, node): self._childNodes.append(node) self._element.append(node._element) node.parent = self def insertBefore(self, node, refNode): index = list(self._element).index(refNode._element) self._element.insert(index, node._element) node.parent = self def removeChild(self, node): self._childNodes.remove(node) self._element.remove(node._element) node.parent = None def insertText(self, data, insertBefore=None): if not(len(self._element)): if not self._element.text: self._element.text = "" self._element.text += data elif insertBefore is None: # Insert the text as the tail of the last child element if not self._element[-1].tail: self._element[-1].tail = "" self._element[-1].tail += data else: # Insert the text before the specified node children = list(self._element) index = children.index(insertBefore._element) if index > 0: if not self._element[index - 1].tail: self._element[index - 1].tail = "" self._element[index - 1].tail += data else: if not self._element.text: self._element.text = "" self._element.text += data def cloneNode(self): element = type(self)(self.name, self.namespace) if self._element.attrib: element._element.attrib = copy(self._element.attrib) return element def reparentChildren(self, newParent): if newParent.childNodes: newParent.childNodes[-1]._element.tail += self._element.text else: if not newParent._element.text: newParent._element.text = "" if self._element.text is not None: newParent._element.text += self._element.text self._element.text = "" base.Node.reparentChildren(self, newParent) class Comment(Element): def __init__(self, data): # Use the superclass constructor to set all properties on the # wrapper element self._element = ElementTree.Comment(data) self.parent = None self._childNodes = [] self._flags = [] def _getData(self): return self._element.text def _setData(self, value): self._element.text = value data = property(_getData, _setData) class DocumentType(Element): def __init__(self, name, publicId, systemId): Element.__init__(self, "") self._element.text = name self.publicId = publicId self.systemId = systemId def _getPublicId(self): return self._element.get("publicId", "") def _setPublicId(self, value): if value is not None: self._element.set("publicId", value) publicId = property(_getPublicId, _setPublicId) def _getSystemId(self): return self._element.get("systemId", "") def _setSystemId(self, value): if value is not None: self._element.set("systemId", value) systemId = property(_getSystemId, _setSystemId) class Document(Element): def __init__(self): Element.__init__(self, "DOCUMENT_ROOT") class DocumentFragment(Element): def __init__(self): Element.__init__(self, "DOCUMENT_FRAGMENT") def testSerializer(element): rv = [] def serializeElement(element, indent=0): if not(hasattr(element, "tag")): element = element.getroot() if element.tag == "": if element.get("publicId") or element.get("systemId"): publicId = element.get("publicId") or "" systemId = element.get("systemId") or "" rv.append("""""" % (element.text, publicId, systemId)) else: rv.append("" % (element.text,)) elif element.tag == "DOCUMENT_ROOT": rv.append("#document") if element.text is not None: rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) if element.tail is not None: raise TypeError("Document node cannot have tail") if hasattr(element, "attrib") and len(element.attrib): raise TypeError("Document node cannot have attributes") elif element.tag == ElementTreeCommentType: rv.append("|%s" % (' ' * indent, element.text)) else: assert isinstance(element.tag, text_type), \ "Expected unicode, got %s, %s" % (type(element.tag), element.tag) nsmatch = tag_regexp.match(element.tag) if nsmatch is None: name = element.tag else: ns, name = nsmatch.groups() prefix = constants.prefixes[ns] name = "%s %s" % (prefix, name) rv.append("|%s<%s>" % (' ' * indent, name)) if hasattr(element, "attrib"): attributes = [] for name, value in element.attrib.items(): nsmatch = tag_regexp.match(name) if nsmatch is not None: ns, name = nsmatch.groups() prefix = constants.prefixes[ns] attr_string = "%s %s" % (prefix, name) else: attr_string = name attributes.append((attr_string, value)) for name, value in sorted(attributes): rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) if element.text: rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) indent += 2 for child in element: serializeElement(child, indent) if element.tail: rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail)) serializeElement(element, 0) return "\n".join(rv) def tostring(element): # pylint:disable=unused-variable """Serialize an element and its child nodes to a string""" rv = [] filter = _ihatexml.InfosetFilter() def serializeElement(element): if isinstance(element, ElementTree.ElementTree): element = element.getroot() if element.tag == "": if element.get("publicId") or element.get("systemId"): publicId = element.get("publicId") or "" systemId = element.get("systemId") or "" rv.append("""""" % (element.text, publicId, systemId)) else: rv.append("" % (element.text,)) elif element.tag == "DOCUMENT_ROOT": if element.text is not None: rv.append(element.text) if element.tail is not None: raise TypeError("Document node cannot have tail") if hasattr(element, "attrib") and len(element.attrib): raise TypeError("Document node cannot have attributes") for child in element: serializeElement(child) elif element.tag == ElementTreeCommentType: rv.append("" % (element.text,)) else: # This is assumed to be an ordinary element if not element.attrib: rv.append("<%s>" % (filter.fromXmlName(element.tag),)) else: attr = " ".join(["%s=\"%s\"" % ( filter.fromXmlName(name), value) for name, value in element.attrib.items()]) rv.append("<%s %s>" % (element.tag, attr)) if element.text: rv.append(element.text) for child in element: serializeElement(child) rv.append("" % (element.tag,)) if element.tail: rv.append(element.tail) serializeElement(element) return "".join(rv) class TreeBuilder(base.TreeBuilder): # pylint:disable=unused-variable documentClass = Document doctypeClass = DocumentType elementClass = Element commentClass = Comment fragmentClass = DocumentFragment implementation = ElementTreeImplementation def testSerializer(self, element): return testSerializer(element) def getDocument(self): if fullTree: return self.document._element else: if self.defaultNamespace is not None: return self.document._element.find( "{%s}html" % self.defaultNamespace) else: return self.document._element.find("html") def getFragment(self): return base.TreeBuilder.getFragment(self)._element return locals() getETreeModule = moduleFactoryFactory(getETreeBuilder) PK!eotreewalkers/dom.pynu[from __future__ import absolute_import, division, unicode_literals from xml.dom import Node from . import base class TreeWalker(base.NonRecursiveTreeWalker): def getNodeDetails(self, node): if node.nodeType == Node.DOCUMENT_TYPE_NODE: return base.DOCTYPE, node.name, node.publicId, node.systemId elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): return base.TEXT, node.nodeValue elif node.nodeType == Node.ELEMENT_NODE: attrs = {} for attr in list(node.attributes.keys()): attr = node.getAttributeNode(attr) if attr.namespaceURI: attrs[(attr.namespaceURI, attr.localName)] = attr.value else: attrs[(None, attr.name)] = attr.value return (base.ELEMENT, node.namespaceURI, node.nodeName, attrs, node.hasChildNodes()) elif node.nodeType == Node.COMMENT_NODE: return base.COMMENT, node.nodeValue elif node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE): return (base.DOCUMENT,) else: return base.UNKNOWN, node.nodeType def getFirstChild(self, node): return node.firstChild def getNextSibling(self, node): return node.nextSibling def getParentNode(self, node): return node.parentNode PK!|}-treewalkers/__pycache__/genshi.cpython-39.pycnu[a Re @sddlmZmZmZddlmZddlmZmZmZm Z m Z ddlm Z m Z m Z mZmZmZddlmZddlmZmZGd d d ejZd S) )absolute_importdivisionunicode_literals)QName)STARTEND XML_NAMESPACEDOCTYPETEXT)START_NSEND_NS START_CDATA END_CDATAPICOMMENT)base) voidElements namespacesc@seZdZddZddZdS) TreeWalkerccsXd}|jD](}|dur.|||D] }|Vq"|}q |durT||dD] }|VqHdS)N)treetokens)selfpreviouseventtokenr/builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/treewalkers/genshi.py__iter__ s zTreeWalker.__iter__ccst|\}}}|tkr|\}}|j}|j} i} |D]0\} } t| trR| | | j| jf<q.| | d| f<q.| tdkr|tvr|| || | p|dtkp|d|kD] } | Vqn| | || Vn|tkr|j}|j} | tdks|tvr| | |Vnz|t kr| |Vnb|t kr2||D] } | Vq"n>|tkrJ|j|Vn&|tttttttfvrdn ||VdS)Nhtmlrr)r localname namespace isinstancerrrZemptyTagrZstartTagendTagrcommentr textr doctyperr r r rrunknown)rrnextkinddata_tagZattribsnamer"Zconverted_attribskvrrrrrsD           zTreeWalker.tokensN)__name__ __module__ __qualname__rrrrrrr srN) __future__rrrZ genshi.corerrrrr r r r r rrrr constantsrrrrrrrs    PK!L1treewalkers/__pycache__/etree_lxml.cpython-39.pycnu[a Re@sddlmZmZmZddlmZddlmZddlm Z ddl m Z ddl m Z dd l mZd d ZGd d d eZGdddeZGdddeZGdddeZGddde jZdS))absolute_importdivisionunicode_literals) text_type) OrderedDict)etree) tag_regexp)base) _ihatexmlcCs*|dur dSt|tr|S|ddSdS)Nasciistrict) isinstancerdecode)sr/builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/treewalkers/etree_lxml.py ensure_strs  rc@s,eZdZddZddZddZddZd S) RootcCs||_g|_z:|jjrD|jt|t|jjt|jjt|jj Wnt yXYn0z | }Wnt y||}Yn0| dur| }q~|dur|j|| }qd|_d|_dSN)Z elementtreechildrenZdocinfoZ internalDTDappendDoctyperZ root_name public_idZ system_urlAttributeErrorgetrootZ getpreviousgetnexttexttail)selfetnoderrr__init__s,            z Root.__init__cCs |j|Sr)rr keyrrr __getitem__3szRoot.__getitem__cCsdSrrr rrrr6sz Root.getnextcCsdSNr rr'rrr__len__9sz Root.__len__N)__name__ __module__ __qualname__r#r&rr)rrrrrsrc@seZdZddZddZdS)rcCs(||_||_||_||_d|_d|_dSr) root_nodenamer system_idrr)r r-r.rr/rrrr#>s zDoctype.__init__cCs |jjdSr()r-rr'rrrrGszDoctype.getnextNr*r+r,r#rrrrrr=s rc@seZdZddZddZdS) FragmentRootcs$fdd|D_d__dS)Ncsg|]}t|qSr)FragmentWrapper).0childr'rr Mz)FragmentRoot.__init__..)rrr)r rrr'rr#LszFragmentRoot.__init__cCsdSrrr'rrrrPszFragmentRoot.getnextNr0rrrrr1Ksr1c@sTeZdZddZddZddZddZd d Zd d Zd dZ ddZ ddZ dS)r2cCsT||_||_t|jdr(t|jj|_nd|_t|jdrJt|jj|_nd|_dS)Nrr)r-objhasattrrrr)r Z fragment_rootr7rrrr#Us  zFragmentWrapper.__init__cCs t|j|Sr)getattrr7)r r.rrr __getattr__aszFragmentWrapper.__getattr__cCs6|jj}||}|t|dkr.||dSdSdSr()r-rindexlen)r Zsiblingsidxrrrrds   zFragmentWrapper.getnextcCs |j|Sr)r7r$rrrr&lszFragmentWrapper.__getitem__cCs t|jSr)boolr7r'rrr__bool__oszFragmentWrapper.__bool__cCsdSrrr'rrr getparentrszFragmentWrapper.getparentcCs t|jSrstrr7r'rrr__str__uszFragmentWrapper.__str__cCs t|jSrrAr'rrr __unicode__xszFragmentWrapper.__unicode__cCs t|jSr)r<r7r'rrrr){szFragmentWrapper.__len__N) r*r+r,r#r:rr&r?r@rCrDr)rrrrr2Ts r2c@s4eZdZddZddZddZddZd d Zd S) TreeWalkercCsJt|trt||_t|}nt|_t|}tj||t |_ dSr) rlistsetfragmentChildrenr1rr NonRecursiveTreeWalkerr#r Z InfosetFilterfilter)r treerrrr#s   zTreeWalker.__init__c Cst|tr:|\}}|dvs&Jd|tjtt||fSt|trLtjfSt|trjtj |j |j |j fSt|t rt|dstjt|jfS|jtjkrtjt|jfS|jtjkrtjt|jddfStt|j}|r|\}}nd}t|j}t}t|jD]P\}}t|}t|}t|}|rV|||d|df<n ||d|f<qtj||j !||t"|dkp|jfSdS)Nrr%Text nodes are text or tail, found %stagr rr)#rtupler ZTEXTrr9rZDOCUMENTrZDOCTYPEr.rr/r2r8r7rNrCommentCOMMENTrEntityZENTITYr matchgroupsrrFattribitemsgroupZELEMENTrJZ fromXmlNamer<) r r"r%rT namespacerNattrsr.valuerrrgetNodeDetailss<       zTreeWalker.getNodeDetailscCsBt|trJdt|s(|js(Jd|jr6|dfS|dSdS)NzText nodes have no childrenzNode has no childrenrr)rrPr<r)r r"rrr getFirstChilds zTreeWalker.getFirstChildcCsbt|trL|\}}|dvs&Jd||dkrDt|r>|dSdSn|S|jrZ|dfS|S)NrLrMrrr)rrPr<rrr r"r%rrrgetNextSiblings zTreeWalker.getNextSiblingcCsJt|tr4|\}}|dvs&Jd||dkrB|Sn||jvrBdS|S)NrLrMr)rrPrHr@r^rrr getParentNodes  zTreeWalker.getParentNodeN)r*r+r,r#r\r]r_r`rrrrrEs  ) rEN) __future__rrrZpip._vendor.sixr collectionsrlxmlrZtreebuilders.etreer r r robjectrrr1r2rIrErrrrs       & +PK!(t* ,treewalkers/__pycache__/etree.cpython-39.pycnu[a Re@sjddlmZmZmZddlmZddlZddlmZddl m Z ddl m Z e d Zd d Ze eZdS) )absolute_importdivisionunicode_literals) OrderedDictN) string_types)base)moduleFactoryFactoryz {([^}]*)}(.*)cs,|}|djGfdddtj}tS)NZasdcs4eZdZdZfddZddZddZdd Zd S) z#getETreeBuilder..TreeWalkeraGiven the particular ElementTree representation, this implementation, to avoid using recursion, returns "nodes" as tuples with the following content: 1. The current element 2. The index of the element relative to its parent 3. A stack of ancestor elements 4. A flag "text", "tail" or None to indicate if the current node is a text node; either the text or tail of the current element (1) c sHt|tr2|\}}}}|dvr.tjt||fS|}t|dsD|}|jdvrVtjfS|jdkr|tj |j | d| dfS|jkrtj |j fSt|jt sJt|jt|j}|r|\}}n d}|j}t}t|jD]>\} } t| }|r| ||d|df<q| |d| f<qtj|||t|p@|j fSdS) Ntexttailtag)Z DOCUMENT_ROOTZDOCUMENT_FRAGMENTz publicIdsystemIdrr ) isinstancetuplerZTEXTgetattrhasattrgetrootrZDOCUMENTZDOCTYPEr getCOMMENTrtype tag_regexpmatchgroupsrlistattribitemsgroupZELEMENTlen) selfnodeelt_flagr namespacerattrsnamevalueZElementTreeCommentType/builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/treewalkers/etree.pygetNodeDetails s<         z2getETreeBuilder..TreeWalker.getNodeDetailscSstt|tr|\}}}}n|dgdf\}}}}|dvr8dS|jrJ|||dfSt|rl|||dd|dfSdSdS)Nr r r)rrr r appendr!r"elementkeyparentsr%r+r+r, getFirstChildHs   z1getETreeBuilder..TreeWalker.getFirstChildcSst|tr|\}}}}ndS|dkrLt|rF|||dd|dfSdSnN|jrf|dkrf|||dfS|t|ddkr|d|d|d|dfSdSdS)Nr rr r)rrr r.r r/r+r+r,getNextSiblingYs   z2getETreeBuilder..TreeWalker.getNextSiblingcSst|tr|\}}}}ndS|dkr:|s,|S|||dfSnD|}|sJ|St|d|dksdJ|t|d||dfSdS)Nr r4r)rrpoprcountindex)r!r"r0r1r2r%parentr+r+r, getParentNodems z1getETreeBuilder..TreeWalker.getParentNodeN)__name__ __module__ __qualname____doc__r-r3r5r:r+r*r+r, TreeWalkers  (r?)CommentrrZNonRecursiveTreeWalkerlocals)ZElementTreeImplementation ElementTreer?r+r*r,getETreeBuilders nrC) __future__rrr collectionsrreZpip._vendor.sixrr_utilsr compilerrCgetETreeModuler+r+r+r,s     uPK!蕹k*treewalkers/__pycache__/dom.cpython-39.pycnu[a Re@sBddlmZmZmZddlmZddlmZGdddejZ dS))absolute_importdivisionunicode_literals)Node)basec@s,eZdZddZddZddZddZd S) TreeWalkercCs|jtjkr tj|j|j|jfS|jtjtj fvr>tj |j fS|jtj kri}t |jD]8}||}|jr|j||j|jf<q\|j|d|jf<q\tj|j|j||fS|jtjkrtj|j fS|jtjtjfvrtjfStj|jfSdSN)ZnodeTyperZDOCUMENT_TYPE_NODErZDOCTYPEnamepublicIdsystemIdZ TEXT_NODEZCDATA_SECTION_NODEZTEXTZ nodeValueZ ELEMENT_NODElist attributeskeysZgetAttributeNodeZ namespaceURIvalueZ localNameZELEMENTnodeNameZ hasChildNodesZ COMMENT_NODECOMMENTZ DOCUMENT_NODEZDOCUMENT_FRAGMENT_NODEZDOCUMENTUNKNOWN)selfnodeattrsattrr/builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/treewalkers/dom.pygetNodeDetails s&       zTreeWalker.getNodeDetailscCs|jSr )Z firstChildrrrrr getFirstChild$szTreeWalker.getFirstChildcCs|jSr )Z nextSiblingrrrrgetNextSibling'szTreeWalker.getNextSiblingcCs|jSr )Z parentNoderrrr getParentNode*szTreeWalker.getParentNodeN)__name__ __module__ __qualname__rrrrrrrrrsrN) __future__rrrxml.domrrZNonRecursiveTreeWalkerrrrrrs  PK!-+Є||+treewalkers/__pycache__/base.cpython-39.pycnu[a Re4@sddlmZmZmZddlmZddlmZmZm Z gdZ ej Z ej ZejZejZejZejZdZde Z Gdd d eZGd d d eZd S) )absolute_importdivisionunicode_literals)Node) namespaces voidElementsspaceCharacters) DOCUMENTDOCTYPETEXTELEMENTCOMMENTENTITYUNKNOWN TreeWalkerNonRecursiveTreeWalkerz <#UNKNOWN#>c@sleZdZdZddZddZddZdd d Zd d Zd dZ ddZ ddZ dddZ ddZ ddZdS)rz}Walks a tree yielding tokens Tokens are dicts that all have a ``type`` field specifying the type of the token. cCs ||_dS)zCCreates a TreeWalker :arg tree: the tree to walk N)tree)selfrr/builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/treewalkers/base.py__init__szTreeWalker.__init__cCstdSNNotImplementedError)rrrr__iter__#szTreeWalker.__iter__cCs d|dS)zGenerates an error token with the given message :arg msg: the error message :returns: SerializeError token SerializeErrortypedatar)rmsgrrrerror&szTreeWalker.errorFccs$d|||dV|r |dVdS)arGenerates an EmptyTag token :arg namespace: the namespace of the token--can be ``None`` :arg name: the name of the element :arg attrs: the attributes of the element as a dict :arg hasChildren: whether or not to yield a SerializationError because this tag shouldn't have children :returns: EmptyTag token EmptyTagrname namespacer zVoid element has childrenNr")rr&r%attrs hasChildrenrrremptyTag0s zTreeWalker.emptyTagcCsd|||dS)zGenerates a StartTag token :arg namespace: the namespace of the token--can be ``None`` :arg name: the name of the element :arg attrs: the attributes of the element as a dict :returns: StartTag token StartTagr$r)rr&r%r(rrrstartTagEs zTreeWalker.startTagcCs d||dS)zGenerates an EndTag token :arg namespace: the namespace of the token--can be ``None`` :arg name: the name of the element :returns: EndTag token EndTag)rr%r&r)rr&r%rrrendTagVs zTreeWalker.endTagccsx|}|t}|dt|t|}|r6d|dV|}|t}|t|d}|rdd|dV|rtd|dVdS)atGenerates SpaceCharacters and Characters tokens Depending on what's in the data, this generates one or more ``SpaceCharacters`` and ``Characters`` tokens. For example: >>> from html5lib.treewalkers.base import TreeWalker >>> # Give it an empty tree just so it instantiates >>> walker = TreeWalker([]) >>> list(walker.text('')) [] >>> list(walker.text(' ')) [{u'data': ' ', u'type': u'SpaceCharacters'}] >>> list(walker.text(' abc ')) # doctest: +NORMALIZE_WHITESPACE [{u'data': ' ', u'type': u'SpaceCharacters'}, {u'data': u'abc', u'type': u'Characters'}, {u'data': u' ', u'type': u'SpaceCharacters'}] :arg data: the text data :returns: one or more ``SpaceCharacters`` and ``Characters`` tokens NSpaceCharactersr Characters)lstripr lenrstrip)rr Zmiddleleftrightrrrtextds    zTreeWalker.textcCs d|dS)zdGenerates a Comment token :arg data: the comment :returns: Comment token Commentrr)rr rrrcommentszTreeWalker.commentNcCsd|||dS)zGenerates a Doctype token :arg name: :arg publicId: :arg systemId: :returns: the Doctype token Doctype)rr%publicIdsystemIdr)rr%r:r;rrrdoctypes zTreeWalker.doctypecCs d|dS)zjGenerates an Entity token :arg name: the entity name :returns: an Entity token Entity)rr%r)rr%rrrentityszTreeWalker.entitycCs|d|S)zHandles unknown node typeszUnknown node type: r')rZnodeTyperrrunknownszTreeWalker.unknown)F)NN)__name__ __module__ __qualname____doc__rrr"r*r,r.r6r8r<r>r?rrrrrs &  rc@s4eZdZddZddZddZddZd d Zd S) rcCstdSrrrnoderrrgetNodeDetailssz%NonRecursiveTreeWalker.getNodeDetailscCstdSrrrDrrr getFirstChildsz$NonRecursiveTreeWalker.getFirstChildcCstdSrrrDrrrgetNextSiblingsz%NonRecursiveTreeWalker.getNextSiblingcCstdSrrrDrrr getParentNodesz$NonRecursiveTreeWalker.getParentNodec cs|j}|dur||}|d|dd}}d}|tkrJ|j|Vn|tkrj|j|D] }|Vq\n|tkr|\}}}}|r|tdkr|tvr| ||||D] }|Vqd}n| |||VnV|t kr| |dVn<|t kr||dVn |tkrd}n||dV|r2||} nd} | durF| }q|dur||}|d|dd}}|tkr|\}}}}|r|tdks|tvr|||V|j|urd}q||} | dur| }qn ||}qFqdS)NrFhtmlT)rrFr r<r r6r rrr*r,rr8rr>r r?rGr.rHrI) r currentNodedetailsrr)tokenr&r% attributesZ firstChildZ nextSiblingrrrrs\               zNonRecursiveTreeWalker.__iter__N)r@rArBrFrGrHrIrrrrrrs rN) __future__rrrxml.domr constantsrrr __all__Z DOCUMENT_NODEr ZDOCUMENT_TYPE_NODEr Z TEXT_NODEr Z ELEMENT_NODEr Z COMMENT_NODErZ ENTITY_NODErrjoinobjectrrrrrrs  !PK!s!/treewalkers/__pycache__/__init__.cpython-39.pycnu[a ReW@sZdZddlmZmZmZddlmZddlmZddgZ iZ d d dZ d d Z d dZ dS)aA collection of modules for iterating through different kinds of tree, generating tokens identical to those produced by the tokenizer module. To create a tree walker for a new type of tree, you need to implement a tree walker object (called TreeWalker by convention) that implements a 'serialize' method which takes a tree as sole argument and returns an iterator which generates tokens. )absolute_importdivisionunicode_literals) constants) default_etree getTreeWalkerpprintNcKs|}|tvr|dkr0ddlm}|jt|<nt|dkrPddlm}|jt|<nT|dkrpddlm}|jt|<n4|dkrdd lm}|d urt}|j |fi|jSt |S) a;Get a TreeWalker class for various types of tree with built-in support :arg str treeType: the name of the tree type required (case-insensitive). Supported values are: * "dom": The xml.dom.minidom DOM implementation * "etree": A generic walker for tree implementations exposing an elementtree-like interface (known to work with ElementTree, cElementTree and lxml.etree). * "lxml": Optimized walker for lxml.etree * "genshi": a Genshi stream :arg implementation: A module implementing the tree type e.g. xml.etree.ElementTree or cElementTree (Currently applies to the "etree" tree type only). :arg kwargs: keyword arguments passed to the etree walker--for other walkers, this has no effect :returns: a TreeWalker class dom)r genshi)r lxml) etree_lxmletree)rN) lowertreeWalkerCacher TreeWalkerr rrrgetETreeModuleget)treeTypeimplementationkwargsr r rrr/builddir/build/BUILDROOT/alt-python39-pip-21.3.1-2.el8.x86_64/opt/alt/python39/lib/python3.9/site-packages/pip/_vendor/html5lib/treewalkers/__init__.pyrs"       ccshg}|D]D}|d}|dvr,||dq|rFdd|dVg}|Vq|rddd|dVdS)Ntype) CharactersSpaceCharactersdatarr)rr)appendjoin)tokenspendingCharacterstokenrrrrconcatenateCharacterTokensAsr$c Csbg}d}t|D]D}|d}|dvr|drz|dtjdkrz|dtjvr`tj|d}n|d}d||df}n|d}|dd ||f|d 7}|d }t|D]T\\}} } |r|tjvrtj|}n|}d|| f}n| }|d d ||| fq|d krV|d 8}q|dkr2|d 8}q|dkrX|dd ||d fq|dkr |dr|dr|dd ||d|d|dr|dndfnF|dr|dd ||d|dfn|dd ||dfn|dd |fq|dkr0|dd ||d fq|dkrJdsVJdqtd|qd|S) zPretty printer for tree walkers Takes a TreeWalker instance and pretty prints the output of walking the tree. :arg walker: a TreeWalker instance rr)StartTagEmptyTag namespacehtmlz%s %snamez%s<%s> rrz %s%s="%s"r&EndTagCommentz %sDoctypepublicIdz%ssystemIdrz%sz%sz %srz%s"%s"rFzBconcatenateCharacterTokens should have got rid of all Space tokenszUnknown token type, %s ) r$r namespacesprefixesrsorteditems ValueErrorr ) walkeroutputindentr#rnsr)attrsr' localnamevaluerrrr Psn                )N)__doc__ __future__rrrrr_utilsr__all__rrr$r rrrrs    ,PK!!-B44treewalkers/base.pynu[from __future__ import absolute_import, division, unicode_literals from xml.dom import Node from ..constants import namespaces, voidElements, spaceCharacters __all__ = ["DOCUMENT", "DOCTYPE", "TEXT", "ELEMENT", "COMMENT", "ENTITY", "UNKNOWN", "TreeWalker", "NonRecursiveTreeWalker"] DOCUMENT = Node.DOCUMENT_NODE DOCTYPE = Node.DOCUMENT_TYPE_NODE TEXT = Node.TEXT_NODE ELEMENT = Node.ELEMENT_NODE COMMENT = Node.COMMENT_NODE ENTITY = Node.ENTITY_NODE UNKNOWN = "<#UNKNOWN#>" spaceCharacters = "".join(spaceCharacters) class TreeWalker(object): """Walks a tree yielding tokens Tokens are dicts that all have a ``type`` field specifying the type of the token. """ def __init__(self, tree): """Creates a TreeWalker :arg tree: the tree to walk """ self.tree = tree def __iter__(self): raise NotImplementedError def error(self, msg): """Generates an error token with the given message :arg msg: the error message :returns: SerializeError token """ return {"type": "SerializeError", "data": msg} def emptyTag(self, namespace, name, attrs, hasChildren=False): """Generates an EmptyTag token :arg namespace: the namespace of the token--can be ``None`` :arg name: the name of the element :arg attrs: the attributes of the element as a dict :arg hasChildren: whether or not to yield a SerializationError because this tag shouldn't have children :returns: EmptyTag token """ yield {"type": "EmptyTag", "name": name, "namespace": namespace, "data": attrs} if hasChildren: yield self.error("Void element has children") def startTag(self, namespace, name, attrs): """Generates a StartTag token :arg namespace: the namespace of the token--can be ``None`` :arg name: the name of the element :arg attrs: the attributes of the element as a dict :returns: StartTag token """ return {"type": "StartTag", "name": name, "namespace": namespace, "data": attrs} def endTag(self, namespace, name): """Generates an EndTag token :arg namespace: the namespace of the token--can be ``None`` :arg name: the name of the element :returns: EndTag token """ return {"type": "EndTag", "name": name, "namespace": namespace} def text(self, data): """Generates SpaceCharacters and Characters tokens Depending on what's in the data, this generates one or more ``SpaceCharacters`` and ``Characters`` tokens. For example: >>> from html5lib.treewalkers.base import TreeWalker >>> # Give it an empty tree just so it instantiates >>> walker = TreeWalker([]) >>> list(walker.text('')) [] >>> list(walker.text(' ')) [{u'data': ' ', u'type': u'SpaceCharacters'}] >>> list(walker.text(' abc ')) # doctest: +NORMALIZE_WHITESPACE [{u'data': ' ', u'type': u'SpaceCharacters'}, {u'data': u'abc', u'type': u'Characters'}, {u'data': u' ', u'type': u'SpaceCharacters'}] :arg data: the text data :returns: one or more ``SpaceCharacters`` and ``Characters`` tokens """ data = data middle = data.lstrip(spaceCharacters) left = data[:len(data) - len(middle)] if left: yield {"type": "SpaceCharacters", "data": left} data = middle middle = data.rstrip(spaceCharacters) right = data[len(middle):] if middle: yield {"type": "Characters", "data": middle} if right: yield {"type": "SpaceCharacters", "data": right} def comment(self, data): """Generates a Comment token :arg data: the comment :returns: Comment token """ return {"type": "Comment", "data": data} def doctype(self, name, publicId=None, systemId=None): """Generates a Doctype token :arg name: :arg publicId: :arg systemId: :returns: the Doctype token """ return {"type": "Doctype", "name": name, "publicId": publicId, "systemId": systemId} def entity(self, name): """Generates an Entity token :arg name: the entity name :returns: an Entity token """ return {"type": "Entity", "name": name} def unknown(self, nodeType): """Handles unknown node types""" return self.error("Unknown node type: " + nodeType) class NonRecursiveTreeWalker(TreeWalker): def getNodeDetails(self, node): raise NotImplementedError def getFirstChild(self, node): raise NotImplementedError def getNextSibling(self, node): raise NotImplementedError def getParentNode(self, node): raise NotImplementedError def __iter__(self): currentNode = self.tree while currentNode is not None: details = self.getNodeDetails(currentNode) type, details = details[0], details[1:] hasChildren = False if type == DOCTYPE: yield self.doctype(*details) elif type == TEXT: for token in self.text(*details): yield token elif type == ELEMENT: namespace, name, attributes, hasChildren = details if (not namespace or namespace == namespaces["html"]) and name in voidElements: for token in self.emptyTag(namespace, name, attributes, hasChildren): yield token hasChildren = False else: yield self.startTag(namespace, name, attributes) elif type == COMMENT: yield self.comment(details[0]) elif type == ENTITY: yield self.entity(details[0]) elif type == DOCUMENT: hasChildren = True else: yield self.unknown(details[0]) if hasChildren: firstChild = self.getFirstChild(currentNode) else: firstChild = None if firstChild is not None: currentNode = firstChild else: while currentNode is not None: details = self.getNodeDetails(currentNode) type, details = details[0], details[1:] if type == ELEMENT: namespace, name, attributes, hasChildren = details if (namespace and namespace != namespaces["html"]) or name not in voidElements: yield self.endTag(namespace, name) if self.tree is currentNode: currentNode = None break nextSibling = self.getNextSibling(currentNode) if nextSibling is not None: currentNode = nextSibling break else: currentNode = self.getParentNode(currentNode) PK!_m  treewalkers/genshi.pynu[from __future__ import absolute_import, division, unicode_literals from genshi.core import QName from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT from . import base from ..constants import voidElements, namespaces class TreeWalker(base.TreeWalker): def __iter__(self): # Buffer the events so we can pass in the following one previous = None for event in self.tree: if previous is not None: for token in self.tokens(previous, event): yield token previous = event # Don't forget the final event! if previous is not None: for token in self.tokens(previous, None): yield token def tokens(self, event, next): kind, data, _ = event if kind == START: tag, attribs = data name = tag.localname namespace = tag.namespace converted_attribs = {} for k, v in attribs: if isinstance(k, QName): converted_attribs[(k.namespace, k.localname)] = v else: converted_attribs[(None, k)] = v if namespace == namespaces["html"] and name in voidElements: for token in self.emptyTag(namespace, name, converted_attribs, not next or next[0] != END or next[1] != tag): yield token else: yield self.startTag(namespace, name, converted_attribs) elif kind == END: name = data.localname namespace = data.namespace if namespace != namespaces["html"] or name not in voidElements: yield self.endTag(namespace, name) elif kind == COMMENT: yield self.comment(data) elif kind == TEXT: for token in self.text(data): yield token elif kind == DOCTYPE: yield self.doctype(*data) elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS, START_CDATA, END_CDATA, PI): pass else: yield self.unknown(kind) PK!0Dttreewalkers/etree_lxml.pynu[from __future__ import absolute_import, division, unicode_literals from pip._vendor.six import text_type from collections import OrderedDict from lxml import etree from ..treebuilders.etree import tag_regexp from . import base from .. import _ihatexml def ensure_str(s): if s is None: return None elif isinstance(s, text_type): return s else: return s.decode("ascii", "strict") class Root(object): def __init__(self, et): self.elementtree = et self.children = [] try: if et.docinfo.internalDTD: self.children.append(Doctype(self, ensure_str(et.docinfo.root_name), ensure_str(et.docinfo.public_id), ensure_str(et.docinfo.system_url))) except AttributeError: pass try: node = et.getroot() except AttributeError: node = et while node.getprevious() is not None: node = node.getprevious() while node is not None: self.children.append(node) node = node.getnext() self.text = None self.tail = None def __getitem__(self, key): return self.children[key] def getnext(self): return None def __len__(self): return 1 class Doctype(object): def __init__(self, root_node, name, public_id, system_id): self.root_node = root_node self.name = name self.public_id = public_id self.system_id = system_id self.text = None self.tail = None def getnext(self): return self.root_node.children[1] class FragmentRoot(Root): def __init__(self, children): self.children = [FragmentWrapper(self, child) for child in children] self.text = self.tail = None def getnext(self): return None class FragmentWrapper(object): def __init__(self, fragment_root, obj): self.root_node = fragment_root self.obj = obj if hasattr(self.obj, 'text'): self.text = ensure_str(self.obj.text) else: self.text = None if hasattr(self.obj, 'tail'): self.tail = ensure_str(self.obj.tail) else: self.tail = None def __getattr__(self, name): return getattr(self.obj, name) def getnext(self): siblings = self.root_node.children idx = siblings.index(self) if idx < len(siblings) - 1: return siblings[idx + 1] else: return None def __getitem__(self, key): return self.obj[key] def __bool__(self): return bool(self.obj) def getparent(self): return None def __str__(self): return str(self.obj) def __unicode__(self): return str(self.obj) def __len__(self): return len(self.obj) class TreeWalker(base.NonRecursiveTreeWalker): def __init__(self, tree): # pylint:disable=redefined-variable-type if isinstance(tree, list): self.fragmentChildren = set(tree) tree = FragmentRoot(tree) else: self.fragmentChildren = set() tree = Root(tree) base.NonRecursiveTreeWalker.__init__(self, tree) self.filter = _ihatexml.InfosetFilter() def getNodeDetails(self, node): if isinstance(node, tuple): # Text node node, key = node assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key return base.TEXT, ensure_str(getattr(node, key)) elif isinstance(node, Root): return (base.DOCUMENT,) elif isinstance(node, Doctype): return base.DOCTYPE, node.name, node.public_id, node.system_id elif isinstance(node, FragmentWrapper) and not hasattr(node, "tag"): return base.TEXT, ensure_str(node.obj) elif node.tag == etree.Comment: return base.COMMENT, ensure_str(node.text) elif node.tag == etree.Entity: return base.ENTITY, ensure_str(node.text)[1:-1] # strip &; else: # This is assumed to be an ordinary element match = tag_regexp.match(ensure_str(node.tag)) if match: namespace, tag = match.groups() else: namespace = None tag = ensure_str(node.tag) attrs = OrderedDict() for name, value in list(node.attrib.items()): name = ensure_str(name) value = ensure_str(value) match = tag_regexp.match(name) if match: attrs[(match.group(1), match.group(2))] = value else: attrs[(None, name)] = value return (base.ELEMENT, namespace, self.filter.fromXmlName(tag), attrs, len(node) > 0 or node.text) def getFirstChild(self, node): assert not isinstance(node, tuple), "Text nodes have no children" assert len(node) or node.text, "Node has no children" if node.text: return (node, "text") else: return node[0] def getNextSibling(self, node): if isinstance(node, tuple): # Text node node, key = node assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key if key == "text": # XXX: we cannot use a "bool(node) and node[0] or None" construct here # because node[0] might evaluate to False if it has no child element if len(node): return node[0] else: return None else: # tail return node.getnext() return (node, "tail") if node.tail else node.getnext() def getParentNode(self, node): if isinstance(node, tuple): # Text node node, key = node assert key in ("text", "tail"), "Text nodes are text or tail, found %s" % key if key == "text": return node # else: fallback to "normal" processing elif node in self.fragmentChildren: return None return node.getparent() PK!vjWWtreewalkers/__init__.pynu["""A collection of modules for iterating through different kinds of tree, generating tokens identical to those produced by the tokenizer module. To create a tree walker for a new type of tree, you need to implement a tree walker object (called TreeWalker by convention) that implements a 'serialize' method which takes a tree as sole argument and returns an iterator which generates tokens. """ from __future__ import absolute_import, division, unicode_literals from .. import constants from .._utils import default_etree __all__ = ["getTreeWalker", "pprint"] treeWalkerCache = {} def getTreeWalker(treeType, implementation=None, **kwargs): """Get a TreeWalker class for various types of tree with built-in support :arg str treeType: the name of the tree type required (case-insensitive). Supported values are: * "dom": The xml.dom.minidom DOM implementation * "etree": A generic walker for tree implementations exposing an elementtree-like interface (known to work with ElementTree, cElementTree and lxml.etree). * "lxml": Optimized walker for lxml.etree * "genshi": a Genshi stream :arg implementation: A module implementing the tree type e.g. xml.etree.ElementTree or cElementTree (Currently applies to the "etree" tree type only). :arg kwargs: keyword arguments passed to the etree walker--for other walkers, this has no effect :returns: a TreeWalker class """ treeType = treeType.lower() if treeType not in treeWalkerCache: if treeType == "dom": from . import dom treeWalkerCache[treeType] = dom.TreeWalker elif treeType == "genshi": from . import genshi treeWalkerCache[treeType] = genshi.TreeWalker elif treeType == "lxml": from . import etree_lxml treeWalkerCache[treeType] = etree_lxml.TreeWalker elif treeType == "etree": from . import etree if implementation is None: implementation = default_etree # XXX: NEVER cache here, caching is done in the etree submodule return etree.getETreeModule(implementation, **kwargs).TreeWalker return treeWalkerCache.get(treeType) def concatenateCharacterTokens(tokens): pendingCharacters = [] for token in tokens: type = token["type"] if type in ("Characters", "SpaceCharacters"): pendingCharacters.append(token["data"]) else: if pendingCharacters: yield {"type": "Characters", "data": "".join(pendingCharacters)} pendingCharacters = [] yield token if pendingCharacters: yield {"type": "Characters", "data": "".join(pendingCharacters)} def pprint(walker): """Pretty printer for tree walkers Takes a TreeWalker instance and pretty prints the output of walking the tree. :arg walker: a TreeWalker instance """ output = [] indent = 0 for token in concatenateCharacterTokens(walker): type = token["type"] if type in ("StartTag", "EmptyTag"): # tag name if token["namespace"] and token["namespace"] != constants.namespaces["html"]: if token["namespace"] in constants.prefixes: ns = constants.prefixes[token["namespace"]] else: ns = token["namespace"] name = "%s %s" % (ns, token["name"]) else: name = token["name"] output.append("%s<%s>" % (" " * indent, name)) indent += 2 # attributes (sorted for consistent ordering) attrs = token["data"] for (namespace, localname), value in sorted(attrs.items()): if namespace: if namespace in constants.prefixes: ns = constants.prefixes[namespace] else: ns = namespace name = "%s %s" % (ns, localname) else: name = localname output.append("%s%s=\"%s\"" % (" " * indent, name, value)) # self-closing if type == "EmptyTag": indent -= 2 elif type == "EndTag": indent -= 2 elif type == "Comment": output.append("%s" % (" " * indent, token["data"])) elif type == "Doctype": if token["name"]: if token["publicId"]: output.append("""%s""" % (" " * indent, token["name"], token["publicId"], token["systemId"] if token["systemId"] else "")) elif token["systemId"]: output.append("""%s""" % (" " * indent, token["name"], token["systemId"])) else: output.append("%s" % (" " * indent, token["name"])) else: output.append("%s" % (" " * indent,)) elif type == "Characters": output.append("%s\"%s\"" % (" " * indent, token["data"])) elif type == "SpaceCharacters": assert False, "concatenateCharacterTokens should have got rid of all Space tokens" else: raise ValueError("Unknown token type, %s" % type) return "\n".join(output) PK!Ctreewalkers/etree.pynu[from __future__ import absolute_import, division, unicode_literals from collections import OrderedDict import re from pip._vendor.six import string_types from . import base from .._utils import moduleFactoryFactory tag_regexp = re.compile("{([^}]*)}(.*)") def getETreeBuilder(ElementTreeImplementation): ElementTree = ElementTreeImplementation ElementTreeCommentType = ElementTree.Comment("asd").tag class TreeWalker(base.NonRecursiveTreeWalker): # pylint:disable=unused-variable """Given the particular ElementTree representation, this implementation, to avoid using recursion, returns "nodes" as tuples with the following content: 1. The current element 2. The index of the element relative to its parent 3. A stack of ancestor elements 4. A flag "text", "tail" or None to indicate if the current node is a text node; either the text or tail of the current element (1) """ def getNodeDetails(self, node): if isinstance(node, tuple): # It might be the root Element elt, _, _, flag = node if flag in ("text", "tail"): return base.TEXT, getattr(elt, flag) else: node = elt if not(hasattr(node, "tag")): node = node.getroot() if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"): return (base.DOCUMENT,) elif node.tag == "": return (base.DOCTYPE, node.text, node.get("publicId"), node.get("systemId")) elif node.tag == ElementTreeCommentType: return base.COMMENT, node.text else: assert isinstance(node.tag, string_types), type(node.tag) # This is assumed to be an ordinary element match = tag_regexp.match(node.tag) if match: namespace, tag = match.groups() else: namespace = None tag = node.tag attrs = OrderedDict() for name, value in list(node.attrib.items()): match = tag_regexp.match(name) if match: attrs[(match.group(1), match.group(2))] = value else: attrs[(None, name)] = value return (base.ELEMENT, namespace, tag, attrs, len(node) or node.text) def getFirstChild(self, node): if isinstance(node, tuple): element, key, parents, flag = node else: element, key, parents, flag = node, None, [], None if flag in ("text", "tail"): return None else: if element.text: return element, key, parents, "text" elif len(element): parents.append(element) return element[0], 0, parents, None else: return None def getNextSibling(self, node): if isinstance(node, tuple): element, key, parents, flag = node else: return None if flag == "text": if len(element): parents.append(element) return element[0], 0, parents, None else: return None else: if element.tail and flag != "tail": return element, key, parents, "tail" elif key < len(parents[-1]) - 1: return parents[-1][key + 1], key + 1, parents, None else: return None def getParentNode(self, node): if isinstance(node, tuple): element, key, parents, flag = node else: return None if flag == "text": if not parents: return element else: return element, key, parents, None else: parent = parents.pop() if not parents: return parent else: assert list(parents[-1]).count(parent) == 1 return parent, list(parents[-1]).index(parent), parents, None return locals() getETreeModule = moduleFactoryFactory(getETreeBuilder) PK!6=a~a~_inputstream.pynu[from __future__ import absolute_import, division, unicode_literals from pip._vendor.six import text_type from pip._vendor.six.moves import http_client, urllib import codecs import re from io import BytesIO, StringIO from pip._vendor import webencodings from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase from .constants import _ReparseException from . import _utils # Non-unicode versions of constants for use in the pre-parser spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters]) asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters]) asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase]) spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"]) invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]" # noqa if _utils.supports_lone_surrogates: # Use one extra step of indirection and create surrogates with # eval. Not using this indirection would introduce an illegal # unicode literal on platforms not supporting such lone # surrogates. assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1 invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] + eval('"\\uD800-\\uDFFF"') + # pylint:disable=eval-used "]") else: invalid_unicode_re = re.compile(invalid_unicode_no_surrogate) non_bmp_invalid_codepoints = {0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, 0x10FFFE, 0x10FFFF} ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]") # Cache for charsUntil() charsUntilRegEx = {} class BufferedStream(object): """Buffering for streams that do not have buffering of their own The buffer is implemented as a list of chunks on the assumption that joining many strings will be slow since it is O(n**2) """ def __init__(self, stream): self.stream = stream self.buffer = [] self.position = [-1, 0] # chunk number, offset def tell(self): pos = 0 for chunk in self.buffer[:self.position[0]]: pos += len(chunk) pos += self.position[1] return pos def seek(self, pos): assert pos <= self._bufferedBytes() offset = pos i = 0 while len(self.buffer[i]) < offset: offset -= len(self.buffer[i]) i += 1 self.position = [i, offset] def read(self, bytes): if not self.buffer: return self._readStream(bytes) elif (self.position[0] == len(self.buffer) and self.position[1] == len(self.buffer[-1])): return self._readStream(bytes) else: return self._readFromBuffer(bytes) def _bufferedBytes(self): return sum([len(item) for item in self.buffer]) def _readStream(self, bytes): data = self.stream.read(bytes) self.buffer.append(data) self.position[0] += 1 self.position[1] = len(data) return data def _readFromBuffer(self, bytes): remainingBytes = bytes rv = [] bufferIndex = self.position[0] bufferOffset = self.position[1] while bufferIndex < len(self.buffer) and remainingBytes != 0: assert remainingBytes > 0 bufferedData = self.buffer[bufferIndex] if remainingBytes <= len(bufferedData) - bufferOffset: bytesToRead = remainingBytes self.position = [bufferIndex, bufferOffset + bytesToRead] else: bytesToRead = len(bufferedData) - bufferOffset self.position = [bufferIndex, len(bufferedData)] bufferIndex += 1 rv.append(bufferedData[bufferOffset:bufferOffset + bytesToRead]) remainingBytes -= bytesToRead bufferOffset = 0 if remainingBytes: rv.append(self._readStream(remainingBytes)) return b"".join(rv) def HTMLInputStream(source, **kwargs): # Work around Python bug #20007: read(0) closes the connection. # http://bugs.python.org/issue20007 if (isinstance(source, http_client.HTTPResponse) or # Also check for addinfourl wrapping HTTPResponse (isinstance(source, urllib.response.addbase) and isinstance(source.fp, http_client.HTTPResponse))): isUnicode = False elif hasattr(source, "read"): isUnicode = isinstance(source.read(0), text_type) else: isUnicode = isinstance(source, text_type) if isUnicode: encodings = [x for x in kwargs if x.endswith("_encoding")] if encodings: raise TypeError("Cannot set an encoding with a unicode input, set %r" % encodings) return HTMLUnicodeInputStream(source, **kwargs) else: return HTMLBinaryInputStream(source, **kwargs) class HTMLUnicodeInputStream(object): """Provides a unicode stream of characters to the HTMLTokenizer. This class takes care of character encoding and removing or replacing incorrect byte-sequences and also provides column and line tracking. """ _defaultChunkSize = 10240 def __init__(self, source): """Initialises the HTMLInputStream. HTMLInputStream(source, [encoding]) -> Normalized stream from source for use by html5lib. source can be either a file-object, local filename or a string. The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) """ if not _utils.supports_lone_surrogates: # Such platforms will have already checked for such # surrogate errors, so no need to do this checking. self.reportCharacterErrors = None elif len("\U0010FFFF") == 1: self.reportCharacterErrors = self.characterErrorsUCS4 else: self.reportCharacterErrors = self.characterErrorsUCS2 # List of where new lines occur self.newLines = [0] self.charEncoding = (lookupEncoding("utf-8"), "certain") self.dataStream = self.openStream(source) self.reset() def reset(self): self.chunk = "" self.chunkSize = 0 self.chunkOffset = 0 self.errors = [] # number of (complete) lines in previous chunks self.prevNumLines = 0 # number of columns in the last line of the previous chunk self.prevNumCols = 0 # Deal with CR LF and surrogates split over chunk boundaries self._bufferedCharacter = None def openStream(self, source): """Produces a file object from source. source can be either a file object, local filename or a string. """ # Already a file object if hasattr(source, 'read'): stream = source else: stream = StringIO(source) return stream def _position(self, offset): chunk = self.chunk nLines = chunk.count('\n', 0, offset) positionLine = self.prevNumLines + nLines lastLinePos = chunk.rfind('\n', 0, offset) if lastLinePos == -1: positionColumn = self.prevNumCols + offset else: positionColumn = offset - (lastLinePos + 1) return (positionLine, positionColumn) def position(self): """Returns (line, col) of the current position in the stream.""" line, col = self._position(self.chunkOffset) return (line + 1, col) def char(self): """ Read one character from the stream or queue if available. Return EOF when EOF is reached. """ # Read a new chunk from the input stream if necessary if self.chunkOffset >= self.chunkSize: if not self.readChunk(): return EOF chunkOffset = self.chunkOffset char = self.chunk[chunkOffset] self.chunkOffset = chunkOffset + 1 return char def readChunk(self, chunkSize=None): if chunkSize is None: chunkSize = self._defaultChunkSize self.prevNumLines, self.prevNumCols = self._position(self.chunkSize) self.chunk = "" self.chunkSize = 0 self.chunkOffset = 0 data = self.dataStream.read(chunkSize) # Deal with CR LF and surrogates broken across chunks if self._bufferedCharacter: data = self._bufferedCharacter + data self._bufferedCharacter = None elif not data: # We have no more data, bye-bye stream return False if len(data) > 1: lastv = ord(data[-1]) if lastv == 0x0D or 0xD800 <= lastv <= 0xDBFF: self._bufferedCharacter = data[-1] data = data[:-1] if self.reportCharacterErrors: self.reportCharacterErrors(data) # Replace invalid characters data = data.replace("\r\n", "\n") data = data.replace("\r", "\n") self.chunk = data self.chunkSize = len(data) return True def characterErrorsUCS4(self, data): for _ in range(len(invalid_unicode_re.findall(data))): self.errors.append("invalid-codepoint") def characterErrorsUCS2(self, data): # Someone picked the wrong compile option # You lose skip = False for match in invalid_unicode_re.finditer(data): if skip: continue codepoint = ord(match.group()) pos = match.start() # Pretty sure there should be endianness issues here if _utils.isSurrogatePair(data[pos:pos + 2]): # We have a surrogate pair! char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2]) if char_val in non_bmp_invalid_codepoints: self.errors.append("invalid-codepoint") skip = True elif (codepoint >= 0xD800 and codepoint <= 0xDFFF and pos == len(data) - 1): self.errors.append("invalid-codepoint") else: skip = False self.errors.append("invalid-codepoint") def charsUntil(self, characters, opposite=False): """ Returns a string of characters from the stream up to but not including any character in 'characters' or EOF. 'characters' must be a container that supports the 'in' method and iteration over its characters. """ # Use a cache of regexps to find the required characters try: chars = charsUntilRegEx[(characters, opposite)] except KeyError: if __debug__: for c in characters: assert(ord(c) < 128) regex = "".join(["\\x%02x" % ord(c) for c in characters]) if not opposite: regex = "^%s" % regex chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex) rv = [] while True: # Find the longest matching prefix m = chars.match(self.chunk, self.chunkOffset) if m is None: # If nothing matched, and it wasn't because we ran out of chunk, # then stop if self.chunkOffset != self.chunkSize: break else: end = m.end() # If not the whole chunk matched, return everything # up to the part that didn't match if end != self.chunkSize: rv.append(self.chunk[self.chunkOffset:end]) self.chunkOffset = end break # If the whole remainder of the chunk matched, # use it all and read the next chunk rv.append(self.chunk[self.chunkOffset:]) if not self.readChunk(): # Reached EOF break r = "".join(rv) return r def unget(self, char): # Only one character is allowed to be ungotten at once - it must # be consumed again before any further call to unget if char is not EOF: if self.chunkOffset == 0: # unget is called quite rarely, so it's a good idea to do # more work here if it saves a bit of work in the frequently # called char and charsUntil. # So, just prepend the ungotten character onto the current # chunk: self.chunk = char + self.chunk self.chunkSize += 1 else: self.chunkOffset -= 1 assert self.chunk[self.chunkOffset] == char class HTMLBinaryInputStream(HTMLUnicodeInputStream): """Provides a unicode stream of characters to the HTMLTokenizer. This class takes care of character encoding and removing or replacing incorrect byte-sequences and also provides column and line tracking. """ def __init__(self, source, override_encoding=None, transport_encoding=None, same_origin_parent_encoding=None, likely_encoding=None, default_encoding="windows-1252", useChardet=True): """Initialises the HTMLInputStream. HTMLInputStream(source, [encoding]) -> Normalized stream from source for use by html5lib. source can be either a file-object, local filename or a string. The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) """ # Raw Stream - for unicode objects this will encode to utf-8 and set # self.charEncoding as appropriate self.rawStream = self.openStream(source) HTMLUnicodeInputStream.__init__(self, self.rawStream) # Encoding Information # Number of bytes to use when looking for a meta element with # encoding information self.numBytesMeta = 1024 # Number of bytes to use when using detecting encoding using chardet self.numBytesChardet = 100 # Things from args self.override_encoding = override_encoding self.transport_encoding = transport_encoding self.same_origin_parent_encoding = same_origin_parent_encoding self.likely_encoding = likely_encoding self.default_encoding = default_encoding # Determine encoding self.charEncoding = self.determineEncoding(useChardet) assert self.charEncoding[0] is not None # Call superclass self.reset() def reset(self): self.dataStream = self.charEncoding[0].codec_info.streamreader(self.rawStream, 'replace') HTMLUnicodeInputStream.reset(self) def openStream(self, source): """Produces a file object from source. source can be either a file object, local filename or a string. """ # Already a file object if hasattr(source, 'read'): stream = source else: stream = BytesIO(source) try: stream.seek(stream.tell()) except Exception: stream = BufferedStream(stream) return stream def determineEncoding(self, chardet=True): # BOMs take precedence over everything # This will also read past the BOM if present charEncoding = self.detectBOM(), "certain" if charEncoding[0] is not None: return charEncoding # If we've been overridden, we've been overridden charEncoding = lookupEncoding(self.override_encoding), "certain" if charEncoding[0] is not None: return charEncoding # Now check the transport layer charEncoding = lookupEncoding(self.transport_encoding), "certain" if charEncoding[0] is not None: return charEncoding # Look for meta elements with encoding information charEncoding = self.detectEncodingMeta(), "tentative" if charEncoding[0] is not None: return charEncoding # Parent document encoding charEncoding = lookupEncoding(self.same_origin_parent_encoding), "tentative" if charEncoding[0] is not None and not charEncoding[0].name.startswith("utf-16"): return charEncoding # "likely" encoding charEncoding = lookupEncoding(self.likely_encoding), "tentative" if charEncoding[0] is not None: return charEncoding # Guess with chardet, if available if chardet: try: from pip._vendor.chardet.universaldetector import UniversalDetector except ImportError: pass else: buffers = [] detector = UniversalDetector() while not detector.done: buffer = self.rawStream.read(self.numBytesChardet) assert isinstance(buffer, bytes) if not buffer: break buffers.append(buffer) detector.feed(buffer) detector.close() encoding = lookupEncoding(detector.result['encoding']) self.rawStream.seek(0) if encoding is not None: return encoding, "tentative" # Try the default encoding charEncoding = lookupEncoding(self.default_encoding), "tentative" if charEncoding[0] is not None: return charEncoding # Fallback to html5lib's default if even that hasn't worked return lookupEncoding("windows-1252"), "tentative" def changeEncoding(self, newEncoding): assert self.charEncoding[1] != "certain" newEncoding = lookupEncoding(newEncoding) if newEncoding is None: return if newEncoding.name in ("utf-16be", "utf-16le"): newEncoding = lookupEncoding("utf-8") assert newEncoding is not None elif newEncoding == self.charEncoding[0]: self.charEncoding = (self.charEncoding[0], "certain") else: self.rawStream.seek(0) self.charEncoding = (newEncoding, "certain") self.reset() raise _ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding)) def detectBOM(self): """Attempts to detect at BOM at the start of the stream. If an encoding can be determined from the BOM return the name of the encoding otherwise return None""" bomDict = { codecs.BOM_UTF8: 'utf-8', codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be', codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be' } # Go to beginning of file and read in 4 bytes string = self.rawStream.read(4) assert isinstance(string, bytes) # Try detecting the BOM using bytes from the string encoding = bomDict.get(string[:3]) # UTF-8 seek = 3 if not encoding: # Need to detect UTF-32 before UTF-16 encoding = bomDict.get(string) # UTF-32 seek = 4 if not encoding: encoding = bomDict.get(string[:2]) # UTF-16 seek = 2 # Set the read position past the BOM if one was found, otherwise # set it to the start of the stream if encoding: self.rawStream.seek(seek) return lookupEncoding(encoding) else: self.rawStream.seek(0) return None def detectEncodingMeta(self): """Report the encoding declared by the meta element """ buffer = self.rawStream.read(self.numBytesMeta) assert isinstance(buffer, bytes) parser = EncodingParser(buffer) self.rawStream.seek(0) encoding = parser.getEncoding() if encoding is not None and encoding.name in ("utf-16be", "utf-16le"): encoding = lookupEncoding("utf-8") return encoding class EncodingBytes(bytes): """String-like object with an associated position and various extra methods If the position is ever greater than the string length then an exception is raised""" def __new__(self, value): assert isinstance(value, bytes) return bytes.__new__(self, value.lower()) def __init__(self, value): # pylint:disable=unused-argument self._position = -1 def __iter__(self): return self def __next__(self): p = self._position = self._position + 1 if p >= len(self): raise StopIteration elif p < 0: raise TypeError return self[p:p + 1] def next(self): # Py2 compat return self.__next__() def previous(self): p = self._position if p >= len(self): raise StopIteration elif p < 0: raise TypeError self._position = p = p - 1 return self[p:p + 1] def setPosition(self, position): if self._position >= len(self): raise StopIteration self._position = position def getPosition(self): if self._position >= len(self): raise StopIteration if self._position >= 0: return self._position else: return None position = property(getPosition, setPosition) def getCurrentByte(self): return self[self.position:self.position + 1] currentByte = property(getCurrentByte) def skip(self, chars=spaceCharactersBytes): """Skip past a list of characters""" p = self.position # use property for the error-checking while p < len(self): c = self[p:p + 1] if c not in chars: self._position = p return c p += 1 self._position = p return None def skipUntil(self, chars): p = self.position while p < len(self): c = self[p:p + 1] if c in chars: self._position = p return c p += 1 self._position = p return None def matchBytes(self, bytes): """Look for a sequence of bytes at the start of a string. If the bytes are found return True and advance the position to the byte after the match. Otherwise return False and leave the position alone""" rv = self.startswith(bytes, self.position) if rv: self.position += len(bytes) return rv def jumpTo(self, bytes): """Look for the next sequence of bytes matching a given sequence. If a match is found advance the position to the last byte of the match""" try: self._position = self.index(bytes, self.position) + len(bytes) - 1 except ValueError: raise StopIteration return True class EncodingParser(object): """Mini parser for detecting character encoding from meta elements""" def __init__(self, data): """string - the data to work on for encoding detection""" self.data = EncodingBytes(data) self.encoding = None def getEncoding(self): if b"") def handleMeta(self): if self.data.currentByte not in spaceCharactersBytes: # if we have ") def getAttribute(self): """Return a name,value pair for the next attribute in the stream, if one is found, or None""" data = self.data # Step 1 (skip chars) c = data.skip(spaceCharactersBytes | frozenset([b"/"])) assert c is None or len(c) == 1 # Step 2 if c in (b">", None): return None # Step 3 attrName = [] attrValue = [] # Step 4 attribute name while True: if c == b"=" and attrName: break elif c in spaceCharactersBytes: # Step 6! c = data.skip() break elif c in (b"/", b">"): return b"".join(attrName), b"" elif c in asciiUppercaseBytes: attrName.append(c.lower()) elif c is None: return None else: attrName.append(c) # Step 5 c = next(data) # Step 7 if c != b"=": data.previous() return b"".join(attrName), b"" # Step 8 next(data) # Step 9 c = data.skip() # Step 10 if c in (b"'", b'"'): # 10.1 quoteChar = c while True: # 10.2 c = next(data) # 10.3 if c == quoteChar: next(data) return b"".join(attrName), b"".join(attrValue) # 10.4 elif c in asciiUppercaseBytes: attrValue.append(c.lower()) # 10.5 else: attrValue.append(c) elif c == b">": return b"".join(attrName), b"" elif c in asciiUppercaseBytes: attrValue.append(c.lower()) elif c is None: return None else: attrValue.append(c) # Step 11 while True: c = next(data) if c in spacesAngleBrackets: return b"".join(attrName), b"".join(attrValue) elif c in asciiUppercaseBytes: attrValue.append(c.lower()) elif c is None: return None else: attrValue.append(c) class ContentAttrParser(object): def __init__(self, data): assert isinstance(data, bytes) self.data = data def parse(self): try: # Check if the attr name is charset # otherwise return self.data.jumpTo(b"charset") self.data.position += 1 self.data.skip() if not self.data.currentByte == b"=": # If there is no = sign keep looking for attrs return None self.data.position += 1 self.data.skip() # Look for an encoding between matching quote marks if self.data.currentByte in (b'"', b"'"): quoteMark = self.data.currentByte self.data.position += 1 oldPosition = self.data.position if self.data.jumpTo(quoteMark): return self.data[oldPosition:self.data.position] else: return None else: # Unquoted value oldPosition = self.data.position try: self.data.skipUntil(spaceCharactersBytes) return self.data[oldPosition:self.data.position] except StopIteration: # Return the whole remaining value return self.data[oldPosition:] except StopIteration: return None def lookupEncoding(encoding): """Return the python codec name corresponding to an encoding or None if the string doesn't correspond to a valid encoding.""" if isinstance(encoding, bytes): try: encoding = encoding.decode("ascii") except UnicodeDecodeError: return None if encoding is not None: try: return webencodings.lookup(encoding) except AttributeError: return None else: return None PK!html5parser.pynu[from __future__ import absolute_import, division, unicode_literals from pip._vendor.six import with_metaclass, viewkeys import types from . import _inputstream from . import _tokenizer from . import treebuilders from .treebuilders.base import Marker from . import _utils from .constants import ( spaceCharacters, asciiUpper2Lower, specialElements, headingElements, cdataElements, rcdataElements, tokenTypes, tagTokenTypes, namespaces, htmlIntegrationPointElements, mathmlTextIntegrationPointElements, adjustForeignAttributes as adjustForeignAttributesMap, adjustMathMLAttributes, adjustSVGAttributes, E, _ReparseException ) def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs): """Parse an HTML document as a string or file-like object into a tree :arg doc: the document to parse as a string or file-like object :arg treebuilder: the treebuilder to use when parsing :arg namespaceHTMLElements: whether or not to namespace HTML elements :returns: parsed tree Example: >>> from html5lib.html5parser import parse >>> parse('

This is a doc

') """ tb = treebuilders.getTreeBuilder(treebuilder) p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) return p.parse(doc, **kwargs) def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs): """Parse an HTML fragment as a string or file-like object into a tree :arg doc: the fragment to parse as a string or file-like object :arg container: the container context to parse the fragment in :arg treebuilder: the treebuilder to use when parsing :arg namespaceHTMLElements: whether or not to namespace HTML elements :returns: parsed tree Example: >>> from html5lib.html5libparser import parseFragment >>> parseFragment('this is a fragment') """ tb = treebuilders.getTreeBuilder(treebuilder) p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) return p.parseFragment(doc, container=container, **kwargs) def method_decorator_metaclass(function): class Decorated(type): def __new__(meta, classname, bases, classDict): for attributeName, attribute in classDict.items(): if isinstance(attribute, types.FunctionType): attribute = function(attribute) classDict[attributeName] = attribute return type.__new__(meta, classname, bases, classDict) return Decorated class HTMLParser(object): """HTML parser Generates a tree structure from a stream of (possibly malformed) HTML. """ def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False): """ :arg tree: a treebuilder class controlling the type of tree that will be returned. Built in treebuilders can be accessed through html5lib.treebuilders.getTreeBuilder(treeType) :arg strict: raise an exception when a parse error is encountered :arg namespaceHTMLElements: whether or not to namespace HTML elements :arg debug: whether or not to enable debug mode which logs things Example: >>> from html5lib.html5parser import HTMLParser >>> parser = HTMLParser() # generates parser with etree builder >>> parser = HTMLParser('lxml', strict=True) # generates parser with lxml builder which is strict """ # Raise an exception on the first error encountered self.strict = strict if tree is None: tree = treebuilders.getTreeBuilder("etree") self.tree = tree(namespaceHTMLElements) self.errors = [] self.phases = {name: cls(self, self.tree) for name, cls in getPhases(debug).items()} def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs): self.innerHTMLMode = innerHTML self.container = container self.scripting = scripting self.tokenizer = _tokenizer.HTMLTokenizer(stream, parser=self, **kwargs) self.reset() try: self.mainLoop() except _ReparseException: self.reset() self.mainLoop() def reset(self): self.tree.reset() self.firstStartTag = False self.errors = [] self.log = [] # only used with debug mode # "quirks" / "limited quirks" / "no quirks" self.compatMode = "no quirks" if self.innerHTMLMode: self.innerHTML = self.container.lower() if self.innerHTML in cdataElements: self.tokenizer.state = self.tokenizer.rcdataState elif self.innerHTML in rcdataElements: self.tokenizer.state = self.tokenizer.rawtextState elif self.innerHTML == 'plaintext': self.tokenizer.state = self.tokenizer.plaintextState else: # state already is data state # self.tokenizer.state = self.tokenizer.dataState pass self.phase = self.phases["beforeHtml"] self.phase.insertHtmlElement() self.resetInsertionMode() else: self.innerHTML = False # pylint:disable=redefined-variable-type self.phase = self.phases["initial"] self.lastPhase = None self.beforeRCDataPhase = None self.framesetOK = True @property def documentEncoding(self): """Name of the character encoding that was used to decode the input stream, or :obj:`None` if that is not determined yet """ if not hasattr(self, 'tokenizer'): return None return self.tokenizer.stream.charEncoding[0].name def isHTMLIntegrationPoint(self, element): if (element.name == "annotation-xml" and element.namespace == namespaces["mathml"]): return ("encoding" in element.attributes and element.attributes["encoding"].translate( asciiUpper2Lower) in ("text/html", "application/xhtml+xml")) else: return (element.namespace, element.name) in htmlIntegrationPointElements def isMathMLTextIntegrationPoint(self, element): return (element.namespace, element.name) in mathmlTextIntegrationPointElements def mainLoop(self): CharactersToken = tokenTypes["Characters"] SpaceCharactersToken = tokenTypes["SpaceCharacters"] StartTagToken = tokenTypes["StartTag"] EndTagToken = tokenTypes["EndTag"] CommentToken = tokenTypes["Comment"] DoctypeToken = tokenTypes["Doctype"] ParseErrorToken = tokenTypes["ParseError"] for token in self.tokenizer: prev_token = None new_token = token while new_token is not None: prev_token = new_token currentNode = self.tree.openElements[-1] if self.tree.openElements else None currentNodeNamespace = currentNode.namespace if currentNode else None currentNodeName = currentNode.name if currentNode else None type = new_token["type"] if type == ParseErrorToken: self.parseError(new_token["data"], new_token.get("datavars", {})) new_token = None else: if (len(self.tree.openElements) == 0 or currentNodeNamespace == self.tree.defaultNamespace or (self.isMathMLTextIntegrationPoint(currentNode) and ((type == StartTagToken and token["name"] not in frozenset(["mglyph", "malignmark"])) or type in (CharactersToken, SpaceCharactersToken))) or (currentNodeNamespace == namespaces["mathml"] and currentNodeName == "annotation-xml" and type == StartTagToken and token["name"] == "svg") or (self.isHTMLIntegrationPoint(currentNode) and type in (StartTagToken, CharactersToken, SpaceCharactersToken))): phase = self.phase else: phase = self.phases["inForeignContent"] if type == CharactersToken: new_token = phase.processCharacters(new_token) elif type == SpaceCharactersToken: new_token = phase.processSpaceCharacters(new_token) elif type == StartTagToken: new_token = phase.processStartTag(new_token) elif type == EndTagToken: new_token = phase.processEndTag(new_token) elif type == CommentToken: new_token = phase.processComment(new_token) elif type == DoctypeToken: new_token = phase.processDoctype(new_token) if (type == StartTagToken and prev_token["selfClosing"] and not prev_token["selfClosingAcknowledged"]): self.parseError("non-void-element-with-trailing-solidus", {"name": prev_token["name"]}) # When the loop finishes it's EOF reprocess = True phases = [] while reprocess: phases.append(self.phase) reprocess = self.phase.processEOF() if reprocess: assert self.phase not in phases def parse(self, stream, *args, **kwargs): """Parse a HTML document into a well-formed tree :arg stream: a file-like object or string containing the HTML to be parsed The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element). :arg scripting: treat noscript elements as if JavaScript was turned on :returns: parsed tree Example: >>> from html5lib.html5parser import HTMLParser >>> parser = HTMLParser() >>> parser.parse('

This is a doc

') """ self._parse(stream, False, None, *args, **kwargs) return self.tree.getDocument() def parseFragment(self, stream, *args, **kwargs): """Parse a HTML fragment into a well-formed tree fragment :arg container: name of the element we're setting the innerHTML property if set to None, default to 'div' :arg stream: a file-like object or string containing the HTML to be parsed The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) :arg scripting: treat noscript elements as if JavaScript was turned on :returns: parsed tree Example: >>> from html5lib.html5libparser import HTMLParser >>> parser = HTMLParser() >>> parser.parseFragment('this is a fragment') """ self._parse(stream, True, *args, **kwargs) return self.tree.getFragment() def parseError(self, errorcode="XXX-undefined-error", datavars=None): # XXX The idea is to make errorcode mandatory. if datavars is None: datavars = {} self.errors.append((self.tokenizer.stream.position(), errorcode, datavars)) if self.strict: raise ParseError(E[errorcode] % datavars) def adjustMathMLAttributes(self, token): adjust_attributes(token, adjustMathMLAttributes) def adjustSVGAttributes(self, token): adjust_attributes(token, adjustSVGAttributes) def adjustForeignAttributes(self, token): adjust_attributes(token, adjustForeignAttributesMap) def reparseTokenNormal(self, token): # pylint:disable=unused-argument self.parser.phase() def resetInsertionMode(self): # The name of this method is mostly historical. (It's also used in the # specification.) last = False newModes = { "select": "inSelect", "td": "inCell", "th": "inCell", "tr": "inRow", "tbody": "inTableBody", "thead": "inTableBody", "tfoot": "inTableBody", "caption": "inCaption", "colgroup": "inColumnGroup", "table": "inTable", "head": "inBody", "body": "inBody", "frameset": "inFrameset", "html": "beforeHead" } for node in self.tree.openElements[::-1]: nodeName = node.name new_phase = None if node == self.tree.openElements[0]: assert self.innerHTML last = True nodeName = self.innerHTML # Check for conditions that should only happen in the innerHTML # case if nodeName in ("select", "colgroup", "head", "html"): assert self.innerHTML if not last and node.namespace != self.tree.defaultNamespace: continue if nodeName in newModes: new_phase = self.phases[newModes[nodeName]] break elif last: new_phase = self.phases["inBody"] break self.phase = new_phase def parseRCDataRawtext(self, token, contentType): # Generic RCDATA/RAWTEXT Parsing algorithm assert contentType in ("RAWTEXT", "RCDATA") self.tree.insertElement(token) if contentType == "RAWTEXT": self.tokenizer.state = self.tokenizer.rawtextState else: self.tokenizer.state = self.tokenizer.rcdataState self.originalPhase = self.phase self.phase = self.phases["text"] @_utils.memoize def getPhases(debug): def log(function): """Logger that records which phase processes each token""" type_names = {value: key for key, value in tokenTypes.items()} def wrapped(self, *args, **kwargs): if function.__name__.startswith("process") and len(args) > 0: token = args[0] info = {"type": type_names[token['type']]} if token['type'] in tagTokenTypes: info["name"] = token['name'] self.parser.log.append((self.parser.tokenizer.state.__name__, self.parser.phase.__class__.__name__, self.__class__.__name__, function.__name__, info)) return function(self, *args, **kwargs) else: return function(self, *args, **kwargs) return wrapped def getMetaclass(use_metaclass, metaclass_func): if use_metaclass: return method_decorator_metaclass(metaclass_func) else: return type # pylint:disable=unused-argument class Phase(with_metaclass(getMetaclass(debug, log))): """Base class for helper object that implements each phase of processing """ __slots__ = ("parser", "tree", "__startTagCache", "__endTagCache") def __init__(self, parser, tree): self.parser = parser self.tree = tree self.__startTagCache = {} self.__endTagCache = {} def processEOF(self): raise NotImplementedError def processComment(self, token): # For most phases the following is correct. Where it's not it will be # overridden. self.tree.insertComment(token, self.tree.openElements[-1]) def processDoctype(self, token): self.parser.parseError("unexpected-doctype") def processCharacters(self, token): self.tree.insertText(token["data"]) def processSpaceCharacters(self, token): self.tree.insertText(token["data"]) def processStartTag(self, token): # Note the caching is done here rather than BoundMethodDispatcher as doing it there # requires a circular reference to the Phase, and this ends up with a significant # (CPython 2.7, 3.8) GC cost when parsing many short inputs name = token["name"] # In Py2, using `in` is quicker in general than try/except KeyError # In Py3, `in` is quicker when there are few cache hits (typically short inputs) if name in self.__startTagCache: func = self.__startTagCache[name] else: func = self.__startTagCache[name] = self.startTagHandler[name] # bound the cache size in case we get loads of unknown tags while len(self.__startTagCache) > len(self.startTagHandler) * 1.1: # this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7 self.__startTagCache.pop(next(iter(self.__startTagCache))) return func(token) def startTagHtml(self, token): if not self.parser.firstStartTag and token["name"] == "html": self.parser.parseError("non-html-root") # XXX Need a check here to see if the first start tag token emitted is # this token... If it's not, invoke self.parser.parseError(). for attr, value in token["data"].items(): if attr not in self.tree.openElements[0].attributes: self.tree.openElements[0].attributes[attr] = value self.parser.firstStartTag = False def processEndTag(self, token): # Note the caching is done here rather than BoundMethodDispatcher as doing it there # requires a circular reference to the Phase, and this ends up with a significant # (CPython 2.7, 3.8) GC cost when parsing many short inputs name = token["name"] # In Py2, using `in` is quicker in general than try/except KeyError # In Py3, `in` is quicker when there are few cache hits (typically short inputs) if name in self.__endTagCache: func = self.__endTagCache[name] else: func = self.__endTagCache[name] = self.endTagHandler[name] # bound the cache size in case we get loads of unknown tags while len(self.__endTagCache) > len(self.endTagHandler) * 1.1: # this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7 self.__endTagCache.pop(next(iter(self.__endTagCache))) return func(token) class InitialPhase(Phase): __slots__ = tuple() def processSpaceCharacters(self, token): pass def processComment(self, token): self.tree.insertComment(token, self.tree.document) def processDoctype(self, token): name = token["name"] publicId = token["publicId"] systemId = token["systemId"] correct = token["correct"] if (name != "html" or publicId is not None or systemId is not None and systemId != "about:legacy-compat"): self.parser.parseError("unknown-doctype") if publicId is None: publicId = "" self.tree.insertDoctype(token) if publicId != "": publicId = publicId.translate(asciiUpper2Lower) if (not correct or token["name"] != "html" or publicId.startswith( ("+//silmaril//dtd html pro v0r11 19970101//", "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", "-//as//dtd html 3.0 aswedit + extensions//", "-//ietf//dtd html 2.0 level 1//", "-//ietf//dtd html 2.0 level 2//", "-//ietf//dtd html 2.0 strict level 1//", "-//ietf//dtd html 2.0 strict level 2//", "-//ietf//dtd html 2.0 strict//", "-//ietf//dtd html 2.0//", "-//ietf//dtd html 2.1e//", "-//ietf//dtd html 3.0//", "-//ietf//dtd html 3.2 final//", "-//ietf//dtd html 3.2//", "-//ietf//dtd html 3//", "-//ietf//dtd html level 0//", "-//ietf//dtd html level 1//", "-//ietf//dtd html level 2//", "-//ietf//dtd html level 3//", "-//ietf//dtd html strict level 0//", "-//ietf//dtd html strict level 1//", "-//ietf//dtd html strict level 2//", "-//ietf//dtd html strict level 3//", "-//ietf//dtd html strict//", "-//ietf//dtd html//", "-//metrius//dtd metrius presentational//", "-//microsoft//dtd internet explorer 2.0 html strict//", "-//microsoft//dtd internet explorer 2.0 html//", "-//microsoft//dtd internet explorer 2.0 tables//", "-//microsoft//dtd internet explorer 3.0 html strict//", "-//microsoft//dtd internet explorer 3.0 html//", "-//microsoft//dtd internet explorer 3.0 tables//", "-//netscape comm. corp.//dtd html//", "-//netscape comm. corp.//dtd strict html//", "-//o'reilly and associates//dtd html 2.0//", "-//o'reilly and associates//dtd html extended 1.0//", "-//o'reilly and associates//dtd html extended relaxed 1.0//", "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", "-//spyglass//dtd html 2.0 extended//", "-//sq//dtd html 2.0 hotmetal + extensions//", "-//sun microsystems corp.//dtd hotjava html//", "-//sun microsystems corp.//dtd hotjava strict html//", "-//w3c//dtd html 3 1995-03-24//", "-//w3c//dtd html 3.2 draft//", "-//w3c//dtd html 3.2 final//", "-//w3c//dtd html 3.2//", "-//w3c//dtd html 3.2s draft//", "-//w3c//dtd html 4.0 frameset//", "-//w3c//dtd html 4.0 transitional//", "-//w3c//dtd html experimental 19960712//", "-//w3c//dtd html experimental 970421//", "-//w3c//dtd w3 html//", "-//w3o//dtd w3 html 3.0//", "-//webtechs//dtd mozilla html 2.0//", "-//webtechs//dtd mozilla html//")) or publicId in ("-//w3o//dtd w3 html strict 3.0//en//", "-/w3c/dtd html 4.0 transitional/en", "html") or publicId.startswith( ("-//w3c//dtd html 4.01 frameset//", "-//w3c//dtd html 4.01 transitional//")) and systemId is None or systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"): self.parser.compatMode = "quirks" elif (publicId.startswith( ("-//w3c//dtd xhtml 1.0 frameset//", "-//w3c//dtd xhtml 1.0 transitional//")) or publicId.startswith( ("-//w3c//dtd html 4.01 frameset//", "-//w3c//dtd html 4.01 transitional//")) and systemId is not None): self.parser.compatMode = "limited quirks" self.parser.phase = self.parser.phases["beforeHtml"] def anythingElse(self): self.parser.compatMode = "quirks" self.parser.phase = self.parser.phases["beforeHtml"] def processCharacters(self, token): self.parser.parseError("expected-doctype-but-got-chars") self.anythingElse() return token def processStartTag(self, token): self.parser.parseError("expected-doctype-but-got-start-tag", {"name": token["name"]}) self.anythingElse() return token def processEndTag(self, token): self.parser.parseError("expected-doctype-but-got-end-tag", {"name": token["name"]}) self.anythingElse() return token def processEOF(self): self.parser.parseError("expected-doctype-but-got-eof") self.anythingElse() return True class BeforeHtmlPhase(Phase): __slots__ = tuple() # helper methods def insertHtmlElement(self): self.tree.insertRoot(impliedTagToken("html", "StartTag")) self.parser.phase = self.parser.phases["beforeHead"] # other def processEOF(self): self.insertHtmlElement() return True def processComment(self, token): self.tree.insertComment(token, self.tree.document) def processSpaceCharacters(self, token): pass def processCharacters(self, token): self.insertHtmlElement() return token def processStartTag(self, token): if token["name"] == "html": self.parser.firstStartTag = True self.insertHtmlElement() return token def processEndTag(self, token): if token["name"] not in ("head", "body", "html", "br"): self.parser.parseError("unexpected-end-tag-before-html", {"name": token["name"]}) else: self.insertHtmlElement() return token class BeforeHeadPhase(Phase): __slots__ = tuple() def processEOF(self): self.startTagHead(impliedTagToken("head", "StartTag")) return True def processSpaceCharacters(self, token): pass def processCharacters(self, token): self.startTagHead(impliedTagToken("head", "StartTag")) return token def startTagHtml(self, token): return self.parser.phases["inBody"].processStartTag(token) def startTagHead(self, token): self.tree.insertElement(token) self.tree.headPointer = self.tree.openElements[-1] self.parser.phase = self.parser.phases["inHead"] def startTagOther(self, token): self.startTagHead(impliedTagToken("head", "StartTag")) return token def endTagImplyHead(self, token): self.startTagHead(impliedTagToken("head", "StartTag")) return token def endTagOther(self, token): self.parser.parseError("end-tag-after-implied-root", {"name": token["name"]}) startTagHandler = _utils.MethodDispatcher([ ("html", startTagHtml), ("head", startTagHead) ]) startTagHandler.default = startTagOther endTagHandler = _utils.MethodDispatcher([ (("head", "body", "html", "br"), endTagImplyHead) ]) endTagHandler.default = endTagOther class InHeadPhase(Phase): __slots__ = tuple() # the real thing def processEOF(self): self.anythingElse() return True def processCharacters(self, token): self.anythingElse() return token def startTagHtml(self, token): return self.parser.phases["inBody"].processStartTag(token) def startTagHead(self, token): self.parser.parseError("two-heads-are-not-better-than-one") def startTagBaseLinkCommand(self, token): self.tree.insertElement(token) self.tree.openElements.pop() token["selfClosingAcknowledged"] = True def startTagMeta(self, token): self.tree.insertElement(token) self.tree.openElements.pop() token["selfClosingAcknowledged"] = True attributes = token["data"] if self.parser.tokenizer.stream.charEncoding[1] == "tentative": if "charset" in attributes: self.parser.tokenizer.stream.changeEncoding(attributes["charset"]) elif ("content" in attributes and "http-equiv" in attributes and attributes["http-equiv"].lower() == "content-type"): # Encoding it as UTF-8 here is a hack, as really we should pass # the abstract Unicode string, and just use the # ContentAttrParser on that, but using UTF-8 allows all chars # to be encoded and as a ASCII-superset works. data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8")) parser = _inputstream.ContentAttrParser(data) codec = parser.parse() self.parser.tokenizer.stream.changeEncoding(codec) def startTagTitle(self, token): self.parser.parseRCDataRawtext(token, "RCDATA") def startTagNoFramesStyle(self, token): # Need to decide whether to implement the scripting-disabled case self.parser.parseRCDataRawtext(token, "RAWTEXT") def startTagNoscript(self, token): if self.parser.scripting: self.parser.parseRCDataRawtext(token, "RAWTEXT") else: self.tree.insertElement(token) self.parser.phase = self.parser.phases["inHeadNoscript"] def startTagScript(self, token): self.tree.insertElement(token) self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState self.parser.originalPhase = self.parser.phase self.parser.phase = self.parser.phases["text"] def startTagOther(self, token): self.anythingElse() return token def endTagHead(self, token): node = self.parser.tree.openElements.pop() assert node.name == "head", "Expected head got %s" % node.name self.parser.phase = self.parser.phases["afterHead"] def endTagHtmlBodyBr(self, token): self.anythingElse() return token def endTagOther(self, token): self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) def anythingElse(self): self.endTagHead(impliedTagToken("head")) startTagHandler = _utils.MethodDispatcher([ ("html", startTagHtml), ("title", startTagTitle), (("noframes", "style"), startTagNoFramesStyle), ("noscript", startTagNoscript), ("script", startTagScript), (("base", "basefont", "bgsound", "command", "link"), startTagBaseLinkCommand), ("meta", startTagMeta), ("head", startTagHead) ]) startTagHandler.default = startTagOther endTagHandler = _utils.MethodDispatcher([ ("head", endTagHead), (("br", "html", "body"), endTagHtmlBodyBr) ]) endTagHandler.default = endTagOther class InHeadNoscriptPhase(Phase): __slots__ = tuple() def processEOF(self): self.parser.parseError("eof-in-head-noscript") self.anythingElse() return True def processComment(self, token): return self.parser.phases["inHead"].processComment(token) def processCharacters(self, token): self.parser.parseError("char-in-head-noscript") self.anythingElse() return token def processSpaceCharacters(self, token): return self.parser.phases["inHead"].processSpaceCharacters(token) def startTagHtml(self, token): return self.parser.phases["inBody"].processStartTag(token) def startTagBaseLinkCommand(self, token): return self.parser.phases["inHead"].processStartTag(token) def startTagHeadNoscript(self, token): self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) def startTagOther(self, token): self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) self.anythingElse() return token def endTagNoscript(self, token): node = self.parser.tree.openElements.pop() assert node.name == "noscript", "Expected noscript got %s" % node.name self.parser.phase = self.parser.phases["inHead"] def endTagBr(self, token): self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) self.anythingElse() return token def endTagOther(self, token): self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) def anythingElse(self): # Caller must raise parse error first! self.endTagNoscript(impliedTagToken("noscript")) startTagHandler = _utils.MethodDispatcher([ ("html", startTagHtml), (("basefont", "bgsound", "link", "meta", "noframes", "style"), startTagBaseLinkCommand), (("head", "noscript"), startTagHeadNoscript), ]) startTagHandler.default = startTagOther endTagHandler = _utils.MethodDispatcher([ ("noscript", endTagNoscript), ("br", endTagBr), ]) endTagHandler.default = endTagOther class AfterHeadPhase(Phase): __slots__ = tuple() def processEOF(self): self.anythingElse() return True def processCharacters(self, token): self.anythingElse() return token def startTagHtml(self, token): return self.parser.phases["inBody"].processStartTag(token) def startTagBody(self, token): self.parser.framesetOK = False self.tree.insertElement(token) self.parser.phase = self.parser.phases["inBody"] def startTagFrameset(self, token): self.tree.insertElement(token) self.parser.phase = self.parser.phases["inFrameset"] def startTagFromHead(self, token): self.parser.parseError("unexpected-start-tag-out-of-my-head", {"name": token["name"]}) self.tree.openElements.append(self.tree.headPointer) self.parser.phases["inHead"].processStartTag(token) for node in self.tree.openElements[::-1]: if node.name == "head": self.tree.openElements.remove(node) break def startTagHead(self, token): self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) def startTagOther(self, token): self.anythingElse() return token def endTagHtmlBodyBr(self, token): self.anythingElse() return token def endTagOther(self, token): self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) def anythingElse(self): self.tree.insertElement(impliedTagToken("body", "StartTag")) self.parser.phase = self.parser.phases["inBody"] self.parser.framesetOK = True startTagHandler = _utils.MethodDispatcher([ ("html", startTagHtml), ("body", startTagBody), ("frameset", startTagFrameset), (("base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "title"), startTagFromHead), ("head", startTagHead) ]) startTagHandler.default = startTagOther endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"), endTagHtmlBodyBr)]) endTagHandler.default = endTagOther class InBodyPhase(Phase): # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody # the really-really-really-very crazy mode __slots__ = ("processSpaceCharacters",) def __init__(self, *args, **kwargs): super(InBodyPhase, self).__init__(*args, **kwargs) # Set this to the default handler self.processSpaceCharacters = self.processSpaceCharactersNonPre def isMatchingFormattingElement(self, node1, node2): return (node1.name == node2.name and node1.namespace == node2.namespace and node1.attributes == node2.attributes) # helper def addFormattingElement(self, token): self.tree.insertElement(token) element = self.tree.openElements[-1] matchingElements = [] for node in self.tree.activeFormattingElements[::-1]: if node is Marker: break elif self.isMatchingFormattingElement(node, element): matchingElements.append(node) assert len(matchingElements) <= 3 if len(matchingElements) == 3: self.tree.activeFormattingElements.remove(matchingElements[-1]) self.tree.activeFormattingElements.append(element) # the real deal def processEOF(self): allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td", "tfoot", "th", "thead", "tr", "body", "html")) for node in self.tree.openElements[::-1]: if node.name not in allowed_elements: self.parser.parseError("expected-closing-tag-but-got-eof") break # Stop parsing def processSpaceCharactersDropNewline(self, token): # Sometimes (start of
, , and