f any BOM or later declaration (such as in a meta
            element)

        :arg scripting: treat noscript elements as if JavaScript was turned on

        :returns: parsed tree

        Example:

        >>> from html5lib.html5libparser import HTMLParser
        >>> parser = HTMLParser()
        >>> parser.parseFragment('<b>this is a fragment</b>')
        <Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090>

        """
        self._parse(stream, True, *args, **kwargs)
        return self.tree.getFragment()

    def parseError(self, errorcode="XXX-undefined-error", datavars=None):
        # XXX The idea is to make errorcode mandatory.
        if datavars is None:
            datavars = {}
        self.errors.append((self.tokenizer.stream.position(), errorcode, datavars))
        if self.strict:
            raise ParseError(E[errorcode] % datavars)

    def adjustMathMLAttributes(self, token):
        adjust_attributes(token, adjustMathMLAttributes)

    def adjustSVGAttributes(self, token):
        adjust_attributes(token, adjustSVGAttributes)

    def adjustForeignAttributes(self, token):
        adjust_attributes(token, adjustForeignAttributesMap)

    def reparseTokenNormal(self, token):
        # pylint:disable=unused-argument
        self.parser.phase()

    def resetInsertionMode(self):
        # The name of this method is mostly historical. (It's also used in the
        # specification.)
        last = False
        newModes = {
            "select": "inSelect",
            "td": "inCell",
            "th": "inCell",
            "tr": "inRow",
            "tbody": "inTableBody",
            "thead": "inTableBody",
            "tfoot": "inTableBody",
            "caption": "inCaption",
            "colgroup": "inColumnGroup",
            "table": "inTable",
            "head": "inBody",
            "body": "inBody",
            "frameset": "inFrameset",
            "html": "beforeHead"
        }
        for node in self.tree.openElements[::-1]:
            nodeName = node.name
            new_phase = None
            if node == self.tree.openElements[0]:
                assert self.innerHTML
                last = True
                nodeName = self.innerHTML
            # Check for conditions that should only happen in the innerHTML
            # case
            if nodeName in ("select", "colgroup", "head", "html"):
                assert self.innerHTML

            if not last and node.namespace != self.tree.defaultNamespace:
                continue

            if nodeName in newModes:
                new_phase = self.phases[newModes[nodeName]]
                break
            elif last:
                new_phase = self.phases["inBody"]
                break

        self.phase = new_phase

    def parseRCDataRawtext(self, token, contentType):
        # Generic RCDATA/RAWTEXT Parsing algorithm
        assert contentType in ("RAWTEXT", "RCDATA")

        self.tree.insertElement(token)

        if contentType == "RAWTEXT":
            self.tokenizer.state = self.tokenizer.rawtextState
        else:
            self.tokenizer.state = self.tokenizer.rcdataState

        self.originalPhase = self.phase

        self.phase = self.phases["text"]


@_utils.memoize
def getPhases(debug):
    def log(function):
        """Logger that records which phase processes each token"""
        type_names = {value: key for key, value in tokenTypes.items()}

        def wrapped(self, *args, **kwargs):
            if function.__name__.startswith("process") and len(args) > 0:
                token = args[0]
                info = {"type": type_names[token['type']]}
                if token['type'] in tagTokenTypes:
                    info["name"] = token['name']

                self.parser.log.append((self.parser.tokenizer.state.__name__,
                                        self.parser.phase.__class__.__name__,
                                        self.__class__.__name__,
                                        function.__name__,
                                        info))
                return function(self, *args, **kwargs)
            else:
                return function(self, *args, **kwargs)
        return wrapped

    def getMetaclass(use_metaclass, metaclass_func):
        if use_metaclass:
            return method_decorator_metaclass(metaclass_func)
        else:
            return type

    # pylint:disable=unused-argument
    class Phase(with_metaclass(getMetaclass(debug, log))):
        """Base class for helper object that implements each phase of processing
        """
        __slots__ = ("parser", "tree", "__startTagCache", "__endTagCache")

        def __init__(self, parser, tree):
            self.parser = parser
            self.tree = tree
            self.__startTagCache = {}
            self.__endTagCache = {}

        def processEOF(self):
            raise NotImplementedError

        def processComment(self, token):
            # For most phases the following is correct. Where it's not it will be
            # overridden.
            self.tree.insertComment(token, self.tree.openElements[-1])

        def processDoctype(self, token):
            self.parser.parseError("unexpected-doctype")

        def processCharacters(self, token):
            self.tree.insertText(token["data"])

        def processSpaceCharacters(self, token):
            self.tree.insertText(token["data"])

        def processStartTag(self, token):
            # Note the caching is done here rather than BoundMethodDispatcher as doing it there
            # requires a circular reference to the Phase, and this ends up with a significant
            # (CPython 2.7, 3.8) GC cost when parsing many short inputs
            name = token["name"]
            # In Py2, using `in` is quicker in general than try/except KeyError
            # In Py3, `in` is quicker when there are few cache hits (typically short inputs)
            if name in self.__startTagCache:
                func = self.__startTagCache[name]
            else:
                func = self.__startTagCache[name] = self.startTagHandler[name]
                # bound the cache size in case we get loads of unknown tags
                while len(self.__startTagCache) > len(self.startTagHandler) * 1.1:
                    # this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7
                    self.__startTagCache.pop(next(iter(self.__startTagCache)))
            return func(token)

        def startTagHtml(self, token):
            if not self.parser.firstStartTag and token["name"] == "html":
                self.parser.parseError("non-html-root")
            # XXX Need a check here to see if the first start tag token emitted is
            # this token... If it's not, invoke self.parser.parseError().
            for attr, value in token["data"].items():
                if attr not in self.tree.openElements[0].attributes:
                    self.tree.openElements[0].attributes[attr] = value
            self.parser.firstStartTag = False

        def processEndTag(self, token):
            # Note the caching is done here rather than BoundMethodDispatcher as doing it there
            # requires a circular reference to the Phase, and this ends up with a significant
            # (CPython 2.7, 3.8) GC cost when parsing many short inputs
            name = token["name"]
            # In Py2, using `in` is quicker in general than try/except KeyError
            # In Py3, `in` is quicker when there are few cache hits (typically short inputs)
            if name in self.__endTagCache:
                func = self.__endTagCache[name]
            else:
                func = self.__endTagCache[name] = self.endTagHandler[name]
                # bound the cache size in case we get loads of unknown tags
                while len(self.__endTagCache) > len(self.endTagHandler) * 1.1:
                    # this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7
                    self.__endTagCache.pop(next(iter(self.__endTagCache)))
            return func(token)

    class InitialPhase(Phase):
        __slots__ = tuple()

        def processSpaceCharacters(self, token):
            pass

        def processComment(self, token):
            self.tree.insertComment(token, self.tree.document)

        def processDoctype(self, token):
            name = token["name"]
            publicId = token["publicId"]
            systemId = token["systemId"]
            correct = token["correct"]

            if (name != "html" or publicId is not None or
                    systemId is not None and systemId != "about:legacy-compat"):
                self.parser.parseError("unknown-doctype")

            if publicId is None:
                publicId = ""

            self.tree.insertDoctype(token)

            if publicId != "":
                publicId = publicId.translate(asciiUpper2Lower)

            if (not correct or token["name"] != "html" or
                    publicId.startswith(
                        ("+//silmaril//dtd html pro v0r11 19970101//",
                         "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
                         "-//as//dtd html 3.0 aswedit + extensions//",
                         "-//ietf//dtd html 2.0 level 1//",
                         "-//ietf//dtd html 2.0 level 2//",
                         "-//ietf//dtd html 2.0 strict level 1//",
                         "-//ietf//dtd html 2.0 strict level 2//",
                         "-//ietf//dtd html 2.0 strict//",
                         "-//ietf//dtd html 2.0//",
                         "-//ietf//dtd html 2.1e//",
                         "-//ietf//dtd html 3.0//",
                         "-//ietf//dtd html 3.2 final//",
                         "-//ietf//dtd html 3.2//",
                         "-//ietf//dtd html 3//",
                         "-//ietf//dtd html level 0//",
                         "-//ietf//dtd html level 1//",
                         "-//ietf//dtd html level 2//",
                         "-//ietf//dtd html level 3//",
                         "-//ietf//dtd html strict level 0//",
                         "-//ietf//dtd html strict level 1//",
                         "-//ietf//dtd html strict level 2//",
                         "-//ietf//dtd html strict level 3//",
                         "-//ietf//dtd html strict//",
                         "-//ietf//dtd html//",
                         "-//metrius//dtd metrius presentational//",
                         "-//microsoft//dtd internet explorer 2.0 html strict//",
                         "-//microsoft//dtd internet explorer 2.0 html//",
                         "-//microsoft//dtd internet explorer 2.0 tables//",
                         "-//microsoft//dtd internet explorer 3.0 html strict//",
                         "-//microsoft//dtd internet explorer 3.0 html//",
                         "-//microsoft//dtd internet explorer 3.0 tables//",
                         "-//netscape comm. corp.//dtd html//",
                         "-//netscape comm. corp.//dtd strict html//",
                         "-//o'reilly and associates//dtd html 2.0//",
                         "-//o'reilly and associates//dtd html extended 1.0//",
                         "-//o'reilly and associates//dtd html extended relaxed 1.0//",
                         "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
                         "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
                         "-//spyglass//dtd html 2.0 extended//",
                         "-//sq//dtd html 2.0 hotmetal + extensions//",
                         "-//sun microsystems corp.//dtd hotjava html//",
                         "-//sun microsystems corp.//dtd hotjava strict html//",
                         "-//w3c//dtd html 3 1995-03-24//",
                         "-//w3c//dtd html 3.2 draft//",
                         "-//w3c//dtd html 3.2 final//",
                         "-//w3c//dtd html 3.2//",
                         "-//w3c//dtd html 3.2s draft//",
                         "-//w3c//dtd html 4.0 frameset//",
                         "-//w3c//dtd html 4.0 transitional//",
                         "-//w3c//dtd html experimental 19960712//",
                         "-//w3c//dtd html experimental 970421//",
                         "-//w3c//dtd w3 html//",
                         "-//w3o//dtd w3 html 3.0//",
                         "-//webtechs//dtd mozilla html 2.0//",
                         "-//webtechs//dtd mozilla html//")) or
                    publicId in ("-//w3o//dtd w3 html strict 3.0//en//",
                                 "-/w3c/dtd html 4.0 transitional/en",
                                 "html") or
                    publicId.startswith(
                        ("-//w3c//dtd html 4.01 frameset//",
                         "-//w3c//dtd html 4.01 transitional//")) and
                    systemId is None or
                    systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"):
                self.parser.compatMode = "quirks"
            elif (publicId.startswith(
                    ("-//w3c//dtd xhtml 1.0 frameset//",
                     "-//w3c//dtd xhtml 1.0 transitional//")) or
                  publicId.startswith(
                      ("-//w3c//dtd html 4.01 frameset//",
                       "-//w3c//dtd html 4.01 transitional//")) and
                  systemId is not None):
                self.parser.compatMode = "limited quirks"

            self.parser.phase = self.parser.phases["beforeHtml"]

        def anythingElse(self):
            self.parser.compatMode = "quirks"
            self.parser.phase = self.parser.phases["beforeHtml"]

        def processCharacters(self, token):
            self.parser.parseError("expected-doctype-but-got-chars")
            self.anythingElse()
            return token

        def processStartTag(self, token):
            self.parser.parseError("expected-doctype-but-got-start-tag",
                                   {"name": token["name"]})
            self.anythingElse()
            return token

        def processEndTag(self, token):
            self.parser.parseError("expected-doctype-but-got-end-tag",
                                   {"name": token["name"]})
            self.anythingElse()
            return token

        def processEOF(self):
            self.parser.parseError("expected-doctype-but-got-eof")
            self.anythingElse()
            return True

    class BeforeHtmlPhase(Phase):
        __slots__ = tuple()

        # helper methods
        def insertHtmlElement(self):
            self.tree.insertRoot(impliedTagToken("html", "StartTag"))
            self.parser.phase = self.parser.phases["beforeHead"]

        # other
        def processEOF(self):
            self.insertHtmlElement()
            return True

        def processComment(self, token):
            self.tree.insertComment(token, self.tree.document)

        def processSpaceCharacters(self, token):
            pass

        def processCharacters(self, token):
            self.insertHtmlElement()
            return token

        def processStartTag(self, token):
            if token["name"] == "html":
                self.parser.firstStartTag = True
            self.insertHtmlElement()
            return token

        def processEndTag(self, token):
            if token["name"] not in ("head", "body", "html", "br"):
                self.parser.parseError("unexpected-end-tag-before-html",
                                       {"name": token["name"]})
            else:
                self.insertHtmlElement()
                return token

    class BeforeHeadPhase(Phase):
        __slots__ = tuple()

        def processEOF(self):
            self.startTagHead(impliedTagToken("head", "StartTag"))
            return True

        def processSpaceCharacters(self, token):
            pass

        def processCharacters(self, token):
            self.startTagHead(impliedTagToken("head", "StartTag"))
            return token

        def startTagHtml(self, token):
            return self.parser.phases["inBody"].processStartTag(token)

        def startTagHead(self, token):
            self.tree.insertElement(token)
            self.tree.headPointer = self.tree.openElements[-1]
            self.parser.phase = self.parser.phases["inHead"]

        def startTagOther(self, token):
            self.startTagHead(impliedTagToken("head", "StartTag"))
            return token

        def endTagImplyHead(self, token):
            self.startTagHead(impliedTagToken("head", "StartTag"))
            return token

        def endTagOther(self, token):
            self.parser.parseError("end-tag-after-implied-root",
                                   {"name": token["name"]})

        startTagHandler = _utils.MethodDispatcher([
            ("html", startTagHtml),
            ("head", startTagHead)
        ])
        startTagHandler.default = startTagOther

        endTagHandler = _utils.MethodDispatcher([
            (("head", "body", "html", "br"), endTagImplyHead)
        ])
        endTagHandler.default = endTagOther

    class InHeadPhase(Phase):
        __slots__ = tuple()

        # the real thing
        def processEOF(self):
            self.anythingElse()
            return True

        def processCharacters(self, token):
            self.anythingElse()
            return token

        def startTagHtml(self, token):
            return self.parser.phases["inBody"].processStartTag(token)

        def startTagHead(self, token):
            self.parser.parseError("two-heads-are-not-better-than-one")

        def startTagBaseLinkCommand(self, token):
            self.tree.insertElement(token)
            self.tree.openElements.pop()
            token["selfClosingAcknowledged"] = True

        def startTagMeta(self, token):
            self.tree.insertElement(token)
            self.tree.openElements.pop()
            token["selfClosingAcknowledged"] = True

            attributes = token["data"]
            if self.parser.tokenizer.stream.charEncoding[1] == "tentative":
                if "charset" in attributes:
                    self.parser.tokenizer.stream.changeEncoding(attributes["charset"])
                elif ("content" in attributes and
                      "http-equiv" in attributes and
                      attributes["http-equiv"].lower() == "content-type"):
                    # Encoding it as UTF-8 here is a hack, as really we should pass
                    # the abstract Unicode string, and just use the
                    # ContentAttrParser on that, but using UTF-8 allows all chars
                    # to be encoded and as a ASCII-superset works.
                    data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8"))
                    parser = _inputstream.ContentAttrParser(data)
                    codec = parser.parse()
                    self.parser.tokenizer.stream.changeEncoding(codec)

        def startTagTitle(self, token):
            self.parser.parseRCDataRawtext(token, "RCDATA")

        def startTagNoFramesStyle(self, token):
            # Need to decide whether to implement the scripting-disabled case
            self.parser.parseRCDataRawtext(token, "RAWTEXT")

        def startTagNoscript(self, token):
            if self.parser.scripting:
                self.parser.parseRCDataRawtext(token, "RAWTEXT")
            else:
                sel