ParserImpl.java example

Explorer
JSRefactor-master
/*
 *  Java HTML Tidy - JTidy
 *  HTML parser and pretty printer
 *
 *  Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
 *  Institute of Technology, Institut National de Recherche en
 *  Informatique et en Automatique, Keio University). All Rights
 *  Reserved.
 *
 *  Contributing Author(s):
 *
 *     Dave Raggett <dsr@w3.org>
 *     Andy Quick <ac.quick@sympatico.ca> (translation to Java)
 *     Gary L Peskin <garyp@firstech.com> (Java development)
 *     Sami Lempinen <sami@lempinen.net> (release management)
 *     Fabrizio Giustina <fgiust at users.sourceforge.net>
 *
 *  The contributing author(s) would like to thank all those who
 *  helped with testing, bug fixes, and patience.  This wouldn't
 *  have been possible without all of you.
 *
 *  COPYRIGHT NOTICE:
 *
 *  This software and documentation is provided "as is," and
 *  the copyright holders and contributing author(s) make no
 *  representations or warranties, express or implied, including
 *  but not limited to, warranties of merchantability or fitness
 *  for any particular purpose or that the use of the software or
 *  documentation will not infringe any third party patents,
 *  copyrights, trademarks or other rights.
 *
 *  The copyright holders and contributing author(s) will not be
 *  liable for any direct, indirect, special or consequential damages
 *  arising out of any use of the software or documentation, even if
 *  advised of the possibility of such damage.
 *
 *  Permission is hereby granted to use, copy, modify, and distribute
 *  this source code, or portions hereof, documentation and executables,
 *  for any purpose, without fee, subject to the following restrictions:
 *
 *  1. The origin of this source code must not be misrepresented.
 *  2. Altered versions must be plainly marked as such and must
 *     not be misrepresented as being the original source.
 *  3. This Copyright notice may not be removed or altered from any
 *     source or altered source distribution.
 *
 *  The copyright holders and contributing author(s) specifically
 *  permit, without fee, and encourage the use of this source code
 *  as a component for supporting the Hypertext Markup Language in
 *  commercial products. If you use this source code in a product,
 *  acknowledgment is not required but would be appreciated.
 *
 */
package org.w3c.tidy;

/**
 * HTML Parser implementation.
 * @author Dave Raggett <a href="mailto:dsr@w3.org">dsr@w3.org </a>
 * @author Andy Quick <a href="mailto:ac.quick@sympatico.ca">ac.quick@sympatico.ca </a> (translation to Java)
 * @author Fabrizio Giustina
 * @version $Revision: 1100 $ ($Author: aditsu $)
 */
public final class ParserImpl
{

    /**
     * parser for html.
     */
    public static final Parser HTML = new ParseHTML();

    /**
     * parser for head.
     */
    public static final Parser HEAD = new ParseHead();

    /**
     * parser for title.
     */
    public static final Parser TITLE = new ParseTitle();

    /**
     * parser for script.
     */
    public static final Parser SCRIPT = new ParseScript();

    /**
     * parser for body.
     */
    public static final Parser BODY = new ParseBody();

    /**
     * parser for frameset.
     */
    public static final Parser FRAMESET = new ParseFrameSet();

    /**
     * parser for inline.
     */
    public static final Parser INLINE = new ParseInline();

    /**
     * parser for list.
     */
    public static final Parser LIST = new ParseList();

    /**
     * parser for definition lists.
     */
    public static final Parser DEFLIST = new ParseDefList();

    /**
     * parser for pre.
     */
    public static final Parser PRE = new ParsePre();

    /**
     * parser for block elements.
     */
    public static final Parser BLOCK = new ParseBlock();

    /**
     * parser for table.
     */
    public static final Parser TABLETAG = new ParseTableTag();

    /**
     * parser for colgroup.
     */
    public static final Parser COLGROUP = new ParseColGroup();

    /**
     * parser for rowgroup.
     */
    public static final Parser ROWGROUP = new ParseRowGroup();

    /**
     * parser for row.
     */
    public static final Parser ROW = new ParseRow();

    /**
     * parser for noframes.
     */
    public static final Parser NOFRAMES = new ParseNoFrames();

    /**
     * parser for select.
     */
    public static final Parser SELECT = new ParseSelect();

    /**
     * parser for text.
     */
    public static final Parser TEXT = new ParseText();

    /**
     * parser for empty elements.
     */
    public static final Parser EMPTY = new ParseEmpty();

    /**
     * parser for optgroup.
     */
    public static final Parser OPTGROUP = new ParseOptGroup();

    /**
     * ParserImpl should not be instantiated.
     */
    private ParserImpl()
    {
        // unused
    }

    /**
     * @param lexer
     * @param node
     * @param mode
     */
    protected static void parseTag(Lexer lexer, Node node, short mode)
    {
        // Fix by GLP 2000-12-21. Need to reset insertspace if this
        // is both a non-inline and empty tag (base, link, meta, isindex, hr, area).
        if ((node.tag.model & Dict.CM_EMPTY) != 0)
        {
            lexer.waswhite = false;
        }
        else if (!((node.tag.model & Dict.CM_INLINE) != 0))
        {
            lexer.insertspace = false;
        }

        if (node.tag.getParser() == null)
        {
            return;
        }

        if (node.type == Node.START_END_TAG)
        {
            Node.trimEmptyElement(lexer, node);
            return;
        }

        node.tag.getParser().parse(lexer, node, mode);
    }

    /**
     * Move node to the head, where element is used as starting point in hunt for head. Normally called during parsing.
     * @param lexer
     * @param element
     * @param node
     */
    protected static void moveToHead(Lexer lexer, Node element, Node node)
    {
        Node head;
        node.removeNode(); // make sure that node is isolated

        TagTable tt = lexer.configuration.tt;

        if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
        {
            lexer.report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);

            while (element.tag != tt.tagHtml)
            {
                element = element.parent;
            }

            for (head = element.content; head != null; head = head.next)
            {
                if (head.tag == tt.tagHead)
                {
                    head.insertNodeAtEnd(node);
                    break;
                }
            }

            if (node.tag.getParser() != null)
            {
                parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
            }
        }
        else
        {
            lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
        }
    }

    /**
     * moves given node to end of body element.
     * @param lexer Lexer
     * @param node Node to insert
     */
    static void moveNodeToBody(Lexer lexer, Node node)
    {
        node.removeNode();
        Node body = lexer.root.findBody(lexer.configuration.tt);
        body.insertNodeAtEnd(node);
    }

    /**
     * Parser for HTML.
     */
    public static class ParseHTML implements Parser
    {

        /**
         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
         */
        public void parse(Lexer lexer, Node html, short mode)
        {
            Node node, head;
            Node frameset = null;
            Node noframes = null;

            lexer.configuration.xmlTags = false;
            lexer.seenEndBody = false;
            TagTable tt = lexer.configuration.tt;

            while (true)
            {
                node = lexer.getToken(Lexer.IGNORE_WHITESPACE);

                if (node == null)
                {
                    node = lexer.inferredTag("head");
                    break;
                }

                if (node.tag == tt.tagHead)
                {
                    break;
                }

                if (node.tag == html.tag && node.type == Node.END_TAG)
                {
                    lexer.report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
                    continue;
                }

                // deal with comments etc.
                if (Node.insertMisc(html, node))
                {
                    continue;
                }

                lexer.ungetToken();
                node = lexer.inferredTag("head");
                break;
            }

            head = node;
            html.insertNodeAtEnd(head);
            HEAD.parse(lexer, head, mode);

            while (true)
            {
                node = lexer.getToken(Lexer.IGNORE_WHITESPACE);

                if (node == null)
                {
                    if (frameset == null)
                    {
                        // implied body
                        node = lexer.inferredTag("body");
                        html.insertNodeAtEnd(node);
                        BODY.parse(lexer, node, mode);
                    }

                    return;
                }

                // robustly handle html tags
                if (node.tag == html.tag)
                {
                    if (node.type != Node.START_TAG && frameset == null)
                    {
                        lexer.report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
                    }
                    else if (node.type == Node.END_TAG)
                    {
                        lexer.seenEndHtml = true;
                    }

                    continue;
                }

                // deal with comments etc.
                if (Node.insertMisc(html, node))
                {
                    continue;
                }

                // if frameset document coerce <body> to <noframes>
                if (node.tag == tt.tagBody)
                {
                    if (node.type != Node.START_TAG)
                    {
                        lexer.report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    if (frameset != null)
                    {
                        lexer.ungetToken();

                        if (noframes == null)
                        {
                            noframes = lexer.inferredTag("noframes");
                            frameset.insertNodeAtEnd(noframes);
                            lexer.report.warning(lexer, html, noframes, Report.INSERTING_TAG);
                        }

                        parseTag(lexer, noframes, mode);
                        continue;
                    }

                    lexer.constrainVersion(~Dict.VERS_FRAMESET);
                    break; // to parse body
                }

                // flag an error if we see more than one frameset
                if (node.tag == tt.tagFrameset)
                {
                    if (node.type != Node.START_TAG)
                    {
                        lexer.report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    if (frameset != null)
                    {
                        lexer.report.error(lexer, html, node, Report.DUPLICATE_FRAMESET);
                    }
                    else
                    {
                        frameset = node;
                    }

                    html.insertNodeAtEnd(node);
                    parseTag(lexer, node, mode);

                    // see if it includes a noframes element so that we can merge subsequent noframes elements

                    for (node = frameset.content; node != null; node = node.next)
                    {
                        if (node.tag == tt.tagNoframes)
                        {
                            noframes = node;
                        }
                    }
                    continue;
                }

                // if not a frameset document coerce <noframes> to <body>
                if (node.tag == tt.tagNoframes)
                {
                    if (node.type != Node.START_TAG)
                    {
                        lexer.report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    if (frameset == null)
                    {
                        lexer.report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
                        node = lexer.inferredTag("body");
                        break;
                    }

                    if (noframes == null)
                    {
                        noframes = node;
                        frameset.insertNodeAtEnd(noframes);
                    }

                    parseTag(lexer, noframes, mode);
                    continue;
                }

                if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
                {
                    if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0)
                    {
                        moveToHead(lexer, html, node);
                        continue;
                    }

                    // #427675 - discard illegal frame element following a frameset - fix by Randy Waki 11 Oct 00
                    if (frameset != null && node.tag == tt.tagFrame)
                    {
                        lexer.report.warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }
                }

                lexer.ungetToken();

                // insert other content into noframes element
                if (frameset != null)
                {
                    if (noframes == null)
                    {
                        noframes = lexer.inferredTag("noframes");
                        frameset.insertNodeAtEnd(noframes);
                    }
                    else
                    {
                        lexer.report.warning(lexer, html, node, Report.NOFRAMES_CONTENT);
                    }

                    lexer.constrainVersion(Dict.VERS_FRAMESET);
                    parseTag(lexer, noframes, mode);
                    continue;
                }

                node = lexer.inferredTag("body");
                lexer.constrainVersion(~Dict.VERS_FRAMESET);
                break;
            }

            // node must be body
            html.insertNodeAtEnd(node);
            parseTag(lexer, node, mode);
            lexer.seenEndHtml = true;
        }

    }

    /**
     * Parser for HEAD.
     */
    public static class ParseHead implements Parser
    {

        /**
         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
         */
        public void parse(Lexer lexer, Node head, short mode)
        {
            Node node;
            int hasTitle = 0;
            int hasBase = 0;
            TagTable tt = lexer.configuration.tt;

            while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
            {
                if (node.tag == head.tag && node.type == Node.END_TAG)
                {
                    head.closed = true;
                    break;
                }

                if (node.type == Node.TEXT_NODE)
                {
                    lexer.report.warning(lexer, head, node, Report.TAG_NOT_ALLOWED_IN);
                    lexer.ungetToken();
                    break;
                }

                // deal with comments etc.
                if (Node.insertMisc(head, node))
                {
                    continue;
                }

                if (node.type == Node.DOCTYPE_TAG)
                {
                    Node.insertDocType(lexer, head, node);
                    continue;
                }

                // discard unknown tags
                if (node.tag == null)
                {
                    lexer.report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED);
                    continue;
                }

                if (!TidyUtils.toBoolean(node.tag.model & Dict.CM_HEAD))
                {
                    // #545067 Implicit closing of head broken - warn only for XHTML input
                    if (lexer.isvoyager)
                    {
                        lexer.report.warning(lexer, head, node, Report.TAG_NOT_ALLOWED_IN);
                    }
                    lexer.ungetToken();
                    break;
                }

                if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
                {
                    if (node.tag == tt.tagTitle)
                    {
                        ++hasTitle;

                        if (hasTitle > 1)
                        {
                            lexer.report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS);
                        }
                    }
                    else if (node.tag == tt.tagBase)
                    {
                        ++hasBase;

                        if (hasBase > 1)
                        {
                            lexer.report.warning(lexer, head, node, Report.TOO_MANY_ELEMENTS);
                        }
                    }
                    else if (node.tag == tt.tagNoscript)
                    {
                        lexer.report.warning(lexer, head, node, Report.TAG_NOT_ALLOWED_IN);
                    }

                    head.insertNodeAtEnd(node);
                    parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
                    continue;
                }

                // discard unexpected text nodes and end tags
                lexer.report.warning(lexer, head, node, Report.DISCARDING_UNEXPECTED);
            }
        }
    }

    /**
     * Parser for TITLE.
     */
    public static class ParseTitle implements Parser
    {

        /**
         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
         */
        public void parse(Lexer lexer, Node title, short mode)
        {
            Node node;

            while ((node = lexer.getToken(Lexer.MIXED_CONTENT)) != null)
            {
                // [438658] : Missing / in title endtag makes 2 titles
                if (node.tag == title.tag && node.type == Node.START_TAG)
                {
                    lexer.report.warning(lexer, title, node, Report.COERCE_TO_ENDTAG);
                    node.type = Node.END_TAG;
                    continue;
                }
                else if (node.tag == title.tag && node.type == Node.END_TAG)
                {
                    title.closed = true;
                    Node.trimSpaces(lexer, title);
                    return;
                }

                if (node.type == Node.TEXT_NODE)
                {
                    // only called for 1st child
                    if (title.content == null)
                    {
                        Node.trimInitialSpace(lexer, title, node);
                    }

                    if (node.start >= node.end)
                    {
                        continue;
                    }

                    title.insertNodeAtEnd(node);
                    continue;
                }

                // deal with comments etc.
                if (Node.insertMisc(title, node))
                {
                    continue;
                }

                // discard unknown tags
                if (node.tag == null)
                {
                    lexer.report.warning(lexer, title, node, Report.DISCARDING_UNEXPECTED);
                    continue;
                }

                // pushback unexpected tokens
                lexer.report.warning(lexer, title, node, Report.MISSING_ENDTAG_BEFORE);
                lexer.ungetToken();
                Node.trimSpaces(lexer, title);
                return;
            }

            lexer.report.warning(lexer, title, node, Report.MISSING_ENDTAG_FOR);
        }

    }

    /**
     * Parser for SCRIPT.
     */
    public static class ParseScript implements Parser
    {

        /**
         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
         */
        public void parse(Lexer lexer, Node script, short mode) {
            Node node = lexer.getCDATA(script);
            if (node != null) {
                script.insertNodeAtEnd(node);
            } else {
                /* handle e.g. a document like "<script>" */
                lexer.report.warning(lexer, script, null, Report.MISSING_ENDTAG_FOR);
                return;
            }
            node = lexer.getToken(Lexer.IGNORE_WHITESPACE);
            if (!(node != null && node.type == Node.END_TAG && node.tag != null &&
            		node.tag.name.equalsIgnoreCase(script.tag.name))) {
                lexer.report.warning(lexer, script, node, Report.MISSING_ENDTAG_FOR);
                if (node != null) {
                	lexer.ungetToken();
                }
            }
        }
    }

    /**
     * Parser for BODY.
     */
    public static class ParseBody implements Parser
    {

        /**
         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
         */
        public void parse(Lexer lexer, Node body, short mode)
        {
            Node node;
            boolean checkstack, iswhitenode;

            mode = Lexer.IGNORE_WHITESPACE;
            checkstack = true;
            TagTable tt = lexer.configuration.tt;

            Clean.bumpObject(lexer, body.parent);

            while ((node = lexer.getToken(mode)) != null)
            {

                // #538536 Extra endtags not detected
                if (node.tag == tt.tagHtml)
                {
                    if (node.type == Node.START_TAG || node.type == Node.START_END_TAG || lexer.seenEndHtml)
                    {
                        lexer.report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
                    }
                    else
                    {
                        lexer.seenEndHtml = true;
                    }

                    continue;
                }

                if (lexer.seenEndBody
                    && (node.type == Node.START_TAG || node.type == Node.END_TAG || node.type == Node.START_END_TAG))
                {
                    lexer.report.warning(lexer, body, node, Report.CONTENT_AFTER_BODY);
                }

                if (node.tag == body.tag && node.type == Node.END_TAG)
                {
                    body.closed = true;
                    Node.trimSpaces(lexer, body);
                    lexer.seenEndBody = true;
                    mode = Lexer.IGNORE_WHITESPACE;

                    if (body.parent.tag == tt.tagNoframes)
                    {
                        break;
                    }

                    continue;
                }

                if (node.tag == tt.tagNoframes)
                {
                    if (node.type == Node.START_TAG)
                    {
                        body.insertNodeAtEnd(node);
                        BLOCK.parse(lexer, node, mode);
                        continue;
                    }

                    if (node.type == Node.END_TAG && body.parent.tag == tt.tagNoframes)
                    {
                        Node.trimSpaces(lexer, body);
                        lexer.ungetToken();
                        break;
                    }
                }

                if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset) && body.parent.tag == tt.tagNoframes)
                {
                    Node.trimSpaces(lexer, body);
                    lexer.ungetToken();
                    break;
                }

                iswhitenode = false;

                if (node.type == Node.TEXT_NODE
                    && node.end <= node.start + 1
                    && node.textarray[node.start] == (byte) ' ')
                {
                    iswhitenode = true;
                }

                // deal with comments etc.
                if (Node.insertMisc(body, node))
                {
                    continue;
                }

                // #538536 Extra endtags not detected
                // if (lexer.seenEndBody && !iswhitenode)
                // {
                // lexer.seenEndBody = true;
                // lexer.report.warning(lexer, body, node, Report.CONTENT_AFTER_BODY);
                // }

                // mixed content model permits text
                if (node.type == Node.TEXT_NODE)
                {
                    if (iswhitenode && mode == Lexer.IGNORE_WHITESPACE)
                    {
                        continue;
                    }

                    if (lexer.configuration.encloseBodyText && !iswhitenode)
                    {
                        Node para;

                        lexer.ungetToken();
                        para = lexer.inferredTag("p");
                        body.insertNodeAtEnd(para);
                        parseTag(lexer, para, mode);
                        mode = Lexer.MIXED_CONTENT;
                        continue;
                    }

                    // HTML2 and HTML4 strict doesn't allow text here
                    lexer.constrainVersion(~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20));

                    if (checkstack)
                    {
                        checkstack = false;

                        if (lexer.inlineDup(node) > 0)
                        {
                            continue;
                        }
                    }

                    body.insertNodeAtEnd(node);
                    mode = Lexer.MIXED_CONTENT;
                    continue;
                }

                if (node.type == Node.DOCTYPE_TAG)
                {
                    Node.insertDocType(lexer, body, node);
                    continue;
                }
                // discard unknown and PARAM tags
                if (node.tag == null || node.tag == tt.tagParam)
                {
                    lexer.report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
                    continue;
                }

                // Netscape allows LI and DD directly in BODY We infer UL or DL respectively and use this boolean to
                // exclude block-level elements so as to match Netscape's observed behaviour.

                lexer.excludeBlocks = false;

                if ((!((node.tag.model & Dict.CM_BLOCK) != 0) && !((node.tag.model & Dict.CM_INLINE) != 0))
                    || node.tag == tt.tagInput)
                {
                    // avoid this error message being issued twice
                    if (!((node.tag.model & Dict.CM_HEAD) != 0))
                    {
                        lexer.report.warning(lexer, body, node, Report.TAG_NOT_ALLOWED_IN);
                    }

                    if ((node.tag.model & Dict.CM_HTML) != 0)
                    {
                        // copy body attributes if current body was inferred
                        if (node.tag == tt.tagBody && body.implicit && body.attributes == null)
                        {
                            body.attributes = node.attributes;
                            node.attributes = null;
                        }

                        continue;
                    }

                    if ((node.tag.model & Dict.CM_HEAD) != 0)
                    {
                        moveToHead(lexer, body, node);
                        continue;
                    }

                    if ((node.tag.model & Dict.CM_LIST) != 0)
                    {
                        lexer.ungetToken();
                        node = lexer.inferredTag("ul");
                        node.addClass("noindent");
                        lexer.excludeBlocks = true;
                    }
                    else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
                    {
                        lexer.ungetToken();
                        node = lexer.inferredTag("dl");
                        lexer.excludeBlocks = true;
                    }
                    else if ((node.tag.model & (Dict.CM_TABLE | Dict.CM_ROWGRP | Dict.CM_ROW)) != 0)
                    {
                        // Issue 2855511
                        if (node.type != Node.END_TAG) {
                        	lexer.ungetToken();
                        	node = lexer.inferredTag("table");
                        }
                        lexer.excludeBlocks = true;
                    }
                    else if (node.tag == tt.tagInput)
                    {
                        lexer.ungetToken();
                        node = lexer.inferredTag("form");
                        lexer.excludeBlocks = true;
                    }
                    else
                    {
                        if (!((node.tag.model & (Dict.CM_ROW | Dict.CM_FIELD)) != 0))
                        {
                            lexer.ungetToken();
                            return;
                        }

                        // ignore </td></th> <option> etc.
                        continue;
                    }
                }

                if (node.type == Node.END_TAG)
                {
                    if (node.tag == tt.tagBr)
                    {
                        node.type = Node.START_TAG;
                    }
                    else if (node.tag == tt.tagP)
                    {
                        Node.coerceNode(lexer, node, tt.tagBr);
                        body.insertNodeAtEnd(node);
                        node = lexer.inferredTag("br");
                    }
                    else if ((node.tag.model & Dict.CM_INLINE) != 0)
                    {
                        lexer.popInline(node);
                    }
                }

                if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
                {
                    if (((node.tag.model & Dict.CM_INLINE) != 0) && !((node.tag.model & Dict.CM_MIXED) != 0))
                    {
                        // HTML4 strict doesn't allow inline content here
                        // but HTML2 does allow img elements as children of body
                        if (node.tag == tt.tagImg)
                        {
                            lexer.constrainVersion(~Dict.VERS_HTML40_STRICT);
                        }
                        else
                        {
                            lexer.constrainVersion(~(Dict.VERS_HTML40_STRICT | Dict.VERS_HTML20));
                        }

                        if (checkstack && !node.implicit)
                        {
                            checkstack = false;

                            if (lexer.inlineDup(node) > 0)
                            {
                                continue;
                            }
                        }

                        mode = Lexer.MIXED_CONTENT;
                    }
                    else
                    {
                        checkstack = true;
                        mode = Lexer.IGNORE_WHITESPACE;
                    }

                    if (node.implicit)
                    {
                        lexer.report.warning(lexer, body, node, Report.INSERTING_TAG);
                    }

                    body.insertNodeAtEnd(node);
                    parseTag(lexer, node, mode);
                    continue;
                }

                // discard unexpected tags
                lexer.report.warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
            }
        }

    }

    /**
     * Parser for FRAMESET.
     */
    public static class ParseFrameSet implements Parser
    {

        /**
         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
         */
        public void parse(Lexer lexer, Node frameset, short mode)
        {
            Node node;
            TagTable tt = lexer.configuration.tt;

            lexer.badAccess |= Report.USING_FRAMES;

            while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
            {
                if (node.tag == frameset.tag && node.type == Node.END_TAG)
                {
                    frameset.closed = true;
                    Node.trimSpaces(lexer, frameset);
                    return;
                }

                // deal with comments etc.
                if (Node.insertMisc(frameset, node))
                {
                    continue;
                }

                if (node.tag == null)
                {
                    lexer.report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED);
                    continue;
                }

                if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
                {
                    if (node.tag != null && (node.tag.model & Dict.CM_HEAD) != 0)
                    {
                        moveToHead(lexer, frameset, node);
                        continue;
                    }
                }

                if (node.tag == tt.tagBody)
                {
                    lexer.ungetToken();
                    node = lexer.inferredTag("noframes");
                    lexer.report.warning(lexer, frameset, node, Report.INSERTING_TAG);
                }

                if (node.type == Node.START_TAG && (node.tag.model & Dict.CM_FRAMES) != 0)
                {
                    frameset.insertNodeAtEnd(node);
                    lexer.excludeBlocks = false;
                    parseTag(lexer, node, Lexer.MIXED_CONTENT);
                    continue;
                }
                else if (node.type == Node.START_END_TAG && (node.tag.model & Dict.CM_FRAMES) != 0)
                {
                    frameset.insertNodeAtEnd(node);
                    continue;
                }

                // discard unexpected tags
                lexer.report.warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED);
            }

            lexer.report.warning(lexer, frameset, node, Report.MISSING_ENDTAG_FOR);
        }

    }

    /**
     * Parser for INLINE.
     */
    public static class ParseInline implements Parser
    {

        /**
         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
         */
        public void parse(Lexer lexer, Node element, short mode)
        {
            Node node, parent;
            TagTable tt = lexer.configuration.tt;

            if (TidyUtils.toBoolean(element.tag.model & Dict.CM_EMPTY))
            {
                return;
            }

            // ParseInline is used for some block level elements like H1 to H6 For such elements we need to insert
            // inline emphasis tags currently on the inline stack. For Inline elements, we normally push them onto the
            // inline stack provided they aren't implicit or OBJECT/APPLET. This test is carried out in PushInline and
            // PopInline, see istack.c We don't push SPAN to replicate current browser behavior

            if (TidyUtils.toBoolean(element.tag.model & Dict.CM_BLOCK) || (element.tag == tt.tagDt))
            {
                lexer.inlineDup(null);
            }
            else if (TidyUtils.toBoolean(element.tag.model & Dict.CM_INLINE)
                    // EUNYEE: Add back this condition 
                    // because this causes the infinite loop problem when the span does not have the ending tag.
                    && element.tag != tt.tagA && element.tag != tt.tagSpan)
            {
                // && element.tag != tt.tagSpan #540571 Inconsistent behaviour with span inline element
                lexer.pushInline(element);
            }

            if (element.tag == tt.tagNobr)
            {
                lexer.badLayout |= Report.USING_NOBR;
            }
            else if (element.tag == tt.tagFont)
            {
                lexer.badLayout |= Report.USING_FONT;
            }

            // Inline elements may or may not be within a preformatted element
            if (mode != Lexer.PREFORMATTED)
            {
                mode = Lexer.MIXED_CONTENT;
            }

            while ((node = lexer.getToken(mode)) != null)
            {
                // end tag for current element
                if (node.tag == element.tag && node.type == Node.END_TAG)
                {
                    if (TidyUtils.toBoolean(element.tag.model & Dict.CM_INLINE))
                    {
                        lexer.popInline(node);
                    }

                    if (!TidyUtils.toBoolean(mode & Lexer.PREFORMATTED))
                    {
                        Node.trimSpaces(lexer, element);
                    }

                    // if a font element wraps an anchor and nothing else then move the font element inside the anchor
                    // since otherwise it won't alter the anchor text color

                    if (element.tag == tt.tagFont && element.content != null && element.content == element.last)
                    {
                        Node child = element.content;

                        if (child.tag == tt.tagA)
                        {
                            child.parent = element.parent;
                            child.next = element.next;
                            child.prev = element.prev;

                            if (child.prev != null)
                            {
                                child.prev.next = child;
                            }
                            else
                            {
                                child.parent.content = child;
                            }

                            if (child.next != null)
                            {
                                child.next.prev = child;
                            }
                            else
                            {
                                child.parent.last = child;
                            }

                            element.next = null;
                            element.prev = null;
                            element.parent = child;
                            element.content = child.content;
                            element.last = child.last;
                            child.content = element;
                            child.last = element;
                            for (child = element.content; child != null; child = child.next)
                            {
                                child.parent = element;
                            }
                        }
                    }
                    element.closed = true;
                    Node.trimSpaces(lexer, element);
                    Node.trimEmptyElement(lexer, element);
                    return;
                }

                // <u> ... <u> map 2nd <u> to </u> if 1st is explicit
                // otherwise emphasis nesting is probably unintentional
                // big and small have cumulative effect to leave them alone
                if (node.type == Node.START_TAG
                    && node.tag == element.tag
                    && lexer.isPushed(node)
                    && !node.implicit
                    && !element.implicit
                    && node.tag != null
                    && ((node.tag.model & Dict.CM_INLINE) != 0)
                    && node.tag != tt.tagA
                    && node.tag != tt.tagFont
                    && node.tag != tt.tagBig
                    && node.tag != tt.tagSmall
                    && node.tag != tt.tagQ)
                {
                    if (element.content != null && node.attributes == null)
                    {
                        lexer.report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG);
                        node.type = Node.END_TAG;
                        lexer.ungetToken();
                        continue;
                    }

                    lexer.report.warning(lexer, element, node, Report.NESTED_EMPHASIS);
                }
                else if (lexer.isPushed(node) && node.type == Node.START_TAG && node.tag == tt.tagQ)
                {
                    lexer.report.warning(lexer, element, node, Report.NESTED_QUOTATION);
                }

                if (node.type == Node.TEXT_NODE)
                {
                    // only called for 1st child
                    if (element.content == null && !TidyUtils.toBoolean(mode & Lexer.PREFORMATTED))
                    {
                        Node.trimSpaces(lexer, element);
                    }

                    if (node.start >= node.end)
                    {
                        continue;
                    }

                    element.insertNodeAtEnd(node);
                    continue;
                }

                // mixed content model so allow text
                if (Node.insertMisc(element, node))
                {
                    continue;
                }

                // deal with HTML tags
                if (node.tag == tt.tagHtml)
                {
                    if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
                    {
                        lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    // otherwise infer end of inline element
                    lexer.ungetToken();
                    if (!((mode & Lexer.PREFORMATTED) != 0))
                    {
                        Node.trimSpaces(lexer, element);
                    }
                    Node.trimEmptyElement(lexer, element);
                    return;
                }

                // within <dt> or <pre> map <p> to <br>
                if (node.tag == tt.tagP
                    && node.type == Node.START_TAG
                    && ((mode & Lexer.PREFORMATTED) != 0 || element.tag == tt.tagDt || element.isDescendantOf(tt.tagDt)))
                {
                    node.tag = tt.tagBr;
                    node.element = "br";
                    Node.trimSpaces(lexer, element);
                    element.insertNodeAtEnd(node);
                    continue;
                }

                // ignore unknown and PARAM tags
                if (node.tag == null || node.tag == tt.tagParam)
                {
                    lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                    continue;
                }

                if (node.tag == tt.tagBr && node.type == Node.END_TAG)
                {
                    node.type = Node.START_TAG;
                }

                if (node.type == Node.END_TAG)
                {
                    // coerce </br> to <br>
                    if (node.tag == tt.tagBr)
                    {
                        node.type = Node.START_TAG;
                    }
                    else if (node.tag == tt.tagP)
                    {
                        // coerce unmatched </p> to <br><br>
                        if (!element.isDescendantOf(tt.tagP))
                        {
                            Node.coerceNode(lexer, node, tt.tagBr);
                            Node.trimSpaces(lexer, element);
                            element.insertNodeAtEnd(node);
                            node = lexer.inferredTag("br");
                            continue;
                        }
                    }
                    else if ((node.tag.model & Dict.CM_INLINE) != 0
                        && node.tag != tt.tagA
                        && !((node.tag.model & Dict.CM_OBJECT) != 0)
                        && (element.tag.model & Dict.CM_INLINE) != 0)
                    {
                        // allow any inline end tag to end current element
                        lexer.popInline(element);

                        if (element.tag != tt.tagA)
                        {
                            if (node.tag == tt.tagA && node.tag != element.tag)
                            {
                                lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
                                lexer.ungetToken();
                            }
                            else
                            {
                                lexer.report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG);
                            }

                            if (!((mode & Lexer.PREFORMATTED) != 0))
                            {
                                Node.trimSpaces(lexer, element);
                            }
                            Node.trimEmptyElement(lexer, element);
                            return;
                        }

                        // if parent is <a> then discard unexpected inline end tag
                        lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    } // special case </tr> etc. for stuff moved in front of table
                    else if (lexer.exiled && node.tag.model != 0 && (node.tag.model & Dict.CM_TABLE) != 0)
                    {
                        lexer.ungetToken();
                        Node.trimSpaces(lexer, element);
                        Node.trimEmptyElement(lexer, element);
                        return;
                    }
                }

                // allow any header tag to end current header
                if ((node.tag.model & Dict.CM_HEADING) != 0 && (element.tag.model & Dict.CM_HEADING) != 0)
                {
                    if (node.tag == element.tag)
                    {
                        lexer.report.warning(lexer, element, node, Report.NON_MATCHING_ENDTAG);
                    }
                    else
                    {
                        lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
                        lexer.ungetToken();
                    }
                    if (!((mode & Lexer.PREFORMATTED) != 0))
                    {
                        Node.trimSpaces(lexer, element);
                    }
                    Node.trimEmptyElement(lexer, element);
                    return;
                }

                // an <A> tag to ends any open <A> element but <A href=...> is mapped to </A><A href=...>

                // #427827 - fix by Randy Waki and Bjoern Hoehrmann 23 Aug 00
                // if (node.tag == tt.tagA && !node.implicit && lexer.isPushed(node))
                if (node.tag == tt.tagA
                    && !node.implicit
                    && (element.tag == tt.tagA || element.isDescendantOf(tt.tagA)))
                {
                    // coerce <a> to </a> unless it has some attributes
                    // #427827 - fix by Randy Waki and Bjoern Hoehrmann 23 Aug 00
                    // other fixes by Dave Raggett
                    // if (node.attributes == null)
                    if (node.type != Node.END_TAG && node.attributes == null)
                    {
                        node.type = Node.END_TAG;
                        lexer.report.warning(lexer, element, node, Report.COERCE_TO_ENDTAG);
                        // lexer.popInline(node);
                        lexer.ungetToken();
                        continue;
                    }

                    lexer.ungetToken();
                    lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
                    // lexer.popInline(element);
                    if (!((mode & Lexer.PREFORMATTED) != 0))
                    {
                        Node.trimSpaces(lexer, element);
                    }
                    Node.trimEmptyElement(lexer, element);
                    return;
                }

                if ((element.tag.model & Dict.CM_HEADING) != 0)
                {
                    if (node.tag == tt.tagCenter || node.tag == tt.tagDiv)
                    {
                        if (node.type != Node.START_TAG && node.type != Node.START_END_TAG)
                        {
                            lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                            continue;
                        }

                        lexer.report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);

                        // insert center as parent if heading is empty
                        if (element.content == null)
                        {
                            Node.insertNodeAsParent(element, node);
                            continue;
                        }

                        // split heading and make center parent of 2nd part
                        element.insertNodeAfterElement(node);

                        if (!((mode & Lexer.PREFORMATTED) != 0))
                        {
                            Node.trimSpaces(lexer, element);
                        }

                        element = lexer.cloneNode(element);
                        element.start = lexer.lexsize;
                        element.end = lexer.lexsize;
                        node.insertNodeAtEnd(element);
                        continue;
                    }

                    if (node.tag == tt.tagHr)
                    {
                        if (node.type != Node.START_TAG && node.type != Node.START_END_TAG)
                        {
                            lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                            continue;
                        }

                        lexer.report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);

                        // insert hr before heading if heading is empty
                        if (element.content == null)
                        {
                            Node.insertNodeBeforeElement(element, node);
                            continue;
                        }

                        // split heading and insert hr before 2nd part
                        element.insertNodeAfterElement(node);

                        if (!((mode & Lexer.PREFORMATTED) != 0))
                        {
                            Node.trimSpaces(lexer, element);
                        }

                        element = lexer.cloneNode(element);
                        element.start = lexer.lexsize;
                        element.end = lexer.lexsize;
                        node.insertNodeAfterElement(element);
                        continue;
                    }
                }

                if (element.tag == tt.tagDt)
                {
                    if (node.tag == tt.tagHr)
                    {
                        Node dd;

                        if (node.type != Node.START_TAG && node.type != Node.START_END_TAG)
                        {
                            lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                            continue;
                        }

                        lexer.report.warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
                        dd = lexer.inferredTag("dd");

                        // insert hr within dd before dt if dt is empty
                        if (element.content == null)
                        {
                            Node.insertNodeBeforeElement(element, dd);
                            dd.insertNodeAtEnd(node);
                            continue;
                        }

                        // split dt and insert hr within dd before 2nd part
                        element.insertNodeAfterElement(dd);
                        dd.insertNodeAtEnd(node);

                        if (!((mode & Lexer.PREFORMATTED) != 0))
                        {
                            Node.trimSpaces(lexer, element);
                        }

                        element = lexer.cloneNode(element);
                        element.start = lexer.lexsize;
                        element.end = lexer.lexsize;
                        dd.insertNodeAfterElement(element);
                        continue;
                    }
                }

                // if this is the end tag for an ancestor element then infer end tag for this element

                if (node.type == Node.END_TAG)
                {
                    for (parent = element.parent; parent != null; parent = parent.parent)
                    {
                        if (node.tag == parent.tag)
                        {
                            if (!((element.tag.model & Dict.CM_OPT) != 0) && !element.implicit)
                            {
                                lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
                            }

                            if (element.tag == tt.tagA)
                            {
                                lexer.popInline(element);
                            }

                            lexer.ungetToken();

                            if (!((mode & Lexer.PREFORMATTED) != 0))
                            {
                                Node.trimSpaces(lexer, element);
                            }

                            Node.trimEmptyElement(lexer, element);
                            return;
                        }
                    }
                }

                // block level tags end this element
                if (!((node.tag.model & Dict.CM_INLINE) != 0))
                {
                    if (node.type != Node.START_TAG)
                    {
                        lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    if (!((element.tag.model & Dict.CM_OPT) != 0))
                    {
                        lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
                    }

                    if ((node.tag.model & Dict.CM_HEAD) != 0 && !((node.tag.model & Dict.CM_BLOCK) != 0))
                    {
                        moveToHead(lexer, element, node);
                        continue;
                    }

                    // prevent anchors from propagating into block tags except for headings h1 to h6

                    if (element.tag == tt.tagA)
                    {
                        if (node.tag != null && !((node.tag.model & Dict.CM_HEADING) != 0))
                        {
                            lexer.popInline(element);
                        }
                        else if (!(element.content != null))
                        {
                            Node.discardElement(element);
                            lexer.ungetToken();
                            return;
                        }
                    }

                    lexer.ungetToken();

                    if (!((mode & Lexer.PREFORMATTED) != 0))
                    {
                        Node.trimSpaces(lexer, element);
                    }

                    Node.trimEmptyElement(lexer, element);
                    return;
                }

                // parse inline element
                if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
                {
                    if (node.implicit)
                    {
                        lexer.report.warning(lexer, element, node, Report.INSERTING_TAG);
                    }

                    // trim white space before <br>
                    if (node.tag == tt.tagBr)
                    {
                        Node.trimSpaces(lexer, element);
                    }

                    element.insertNodeAtEnd(node);
                    parseTag(lexer, node, mode);
                    continue;
                }

                // discard unexpected tags
                lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                continue;
            }

            if (!((element.tag.model & Dict.CM_OPT) != 0))
            {
                lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
            }

            Node.trimEmptyElement(lexer, element);
        }
    }

    /**
     * Parser for LIST.
     */
    public static class ParseList implements Parser
    {

        public void parse(Lexer lexer, Node list, short mode)
        {
            Node node;
            Node parent;
            TagTable tt = lexer.configuration.tt;

            if ((list.tag.model & Dict.CM_EMPTY) != 0)
            {
                return;
            }

            lexer.insert = -1; // defer implicit inline start tags

            while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
            {
                if (node.tag == list.tag && node.type == Node.END_TAG)
                {
                    if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
                    {
                        Node.coerceNode(lexer, list, tt.tagUl);
                    }

                    list.closed = true;
                    Node.trimEmptyElement(lexer, list);
                    return;
                }

                // deal with comments etc.
                if (Node.insertMisc(list, node))
                {
                    continue;
                }

                if (node.type != Node.TEXT_NODE && node.tag == null)
                {
                    lexer.report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
                    continue;
                }

                // if this is the end tag for an ancestor element then infer end tag for this element

                if (node.type == Node.END_TAG)
                {
                    if (node.tag == tt.tagForm)
                    {
                        badForm(lexer);
                        lexer.report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    if (node.tag != null && (node.tag.model & Dict.CM_INLINE) != 0)
                    {
                        lexer.report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
                        lexer.popInline(node);
                        continue;
                    }

                    for (parent = list.parent; parent != null; parent = parent.parent)
                    {
                        if (node.tag == parent.tag)
                        {
                            lexer.report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
                            lexer.ungetToken();

                            if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
                            {
                                Node.coerceNode(lexer, list, tt.tagUl);
                            }

                            Node.trimEmptyElement(lexer, list);
                            return;
                        }
                    }

                    lexer.report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
                    continue;
                }

                if (node.tag != tt.tagLi)
                {
                    lexer.ungetToken();

                    if (node.tag != null && (node.tag.model & Dict.CM_BLOCK) != 0 && lexer.excludeBlocks)
                    {
                        lexer.report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
                        Node.trimEmptyElement(lexer, list);
                        return;
                    }

                    node = lexer.inferredTag("li");
                    node.addAttribute("style", "list-style: none");
                    lexer.report.warning(lexer, list, node, Report.MISSING_STARTTAG);
                }

                // node should be <LI>
                list.insertNodeAtEnd(node);
                parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
            }

            if ((list.tag.model & Dict.CM_OBSOLETE) != 0)
            {
                Node.coerceNode(lexer, list, tt.tagUl);
            }

            lexer.report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR);
            Node.trimEmptyElement(lexer, list);
        }

    }

    /**
     * Parser for empty elements.
     */
    public static class ParseEmpty implements Parser
    {

        /**
         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
         */
        public void parse(Lexer lexer, Node element, short mode)
        {
            if (lexer.isvoyager)
            {
                Node node = lexer.getToken(mode);
                if (node != null && !(node.type == Node.END_TAG && node.tag == element.tag))
                {
                    lexer.report.warning(lexer, element, node, Report.ELEMENT_NOT_EMPTY);
                    lexer.ungetToken();
                }
            }
        }
    }

    /**
     * Parser for DEFLIST.
     */
    public static class ParseDefList implements Parser
    {

        /**
         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
         */
        public void parse(Lexer lexer, Node list, short mode)
        {
            Node node, parent;
            TagTable tt = lexer.configuration.tt;

            if ((list.tag.model & Dict.CM_EMPTY) != 0)
            {
                return;
            }

            lexer.insert = -1; // defer implicit inline start tags

            while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
            {
                if (node.tag == list.tag && node.type == Node.END_TAG)
                {
                    list.closed = true;
                    Node.trimEmptyElement(lexer, list);
                    return;
                }

                // deal with comments etc.
                if (Node.insertMisc(list, node))
                {
                    continue;
                }

                if (node.type == Node.TEXT_NODE)
                {
                    lexer.ungetToken();
                    node = lexer.inferredTag("dt");
                    lexer.report.warning(lexer, list, node, Report.MISSING_STARTTAG);
                }

                if (node.tag == null)
                {
                    lexer.report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
                    continue;
                }

                // if this is the end tag for an ancestor element then infer end tag for this element

                if (node.type == Node.END_TAG)
                {
                    if (node.tag == tt.tagForm)
                    {
                        badForm(lexer);
                        lexer.report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    for (parent = list.parent; parent != null; parent = parent.parent)
                    {
                        if (node.tag == parent.tag)
                        {
                            lexer.report.warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);

                            lexer.ungetToken();
                            Node.trimEmptyElement(lexer, list);
                            return;
                        }
                    }
                }

                // center in a dt or a dl breaks the dl list in two
                if (node.tag == tt.tagCenter)
                {
                    if (list.content != null)
                    {
                        list.insertNodeAfterElement(node);
                    }
                    else
                    {
                        // trim empty dl list
                        Node.insertNodeBeforeElement(list, node);

                        // #540296 tidy dumps with empty definition list
                        Node.discardElement(list);
                    }

                    // and parse contents of center
                    parseTag(lexer, node, mode);

                    // now create a new dl element
                    list = lexer.inferredTag("dl");
                    node.insertNodeAfterElement(list);
                    continue;
                }

                if (!(node.tag == tt.tagDt || node.tag == tt.tagDd))
                {
                    lexer.ungetToken();

                    if (!((node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0))
                    {
                        lexer.report.warning(lexer, list, node, Report.TAG_NOT_ALLOWED_IN);
                        Node.trimEmptyElement(lexer, list);
                        return;
                    }

                    // if DD appeared directly in BODY then exclude blocks
                    if (!((node.tag.model & Dict.CM_INLINE) != 0) && lexer.excludeBlocks)
                    {
                        Node.trimEmptyElement(lexer, list);
                        return;
                    }

                    node = lexer.inferredTag("dd");
                    lexer.report.warning(lexer, list, node, Report.MISSING_STARTTAG);
                }

                if (node.type == Node.END_TAG)
                {
                    lexer.report.warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
                    continue;
                }

                // node should be <DT> or <DD>
                list.insertNodeAtEnd(node);
                parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
            }

            lexer.report.warning(lexer, list, node, Report.MISSING_ENDTAG_FOR);
            Node.trimEmptyElement(lexer, list);
        }

    }

    /**
     * Parser for PRE.
     */
    public static class ParsePre implements Parser
    {

        /**
         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
         */
        public void parse(Lexer lexer, Node pre, short mode)
        {
            Node node;
            TagTable tt = lexer.configuration.tt;

            if ((pre.tag.model & Dict.CM_EMPTY) != 0)
            {
                return;
            }

            if ((pre.tag.model & Dict.CM_OBSOLETE) != 0)
            {
                Node.coerceNode(lexer, pre, tt.tagPre);
            }

            lexer.inlineDup(null); // tell lexer to insert inlines if needed

            while ((node = lexer.getToken(Lexer.PREFORMATTED)) != null)
            {
                if (node.tag == pre.tag && node.type == Node.END_TAG)
                {
                    Node.trimSpaces(lexer, pre);
                    pre.closed = true;
                    Node.trimEmptyElement(lexer, pre);
                    return;
                }

                if (node.tag == tt.tagHtml)
                {
                    if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
                    {
                        lexer.report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
                    }

                    continue;
                }

                if (node.type == Node.TEXT_NODE)
                {
                    // if first check for inital newline
                    if (pre.content == null)
                    {
                        if (node.textarray[node.start] == (byte) '\n')
                        {
                            ++node.start;
                        }

                        if (node.start >= node.end)
                        {
                            continue;
                        }
                    }

                    pre.insertNodeAtEnd(node);
                    continue;
                }

                // deal with comments etc.
                if (Node.insertMisc(pre, node))
                {
                    continue;
                }

                // strip unexpected tags
                if (!lexer.preContent(node))
                {
                    Node newnode;

                    lexer.report.warning(lexer, pre, node, Report.UNESCAPED_ELEMENT);
                    newnode = Node.escapeTag(lexer, node);
                    pre.insertNodeAtEnd(newnode);
                    continue;
                }

                if (node.tag == tt.tagP)
                {
                    if (node.type == Node.START_TAG)
                    {
                        lexer.report.warning(lexer, pre, node, Report.USING_BR_INPLACE_OF);

                        // trim white space before <p> in <pre>
                        Node.trimSpaces(lexer, pre);

                        // coerce both <p> and </p> to <br>
                        Node.coerceNode(lexer, node, tt.tagBr);
                        pre.insertNodeAtEnd(node);
                    }
                    else
                    {
                        lexer.report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
                    }
                    continue;
                }

                if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
                {
                    // trim white space before <br>
                    if (node.tag == tt.tagBr)
                    {
                        Node.trimSpaces(lexer, pre);
                    }

                    pre.insertNodeAtEnd(node);
                    parseTag(lexer, node, Lexer.PREFORMATTED);
                    continue;
                }

                // discard unexpected tags
                lexer.report.warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
            }

            lexer.report.warning(lexer, pre, node, Report.MISSING_ENDTAG_FOR);
            Node.trimEmptyElement(lexer, pre);
        }

    }

    /**
     * Parser for block elements.
     */
    public static class ParseBlock implements Parser
    {

        /**
         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
         */
        public void parse(Lexer lexer, Node element, short mode)
        {
            // element is node created by the lexer upon seeing the start tag, or by the parser when the start tag is
            // inferred.
            Node node, parent;
            boolean checkstack;
            int istackbase = 0;
            TagTable tt = lexer.configuration.tt;

            checkstack = true;

            if ((element.tag.model & Dict.CM_EMPTY) != 0)
            {
                return;
            }

            if (element.tag == tt.tagForm && element.isDescendantOf(tt.tagForm))
            {
                lexer.report.warning(lexer, element, null, Report.ILLEGAL_NESTING);
            }

            // InlineDup() asks the lexer to insert inline emphasis tags currently pushed on the istack, but take care
            // to avoid propagating inline emphasis inside OBJECT or APPLET. For these elements a fresh inline stack
            // context is created and disposed of upon reaching the end of the element. They thus behave like table
            // cells in this respect.

            if ((element.tag.model & Dict.CM_OBJECT) != 0)
            {
                istackbase = lexer.istackbase;
                lexer.istackbase = lexer.istack.size();
            }

            if (!((element.tag.model & Dict.CM_MIXED) != 0))
            {
                lexer.inlineDup(null);
            }

            mode = Lexer.IGNORE_WHITESPACE;

            while ((node = lexer.getToken(mode)) != null)
            {
                // end tag for this element
                if (node.type == Node.END_TAG
                    && node.tag != null
                    && (node.tag == element.tag || element.was == node.tag))
                {

                    if ((element.tag.model & Dict.CM_OBJECT) != 0)
                    {
                        // pop inline stack
                        while (lexer.istack.size() > lexer.istackbase)
                        {
                            lexer.popInline(null);
                        }
                        lexer.istackbase = istackbase;
                    }

                    element.closed = true;
                    Node.trimSpaces(lexer, element);
                    Node.trimEmptyElement(lexer, element);
                    return;
                }

                if (node.tag == tt.tagHtml || node.tag == tt.tagHead || node.tag == tt.tagBody)
                {
                    if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
                    {
                        lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                    }

                    continue;
                }

                if (node.type == Node.END_TAG)
                {
                    if (node.tag == null)
                    {
                        lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);

                        continue;
                    }
                    else if (node.tag == tt.tagBr)
                    {
                        node.type = Node.START_TAG;
                    }
                    else if (node.tag == tt.tagP)
                    {
                        Node.coerceNode(lexer, node, tt.tagBr);
                        element.insertNodeAtEnd(node);
                        node = lexer.inferredTag("br");
                    }
                    else
                    {
                        // if this is the end tag for an ancestor element then infer end tag for this element

                        for (parent = element.parent; parent != null; parent = parent.parent)
                        {
                            if (node.tag == parent.tag)
                            {
                                if (!((element.tag.model & Dict.CM_OPT) != 0))
                                {
                                    lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
                                }

                                lexer.ungetToken();

                                if ((element.tag.model & Dict.CM_OBJECT) != 0)
                                {
                                    // pop inline stack
                                    while (lexer.istack.size() > lexer.istackbase)
                                    {
                                        lexer.popInline(null);
                                    }
                                    lexer.istackbase = istackbase;
                                }

                                Node.trimSpaces(lexer, element);
                                Node.trimEmptyElement(lexer, element);
                                return;
                            }
                        }
                        // special case </tr> etc. for stuff moved in front of table
                        if (lexer.exiled && node.tag.model != 0 && (node.tag.model & Dict.CM_TABLE) != 0)
                        {
                            lexer.ungetToken();
                            Node.trimSpaces(lexer, element);
                            Node.trimEmptyElement(lexer, element);
                            return;
                        }
                    }
                }

                // mixed content model permits text
                if (node.type == Node.TEXT_NODE)
                {
                    boolean iswhitenode = false;

                    if (node.type == Node.TEXT_NODE
                        && node.end <= node.start + 1
                        && lexer.lexbuf[node.start] == (byte) ' ')
                    {
                        iswhitenode = true;
                    }

                    if (lexer.configuration.encloseBlockText && !iswhitenode)
                    {
                        lexer.ungetToken();
                        node = lexer.inferredTag("p");
                        element.insertNodeAtEnd(node);
                        parseTag(lexer, node, Lexer.MIXED_CONTENT);
                        continue;
                    }

                    if (checkstack)
                    {
                        checkstack = false;

                        if (!((element.tag.model & Dict.CM_MIXED) != 0))
                        {
                            if (lexer.inlineDup(node) > 0)
                            {
                                continue;
                            }
                        }
                    }

                    element.insertNodeAtEnd(node);
                    mode = Lexer.MIXED_CONTENT;

                    // HTML4 strict doesn't allow mixed content for elements with %block; as their content model
                    // But only body, map, blockquote, form and noscript have content model %block;
                    if (element.tag == tt.tagBody
                        || element.tag == tt.tagMap
                        || element.tag == tt.tagBlockquote
                        || element.tag == tt.tagForm
                        || element.tag == tt.tagNoscript)
                    {
                        lexer.constrainVersion(~Dict.VERS_HTML40_STRICT);
                    }
                    continue;
                }

                if (Node.insertMisc(element, node))
                {
                    continue;
                }

                // allow PARAM elements?
                if (node.tag == tt.tagParam)
                {
                    if (((element.tag.model & Dict.CM_PARAM) != 0)
                        && (node.type == Node.START_TAG || node.type == Node.START_END_TAG))
                    {
                        element.insertNodeAtEnd(node);
                        continue;
                    }

                    // otherwise discard it
                    lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                    continue;
                }

                // allow AREA elements?
                if (node.tag == tt.tagArea)
                {
                    if ((element.tag == tt.tagMap) && (node.type == Node.START_TAG || node.type == Node.START_END_TAG))
                    {
                        element.insertNodeAtEnd(node);
                        continue;
                    }

                    // otherwise discard it
                    lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                    continue;
                }

                // ignore unknown start/end tags
                if (node.tag == null)
                {
                    lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                    continue;
                }

                // Allow Dict.CM_INLINE elements here. Allow Dict.CM_BLOCK elements here unless lexer.excludeBlocks is
                // yes. LI and DD are special cased. Otherwise infer end tag for this element.

                if (!((node.tag.model & Dict.CM_INLINE) != 0))
                {
                    if (node.type != Node.START_TAG && node.type != Node.START_END_TAG)
                    {
                        if (node.tag == tt.tagForm)
                        {
                            badForm(lexer);
                        }
                        lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    // #427671 - Fix by Randy Waki - 10 Aug 00
                    // If an LI contains an illegal FRAME, FRAMESET, OPTGROUP, or OPTION start tag, discard the start
                    // tag and let the subsequent content get parsed as content of the enclosing LI. This seems to
                    // mimic IE and Netscape, and avoids an infinite loop: without this check, ParseBlock (which is
                    // parsing the LI's content) and ParseList (which is parsing the LI's parent's content) repeatedly
                    // defer to each other to parse the illegal start tag, each time inferring a missing </li> or <li>
                    // respectively. NOTE: This check is a bit fragile. It specifically checks for the four tags that
                    // happen to weave their way through the current series of tests performed by ParseBlock and
                    // ParseList to trigger the infinite loop.

                    if (element.tag == tt.tagLi)
                    {
                        if (node.tag == tt.tagFrame
                            || node.tag == tt.tagFrameset
                            || node.tag == tt.tagOptgroup
                            || node.tag == tt.tagOption)
                        {
                            lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                            continue;
                        }
                    }

                    if (element.tag == tt.tagTd || element.tag == tt.tagTh)
                    {
                        // if parent is a table cell, avoid inferring the end of the cell

                        if ((node.tag.model & Dict.CM_HEAD) != 0)
                        {
                            moveToHead(lexer, element, node);
                            continue;
                        }

                        if ((node.tag.model & Dict.CM_LIST) != 0)
                        {
                            lexer.ungetToken();
                            node = lexer.inferredTag("ul");
                            node.addClass("noindent");
                            lexer.excludeBlocks = true;
                        }
                        else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
                        {
                            lexer.ungetToken();
                            node = lexer.inferredTag("dl");
                            lexer.excludeBlocks = true;
                        }

                        // infer end of current table cell
                        if (!((node.tag.model & Dict.CM_BLOCK) != 0))
                        {
                            lexer.ungetToken();
                            Node.trimSpaces(lexer, element);
                            Node.trimEmptyElement(lexer, element);
                            return;
                        }
                    }
                    else if ((node.tag.model & Dict.CM_BLOCK) != 0)
                    {
                        if (lexer.excludeBlocks)
                        {
                            if (!((element.tag.model & Dict.CM_OPT) != 0))
                            {
                                lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
                            }

                            lexer.ungetToken();

                            if ((element.tag.model & Dict.CM_OBJECT) != 0)
                            {
                                lexer.istackbase = istackbase;
                            }

                            Node.trimSpaces(lexer, element);
                            Node.trimEmptyElement(lexer, element);
                            return;
                        }
                    }
                    else
                    {
                        // things like list items

                        if ((node.tag.model & Dict.CM_HEAD) != 0)
                        {
                            moveToHead(lexer, element, node);
                            continue;
                        }

                        // special case where a form start tag occurs in a tr and is followed by td or th
                        if (element.tag == tt.tagForm && element.parent.tag == tt.tagTd && element.parent.implicit)
                        {
                            if (node.tag == tt.tagTd)
                            {
                                lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                                continue;
                            }

                            if (node.tag == tt.tagTh)
                            {
                                lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                                node = element.parent;
                                node.element = "th";
                                node.tag = tt.tagTh;
                                continue;
                            }
                        }

                        if (!((element.tag.model & Dict.CM_OPT) != 0) && !element.implicit)
                        {
                            lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
                        }

                        lexer.ungetToken();

                        if ((node.tag.model & Dict.CM_LIST) != 0)
                        {
                            if (element.parent != null
                                && element.parent.tag != null
                                && element.parent.tag.getParser() == LIST)
                            {
                                Node.trimSpaces(lexer, element);
                                Node.trimEmptyElement(lexer, element);
                                return;
                            }

                            node = lexer.inferredTag("ul");
                            node.addClass("noindent");
                        }
                        else if ((node.tag.model & Dict.CM_DEFLIST) != 0)
                        {
                            if (element.parent.tag == tt.tagDl)
                            {
                                Node.trimSpaces(lexer, element);
                                Node.trimEmptyElement(lexer, element);
                                return;
                            }

                            node = lexer.inferredTag("dl");
                        }
                        else if ((node.tag.model & Dict.CM_TABLE) != 0 || (node.tag.model & Dict.CM_ROW) != 0)
                        {
                            node = lexer.inferredTag("table");
                        }
                        else if ((element.tag.model & Dict.CM_OBJECT) != 0)
                        {
                            // pop inline stack
                            while (lexer.istack.size() > lexer.istackbase)
                            {
                                lexer.popInline(null);
                            }
                            lexer.istackbase = istackbase;
                            Node.trimSpaces(lexer, element);
                            Node.trimEmptyElement(lexer, element);
                            return;

                        }
                        else
                        {
                            Node.trimSpaces(lexer, element);
                            Node.trimEmptyElement(lexer, element);
                            return;
                        }
                    }
                }

                // parse known element
                if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
                {
                    if (TidyUtils.toBoolean(node.tag.model & Dict.CM_INLINE))
                    {
                        // DSR - 27Apr02 ensure we wrap anchors and other inline content
                        // fgiust: commented out due to [1403105]: java.lang.StackOverflowError in Tidy.parseDOM()
                        // if (lexer.configuration.encloseBlockText)
                        // {
                        // lexer.ungetToken();
                        // node = lexer.inferredTag("p");
                        // element.insertNodeAtEnd(node);
                        // parseTag(lexer, node, Lexer.MIXED_CONTENT);
                        // continue;
                        // }

                        if (checkstack && !node.implicit)
                        {
                            checkstack = false;

                            // #431731 - fix by Randy Waki 25 Dec 00
                            if (!TidyUtils.toBoolean(element.tag.model & Dict.CM_MIXED))
                            {
                                if (lexer.inlineDup(node) > 0)
                                {
                                    continue;
                                }
                            }
                        }

                        mode = Lexer.MIXED_CONTENT;
                    }
                    else
                    {
                        checkstack = true;
                        mode = Lexer.IGNORE_WHITESPACE;
                    }

                    // trim white space before <br>
                    if (node.tag == tt.tagBr)
                    {
                        Node.trimSpaces(lexer, element);
                    }

                    element.insertNodeAtEnd(node);

                    if (node.implicit)
                    {
                        lexer.report.warning(lexer, element, node, Report.INSERTING_TAG);
                    }

                    parseTag(lexer, node, Lexer.IGNORE_WHITESPACE // Lexer.MixedContent
                    );
                    continue;
                }

                // discard unexpected tags
                if (node.type == Node.END_TAG)
                {
                    lexer.popInline(node); // if inline end tag
                }

                lexer.report.warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                continue;
            }

            if (!((element.tag.model & Dict.CM_OPT) != 0))
            {
                lexer.report.warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
            }

            if ((element.tag.model & Dict.CM_OBJECT) != 0)
            {
                // pop inline stack
                while (lexer.istack.size() > lexer.istackbase)
                {
                    lexer.popInline(null);
                }
                lexer.istackbase = istackbase;
            }

            Node.trimSpaces(lexer, element);
            Node.trimEmptyElement(lexer, element);
        }

    }

    /**
     * Parser for TABLE.
     */
    public static class ParseTableTag implements Parser
    {

        /**
         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
         */
        public void parse(Lexer lexer, Node table, short mode)
        {
            Node node, parent;
            int istackbase;
            TagTable tt = lexer.configuration.tt;

            lexer.deferDup();
            istackbase = lexer.istackbase;
            lexer.istackbase = lexer.istack.size();

            while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
            {
                if (node.tag == table.tag && node.type == Node.END_TAG)
                {
                    lexer.istackbase = istackbase;
                    table.closed = true;
                    Node.trimEmptyElement(lexer, table);
                    return;
                }

                // deal with comments etc.
                if (Node.insertMisc(table, node))
                {
                    continue;
                }

                // discard unknown tags
                if (node.tag == null && node.type != Node.TEXT_NODE)
                {
                    lexer.report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
                    continue;
                }

                // if TD or TH or text or inline or block then infer <TR>

                if (node.type != Node.END_TAG)
                {
                    if (node.tag == tt.tagTd || node.tag == tt.tagTh || node.tag == tt.tagTable)
                    {
                        lexer.ungetToken();
                        node = lexer.inferredTag("tr");
                        lexer.report.warning(lexer, table, node, Report.MISSING_STARTTAG);
                    }
                    else if (node.type == Node.TEXT_NODE || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
                    {
                        Node.insertNodeBeforeElement(table, node);
                        lexer.report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN);
                        lexer.exiled = true;

                        if (!(node.type == Node.TEXT_NODE)) // #427662 - was (!node.type == TextNode) - fix by Young
                        {
                            parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
                        }

                        lexer.exiled = false;
                        continue;
                    }
                    else if ((node.tag.model & Dict.CM_HEAD) != 0)
                    {
                        moveToHead(lexer, table, node);
                        continue;
                    }
                }

                // if this is the end tag for an ancestor element then infer end tag for this element

                if (node.type == Node.END_TAG)
                {
                    if (node.tag == tt.tagForm) {
                        badForm(lexer);
                        lexer.report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    if ((node.tag != null && (node.tag.model & (Dict.CM_TABLE | Dict.CM_ROW)) != 0)
                        || (node.tag != null && (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0))
                    {
                        lexer.report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    for (parent = table.parent; parent != null; parent = parent.parent)
                    {
                        if (node.tag == parent.tag)
                        {
                            lexer.report.warning(lexer, table, node, Report.MISSING_ENDTAG_BEFORE);
                            lexer.ungetToken();
                            lexer.istackbase = istackbase;
                            Node.trimEmptyElement(lexer, table);
                            return;
                        }
                    }
                }

                if (!((node.tag.model & Dict.CM_TABLE) != 0))
                {
                    lexer.ungetToken();
                    lexer.report.warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN);
                    lexer.istackbase = istackbase;
                    Node.trimEmptyElement(lexer, table);
                    return;
                }

                if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
                {
                    table.insertNodeAtEnd(node);

                    parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
                    continue;
                }

                // discard unexpected text nodes and end tags
                lexer.report.warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
            }

            lexer.report.warning(lexer, table, node, Report.MISSING_ENDTAG_FOR);
            Node.trimEmptyElement(lexer, table);
            lexer.istackbase = istackbase;
        }

    }

    /**
     * Parser for COLGROUP.
     */
    public static class ParseColGroup implements Parser
    {

        /**
         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
         */
        public void parse(Lexer lexer, Node colgroup, short mode)
        {
            Node node, parent;
            TagTable tt = lexer.configuration.tt;

            if ((colgroup.tag.model & Dict.CM_EMPTY) != 0)
            {
                return;
            }

            while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
            {
                if (node.tag == colgroup.tag && node.type == Node.END_TAG)
                {
                    colgroup.closed = true;
                    return;
                }

                // if this is the end tag for an ancestor element then infer end tag for this element

                if (node.type == Node.END_TAG)
                {
                    if (node.tag == tt.tagForm)
                    {
                        badForm(lexer);
                        lexer.report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    for (parent = colgroup.parent; parent != null; parent = parent.parent)
                    {

                        if (node.tag == parent.tag)
                        {
                            lexer.ungetToken();
                            return;
                        }
                    }
                }

                if (node.type == Node.TEXT_NODE)
                {
                    lexer.ungetToken();
                    return;
                }

                // deal with comments etc.
                if (Node.insertMisc(colgroup, node))
                {
                    continue;
                }

                // discard unknown tags
                if (node.tag == null)
                {
                    lexer.report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
                    continue;
                }

                if (node.tag != tt.tagCol)
                {
                    lexer.ungetToken();
                    return;
                }

                if (node.type == Node.END_TAG)
                {
                    lexer.report.warning(lexer, colgroup, node, Report.DISCARDING_UNEXPECTED);
                    continue;
                }

                // node should be <COL>
                colgroup.insertNodeAtEnd(node);
                parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
            }
        }

    }

    /**
     * Parser for ROWGROUP.
     */
    public static class ParseRowGroup implements Parser
    {

        /**
         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
         */
        public void parse(Lexer lexer, Node rowgroup, short mode)
        {
            Node node, parent;
            TagTable tt = lexer.configuration.tt;

            if ((rowgroup.tag.model & Dict.CM_EMPTY) != 0)
            {
                return;
            }

            while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
            {
                if (node.tag == rowgroup.tag)
                {
                    if (node.type == Node.END_TAG)
                    {
                        rowgroup.closed = true;
                        Node.trimEmptyElement(lexer, rowgroup);
                        return;
                    }

                    lexer.ungetToken();
                    return;
                }

                // if </table> infer end tag
                if (node.tag == tt.tagTable && node.type == Node.END_TAG)
                {
                    lexer.ungetToken();
                    Node.trimEmptyElement(lexer, rowgroup);
                    return;
                }

                // deal with comments etc.
                if (Node.insertMisc(rowgroup, node))
                {
                    continue;
                }

                // discard unknown tags
                if (node.tag == null && node.type != Node.TEXT_NODE)
                {
                    lexer.report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
                    continue;
                }

                // if TD or TH then infer <TR> if text or inline or block move before table if head content move to
                // head

                if (node.type != Node.END_TAG)
                {
                    if (node.tag == tt.tagTd || node.tag == tt.tagTh)
                    {
                        lexer.ungetToken();
                        node = lexer.inferredTag("tr");
                        lexer.report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG);
                    }
                    else if (node.type == Node.TEXT_NODE || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
                    {
                        Node.moveBeforeTable(rowgroup, node, tt);
                        lexer.report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
                        lexer.exiled = true;

                        // #427662 was (!node.type == TextNode) fix by Young 04 Aug 00
                        if (node.type != Node.TEXT_NODE)
                        {
                            parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
                        }

                        lexer.exiled = false;
                        continue;
                    }
                    else if ((node.tag.model & Dict.CM_HEAD) != 0)
                    {
                        lexer.report.warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
                        moveToHead(lexer, rowgroup, node);
                        continue;
                    }
                }

                // if this is the end tag for ancestor element then infer end tag for this element

                if (node.type == Node.END_TAG)
                {

                    if (node.tag == tt.tagForm
                        || (node.tag != null && (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0))
                    {
                        if (node.tag == tt.tagForm)
                        {
                            badForm(lexer);
                        }
                        lexer.report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    if (node.tag == tt.tagTr || node.tag == tt.tagTd || node.tag == tt.tagTh)
                    {
                        lexer.report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    for (parent = rowgroup.parent; parent != null; parent = parent.parent)
                    {
                        if (node.tag == parent.tag)
                        {
                            lexer.ungetToken();
                            Node.trimEmptyElement(lexer, rowgroup);
                            return;
                        }
                    }

                }

                // if THEAD, TFOOT or TBODY then implied end tag

                if ((node.tag.model & Dict.CM_ROWGRP) != 0)
                {
                    if (node.type != Node.END_TAG)
                    {
                        lexer.ungetToken();
                    }

                    Node.trimEmptyElement(lexer, rowgroup);
                    return;
                }

                if (node.type == Node.END_TAG)
                {
                    lexer.report.warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
                    continue;
                }

                if (!(node.tag == tt.tagTr))
                {
                    node = lexer.inferredTag("tr");
                    lexer.report.warning(lexer, rowgroup, node, Report.MISSING_STARTTAG);
                    lexer.ungetToken();
                }

                // node should be <TR>
                rowgroup.insertNodeAtEnd(node);
                parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
            }
            Node.trimEmptyElement(lexer, rowgroup);
        }
    }

    /**
     * Parser for ROW.
     */
    public static class ParseRow implements Parser
    {

        /**
         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
         */
        public void parse(Lexer lexer, Node row, short mode)
        {
            Node node, parent;
            boolean excludeState;
            TagTable tt = lexer.configuration.tt;

            if ((row.tag.model & Dict.CM_EMPTY) != 0)
            {
                return;
            }

            while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
            {
                if (node.tag == row.tag)
                {
                    if (node.type == Node.END_TAG)
                    {
                        row.closed = true;
                        Node.fixEmptyRow(lexer, row);
                        return;
                    }

                    lexer.ungetToken();
                    Node.fixEmptyRow(lexer, row);
                    return;
                }

                // if this is the end tag for an ancestor element then infer end tag for this element
                if (node.type == Node.END_TAG)
                {
                	if (((node.tag != null && (node.tag.model & (Dict.CM_HTML | Dict.CM_TABLE)) != 0)
                			|| node.tag == tt.tagTable)
                            && row.isDescendantOf(node.tag)) {
                		lexer.ungetToken();
                		return;
                	}
                	
                    if (node.tag == tt.tagForm
                        || (node.tag != null && (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0))
                    {
                        if (node.tag == tt.tagForm)
                        {
                            badForm(lexer);
                        }
                        lexer.report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    if (node.tag == tt.tagTd || node.tag == tt.tagTh)
                    {
                        lexer.report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    for (parent = row.parent; parent != null; parent = parent.parent)
                    {
                        if (node.tag == parent.tag)
                        {
                            lexer.ungetToken();
                            Node.trimEmptyElement(lexer, row);
                            return;
                        }
                    }
                }

                // deal with comments etc.
                if (Node.insertMisc(row, node))
                {
                    continue;
                }

                // discard unknown tags
                if (node.tag == null && node.type != Node.TEXT_NODE)
                {
                    lexer.report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
                    continue;
                }

                // discard unexpected <table> element
                if (node.tag == tt.tagTable)
                {
                    lexer.report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
                    continue;
                }

                // THEAD, TFOOT or TBODY
                if (node.tag != null && (node.tag.model & Dict.CM_ROWGRP) != 0)
                {
                    lexer.ungetToken();
                    Node.trimEmptyElement(lexer, row);
                    return;
                }

                if (node.type == Node.END_TAG)
                {
                    lexer.report.warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
                    continue;
                }

                // if text or inline or block move before table if head content move to head

                if (node.type != Node.END_TAG)
                {
                    if (node.tag == tt.tagForm)
                    {
                        lexer.ungetToken();
                        node = lexer.inferredTag("td");
                        lexer.report.warning(lexer, row, node, Report.MISSING_STARTTAG);
                    }
                    else if (node.type == Node.TEXT_NODE || (node.tag.model & (Dict.CM_BLOCK | Dict.CM_INLINE)) != 0)
                    {
                        Node.moveBeforeTable(row, node, tt);
                        lexer.report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
                        lexer.exiled = true;

                        if (node.type != Node.TEXT_NODE)
                        {
                            parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
                        }

                        lexer.exiled = false;
                        continue;
                    }
                    else if ((node.tag.model & Dict.CM_HEAD) != 0)
                    {
                        lexer.report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
                        moveToHead(lexer, row, node);
                        continue;
                    }
                }

                if (!(node.tag == tt.tagTd || node.tag == tt.tagTh))
                {
                    lexer.report.warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
                    continue;
                }

                // node should be <TD> or <TH>
                row.insertNodeAtEnd(node);
                excludeState = lexer.excludeBlocks;
                lexer.excludeBlocks = false;
                parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
                lexer.excludeBlocks = excludeState;

                // pop inline stack

                while (lexer.istack.size() > lexer.istackbase)
                {
                    lexer.popInline(null);
                }
            }

            Node.trimEmptyElement(lexer, row);
        }

    }

    /**
     * Parser for NOFRAMES.
     */
    public static class ParseNoFrames implements Parser
    {

        /**
         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
         */
        public void parse(Lexer lexer, Node noframes, short mode)
        {
            Node node;
            TagTable tt = lexer.configuration.tt;

            lexer.badAccess |= Report.USING_NOFRAMES;
            mode = Lexer.IGNORE_WHITESPACE;

            while ((node = lexer.getToken(mode)) != null)
            {
                if (node.tag == noframes.tag && node.type == Node.END_TAG)
                {
                    noframes.closed = true;
                    Node.trimSpaces(lexer, noframes);
                    return;
                }

                if ((node.tag == tt.tagFrame || node.tag == tt.tagFrameset))
                {

                    Node.trimSpaces(lexer, noframes);

                    // fix for [539369]
                    if (node.type == Node.END_TAG)
                    {
                        lexer.report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
                        // Throw it away
                    }
                    else
                    {
                        lexer.report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_BEFORE);

                        lexer.ungetToken();
                    }
                    return;
                }

                if (node.tag == tt.tagHtml)
                {
                    if (node.type == Node.START_TAG || node.type == Node.START_END_TAG)
                    {
                        lexer.report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
                    }

                    continue;
                }

                // deal with comments etc.
                if (Node.insertMisc(noframes, node))
                {
                    continue;
                }

                if (node.tag == tt.tagBody && node.type == Node.START_TAG)
                {
                    boolean seenbody = lexer.seenEndBody;
                    noframes.insertNodeAtEnd(node);
                    parseTag(lexer, node, Lexer.IGNORE_WHITESPACE); // MixedContent

                    if (seenbody)
                    {
                        Node.coerceNode(lexer, node, tt.tagDiv);
                        moveNodeToBody(lexer, node);
                    }
                    continue;
                }

                // implicit body element inferred
                if (node.type == Node.TEXT_NODE || (node.tag != null && node.type != Node.END_TAG))
                {
                    if (lexer.seenEndBody)
                    {
                        Node body = lexer.root.findBody(tt);

                        if (node.type == Node.TEXT_NODE)
                        {
                            lexer.ungetToken();
                            node = lexer.inferredTag("p");
                            lexer.report.warning(lexer, noframes, node, Report.CONTENT_AFTER_BODY);
                        }

                        body.insertNodeAtEnd(node);
                    }
                    else
                    {
                        lexer.ungetToken();
                        node = lexer.inferredTag("body");
                        if (lexer.configuration.xmlOut)
                        {
                            lexer.report.warning(lexer, noframes, node, Report.INSERTING_TAG);
                        }
                        noframes.insertNodeAtEnd(node);
                    }
                    parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
                    // MixedContent
                    continue;
                }
                // discard unexpected end tags
                lexer.report.warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
            }

            lexer.report.warning(lexer, noframes, node, Report.MISSING_ENDTAG_FOR);
        }

    }

    /**
     * Parser for SELECT.
     */
    public static class ParseSelect implements Parser
    {

        /**
         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
         */
        public void parse(Lexer lexer, Node field, short mode)
        {
            Node node;
            TagTable tt = lexer.configuration.tt;

            lexer.insert = -1; // defer implicit inline start tags

            while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
            {
                if (node.tag == field.tag && node.type == Node.END_TAG)
                {
                    field.closed = true;
                    Node.trimSpaces(lexer, field);
                    return;
                }

                // deal with comments etc.
                if (Node.insertMisc(field, node))
                {
                    continue;
                }

                if (node.type == Node.START_TAG
                    && (node.tag == tt.tagOption || node.tag == tt.tagOptgroup || node.tag == tt.tagScript))
                {
                    field.insertNodeAtEnd(node);
                    parseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
                    continue;
                }

                // discard unexpected tags
                lexer.report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
            }

            lexer.report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR);
        }

    }

    /**
     * Parser for text nodes.
     */
    public static class ParseText implements Parser
    {

        /**
         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
         */
        public void parse(Lexer lexer, Node field, short mode)
        {
            Node node;
            TagTable tt = lexer.configuration.tt;

            lexer.insert = -1; // defer implicit inline start tags

            if (field.tag == tt.tagTextarea)
            {
                mode = Lexer.PREFORMATTED;
            }
            else
            {
                mode = Lexer.MIXED_CONTENT; // kludge for font tags
            }

            while ((node = lexer.getToken(mode)) != null)
            {
                if (node.tag == field.tag && node.type == Node.END_TAG)
                {
                    field.closed = true;
                    Node.trimSpaces(lexer, field);
                    return;
                }

                // deal with comments etc.
                if (Node.insertMisc(field, node))
                {
                    continue;
                }

                if (node.type == Node.TEXT_NODE)
                {
                    // only called for 1st child
                    if (field.content == null && !((mode & Lexer.PREFORMATTED) != 0))
                    {
                        Node.trimSpaces(lexer, field);
                    }

                    if (node.start >= node.end)
                    {
                        continue;
                    }

                    field.insertNodeAtEnd(node);
                    continue;
                }

                // for textarea should all cases of < and & be escaped?
                // discard inline tags e.g. font
                if (node.tag != null
                    && ((node.tag.model & Dict.CM_INLINE) != 0)
                    && (node.tag.model & Dict.CM_FIELD) == 0) // #487283 - fix by Lee Passey 25 Jan 02
                {
                    lexer.report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
                    continue;
                }

                // terminate element on other tags
                if (!((field.tag.model & Dict.CM_OPT) != 0))
                {
                    lexer.report.warning(lexer, field, node, Report.MISSING_ENDTAG_BEFORE);
                }

                lexer.ungetToken();
                Node.trimSpaces(lexer, field);
                return;
            }

            if (!((field.tag.model & Dict.CM_OPT) != 0))
            {
                lexer.report.warning(lexer, field, node, Report.MISSING_ENDTAG_FOR);
            }
        }

    }

    /**
     * Parser for OPTGROUP.
     */
    public static class ParseOptGroup implements Parser
    {

        /**
         * @see org.w3c.tidy.Parser#parse(org.w3c.tidy.Lexer, org.w3c.tidy.Node, short)
         */
        public void parse(Lexer lexer, Node field, short mode)
        {
            Node node;
            TagTable tt = lexer.configuration.tt;

            lexer.insert = -1; // defer implicit inline start tags

            while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
            {
                if (node.tag == field.tag && node.type == Node.END_TAG)
                {
                    field.closed = true;
                    Node.trimSpaces(lexer, field);
                    return;
                }

                // deal with comments etc.
                if (Node.insertMisc(field, node))
                {
                    continue;
                }

                if (node.type == Node.START_TAG && (node.tag == tt.tagOption || node.tag == tt.tagOptgroup))
                {
                    if (node.tag == tt.tagOptgroup)
                    {
                        lexer.report.warning(lexer, field, node, Report.CANT_BE_NESTED);
                    }

                    field.insertNodeAtEnd(node);
                    parseTag(lexer, node, Lexer.MIXED_CONTENT);
                    continue;
                }

                // discard unexpected tags
                lexer.report.warning(lexer, field, node, Report.DISCARDING_UNEXPECTED);
            }
        }

    }

    /**
     * HTML is the top level element.
     */
    public static Node parseDocument(Lexer lexer)
    {
        Node node, document, html;
        Node doctype = null;
        TagTable tt = lexer.configuration.tt;

        document = lexer.newNode();
        document.type = Node.ROOT_NODE;

        lexer.root = document;

        while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
        {
            // deal with comments etc.
            if (Node.insertMisc(document, node))
            {
                continue;
            }

            if (node.type == Node.DOCTYPE_TAG)
            {
                if (doctype == null)
                {
                    document.insertNodeAtEnd(node);
                    doctype = node;
                }
                else
                {
                    lexer.report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED);
                }
                continue;
            }

            if (node.type == Node.END_TAG)
            {
                lexer.report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); // TODO?
                continue;
            }

            if (node.type != Node.START_TAG || node.tag != tt.tagHtml)
            {
                lexer.ungetToken();
                html = lexer.inferredTag("html");
            }
            else
            {
                html = node;
            }

            if (document.findDocType() == null && !lexer.configuration.bodyOnly)
            {
                lexer.report.warning(lexer, null, null, Report.MISSING_DOCTYPE);
            }

            document.insertNodeAtEnd(html);
            HTML.parse(lexer, html, (short) 0); // TODO?
            break;
        }
        
        if (lexer.root.findHTML(lexer.configuration.tt) == null) {
            /* a later check should complain if <body> is empty */
            html = lexer.inferredTag("html");
            lexer.root.insertNodeAtEnd(html);
            HTML.parse(lexer, html, Lexer.IGNORE_WHITESPACE);
        }
        
        if (lexer.root.findTITLE(lexer.configuration.tt) == null) {
            Node head = lexer.root.findHEAD(lexer.configuration.tt);
            lexer.report.warning(lexer, head, null, Report.MISSING_TITLE_ELEMENT);
            head.insertNodeAtEnd(lexer.inferredTag("title"));
        }

        return document;
    }

    /**
     * Indicates whether or not whitespace should be preserved for this element. If an <code>xml:space</code>
     * attribute is found, then if the attribute value is <code>preserve</code>, returns <code>true</code>. For
     * any other value, returns <code>false</code>. If an <code>xml:space</code> attribute was <em>not</em>
     * found, then the following element names result in a return value of <code>true:
     *  pre, script, style,</code> and
     * <code>xsl:text</code>. Finally, if a <code>TagTable</code> was passed in and the element appears as the
     * "pre" element in the <code>TagTable</code>, then <code>true</code> will be returned. Otherwise,
     * <code>false</code> is returned.
     * @param element The <code>Node</code> to test to see if whitespace should be preserved.
     * @param tt The <code>TagTable</code> to test for the <code>getNodePre()</code> function. This may be
     * <code>null</code>, in which case this test is bypassed.
     * @return <code>true</code> or <code>false</code>, as explained above.
     */
    public static boolean XMLPreserveWhiteSpace(Node element, TagTable tt)
    {
        AttVal attribute;

        // search attributes for xml:space
        for (attribute = element.attributes; attribute != null; attribute = attribute.next)
        {
            if (attribute.attribute.equals("xml:space"))
            {
                if (attribute.value.equals("preserve"))
                {
                    return true;
                }

                return false;
            }
        }

        if (element.element == null) // Debian Bug #137124. Fix based on suggestion by Cesar Eduardo Barros 06 Mar 02
        {
            return false;
        }

        // kludge for html docs without explicit xml:space attribute
        if ("pre".equalsIgnoreCase(element.element)
            || "script".equalsIgnoreCase(element.element)
            || "style".equalsIgnoreCase(element.element))
        {
            return true;
        }

        if ((tt != null) && (tt.findParser(element) == PRE))
        {
            return true;
        }

        // kludge for XSL docs
        if ("xsl:text".equalsIgnoreCase(element.element))
        {
            return true;
        }

        return false;
    }

    /**
     * XML documents.
     */
    public static void parseXMLElement(Lexer lexer, Node element, short mode)
    {
        Node node;

        // if node is pre or has xml:space="preserve" then do so

        if (XMLPreserveWhiteSpace(element, lexer.configuration.tt))
        {
            mode = Lexer.PREFORMATTED;
        }

        while ((node = lexer.getToken(mode)) != null)
        {
            if (node.type == Node.END_TAG && node.element.equals(element.element))
            {
                element.closed = true;
                break;
            }

            // discard unexpected end tags
            if (node.type == Node.END_TAG)
            {
                lexer.report.error(lexer, element, node, Report.UNEXPECTED_ENDTAG);
                continue;
            }

            // parse content on seeing start tag
            if (node.type == Node.START_TAG)
            {
                parseXMLElement(lexer, node, mode);
            }

            element.insertNodeAtEnd(node);
        }

        // if first child is text then trim initial space and delete text node if it is empty.

        node = element.content;

        if (node != null && node.type == Node.TEXT_NODE && mode != Lexer.PREFORMATTED)
        {
            if (node.textarray[node.start] == (byte) ' ')
            {
                node.start++;

                if (node.start >= node.end)
                {
                    Node.discardElement(node);
                }
            }
        }

        // if last child is text then trim final space and delete the text node if it is empty

        node = element.last;

        if (node != null && node.type == Node.TEXT_NODE && mode != Lexer.PREFORMATTED)
        {
            if (node.textarray[node.end - 1] == (byte) ' ')
            {
                node.end--;

                if (node.start >= node.end)
                {
                    Node.discardElement(node);
                }
            }
        }
    }

    public static Node parseXMLDocument(Lexer lexer)
    {
        Node node, document, doctype;

        document = lexer.newNode();
        document.type = Node.ROOT_NODE;
        doctype = null;
        lexer.configuration.xmlTags = true;

        while ((node = lexer.getToken(Lexer.IGNORE_WHITESPACE)) != null)
        {
            // discard unexpected end tags
            if (node.type == Node.END_TAG)
            {
                lexer.report.warning(lexer, null, node, Report.UNEXPECTED_ENDTAG);
                continue;
            }

            // deal with comments etc.
            if (Node.insertMisc(document, node))
            {
                continue;
            }

            if (node.type == Node.DOCTYPE_TAG)
            {
                if (doctype == null)
                {
                    document.insertNodeAtEnd(node);
                    doctype = node;
                }
                else
                {
                    lexer.report.warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); // TODO
                }
                continue;
            }

            if (node.type == Node.START_END_TAG)
            {
                document.insertNodeAtEnd(node);
                continue;
            }

            // if start tag then parse element's content
            if (node.type == Node.START_TAG)
            {
                document.insertNodeAtEnd(node);
                parseXMLElement(lexer, node, Lexer.IGNORE_WHITESPACE);
            }

        }

        if (doctype != null && !lexer.checkDocTypeKeyWords(doctype))
        {
            lexer.report.warning(lexer, doctype, null, Report.DTYPE_NOT_UPPER_CASE);
        }

        // ensure presence of initial <?XML version="1.0"?>
        if (lexer.configuration.xmlPi)
        {
            lexer.fixXmlDecl(document);
        }

        return document;
    }

    /**
     * errors in positioning of form start or end tags generally require human intervention to fix.
     */
    static void badForm(Lexer lexer)
    {
        lexer.badForm = 1;
        lexer.errors++;
    }

}