XmlParser.java example

Explorer
AnalyseSI-master
- src
  - com
    - jgoodies
      - uif
        lite
        panel
        SimpleInternalFrame.java
    - microstar
      - xml
        HandlerBase.java
        XmlException.java
        XmlHandler.java
        XmlParser.java
  - org
    - analyse
      - core
        gui
        AboutWindow.java
        AnalyseBar.java
        AnalyseFrame.java
        HtmlWindow.java
        ParametrageWindow.java
        SplashScreen.java
        action
        BasicAction.java
        GlobalActionCollection.java
        MainActionListener.java
        NavigationActionFactory.java
        menu
        AnalyseMenu.java
        ClipboardPopupMenu.java
        panel
        HelpPanel.java
        Navigator.java
        statusbar
        AnalyseStatusbar.java
        toolbar
        AnalyseToolbar.java
        zgraph
        ZElement.java
        ZGraphique.java
        ZLien.java
        modules
        AnalyseModule.java
        AnalysePanel.java
        ClipboardInterface.java
        FilterModule.java
        OpenModule.java
        SaveModule.java
        UndoInterface.java
        save
        FiltreASI.java
        asi
        ASIHandler.java
        ASIModuleHandler.java
        FilterASIModule.java
        util
        Constantes.java
        GUIUtilities.java
        Memento.java
        MyBorderFactory.java
        MyPanelFactory.java
        SwingWorker.java
        UnicodeUtils.java
        Utilities.java
        save
        AnalyseFilter.java
        AnalyseSave.java
        FileChooserFilter.java
        Open.java
        Save.java
        xml
        DefaultXmlDocument.java
      - main
        Main.java
        PlasticSettings.java
      - merise
        gui
        dialog
        ConnectionDialog.java
        EntiteDialog.java
        LienDialog.java
        list
        MeriseListModel.java
        panel
        DictionnairePanel.java
        MCDPanel.java
        MLDPanel.java
        MPDPanel.java
        RapportPanel.java
        SQLPanel.java
        table
        ArgObserverTable.java
        DictionnaireTable.java
        main
        MeriseModule.java
        mcd
        composant
        MCDAssociation.java
        MCDComponent.java
        MCDEntite.java
        MCDLien.java
        MCDObjet.java
        MLDCommand.java
        MLDComponent.java
        MPDComponent.java
        MPDEntite.java
        MPDLien.java
        rapport
        MeriseRapport.java
        save
        FiltreMeriseASI.java
        asi
        ASIMeriseHandler.java
        sql
        SQLCommand.java
// XmlParser.java: the main parser class.
// NO WARRANTY! See README, and copyright below.
// $Id: XmlParser.java,v 1.3 2009-12-16 21:59:29 bruno Exp $

package com.microstar.xml;

import java.io.BufferedInputStream;
import java.io.EOFException;
import java.io.InputStream;
import java.io.Reader;
import java.net.URL;
import java.net.URLConnection;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Stack;

/**
 * Parse XML documents and return parse events through call-backs.
 * <p>
 * You need to define a class implementing the <code>XmlHandler</code>
 * interface: an object belonging to this class will receive the callbacks for
 * the events. (As an alternative to implementing the full XmlHandler interface,
 * you can simply extend the <code>HandlerBase</code> convenience class.)
 * <p>
 * Usage (assuming that <code>MyHandler</code> is your implementation of the
 * <code>XmlHandler</code> interface):
 * 
 * <pre>
 * 
 *  
 *   
 *    XmlHandler handler = new MyHandler();
 *    XmlParser parser = new XmlParser();
 *    parser.setHandler(handler);
 *    try {
 *      parser.parse("http://www.host.com/doc.xml", null);
 *    } catch (Exception e) {
 *      [do something interesting]
 *    }
 *    
 *   
 *  
 * </pre>
 * 
 * <p>
 * Alternatively, you can use the standard SAX interfaces with the
 * <code>SAXDriver</code> class as your entry point.
 * 
 * @author Copyright (c) 1997, 1998 by Microstar Software Ltd.
 * @author Written by David Megginson <dmeggins@microstar.com>
 * @version 1.1
 * @see XmlHandler
 * @see HandlerBase
 * @see SAXDriver
 */
public class XmlParser
{

    //
    // Use special cheats that speed up the code (currently about 50%),
    // but may cause problems with future maintenance and add to the
    // class file size (about 500 bytes).
    //
    private final static boolean USE_CHEATS = true;

    //////////////////////////////////////////////////////////////////////
    // Constructors.
    ////////////////////////////////////////////////////////////////////////

    /**
     * Construct a new parser with no associated handler.
     * 
     * @see #setHandler
     * @see #parse
     */
    public XmlParser()
    {
    }

    /**
     * Set the handler that will receive parsing events.
     * 
     * @param handler
     *            The handler to receive callback events.
     * @see #parse
     * @see XmlHandler
     */
    public void setHandler(XmlHandler handler)
    {
        this.handler = handler;
    }

    /**
     * Parse an XML document from a URI.
     * <p>
     * You may parse a document more than once, but only one thread may call
     * this method for an object at one time.
     * 
     * @param systemId
     *            The URI of the document.
     * @param publicId
     *            The public identifier of the document, or null.
     * @param encoding
     *            The suggested encoding, or null if unknown.
     * @exception java.lang.Exception
     *                Any exception thrown by your own handlers, or any
     *                derivation of java.io.IOException thrown by the parser
     *                itself.
     */
    public void parse(String systemId, String publicId, String encoding)
            throws java.lang.Exception
    {
        doParse(systemId, publicId, null, null, encoding);
    }

    /**
     * Parse an XML document from a byte stream.
     * <p>
     * The URI that you supply will become the base URI for resolving relative
     * links, but Ælfred will actually read the document from the supplied
     * input stream.
     * <p>
     * You may parse a document more than once, but only one thread may call
     * this method for an object at one time.
     * 
     * @param systemId
     *            The base URI of the document, or null if not known.
     * @param publicId
     *            The public identifier of the document, or null if not known.
     * @param stream
     *            A byte input stream.
     * @param encoding
     *            The suggested encoding, or null if unknown.
     * @exception java.lang.Exception
     *                Any exception thrown by your own handlers, or any
     *                derivation of java.io.IOException thrown by the parser
     *                itself.
     */
    public void parse(String systemId, String publicId, InputStream stream,
            String encoding) throws java.lang.Exception
    {
        doParse(systemId, publicId, null, stream, encoding);
    }

    /**
     * Parse an XML document from a character stream.
     * <p>
     * The URI that you supply will become the base URI for resolving relative
     * links, but Ælfred will actually read the document from the supplied
     * input stream.
     * <p>
     * You may parse a document more than once, but only one thread may call
     * this method for an object at one time.
     * 
     * @param systemId
     *            The base URI of the document, or null if not known.
     * @param publicId
     *            The public identifier of the document, or null if not known.
     * @param reader
     *            A character stream.
     * @exception java.lang.Exception
     *                Any exception thrown by your own handlers, or any
     *                derivation of java.io.IOException thrown by the parser
     *                itself.
     */
    public void parse(String systemId, String publicId, Reader reader)
            throws java.lang.Exception
    {
        doParse(systemId, publicId, reader, null, null);
    }

    private synchronized void doParse(String systemId, String publicId,
            Reader reader, InputStream stream, String encoding)
            throws java.lang.Exception
    {
        basePublicId = publicId;
        baseURI = systemId;
        baseReader = reader;
        baseInputStream = stream;

        initializeVariables();

        // Set the default entities here.
        setInternalEntity(intern("amp"), "&");
        setInternalEntity(intern("lt"), "<");
        setInternalEntity(intern("gt"), ">");
        setInternalEntity(intern("apos"), "'");
        setInternalEntity(intern("quot"), """);

        if (handler != null) {
            handler.startDocument();
        }

        pushURL("[document]", basePublicId, baseURI, baseReader,
                baseInputStream, encoding);

        parseDocument();

        if (handler != null) {
            handler.endDocument();
        }
        cleanupVariables();
    }

    ////////////////////////////////////////////////////////////////////////
    // Constants.
    ////////////////////////////////////////////////////////////////////////

    //
    // Constants for element content type.
    //

    /**
     * Constant: an element has not been declared.
     * 
     * @see #getElementContentType
     */
    public final static int CONTENT_UNDECLARED = 0;

    /**
     * Constant: the element has a content model of ANY.
     * 
     * @see #getElementContentType
     */
    public final static int CONTENT_ANY = 1;

    /**
     * Constant: the element has declared content of EMPTY.
     * 
     * @see #getElementContentType
     */
    public final static int CONTENT_EMPTY = 2;

    /**
     * Constant: the element has mixed content.
     * 
     * @see #getElementContentType
     */
    public final static int CONTENT_MIXED = 3;

    /**
     * Constant: the element has element content.
     * 
     * @see #getElementContentType
     */
    public final static int CONTENT_ELEMENTS = 4;

    //
    // Constants for the entity type.
    //

    /**
     * Constant: the entity has not been declared.
     * 
     * @see #getEntityType
     */
    public final static int ENTITY_UNDECLARED = 0;

    /**
     * Constant: the entity is internal.
     * 
     * @see #getEntityType
     */
    public final static int ENTITY_INTERNAL = 1;

    /**
     * Constant: the entity is external, non-XML data.
     * 
     * @see #getEntityType
     */
    public final static int ENTITY_NDATA = 2;

    /**
     * Constant: the entity is external XML data.
     * 
     * @see #getEntityType
     */
    public final static int ENTITY_TEXT = 3;

    //
    // Constants for attribute type.
    //

    /**
     * Constant: the attribute has not been declared for this element type.
     * 
     * @see #getAttributeType
     */
    public final static int ATTRIBUTE_UNDECLARED = 0;

    /**
     * Constant: the attribute value is a string value.
     * 
     * @see #getAttributeType
     */
    public final static int ATTRIBUTE_CDATA = 1;

    /**
     * Constant: the attribute value is a unique identifier.
     * 
     * @see #getAttributeType
     */
    public final static int ATTRIBUTE_ID = 2;

    /**
     * Constant: the attribute value is a reference to a unique identifier.
     * 
     * @see #getAttributeType
     */
    public final static int ATTRIBUTE_IDREF = 3;

    /**
     * Constant: the attribute value is a list of ID references.
     * 
     * @see #getAttributeType
     */
    public final static int ATTRIBUTE_IDREFS = 4;

    /**
     * Constant: the attribute value is the name of an entity.
     * 
     * @see #getAttributeType
     */
    public final static int ATTRIBUTE_ENTITY = 5;

    /**
     * Constant: the attribute value is a list of entity names.
     * 
     * @see #getAttributeType
     */
    public final static int ATTRIBUTE_ENTITIES = 6;

    /**
     * Constant: the attribute value is a name token.
     * 
     * @see #getAttributeType
     */
    public final static int ATTRIBUTE_NMTOKEN = 7;

    /**
     * Constant: the attribute value is a list of name tokens.
     * 
     * @see #getAttributeType
     */
    public final static int ATTRIBUTE_NMTOKENS = 8;

    /**
     * Constant: the attribute value is a token from an enumeration.
     * 
     * @see #getAttributeType
     */
    public final static int ATTRIBUTE_ENUMERATED = 9;

    /**
     * Constant: the attribute is the name of a notation.
     * 
     * @see #getAttributeType
     */
    public final static int ATTRIBUTE_NOTATION = 10;

    //
    // When the class is loaded, populate the hash table of
    // attribute types.
    //

    /**
     * Hash table of attribute types.
     */
    private static Hashtable attributeTypeHash;
    static {
        attributeTypeHash = new Hashtable();
        attributeTypeHash.put("CDATA", new Integer(ATTRIBUTE_CDATA));
        attributeTypeHash.put("ID", new Integer(ATTRIBUTE_ID));
        attributeTypeHash.put("IDREF", new Integer(ATTRIBUTE_IDREF));
        attributeTypeHash.put("IDREFS", new Integer(ATTRIBUTE_IDREFS));
        attributeTypeHash.put("ENTITY", new Integer(ATTRIBUTE_ENTITY));
        attributeTypeHash.put("ENTITIES", new Integer(ATTRIBUTE_ENTITIES));
        attributeTypeHash.put("NMTOKEN", new Integer(ATTRIBUTE_NMTOKEN));
        attributeTypeHash.put("NMTOKENS", new Integer(ATTRIBUTE_NMTOKENS));
        attributeTypeHash.put("NOTATION", new Integer(ATTRIBUTE_NOTATION));
    }

    //
    // Constants for supported encodings.
    //
    private final static int ENCODING_UTF_8 = 1;

    private final static int ENCODING_ISO_8859_1 = 2;

    private final static int ENCODING_UCS_2_12 = 3;

    private final static int ENCODING_UCS_2_21 = 4;

    private final static int ENCODING_UCS_4_1234 = 5;

    private final static int ENCODING_UCS_4_4321 = 6;

    private final static int ENCODING_UCS_4_2143 = 7;

    private final static int ENCODING_UCS_4_3412 = 8;

    private final static int ENCODING_UTF_16 = 9;
    
    //
    // Constants for attribute default value.
    //

    /**
     * Constant: the attribute is not declared.
     * 
     * @see #getAttributeDefaultValueType
     */
    public final static int ATTRIBUTE_DEFAULT_UNDECLARED = 0;

    /**
     * Constant: the attribute has a literal default value specified.
     * 
     * @see #getAttributeDefaultValueType
     * @see #getAttributeDefaultValue
     */
    public final static int ATTRIBUTE_DEFAULT_SPECIFIED = 1;

    /**
     * Constant: the attribute was declared #IMPLIED.
     * 
     * @see #getAttributeDefaultValueType
     */
    public final static int ATTRIBUTE_DEFAULT_IMPLIED = 2;

    /**
     * Constant: the attribute was declared #REQUIRED.
     * 
     * @see #getAttributeDefaultValueType
     */
    public final static int ATTRIBUTE_DEFAULT_REQUIRED = 3;

    /**
     * Constant: the attribute was declared #FIXED.
     * 
     * @see #getAttributeDefaultValueType
     * @see #getAttributeDefaultValue
     */
    public final static int ATTRIBUTE_DEFAULT_FIXED = 4;

    //
    // Constants for input.
    //
    private final static int INPUT_NONE = 0;

    private final static int INPUT_INTERNAL = 1;

    private final static int INPUT_EXTERNAL = 2;

    private final static int INPUT_STREAM = 3;

    private final static int INPUT_BUFFER = 4;

    private final static int INPUT_READER = 5;

    //
    // Flags for reading literals.
    //
    private final static int LIT_CHAR_REF = 1;

    private final static int LIT_ENTITY_REF = 2;

    private final static int LIT_PE_REF = 4;

    private final static int LIT_NORMALIZE = 8;

    //
    // Flags for parsing context.
    //
    private final static int CONTEXT_NONE = 0;

    private final static int CONTEXT_DTD = 1;

    private final static int CONTEXT_ENTITYVALUE = 2;

    private final static int CONTEXT_ATTRIBUTEVALUE = 3;

    //////////////////////////////////////////////////////////////////////
    // Error reporting.
    //////////////////////////////////////////////////////////////////////

    /**
     * Report an error.
     * 
     * @param message
     *            The error message.
     * @param textFound
     *            The text that caused the error (or null).
     * @see XmlHandler#error
     * @see #line
     */
    void error(String message, String textFound, String textExpected)
            throws java.lang.Exception
    {
        errorCount++;
        if (textFound != null) {
            message = message + " (found \"" + textFound + "\")";
        }
        if (textExpected != null) {
            message = message + " (expected \"" + textExpected + "\")";
        }
        if (handler != null) {
            String uri = null;

            if (externalEntity != null) {
                uri = externalEntity.getURL().toString();
            }
            handler.error(message, uri, line, column);
        }
    }

    /**
     * Report a serious error.
     * 
     * @param message
     *            The error message.
     * @param textFound
     *            The text that caused the error (or null).
     */
    void error(String message, char textFound, String textExpected)
            throws java.lang.Exception
    {
        error(message, new Character(textFound).toString(), textExpected);
    }

    //////////////////////////////////////////////////////////////////////
    // Major syntactic productions.
    //////////////////////////////////////////////////////////////////////

    /**
     * Parse an XML document.
     * 
     * <pre>
     * 
     *  
     *   
     *    [1] document ::= prolog element Misc*
     *    
     *   
     *  
     * </pre>
     * 
     * <p>
     * This is the top-level parsing function for a single XML document. As a
     * minimum, a well-formed document must have a document element, and a valid
     * document must have a prolog as well.
     */
    void parseDocument() throws java.lang.Exception
    {
        char c;

        parseProlog();
        require('<');
        parseElement();
        try {
            parseMisc(); //skip all white, PIs, and comments
            c = readCh(); //if this doesn't throw an exception...
            error("unexpected characters after document end", c, null);
        } catch (EOFException e) {
            return;
        }
    }

    /**
     * Skip a comment.
     * 
     * <pre>
     * 
     *  
     *   
     *    [18] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* "-->"
     *    
     *   
     *  
     * </pre>
     * 
     * <p>
     * (The <code><!--</code> has already been read.)
     */
    void parseComment() throws java.lang.Exception
    {
        skipUntil("-->");
    }

    /**
     * Parse a processing instruction and do a call-back.
     * 
     * <pre>
     * 
     *  
     *   
     *    [19] PI ::= '<?' Name (S (Char* - (Char* '?>' Char*)))? '?>'
     *    
     *   
     *  
     * </pre>
     * 
     * <p>
     * (The <code><?</code> has already been read.)
     * <p>
     * An XML processing instruction <em>must</em> begin with a Name, which is
     * the instruction's target.
     */
    void parsePI() throws java.lang.Exception
    {
        String name;

        name = readNmtoken(true);
        if (!tryRead("?>")) {
            requireWhitespace();
            parseUntil("?>");
        }
        if (handler != null) {
            handler.processingInstruction(name, dataBufferToString());
        }
    }

    /**
     * Parse a CDATA marked section.
     * 
     * <pre>
     * 
     *  
     *   
     *    [20] CDSect ::= CDStart CData CDEnd
     *    [21] CDStart ::= '<![CDATA['
     *    [22] CData ::= (Char* - (Char* ']]>' Char*))
     *    [23] CDEnd ::= ']]>'
     *    
     *   
     *  
     * </pre>
     * 
     * <p>
     * (The '<![CDATA[' has already been read.)
     * <p>
     * Note that this just appends characters to the dataBuffer, without
     * actually generating an event.
     */
    void parseCDSect() throws java.lang.Exception
    {
        parseUntil("]]>");
    }

    /**
     * Parse the prolog of an XML document.
     * 
     * <pre>
     * 
     *  
     *   
     *    [24] prolog ::= XMPDecl? Misc* (Doctypedecl Misc*)?
     *    
     *   
     *  
     * </pre>
     * 
     * <p>
     * There are a couple of tricks here. First, it is necessary to declare the
     * XML default attributes after the DTD (if present) has been read. Second,
     * it is not possible to expand general references in attribute value
     * literals until after the entire DTD (if present) has been parsed.
     * <p>
     * We do not look for the XML declaration here, because it is handled by
     * pushURL().
     * 
     * @see pushURL
     */
    void parseProlog() throws java.lang.Exception
    {
        parseMisc();

        if (tryRead("<!DOCTYPE")) {
            parseDoctypedecl();
            parseMisc();
        }
    }

    /**
     * Parse the XML declaration.
     * 
     * <pre>
     * 
     *  
     *   
     *    [25] XMPDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
     *    [26] VersionInfo ::= S 'version' Eq ('"1.0"' | "'1.0'")
     *    [33] SDDecl ::= S 'standalone' Eq "'" ('yes' | 'no') "'"
     *                  | S 'standalone' Eq '"' ("yes" | "no") '"'
     *    [78] EncodingDecl ::= S 'encoding' Eq QEncoding
     *    
     *   
     *  
     * </pre>
     * 
     * <p>
     * ([80] to [82] are also significant.)
     * <p>
     * (The <code><?xml</code> and whitespace have already been read.)
     * 
     * @see #parseTextDecl
     * @see #checkEncoding
     */
    void parseXMPDecl(boolean ignoreEncoding) throws java.lang.Exception
    {
        String version;
        String encodingName = null;
        String standalone = null;

        // Read the version.
        require("version");
        parseEq();
        version = readLiteral(0);
        if (!version.equals("1.0")) {
            error("unsupported XML version", version, "1.0");
        }

        // Try reading an encoding declaration.
        skipWhitespace();
        if (tryRead("encoding")) {
            parseEq();
            encodingName = readLiteral(0);
            checkEncoding(encodingName, ignoreEncoding);
        }

        // Try reading a standalone declaration
        skipWhitespace();
        if (tryRead("standalone")) {
            parseEq();
            standalone = readLiteral(0);
        }

        skipWhitespace();
        require("?>");
    }

    /**
     * Parse the Encoding PI.
     * 
     * <pre>
     * 
     *  
     *   
     *    [78] EncodingDecl ::= S 'encoding' Eq QEncoding
     *    [79] EncodingPI ::= '<?xml' S 'encoding' Eq QEncoding S? '?>'
     *    [80] QEncoding ::= '"' Encoding '"' | "'" Encoding "'"
     *    [81] Encoding ::= LatinName
     *    [82] LatinName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
     *    
     *   
     *  
     * </pre>
     * 
     * <p>
     * (The <code><?xml</code>' and whitespace have already been read.)
     * 
     * @see #parseXMPDecl
     * @see #checkEncoding
     */
    void parseTextDecl(boolean ignoreEncoding) throws java.lang.Exception
    {
        String encodingName = null;

        // Read an optional version.
        if (tryRead("version")) {
            String version;
            parseEq();
            version = readLiteral(0);
            if (!version.equals("1.0")) {
                error("unsupported XML version", version, "1.0");
            }
            requireWhitespace();
        }

        // Read the encoding.
        require("encoding");
        parseEq();
        encodingName = readLiteral(0);
        checkEncoding(encodingName, ignoreEncoding);

        skipWhitespace();
        require("?>");
    }

    /**
     * Check that the encoding specified makes sense.
     * <p>
     * Compare what the author has specified in the XML declaration or encoding
     * PI with what we have detected.
     * <p>
     * This is also important for distinguishing among the various 7- and 8-bit
     * encodings, such as ISO-LATIN-1 (I cannot autodetect those).
     * 
     * @param encodingName
     *            The name of the encoding specified by the user.
     * @see #parseXMPDecl
     * @see #parseTextDecl
     */
    void checkEncoding(String encodingName, boolean ignoreEncoding)
            throws java.lang.Exception
    {
        encodingName = encodingName.toUpperCase();

        if (ignoreEncoding) {
            return;
        }

        switch (encoding) {
            // 8-bit encodings
            case ENCODING_UTF_8:
                if (encodingName.equals("ISO-8859-1")) {
                    encoding = ENCODING_ISO_8859_1;
                } else if (!encodingName.equals("UTF-8")) {
                    error("unsupported 8-bit encoding", encodingName,
                            "UTF-8 or ISO-8859-1");
                }
                break;
            // 16-bit encodings
            case ENCODING_UCS_2_12:
            case ENCODING_UCS_2_21:
                if (!encodingName.equals("ISO-10646-UCS-2")
                        && !encodingName.equals("UTF-16")) {
                    error("unsupported 16-bit encoding", encodingName,
                            "ISO-10646-UCS-2");
                }
                break;
            // 32-bit encodings
            case ENCODING_UCS_4_1234:
            case ENCODING_UCS_4_4321:
            case ENCODING_UCS_4_2143:
            case ENCODING_UCS_4_3412:
                if (!encodingName.equals("ISO-10646-UCS-4")) {
                    error("unsupported 32-bit encoding", encodingName,
                            "ISO-10646-UCS-4");
                }
        }
    }

    /**
     * Parse miscellaneous markup outside the document element and DOCTYPE
     * declaration.
     * 
     * <pre>
     * 
     *  
     *   
     *    [27] Misc ::= Comment | PI | S
     *    
     *   
     *  
     * </pre>
     */
    void parseMisc() throws java.lang.Exception
    {
        while (true) {
            skipWhitespace();
            if (tryRead("<?")) {
                parsePI();
            } else if (tryRead("<!--")) {
                parseComment();
            } else {
                break;
            }
        }
    }

    /**
     * Parse a document type declaration.
     * 
     * <pre>
     * 
     *  
     *   
     *    [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
     *                         ('[' %markupdecl* ']' S?)? '>'
     *    
     *   
     *  
     * </pre>
     * 
     * <p>
     * (The <code><!DOCTYPE</code> has already been read.)
     */
    void parseDoctypedecl() throws java.lang.Exception
    {
        String doctypeName, ids[];

        // Read the document type name.
        requireWhitespace();
        doctypeName = readNmtoken(true);

        // Read the ExternalIDs.
        skipWhitespace();
        ids = readExternalIds(false);

        // Look for a declaration subset.
        skipWhitespace();
        if (tryRead('[')) {

            // loop until the subset ends
            while (true) {
                context = CONTEXT_DTD;
                skipWhitespace();
                context = CONTEXT_NONE;
                if (tryRead(']')) {
                    break; // end of subset
                } else {
                    context = CONTEXT_DTD;
                    parseMarkupdecl();
                    context = CONTEXT_NONE;
                }
            }
        }

        // Read the external subset, if any
        if (ids[1] != null) {
            pushURL("[external subset]", ids[0], ids[1], null, null, null);

            // Loop until we end up back at '>'
            while (true) {
                context = CONTEXT_DTD;
                skipWhitespace();
                context = CONTEXT_NONE;
                if (tryRead('>')) {
                    break;
                } else {
                    context = CONTEXT_DTD;
                    parseMarkupdecl();
                    context = CONTEXT_NONE;
                }
            }
        } else {
            // No external subset.
            skipWhitespace();
            require('>');
        }

        if (handler != null) {
            handler.doctypeDecl(doctypeName, ids[0], ids[1]);
        }

        // Expand general entities in
        // default values of attributes.
        // (Do this after the doctypeDecl
        // event!).
        // expandAttributeDefaultValues();
    }

    /**
     * Parse a markup declaration in the internal or external DTD subset.
     * 
     * <pre>
     * 
     *  
     *   
     *    [29] markupdecl ::= ( %elementdecl | %AttlistDecl | %EntityDecl |
     *                          %NotationDecl | %PI | %S | %Comment |
     *                          InternalPERef )
     *    [30] InternalPERef ::= PEReference
     *    [31] extSubset ::= (%markupdecl | %conditionalSect)*
     *    
     *   
     *  
     * </pre>
     */
    void parseMarkupdecl() throws java.lang.Exception
    {
        if (tryRead("<!ELEMENT")) {
            parseElementdecl();
        } else if (tryRead("<!ATTLIST")) {
            parseAttlistDecl();
        } else if (tryRead("<!ENTITY")) {
            parseEntityDecl();
        } else if (tryRead("<!NOTATION")) {
            parseNotationDecl();
        } else if (tryRead("<?")) {
            parsePI();
        } else if (tryRead("<!--")) {
            parseComment();
        } else if (tryRead("<![")) {
            parseConditionalSect();
        } else {
            error("expected markup declaration", null, null);
        }
    }

    /**
     * Parse an element, with its tags.
     * 
     * <pre>
     * 
     *  
     *   
     *    [33] STag ::= '<' Name (S Attribute)* S? '>' [WFC: unique Att spec]
     *    [38] element ::= EmptyElement | STag content ETag
     *    [39] EmptyElement ::= '<' Name (S Attribute)* S? '/>'
     *                          [WFC: unique Att spec]
     *    
     *   
     *  
     * </pre>
     * 
     * <p>
     * (The '<' has already been read.)
     * <p>
     * NOTE: this method actually chains onto parseContent(), if necessary, and
     * parseContent() will take care of calling parseETag().
     */
    void parseElement() throws java.lang.Exception
    {
        String gi;
        char c;
        int oldElementContent = currentElementContent;
        String oldElement = currentElement;

        // This is the (global) counter for the
        // array of specified attributes.
        tagAttributePos = 0;

        // Read the element type name.
        gi = readNmtoken(true);

        // Determine the current content type.
        currentElement = gi;
        currentElementContent = getElementContentType(gi);
        if (currentElementContent == CONTENT_UNDECLARED) {
            currentElementContent = CONTENT_ANY;
        }

        // Read the attributes, if any.
        // After this loop, we should be just
        // in front of the closing delimiter.
        skipWhitespace();
        c = readCh();
        while (c != '/' && c != '>') {
            unread(c);
            parseAttribute(gi);
            skipWhitespace();
            c = readCh();
        }
        unread(c);

        // Supply any defaulted attributes.
        Enumeration atts = declaredAttributes(gi);
        if (atts != null) {
            String aname;
            loop: while (atts.hasMoreElements()) {
                aname = (String) atts.nextElement();
                // See if it was specified.
                for (int i = 0; i < tagAttributePos; i++) {
                    if (tagAttributes[i] == aname) {
                        continue loop;
                    }
                }
                // I guess not...
                if (handler != null) {
                    handler.attribute(aname, getAttributeExpandedValue(gi,
                            aname), false);
                }
            }
        }

        // Figure out if this is a start tag
        // or an empty element, and dispatch an
        // event accordingly.
        c = readCh();
        switch (c) {
            case '>':
                if (handler != null) {
                    handler.startElement(gi);
                }
                parseContent();
                break;
            case '/':
                require('>');
                if (handler != null) {
                    handler.startElement(gi);
                    handler.endElement(gi);
                }
                break;
        }

        // Restore the previous state.
        currentElement = oldElement;
        currentElementContent = oldElementContent;
    }

    /**
     * Parse an attribute assignment.
     * 
     * <pre>
     * 
     *  
     *   
     *    [34] Attribute ::= Name Eq AttValue
     *    
     *   
     *  
     * </pre>
     * 
     * @param name
     *            The name of the attribute's element.
     * @see XmlHandler#attribute
     */
    void parseAttribute(String name) throws java.lang.Exception
    {
        String aname;
        int type;
        String value;

        // Read the attribute name.
        aname = readNmtoken(true).intern();
        type = getAttributeDefaultValueType(name, aname);

        // Parse '='
        parseEq();

        // Read the value, normalizing whitespace
        // if it is not CDATA.
        if (type == ATTRIBUTE_CDATA || type == ATTRIBUTE_UNDECLARED) {
            value = readLiteral(LIT_CHAR_REF | LIT_ENTITY_REF);
        } else {
            value = readLiteral(LIT_CHAR_REF | LIT_ENTITY_REF | LIT_NORMALIZE);
        }

        // Inform the handler about the
        // attribute.
        if (handler != null) {
            handler.attribute(aname, value, true);
        }
        dataBufferPos = 0;

        // Note that the attribute has been
        // specified.
        if (tagAttributePos == tagAttributes.length) {
            String newAttrib[] = new String[tagAttributes.length * 2];
            System.arraycopy(tagAttributes, 0, newAttrib, 0, tagAttributePos);
            tagAttributes = newAttrib;
        }
        tagAttributes[tagAttributePos++] = aname;
    }

    /**
     * Parse an equals sign surrounded by optional whitespace. [35] Eq ::= S?
     * '=' S?
     */
    void parseEq() throws java.lang.Exception
    {
        skipWhitespace();
        require('=');
        skipWhitespace();
    }

    /**
     * Parse an end tag. [36] ETag ::= ' </' Name S? '>' *NOTE: parseContent()
     * chains to here.
     */
    void parseETag() throws java.lang.Exception
    {
        String name;
        name = readNmtoken(true);
        if (name != currentElement) {
            error("mismatched end tag", name, currentElement);
        }
        skipWhitespace();
        require('>');
        if (handler != null) {
            handler.endElement(name);
        }
    }

    /**
     * Parse the content of an element. [37] content ::= (element | PCData |
     * Reference | CDSect | PI | Comment)* [68] Reference ::= EntityRef |
     * CharRef
     */
    void parseContent() throws java.lang.Exception
    {
        String data;
        char c;

        while (true) {

            switch (currentElementContent) {
                case CONTENT_ANY:
                case CONTENT_MIXED:
                    parsePCData();
                    break;
                case CONTENT_ELEMENTS:
                    parseWhitespace();
                    break;
            }

            // Handle delimiters
            c = readCh();
            switch (c) {

                case '&': // Found "&"
                    c = readCh();
                    if (c == '#') {
                        parseCharRef();
                    } else {
                        unread(c);
                        parseEntityRef(true);
                    }
                    break;

                case '<': // Found "<"

                    c = readCh();
                    switch (c) {

                        case '!': // Found "<!"
                            c = readCh();
                            switch (c) {
                                case '-': // Found "<!-"
                                    require('-');
                                    parseComment();
                                    break;
                                case '[': // Found "<!["
                                    require("CDATA[");
                                    parseCDSect();
                                    break;
                                default:
                                    error("expected comment or CDATA section",
                                            c, null);
                                    break;
                            }
                            break;

                        case '?': // Found "<?"
                            dataBufferFlush();
                            parsePI();
                            break;

                        case '/': // Found "</"
                            dataBufferFlush();
                            parseETag();
                            return;

                        default: // Found "<" followed by something else
                            dataBufferFlush();
                            unread(c);
                            parseElement();
                            break;
                    }
            }
        }
    }

    /**
     * Parse an element type declaration. [40] elementdecl ::= ' <!ELEMENT' S
     * %Name S (%S S)? %contentspec S? '>' [VC: Unique Element Declaration]
     * *NOTE: the ' <!ELEMENT' has already been read.
     */
    void parseElementdecl() throws java.lang.Exception
    {
        String name;

        requireWhitespace();
        // Read the element type name.
        name = readNmtoken(true);

        requireWhitespace();
        // Read the content model.
        parseContentspec(name);

        skipWhitespace();
        require('>');
    }

    /**
     * Content specification. [41] contentspec ::= 'EMPTY' | 'ANY' | Mixed |
     * elements
     */
    void parseContentspec(String name) throws java.lang.Exception
    {
        if (tryRead("EMPTY")) {
            setElement(name, CONTENT_EMPTY, null, null);
            return;
        } else if (tryRead("ANY")) {
            setElement(name, CONTENT_ANY, null, null);
            return;
        } else {
            require('(');
            dataBufferAppend('(');
            skipWhitespace();
            if (tryRead("#PCDATA")) {
                dataBufferAppend("#PCDATA");
                parseMixed();
                setElement(name, CONTENT_MIXED, dataBufferToString(), null);
            } else {
                parseElements();
                setElement(name, CONTENT_ELEMENTS, dataBufferToString(), null);
            }
        }
    }

    /**
     * Parse an element-content model. [42] elements ::= (choice | seq) ('?' |
     * '*' | '+')? [44] cps ::= S? %cp S? [45] choice ::= '(' S? %ctokplus (S?
     * '|' S? %ctoks)* S? ')' [46] ctokplus ::= cps ('|' cps)+ [47] ctoks ::=
     * cps ('|' cps)* [48] seq ::= '(' S? %stoks (S? ',' S? %stoks)* S? ')' [49]
     * stoks ::= cps (',' cps)* *NOTE: the opening '(' and S have already been
     * read.
     */
    void parseElements() throws java.lang.Exception
    {
        char c;
        char sep;

        // Parse the first content particle
        skipWhitespace();
        parseCp();

        // Check for end or for a separator.
        skipWhitespace();
        c = readCh();
        switch (c) {
            case ')':
                dataBufferAppend(')');
                c = readCh();
                switch (c) {
                    case '*':
                    case '+':
                    case '?':
                        dataBufferAppend(c);
                        break;
                    default:
                        unread(c);
                }
                return;
            case ',': // Register the separator.
            case '|':
                sep = c;
                dataBufferAppend(c);
                break;
            default:
                error("bad separator in content model", c, null);
                return;
        }

        // Parse the rest of the content model.
        while (true) {
            skipWhitespace();
            parseCp();
            skipWhitespace();
            c = readCh();
            if (c == ')') {
                dataBufferAppend(')');
                break;
            } else if (c != sep) {
                error("bad separator in content model", c, null);
                return;
            } else {
                dataBufferAppend(c);
            }
        }

        // Check for the occurrence indicator.
        c = readCh();
        switch (c) {
            case '?':
            case '*':
            case '+':
                dataBufferAppend(c);
                return;
            default:
                unread(c);
                return;
        }
    }

    /**
     * Parse a content particle. [43] cp ::= (Name | choice | seq) ('?' | '*' |
     * '+') *NOTE: I actually use a slightly different production here: cp ::=
     * (elements | (Name ('?' | '*' | '+')?))
     */
    void parseCp() throws java.lang.Exception
    {
        char c;

        if (tryRead('(')) {
            dataBufferAppend('(');
            parseElements();
        } else {
            dataBufferAppend(readNmtoken(true));
            c = readCh();
            switch (c) {
                case '?':
                case '*':
                case '+':
                    dataBufferAppend(c);
                    break;
                default:
                    unread(c);
                    break;
            }
        }
    }

    /**
     * Parse mixed content. [50] Mixed ::= '(' S? %( %'#PCDATA' (S? '|' S?
     * %Mtoks)* ) S? ')*' | '(' S? %('#PCDATA') S? ')' [51] Mtoks ::= %Name (S?
     * '|' S? %Name)* *NOTE: the S and '#PCDATA' have already been read.
     */
    void parseMixed() throws java.lang.Exception
    {
        // Check for PCDATA alone.
        skipWhitespace();
        if (tryRead(')')) {
            dataBufferAppend(")*");
            tryRead('*');
            return;
        }

        // Parse mixed content.
        skipWhitespace();
        while (!tryRead(")*")) {
            require('|');
            dataBufferAppend('|');
            skipWhitespace();
            dataBufferAppend(readNmtoken(true));
            skipWhitespace();
        }
        dataBufferAppend(")*");
    }

    /**
     * Parse an attribute list declaration. [52] AttlistDecl ::= ' <!ATTLIST' S
     * %Name S? %AttDef+ S? '>' *NOTE: the ' <!ATTLIST' has already been read.
     */
    void parseAttlistDecl() throws java.lang.Exception
    {
        String elementName;

        requireWhitespace();
        elementName = readNmtoken(true);
        requireWhitespace();
        while (!tryRead('>')) {
            parseAttDef(elementName);
            skipWhitespace();
        }
    }

    /**
     * Parse a single attribute definition. [53] AttDef ::= S %Name S %AttType S
     * %Default
     */
    void parseAttDef(String elementName) throws java.lang.Exception
    {
        String name;
        int type;
        String enum_ = null;

        // Read the attribute name.
        name = readNmtoken(true);

        // Read the attribute type.
        requireWhitespace();
        type = readAttType();

        // Get the string of enumerated values
        // if necessary.
        if (type == ATTRIBUTE_ENUMERATED || type == ATTRIBUTE_NOTATION) {
        	enum_ = dataBufferToString();
        }

        // Read the default value.
        requireWhitespace();
        parseDefault(elementName, name, type, enum_);
    }

    /**
     * Parse the attribute type. [54] AttType ::= StringType | TokenizedType |
     * EnumeratedType [55] StringType ::= 'CDATA' [56] TokenizedType ::= 'ID' |
     * 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' [57]
     * EnumeratedType ::= NotationType | Enumeration*
     */
    int readAttType() throws java.lang.Exception
    {
        String typeString;
        Integer type;

        if (tryRead('(')) {
            parseEnumeration();
            return ATTRIBUTE_ENUMERATED;
        } else {
            typeString = readNmtoken(true);
            if (typeString.equals("NOTATION")) {
                parseNotationType();
            }
            type = (Integer) attributeTypeHash.get(typeString);
            if (type == null) {
                error("illegal attribute type", typeString, null);
                return ATTRIBUTE_UNDECLARED;
            } else {
                return type.intValue();
            }
        }
    }

    /**
     * Parse an enumeration. [60] Enumeration ::= '(' S? %Etoks (S? '|' S?
     * %Etoks)* S? ')' [61] Etoks ::= %Nmtoken (S? '|' S? %Nmtoken)* *NOTE: the
     * '(' has already been read.
     */
    void parseEnumeration() throws java.lang.Exception
    {
        char c;

        dataBufferAppend('(');

        // Read the first token.
        skipWhitespace();
        dataBufferAppend(readNmtoken(true));
        // Read the remaining tokens.
        skipWhitespace();
        while (!tryRead(')')) {
            require('|');
            dataBufferAppend('|');
            skipWhitespace();
            dataBufferAppend(readNmtoken(true));
            skipWhitespace();
        }
        dataBufferAppend(')');
    }

    /**
     * Parse a notation type for an attribute. [58] NotationType ::= %'NOTATION'
     * S '(' S? %Ntoks (S? '|' S? %Ntoks)* S? ')' [59] Ntoks ::= %Name (S? '|'
     * S? %Name) *NOTE: the 'NOTATION' has already been read
     */
    void parseNotationType() throws java.lang.Exception
    {
        requireWhitespace();
        require('(');

        parseEnumeration();
    }

    /**
     * Parse the default value for an attribute. [62] Default ::= '#REQUIRED' |
     * '#IMPLIED' | ((%'#FIXED' S)? %AttValue
     */
    void parseDefault(String elementName, String name, int type, String enum_)
            throws java.lang.Exception
    {
        int valueType = ATTRIBUTE_DEFAULT_SPECIFIED;
        String value = null;
        boolean normalizeWSFlag;

        if (tryRead('#')) {
            if (tryRead("FIXED")) {
                valueType = ATTRIBUTE_DEFAULT_FIXED;
                requireWhitespace();
                context = CONTEXT_ATTRIBUTEVALUE;
                value = readLiteral(LIT_CHAR_REF);
                context = CONTEXT_DTD;
            } else if (tryRead("REQUIRED")) {
                valueType = ATTRIBUTE_DEFAULT_REQUIRED;
            } else if (tryRead("IMPLIED")) {
                valueType = ATTRIBUTE_DEFAULT_IMPLIED;
            } else {
                error("illegal keyword for attribute default value", null, null);
            }
        } else {
            context = CONTEXT_ATTRIBUTEVALUE;
            value = readLiteral(LIT_CHAR_REF);
            context = CONTEXT_DTD;
        }
        setAttribute(elementName, name, type, enum_, value, valueType);
    }

    /**
     * Parse a conditional section. [63] conditionalSect ::= includeSect ||
     * ignoreSect [64] includeSect ::= ' <![' %'INCLUDE' '[' (%markupdecl*)*
     * ']]>' [65] ignoreSect ::= ' <![' %'IGNORE' '[' ignoreSectContents* ']]>'
     * [66] ignoreSectContents ::= ((SkipLit | Comment | PI) -(Char* ']]>')) | ('
     * <![' ignoreSectContents* ']]>') | (Char - (']' | [ <'"])) | (' <!' (Char -
     * ('-' | '['))) *NOTE: the ' <![' has already been read.
     */
    void parseConditionalSect() throws java.lang.Exception
    {
        skipWhitespace();
        if (tryRead("INCLUDE")) {
            skipWhitespace();
            require('[');
            skipWhitespace();
            while (!tryRead("]]>")) {
                parseMarkupdecl();
                skipWhitespace();
            }
        } else if (tryRead("IGNORE")) {
            skipWhitespace();
            require('[');
            int nesting = 1;
            char c;
            for (int nest = 1; nest > 0;) {
                c = readCh();
                switch (c) {
                    case '<':
                        if (tryRead("![")) {
                            nest++;
                        }
                    case ']':
                        if (tryRead("]>")) {
                            nest--;
                        }
                }
            }
        } else {
            error("conditional section must begin with INCLUDE or IGNORE",
                    null, null);
        }
    }

    /**
     * Read a character reference. [67] CharRef ::= '&#' [0-9]+ ';' | '&#x'
     * [0-9a-fA-F]+ ';' *NOTE: the '&#' has already been read.
     */
    void parseCharRef() throws java.lang.Exception
    {
    	 return ;
    	 
    	/*
        int value = 0;
        char c;

        if (tryRead('x')) {
            loop1: while (true) {
                c = readCh();
                switch (c) {
                    case '0':
                    case '1':
                    case '2':
                    case '3':
                    case '4':
                    case '5':
                    case '6':
                    case '7':
                    case '8':
                    case '9':
                    case 'a':
                    case 'A':
                    case 'b':
                    case 'B':
                    case 'c':
                    case 'C':
                    case 'd':
                    case 'D':
                    case 'e':
                    case 'E':
                    case 'f':
                    case 'F':
                        value *= 16;
                        value += Integer.parseInt(new Character(c).toString(),
                                16);
                        break;
                    case ';':
                        break loop1;
                    default:
                        error("illegal character in character reference", c,
                                null);
                        break loop1;
                }
            }
        } else {
            loop2: while (true) {
                c = readCh();
                switch (c) {
                    case '0':
                    case '1':
                    case '2':
                    case '3':
                    case '4':
                    case '5':
                    case '6':
                    case '7':
                    case '8':
                    case '9':
                        value *= 10;
                        value += Integer.parseInt(new Character(c).toString(),
                                10);
                        break;
                    case ';':
                        break loop2;
                    default:
                        error("illegal character in character reference", c,
                                null);
                        break loop2;
                }
            }
        }

        // Check for surrogates: 00000000 0000xxxx yyyyyyyy zzzzzzzz
        //  (1101|10xx|xxyy|yyyy + 1101|11yy|zzzz|zzzz:
        if (value <= 0x0000ffff) {
            // no surrogates needed
            dataBufferAppend((char) value);
        } else if (value <= 0x000fffff) {
            // > 16 bits, surrogate needed
            dataBufferAppend((char) (0xd8 | ((value & 0x000ffc00) >> 10)));
            dataBufferAppend((char) (0xdc | (value & 0x0003ff)));
        } else {
            // too big for surrogate
            error("character reference " + value + " is too large for UTF-16",
                    new Integer(value).toString(), null);
        }
        
        */
    }

    /**
     * Parse a reference. [69] EntityRef ::= '&' Name ';' *NOTE: the '&' has
     * already been read.
     * 
     * @param externalAllowed
     *            External entities are allowed here.
     */
    void parseEntityRef(boolean externalAllowed) throws java.lang.Exception
    {
        String name;

        name = readNmtoken(true);
        require(';');
        switch (getEntityType(name)) {
            case ENTITY_UNDECLARED:
                error("reference to undeclared entity", name, null);
                break;
            case ENTITY_INTERNAL:
                pushString(name, getEntityValue(name));
                break;
            case ENTITY_TEXT:
                if (externalAllowed) {
                    pushURL(name, getEntityPublicId(name),
                            getEntitySystemId(name), null, null, null);
                } else {
                    error("reference to external entity in attribute value.",
                            name, null);
                }
                break;
            case ENTITY_NDATA:
                if (externalAllowed) {
                    error("data entity reference in content", name, null);
                } else {
                    error("reference to external entity in attribute value.",
                            name, null);
                }
                break;
        }
    }

    /**
     * Parse a parameter entity reference. [70] PEReference ::= '%' Name ';'
     * *NOTE: the '%' has already been read.
     */
    void parsePEReference(boolean isEntityValue) throws java.lang.Exception
    {
        String name;

        name = "%" + readNmtoken(true);
        require(';');
        switch (getEntityType(name)) {
            case ENTITY_UNDECLARED:
                error("reference to undeclared parameter entity", name, null);
                break;
            case ENTITY_INTERNAL:
                if (isEntityValue) {
                    pushString(name, getEntityValue(name));
                } else {
                    pushString(name, " " + getEntityValue(name) + ' ');
                }
                break;
            case ENTITY_TEXT:
                if (isEntityValue) {
                    pushString(null, " ");
                }
                pushURL(name, getEntityPublicId(name), getEntitySystemId(name),
                        null, null, null);
                if (isEntityValue) {
                    pushString(null, " ");
                }
                break;
        }
    }

    /**
     * Parse an entity declaration. [71] EntityDecl ::= ' <!ENTITY' S %Name S
     * %EntityDef S? '>' | ' <!ENTITY' S '%' S %Name S %EntityDef S? '>' [72]
     * EntityDef ::= EntityValue | ExternalDef [73] ExternalDef ::= ExternalID
     * %NDataDecl? [74] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S
     * PubidLiteral S SystemLiteral [75] NDataDecl ::= S %'NDATA' S %Name *NOTE:
     * the ' <!ENTITY' has already been read.
     */
    void parseEntityDecl() throws java.lang.Exception
    {
        char c;
        boolean peFlag = false;
        String name, value, notationName, ids[];

        // Check for a parameter entity.
        requireWhitespace();
        if (tryRead('%')) {
            peFlag = true;
            requireWhitespace();
        }

        // Read the entity name, and prepend
        // '%' if necessary.
        name = readNmtoken(true);
        if (peFlag) {
            name = "%" + name;
        }

        // Read the entity value.
        requireWhitespace();
        c = readCh();
        unread(c);
        if (c == '"' || c == '\'') {
            // Internal entity.
            context = CONTEXT_ENTITYVALUE;
            value = readLiteral(LIT_CHAR_REF | LIT_PE_REF);
            context = CONTEXT_DTD;
            setInternalEntity(name, value);
        } else {
            // Read the external IDs
            ids = readExternalIds(false);
            if (ids[1] == null) {
                error("system identifer missing", name, null);
            }

            // Check for NDATA declaration.
            skipWhitespace();
            if (tryRead("NDATA")) {
                requireWhitespace();
                notationName = readNmtoken(true);
                setExternalDataEntity(name, ids[0], ids[1], notationName);
            } else {
                setExternalTextEntity(name, ids[0], ids[1]);
            }
        }

        // Finish the declaration.
        skipWhitespace();
        require('>');
    }

    /**
     * Parse a notation declaration. [81] NotationDecl ::= ' <!NOTATION' S %Name
     * S %ExternalID S? '>' *NOTE: the ' <!NOTATION' has already been read.
     */
    void parseNotationDecl() throws java.lang.Exception
    {
        String nname, ids[];

        requireWhitespace();
        nname = readNmtoken(true);

        requireWhitespace();

        // Read the external identifiers.
        ids = readExternalIds(true);
        if (ids[0] == null && ids[1] == null) {
            error("external identifer missing", nname, null);
        }

        // Register the notation.
        setNotation(nname, ids[0], ids[1]);

        skipWhitespace();
        require('>');
    }

    /**
     * Parse PCDATA.
     * 
     * <pre>
     * 
     *  
     *   
     *    [16] PCData ::= [ˆ<&]*
     *    
     *   
     *  
     * </pre>
     * 
     * <p>
     * The trick here is that the data stays in the dataBuffer without
     * necessarily being converted to a string right away.
     */
    void parsePCData() throws java.lang.Exception
    {
        char c;

        // Start with a little cheat -- in most
        // cases, the entire sequence of
        // character data will already be in
        // the readBuffer; if not, fall through to
        // the normal approach.
        if (USE_CHEATS) {
            int lineAugment = 0;
            int columnAugment = 0;

            loop: for (int i = readBufferPos; i < readBufferLength; i++) {
                switch (readBuffer[i]) {
                    case '\n':
                        lineAugment++;
                        columnAugment = 0;
                        break;
                    case '&':
                    case '<':
                        int start = readBufferPos;
                        columnAugment++;
                        readBufferPos = i;
                        if (lineAugment > 0) {
                            line += lineAugment;
                            column = columnAugment;
                        } else {
                            column += columnAugment;
                        }
                        dataBufferAppend(readBuffer, start, i - start);
                        return;
                    default:
                        columnAugment++;
                }
            }
        }

        // OK, the cheat didn't work; start over
        // and do it by the book.
        while (true) {
            c = readCh();
            switch (c) {
                case '<':
                case '&':
                    unread(c);
                    return;
                default:
                    dataBufferAppend(c);
                    break;
            }
        }
    }

    //////////////////////////////////////////////////////////////////////
    // High-level reading and scanning methods.
    //////////////////////////////////////////////////////////////////////

    /**
     * Require whitespace characters. [1] S ::= (#x20 | #x9 | #xd | #xa)+
     */
    void requireWhitespace() throws java.lang.Exception
    {
        char c = readCh();
        if (isWhitespace(c)) {
            skipWhitespace();
        } else {
            error("whitespace expected", c, null);
        }
    }

    /**
     * Parse whitespace characters, and leave them in the data buffer.
     */
    void parseWhitespace() throws java.lang.Exception
    {
        char c = readCh();
        while (isWhitespace(c)) {
            dataBufferAppend(c);
            c = readCh();
        }
        unread(c);
    }

    /**
     * Skip whitespace characters. [1] S ::= (#x20 | #x9 | #xd | #xa)+
     */
    void skipWhitespace() throws java.lang.Exception
    {
        // Start with a little cheat. Most of
        // the time, the white space will fall
        // within the current read buffer; if
        // not, then fall through.
        if (USE_CHEATS) {
            int lineAugment = 0;
            int columnAugment = 0;

            loop: for (int i = readBufferPos; i < readBufferLength; i++) {
                switch (readBuffer[i]) {
                    case ' ':
                    case '\t':
                    case '\r':
                        columnAugment++;
                        break;
                    case '\n':
                        lineAugment++;
                        columnAugment = 0;
                        break;
                    case '%':
                        if (context == CONTEXT_DTD
                                || context == CONTEXT_ENTITYVALUE) {
                            break loop;
                        } // else fall through...
                    default:
                        readBufferPos = i;
                        if (lineAugment > 0) {
                            line += lineAugment;
                            column = columnAugment;
                        } else {
                            column += columnAugment;
                        }
                        return;
                }
            }
        }

        // OK, do it by the book.
        char c = readCh();
        while (isWhitespace(c)) {
            c = readCh();
        }
        unread(c);
    }

    /**
     * Read a name or name token. [5] Name ::= (Letter | '_' | ':') (NameChar)*
     * [7] Nmtoken ::= (NameChar)+ *NOTE: [6] is implemented implicitly where
     * required.
     */
    String readNmtoken(boolean isName) throws java.lang.Exception
    {
        char c;

        if (USE_CHEATS) {
            loop: for (int i = readBufferPos; i < readBufferLength; i++) {
                switch (readBuffer[i]) {
                    case '%':
                        if (context == CONTEXT_DTD
                                || context == CONTEXT_ENTITYVALUE) {
                            break loop;
                        } // else fall through...
                    case '<':
                    case '>':
                    case '&':
                    case ',':
                    case '|':
                    case '*':
                    case '+':
                    case '?':
                    case ')':
                    case '=':
                    case '\'':
                    case '"':
                    case '[':
                    case ' ':
                    case '\t':
                    case '\r':
                    case '\n':
                    case ';':
                    case '/':
                    case '#':
                        int start = readBufferPos;
                        if (i == start) {
                            error("name expected", readBuffer[i], null);
                        }
                        readBufferPos = i;
                        return intern(readBuffer, start, i - start);
                }
            }
        }

        nameBufferPos = 0;

        // Read the first character.
        loop: while (true) {
            c = readCh();
            switch (c) {
                case '%':
                case '<':
                case '>':
                case '&':
                case ',':
                case '|':
                case '*':
                case '+':
                case '?':
                case ')':
                case '=':
                case '\'':
                case '"':
                case '[':
                case ' ':
                case '\t':
                case '\n':
                case '\r':
                case ';':
                case '/':
                    unread(c);
                    if (nameBufferPos == 0) {
                        error("name expected", null, null);
                    }
                    String s = intern(nameBuffer, 0, nameBufferPos);
                    nameBufferPos = 0;
                    return s;
                default:
                    nameBuffer = (char[]) extendArray(nameBuffer,
                            nameBuffer.length, nameBufferPos);
                    nameBuffer[nameBufferPos++] = c;
            }
        }
    }

    /**
     * Read a literal. [10] AttValue ::= '"' ([^ <&"] | Reference)* '"' | "'"
     * ([^ <&'] | Reference)* "'" [11] SystemLiteral ::= '"' URLchar* '"' | "'"
     * (URLchar - "'")* "'" [13] PubidLiteral ::= '"' PubidChar* '"' | "'"
     * (PubidChar - "'")* "'" [9] EntityValue ::= '"' ([^%&"] | PEReference |
     * Reference)* '"' | "'" ([^%&'] | PEReference | Reference)* "'"
     */
    String readLiteral(int flags) throws java.lang.Exception
    {
        char delim, c;
        int startLine = line;

        // Find the delimiter.
        delim = readCh();
        if (delim != '"' && delim != '\'' && delim != (char) 0) {
            error("expected '\"' or \"'\"", delim, null);
            return null;
        }

        // Read the literal.
        try {
            c = readCh();

            loop: while (c != delim) {
                switch (c) {
                    // Literals never have line ends
                    case '\n':
                    case '\r':
                        c = ' ';
                        break;
                    // References may be allowed
                    case '&':
                        if ((flags & LIT_CHAR_REF) > 0) {
                            c = readCh();
                            if (c == '#') {
                                parseCharRef();
                                c = readCh();
                                continue loop; // check the next character
                            } else if ((flags & LIT_ENTITY_REF) > 0) {
                                unread(c);
                                parseEntityRef(false);
                                c = readCh();
                                continue loop;
                            } else {
                                dataBufferAppend('&');
                            }
                        }
                        break;

                    default:
                        break;
                }
                dataBufferAppend(c);
                c = readCh();
            }
        } catch (EOFException e) {
            error("end of input while looking for delimiter (started on line "
                    + startLine + ')', null, new Character(delim).toString());
        }

        // Normalise whitespace if necessary.
        if ((flags & LIT_NORMALIZE) > 0) {
            dataBufferNormalize();
        }

        // Return the value.
        return dataBufferToString();
    }

    /**
     * Try reading external identifiers.
     * <p>
     * The system identifier is not required for notations.
     * 
     * @param inNotation
     *            Are we in a notation?
     * @return A two-member String array containing the identifiers.
     */
    String[] readExternalIds(boolean inNotation) throws java.lang.Exception
    {
        char c;
        String ids[] = new String[2];

        if (tryRead("PUBLIC")) {
            requireWhitespace();
            ids[0] = readLiteral(LIT_NORMALIZE); // public id
            if (inNotation) {
                skipWhitespace();
                if (tryRead('"') || tryRead('\'')) {
                    ids[1] = readLiteral(0);
                }
            } else {
                requireWhitespace();
                ids[1] = readLiteral(0); // system id
            }
        } else if (tryRead("SYSTEM")) {
            requireWhitespace();
            ids[1] = readLiteral(0); // system id
        }

        return ids;
    }

    /**
     * Test if a character is whitespace.
     * 
     * <pre>
     * 
     *  
     *   
     *    [1] S ::= (#x20 | #x9 | #xd | #xa)+
     *    
     *   
     *  
     * </pre>
     * 
     * @param c
     *            The character to test.
     * @return true if the character is whitespace.
     */
    final boolean isWhitespace(char c)
    {
        switch ((int) c) {
            case 0x20:
            case 0x09:
            case 0x0d:
            case 0x0a:
                return true;
            default:
                return false;
        }
    }

    //////////////////////////////////////////////////////////////////////
    // Utility routines.
    //////////////////////////////////////////////////////////////////////

    /**
     * Add a character to the data buffer.
     */
    void dataBufferAppend(char c)
    {
        // Expand buffer if necessary.
        dataBuffer = (char[]) extendArray(dataBuffer, dataBuffer.length,
                dataBufferPos);
        dataBuffer[dataBufferPos++] = c;
    }

    /**
     * Add a string to the data buffer.
     */
    void dataBufferAppend(String s)
    {
        dataBufferAppend(s.toCharArray(), 0, s.length());
    }

    /**
     * Append (part of) a character array to the data buffer.
     */
    void dataBufferAppend(char ch[], int start, int length)
    {
        dataBuffer = (char[]) extendArray(dataBuffer, dataBuffer.length,
                dataBufferPos + length);
        System.arraycopy(ch, start, dataBuffer,
                dataBufferPos, length);
        dataBufferPos += length;
    }

    /**
     * Normalise whitespace in the data buffer.
     */
    void dataBufferNormalize()
    {
        int i = 0;
        int j = 0;
        int end = dataBufferPos;

        // Skip whitespace at the start.
        while (j < end && isWhitespace(dataBuffer[j])) {
            j++;
        }

        // Skip whitespace at the end.
        while (end > j && isWhitespace(dataBuffer[end - 1])) {
            end--;
        }

        // Start copying to the left.
        while (j < end) {

            char c = dataBuffer[j++];

            // Normalise all other whitespace to
            // a single space.
            if (isWhitespace(c)) {
                while (j < end && isWhitespace(dataBuffer[j++])) {
                }
                dataBuffer[i++] = ' ';
                dataBuffer[i++] = dataBuffer[j - 1];
            } else {
                dataBuffer[i++] = c;
            }
        }

        // The new length is <= the old one.
        dataBufferPos = i;
    }

    /**
     * Convert the data buffer to a string.
     * 
     * @param internFlag
     *            true if the contents should be interned.
     * @see #intern(char[],int,int)
     */
    String dataBufferToString()
    {
        String s = new String(dataBuffer, 0, dataBufferPos);
        dataBufferPos = 0;
        return s;
    }

    /**
     * Flush the contents of the data buffer to the handler, if appropriate, and
     * reset the buffer for new input.
     */
    void dataBufferFlush() throws java.lang.Exception
    {
        if (dataBufferPos > 0) {
            switch (currentElementContent) {
                case CONTENT_UNDECLARED:
                case CONTENT_EMPTY:
                    // do nothing
                    break;
                case CONTENT_MIXED:
                case CONTENT_ANY:
                    if (handler != null) {
                        handler.charData(dataBuffer, 0, dataBufferPos);
                    }
                    break;
                case CONTENT_ELEMENTS:
                    if (handler != null) {
                        handler.ignorableWhitespace(dataBuffer, 0,
                                dataBufferPos);
                    }
                    break;
            }
            dataBufferPos = 0;
        }
    }

    /**
     * Require a string to appear, or throw an exception.
     */
    void require(String delim) throws java.lang.Exception
    {
        char ch[] = delim.toCharArray();
        for (int i = 0; i < ch.length; i++) {
            require(ch[i]);
        }
    }

    /**
     * Require a character to appear, or throw an exception.
     */
    void require(char delim) throws java.lang.Exception
    {
        char c = readCh();

        if (c != delim) {
            error("expected character", c, new Character(delim).toString());
        }
    }

    /**
     * Return an internalised version of a string.
     * <p>
     * Ælfred uses this method to create an internalised version of all
     * names and attribute values, so that it can test equality with
     * <code>==</code> instead of <code>String.equals()</code>.
     * <p>
     * If you want to be able to test for equality in the same way, you can use
     * this method to internalise your own strings first:
     * 
     * <pre>
     * String PARA = handler.intern("PARA");
     * </pre>
     * 
     * <p>
     * Note that this will not return the same results as String.intern().
     * 
     * @param s
     *            The string to internalise.
     * @return An internalised version of the string.
     * @see #intern(char[],int,int)
     * @see java.lang.String#intern
     */
    public String intern(String s)
    {
        char ch[] = s.toCharArray();
        return intern(ch, 0, ch.length);
    }

    /**
     * Create an internalised string from a character array.
     * <p>
     * This is much more efficient than constructing a non-internalised string
     * first, and then internalising it.
     * <p>
     * Note that this will not return the same results as String.intern().
     * 
     * @param ch
     *            an array of characters for building the string.
     * @param start
     *            the starting position in the array.
     * @param length
     *            the number of characters to place in the string.
     * @return an internalised string.
     * @see #intern(String)
     * @see java.lang.String#intern
     */
    public String intern(char ch[], int start, int length)
    {
        int index;
        int hash = 0;

        // Generate a hash code.
        for (int i = start; i < start + length; i++) {
            hash = ((hash << 1) & 0xffffff) + (int) ch[i];
        }

        hash = hash % SYMBOL_TABLE_LENGTH;

        // Get the bucket.
        Object bucket[] = (Object[]) symbolTable[hash];
        if (bucket == null) {
            symbolTable[hash] = bucket = new Object[8];
        }

        // Search for a matching tuple, and
        // return the string if we find one.
        for (index = 0; index < bucket.length; index += 2) {
            char chFound[] = (char[]) bucket[index];

            // Stop when we hit a null index.
            if (chFound == null) {
                break;
            }

            // If they're the same length,
            // check for a match.
            // If the loop finishes, 'index' will
            // contain the current bucket
            // position.
            if (chFound.length == length) {
                for (int i = 0; i < chFound.length; i++) {
                    // Stop if there are no more tuples.
                    if (ch[start + i] != chFound[i]) {
                        break;
                    } else if (i == length - 1) {
                        // That's it, we have a match!
                        return (String) bucket[index + 1];
                    }
                }
            }
        }

        // Not found -- we'll have to add it.

        // Do we have to grow the bucket?
        bucket = (Object[]) extendArray(bucket, bucket.length, index);

        // OK, add it to the end of the
        // bucket.
        String s = new String(ch, start, length);
        bucket[index] = s.toCharArray();
        bucket[index + 1] = s;
        symbolTable[hash] = bucket;
        return s;
    }

    /**
     * Ensure the capacity of an array, allocating a new one if necessary.
     */
    Object extendArray(Object array, int currentSize, int requiredSize)
    {
        if (requiredSize < currentSize) {
            return array;
        } else {
            Object newArray = null;
            int newSize = currentSize * 2;

            if (newSize <= requiredSize) {
                newSize = requiredSize + 1;
            }

            if (array instanceof char[]) {
                newArray = new char[currentSize * 2];
            } else if (array instanceof Object[]) {
                newArray = new Object[currentSize * 2];
            }

            System.arraycopy(array, 0, newArray, 0, currentSize);
            return newArray;
        }
    }

    //////////////////////////////////////////////////////////////////////
    // XML query routines.
    //////////////////////////////////////////////////////////////////////

    //
    // Elements
    //

    /**
     * Get the declared elements for an XML document.
     * <p>
     * The results will be valid only after the DTD (if any) has been parsed.
     * 
     * @return An enumeration of all element types declared for this document
     *         (as Strings).
     * @see #getElementContentType
     * @see #getElementContentModel
     */
    public Enumeration declaredElements()
    {
        return elementInfo.keys();
    }

    /**
     * Look up the content type of an element.
     * 
     * @param name
     *            The element type name.
     * @return An integer constant representing the content type.
     * @see #getElementContentModel
     * @see #CONTENT_UNDECLARED
     * @see #CONTENT_ANY
     * @see #CONTENT_EMPTY
     * @see #CONTENT_MIXED
     * @see #CONTENT_ELEMENTS
     */
    public int getElementContentType(String name)
    {
        Object element[] = (Object[]) elementInfo.get(name);
        if (element == null) {
            return CONTENT_UNDECLARED;
        } else {
            return ((Integer) element[0]).intValue();
        }
    }

    /**
     * Look up the content model of an element.
     * <p>
     * The result will always be null unless the content type is
     * CONTENT_ELEMENTS or CONTENT_MIXED.
     * 
     * @param name
     *            The element type name.
     * @return The normalised content model, as a string.
     * @see #getElementContentType
     */
    public String getElementContentModel(String name)
    {
        Object element[] = (Object[]) elementInfo.get(name);
        if (element == null) {
            return null;
        } else {
            return (String) element[1];
        }
    }

    /**
     * Register an element. Array format: element type attribute hash table
     */
    void setElement(String name, int contentType, String contentModel,
            Hashtable attributes) throws java.lang.Exception
    {
        Object element[];

        // Try looking up the element
        element = (Object[]) elementInfo.get(name);

        // Make a new one if necessary.
        if (element == null) {
            element = new Object[3];
            element[0] = new Integer(CONTENT_UNDECLARED);
            element[1] = null;
            element[2] = null;
        } else if (contentType != CONTENT_UNDECLARED
                && ((Integer) element[0]).intValue() != CONTENT_UNDECLARED) {
            error("multiple declarations for element type", name, null);
            return;
        }

        // Insert the content type, if any.
        if (contentType != CONTENT_UNDECLARED) {
            element[0] = new Integer(contentType);
        }

        // Insert the content model, if any.
        if (contentModel != null) {
            element[1] = contentModel;
        }

        // Insert the attributes, if any.
        if (attributes != null) {
            element[2] = attributes;
        }

        // Save the element info.
        elementInfo.put(name, element);
    }

    /**
     * Look up the attribute hash table for an element. The hash table is the
     * second item in the element array.
     */
    Hashtable getElementAttributes(String name)
    {
        Object element[] = (Object[]) elementInfo.get(name);
        if (element == null) {
            return null;
        }
        return (Hashtable) element[2];
    }

    //
    // Attributes
    //

    /**
     * Get the declared attributes for an element type.
     * 
     * @param elname
     *            The name of the element type.
     * @return An Enumeration of all the attributes declared for a specific
     *         element type. The results will be valid only after the DTD (if
     *         any) has been parsed.
     * @see #getAttributeType
     * @see #getAttributeEnumeration
     * @see #getAttributeDefaultValueType
     * @see #getAttributeDefaultValue
     * @see #getAttributeExpandedValue
     */
    public Enumeration declaredAttributes(String elname)
    {
        Hashtable attlist = getElementAttributes(elname);

        if (attlist == null) {
            return null;
        } else {
            return attlist.keys();
        }
    }

    /**
     * Retrieve the declared type of an attribute.
     * 
     * @param name
     *            The name of the associated element.
     * @param aname
     *            The name of the attribute.
     * @return An integer constant representing the attribute type.
     * @see #ATTRIBUTE_UNDECLARED
     * @see #ATTRIBUTE_CDATA
     * @see #ATTRIBUTE_ID
     * @see #ATTRIBUTE_IDREF
     * @see #ATTRIBUTE_IDREFS
     * @see #ATTRIBUTE_ENTITY
     * @see #ATTRIBUTE_ENTITIES
     * @see #ATTRIBUTE_NMTOKEN
     * @see #ATTRIBUTE_NMTOKENS
     * @see #ATTRIBUTE_ENUMERATED
     * @see #ATTRIBUTE_NOTATION
     */
    public int getAttributeType(String name, String aname)
    {
        Object attribute[] = getAttribute(name, aname);
        if (attribute == null) {
            return ATTRIBUTE_UNDECLARED;
        } else {
            return ((Integer) attribute[0]).intValue();
        }
    }

    /**
     * Retrieve the allowed values for an enumerated attribute type.
     * 
     * @param name
     *            The name of the associated element.
     * @param aname
     *            The name of the attribute.
     * @return A string containing the token list.
     * @see #ATTRIBUTE_ENUMERATED
     * @see #ATTRIBUTE_NOTATION
     */
    public String getAttributeEnumeration(String name, String aname)
    {
        Object attribute[] = getAttribute(name, aname);
        if (attribute == null) {
            return null;
        } else {
            return (String) attribute[3];
        }
    }

    /**
     * Retrieve the default value of a declared attribute.
     * 
     * @param name
     *            The name of the associated element.
     * @param aname
     *            The name of the attribute.
     * @return The default value, or null if the attribute was #IMPLIED or
     *         simply undeclared and unspecified.
     * @see #getAttributeExpandedValue
     */
    public String getAttributeDefaultValue(String name, String aname)
    {
        Object attribute[] = getAttribute(name, aname);
        if (attribute == null) {
            return null;
        } else {
            return (String) attribute[1];
        }
    }

    /**
     * Retrieve the expanded value of a declared attribute.
     * <p>
     * All general entities will be expanded.
     * 
     * @param name
     *            The name of the associated element.
     * @param aname
     *            The name of the attribute.
     * @return The expanded default value, or null if the attribute was #IMPLIED
     *         or simply undeclared
     * @see #getAttributeDefaultValue
     */
    public String getAttributeExpandedValue(String name, String aname)
    {
        Object attribute[] = getAttribute(name, aname);
        if (attribute == null) {
            return null;
        } else if (attribute[4] == null && attribute[1] != null) {
            try {
                pushString(null, (char) 0 + (String) attribute[1] + (char) 0);
                attribute[4] = readLiteral(LIT_NORMALIZE | LIT_CHAR_REF
                        | LIT_ENTITY_REF);
            } catch (Exception e) {
            }
        }
        return (String) attribute[4];
    }

    /**
     * Retrieve the default value type of a declared attribute.
     * 
     * @see #ATTRIBUTE_DEFAULT_SPECIFIED
     * @see #ATTRIBUTE_DEFAULT_IMPLIED
     * @see #ATTRIBUTE_DEFAULT_REQUIRED
     * @see #ATTRIBUTE_DEFAULT_FIXED
     */
    public int getAttributeDefaultValueType(String name, String aname)
    {
        Object attribute[] = getAttribute(name, aname);
        if (attribute == null) {
            return ATTRIBUTE_DEFAULT_UNDECLARED;
        } else {
            return ((Integer) attribute[2]).intValue();
        }
    }

    /**
     * Register an attribute declaration for later retrieval. Format: - String
     * type - String default value - int value type
     */
    void setAttribute(String elName, String name, int type, String enumeration,
            String value, int valueType) throws java.lang.Exception
    {
        Hashtable attlist;
        Object attribute[];

        // Create a new hashtable if necessary.
        attlist = getElementAttributes(elName);
        if (attlist == null) {
            attlist = new Hashtable();
        }

        // Check that the attribute doesn't
        // already exist!
        if (attlist.get(name) != null) {
            return;
        } else {
            attribute = new Object[5];
            attribute[0] = new Integer(type);
            attribute[1] = value;
            attribute[2] = new Integer(valueType);
            attribute[3] = enumeration;
            attribute[4] = null;
            attlist.put(name.intern(), attribute);

            // Use CONTENT_UNDECLARED to avoid overwriting
            // existing element declaration.
            setElement(elName, CONTENT_UNDECLARED, null, attlist);
        }
    }

    /**
     * Retrieve the three-member array representing an attribute declaration.
     */
    Object[] getAttribute(String elName, String name)
    {
        Hashtable attlist;
        Object attribute[];

        attlist = getElementAttributes(elName);
        if (attlist == null) {
            return null;
        }

        attribute = (Object[]) attlist.get(name);
        return attribute;
    }

    //
    // Entities
    //

    /**
     * Get declared entities.
     * 
     * @return An Enumeration of all the entities declared for this XML
     *         document. The results will be valid only after the DTD (if any)
     *         has been parsed.
     * @see #getEntityType
     * @see #getEntityPublicId
     * @see #getEntitySystemId
     * @see #getEntityValue
     * @see #getEntityNotationName
     */
    public Enumeration declaredEntities()
    {
        return entityInfo.keys();
    }

    /**
     * Find the type of an entity.
     * 
     * @returns An integer constant representing the entity type.
     * @see #ENTITY_UNDECLARED
     * @see #ENTITY_INTERNAL
     * @see #ENTITY_NDATA
     * @see #ENTITY_TEXT
     */
    public int getEntityType(String ename)
    {
        Object entity[] = (Object[]) entityInfo.get(ename);
        if (entity == null) {
            return ENTITY_UNDECLARED;
        }
        return ((Integer) entity[0]).intValue();
    }

    /**
     * Return an external entity's public identifier, if any.
     * 
     * @param ename
     *            The name of the external entity.
     * @return The entity's system identifier, or null if the entity was not
     *         declared, if it is not an external entity, or if no public
     *         identifier was provided.
     * @see #getEntityType
     */
    public String getEntityPublicId(String ename)
    {
        Object entity[] = (Object[]) entityInfo.get(ename);
        if (entity == null) {
            return null;
        }
        return (String) entity[1];
    }

    /**
     * Return an external entity's system identifier.
     * 
     * @param ename
     *            The name of the external entity.
     * @return The entity's system identifier, or null if the entity was not
     *         declared, or if it is not an external entity.
     * @see #getEntityType
     */
    public String getEntitySystemId(String ename)
    {
        Object entity[] = (Object[]) entityInfo.get(ename);
        if (entity == null) {
            return null;
        }
        return (String) entity[2];
    }

    /**
     * Return the value of an internal entity.
     * 
     * @param ename
     *            The name of the internal entity.
     * @return The entity's value, or null if the entity was not declared, or if
     *         it is not an internal entity.
     * @see #getEntityType
     */
    public String getEntityValue(String ename)
    {
        Object entity[] = (Object[]) entityInfo.get(ename);
        if (entity == null) {
            return null;
        }
        return (String) entity[3];
    }

    /**
     * Get the notation name associated with an NDATA entity.
     * 
     * @param ename
     *            The NDATA entity name.
     * @return The associated notation name, or null if the entity was not
     *         declared, or if it is not an NDATA entity.
     * @see #getEntityType
     */
    public String getEntityNotationName(String eName)
    {
        Object entity[] = (Object[]) entityInfo.get(eName);
        if (entity == null) {
            return null;
        }
        return (String) entity[4];
    }

    /**
     * Register an entity declaration for later retrieval.
     */
    void setInternalEntity(String eName, String value)
    {
        setEntity(eName, ENTITY_INTERNAL, null, null, value, null);
    }

    /**
     * Register an external data entity.
     */
    void setExternalDataEntity(String eName, String pubid, String sysid,
            String nName)
    {
        setEntity(eName, ENTITY_NDATA, pubid, sysid, null, nName);
    }

    /**
     * Register an external text entity.
     */
    void setExternalTextEntity(String eName, String pubid, String sysid)
    {
        setEntity(eName, ENTITY_TEXT, pubid, sysid, null, null);
    }

    /**
     * Register an entity declaration for later retrieval.
     */
    void setEntity(String eName, int eClass, String pubid, String sysid,
            String value, String nName)
    {
        Object entity[];
 
        if (entityInfo.get(eName) == null) {
            entity = new Object[5];
            entity[0] = new Integer(eClass);
            entity[1] = pubid;
            entity[2] = sysid;
            entity[3] = value;
            entity[4] = nName;

            entityInfo.put(eName, entity);
        }
    }

    //
    // Notations.
    //

    /**
     * Get declared notations.
     * 
     * @return An Enumeration of all the notations declared for this XML
     *         document. The results will be valid only after the DTD (if any)
     *         has been parsed.
     * @see #getNotationPublicId
     * @see #getNotationSystemId
     */
    public Enumeration declaredNotations()
    {
        return notationInfo.keys();
    }

    /**
     * Look up the public identifier for a notation. You will normally use this
     * method to look up a notation that was provided as an attribute value or
     * for an NDATA entity.
     * 
     * @param nname
     *            The name of the notation.
     * @return A string containing the public identifier, or null if none was
     *         provided or if no such notation was declared.
     * @see #getNotationSystemId
     */
    public String getNotationPublicId(String nname)
    {
        Object notation[] = (Object[]) notationInfo.get(nname);
        if (notation == null) {
            return null;
        }
        return (String) notation[0];
    }

    /**
     * Look up the system identifier for a notation. You will normally use this
     * method to look up a notation that was provided as an attribute value or
     * for an NDATA entity.
     * 
     * @param nname
     *            The name of the notation.
     * @return A string containing the system identifier, or null if no such
     *         notation was declared.
     * @see #getNotationPublicId
     */
    public String getNotationSystemId(String nname)
    {
        Object notation[] = (Object[]) notationInfo.get(nname);
        if (notation == null) {
            return null;
        }
        return (String) notation[1];
    }

    /**
     * Register a notation declaration for later retrieval. Format: - public id -
     * system id
     */
    void setNotation(String nname, String pubid, String sysid)
            throws java.lang.Exception
    {
        Object notation[];

        if (notationInfo.get(nname) == null) {
            notation = new Object[2];
            notation[0] = pubid;
            notation[1] = sysid;
            notationInfo.put(nname, notation);
        } else {
            error("multiple declarations of notation", nname, null);
        }
    }

    //
    // Location.
    //

    /**
     * Return the current line number.
     */
    public int getLineNumber()
    {
        return line;
    }

    /**
     * Return the current column number.
     */
    public int getColumnNumber()
    {
        return column;
    }

    //////////////////////////////////////////////////////////////////////
    // High-level I/O.
    //////////////////////////////////////////////////////////////////////

    /**
     * Read a single character from the readBuffer.
     * <p>
     * The readDataChunk() method maintains the buffer.
     * <p>
     * If we hit the end of an entity, try to pop the stack and keep going.
     * <p>
     * (This approach doesn't really enforce XML's rules about entity
     * boundaries, but this is not currently a validating parser).
     * <p>
     * This routine also attempts to keep track of the current position in
     * external entities, but it's not entirely accurate.
     * 
     * @return The next available input character.
     * @see #unread(char)
     * @see #unread(String)
     * @see #readDataChunk
     * @see #readBuffer
     * @see #line
     * @return The next character from the current input source.
     */
    char readCh() throws java.lang.Exception
    {
        char c;

        // As long as there's nothing in the
        // read buffer, try reading more data
        // (for an external entity) or popping
        // the entity stack (for either).
        while (readBufferPos >= readBufferLength) {
            switch (sourceType) {
                case INPUT_READER:
                case INPUT_EXTERNAL:
                case INPUT_STREAM:
                    readDataChunk();
                    while (readBufferLength < 1) {
                        popInput();
                        if (readBufferLength < 1) {
                            readDataChunk();
                        }
                    }
                    break;

                default:
                    popInput();
                    break;
            }
        }

        c = readBuffer[readBufferPos++];

        // This is a particularly nasty bit
        // of code, that checks for a parameter
        // entity reference but peeks ahead to
        // catch the '%' in parameter entity
        // declarations.
        if (c == '%'
                && (context == CONTEXT_DTD || context == CONTEXT_ENTITYVALUE)) {
            char c2 = readCh();
            unread(c2);
            if (!isWhitespace(c2)) {
                parsePEReference(context == CONTEXT_ENTITYVALUE);
                return readCh();
            }
        }

        if (c == '\n') {
            line++;
            column = 0;
        } else {
            column++;
        }

        return c;
    }

    /**
     * Push a single character back onto the current input stream.
     * <p>
     * This method usually pushes the character back onto the readBuffer, while
     * the unread(String) method treats the string as a new internal entity.
     * <p>
     * I don't think that this would ever be called with readBufferPos = 0,
     * because the methods always reads a character before unreading it, but
     * just in case, I've added a boundary condition.
     * 
     * @param c
     *            The character to push back.
     * @see #readCh
     * @see #unread(String)
     * @see #unread(char[])
     * @see #readBuffer
     */
    void unread(char c) throws java.lang.Exception
    {
        // Normal condition.
        if (c == '\n') {
            line--;
            column = -1;
        }
        if (readBufferPos > 0) {
            readBuffer[--readBufferPos] = c;
        } else {
            pushString(null, new Character(c).toString());
        }
    }

    /**
     * Push a char array back onto the current input stream.
     * <p>
     * NOTE: you must <em>never</em> push back characters that you haven't
     * actually read: use pushString() instead.
     * 
     * @see #readCh
     * @see #unread(char)
     * @see #unread(String)
     * @see #readBuffer
     * @see #pushString
     */
    void unread(char ch[], int length) throws java.lang.Exception
    {
        for (int i = 0; i < length; i++) {
            if (ch[i] == '\n') {
                line--;
                column = -1;
            }
        }
        if (length < readBufferPos) {
            readBufferPos -= length;
        } else {
            pushCharArray(null, ch, 0, length);
            sourceType = INPUT_BUFFER;
        }
    }

    /**
     * Push a new external input source.
     * <p>
     * The source will be either an external text entity, or the DTD external
     * subset.
     * <p>
     * TO DO: Right now, this method always attempts to autodetect the encoding;
     * in the future, it should allow the caller to request an encoding
     * explicitly, and it should also look at the headers with an HTTP
     * connection.
     * 
     * @param url
     *            The java.net.URL object for the entity.
     * @see XmlHandler#resolveEntity
     * @see #pushString
     * @see #sourceType
     * @see #pushInput
     * @see #detectEncoding
     * @see #sourceType
     * @see #readBuffer
     */
    void pushURL(String ename, String publicId, String systemId, Reader reader,
            InputStream stream, String encoding) throws java.lang.Exception
    {
        URL url;
        boolean ignoreEncoding = false;

        // Push the existing status.
        pushInput(ename);

        // Create a new read buffer.
        // (Note the four-character margin)
        readBuffer = new char[READ_BUFFER_MAX + 4];
        readBufferPos = 0;
        readBufferLength = 0;
        readBufferOverflow = -1;
        is = null;
        line = 1;

        currentByteCount = 0;

        // Flush any remaining data.
        dataBufferFlush();

        // Make the URL absolute.
        if (systemId != null && externalEntity != null) {
            systemId = new URL(externalEntity.getURL(), systemId).toString();
        } else if (baseURI != null) {
            try {
                systemId = new URL(new URL(baseURI), systemId).toString();
            } catch (Exception e) {
            }
        }

        // See if the application wants to
        // redirect the system ID and/or
        // supply its own character stream.
        if (systemId != null && handler != null) {
            Object input = handler.resolveEntity(publicId, systemId);
            if (input != null) {
                if (input instanceof String) {
                    systemId = (String) input;
                } else if (input instanceof InputStream) {
                    stream = (InputStream) input;
                } else if (input instanceof Reader) {
                    reader = (Reader) input;
                }
            }
        }

        // Start the entity.
        if (handler != null) {
            if (systemId != null) {
                handler.startExternalEntity(systemId);
            } else {
                handler.startExternalEntity("[external stream]");
            }
        }

        // Figure out what we're reading from.
        if (reader != null) {
            // There's an explicit character stream.
            sourceType = INPUT_READER;
            this.reader = reader;
            tryEncodingDecl(true);
            return;
        } else if (stream != null) {
            sourceType = INPUT_STREAM;
            is = stream;
        } else {
            // We have to open our own stream
            // to the URL.

            // Set the new status
            sourceType = INPUT_EXTERNAL;
            url = new URL(systemId);

            externalEntity = url.openConnection();
            externalEntity.connect();
            is = externalEntity.getInputStream();
        }

        // If we get to here, there must be
        // an InputStream available.
        if (!is.markSupported()) {
            is = new BufferedInputStream(is);
        }

        // Attempt to detect the encoding.
        if (encoding == null && externalEntity != null) {
            encoding = externalEntity.getContentEncoding();
        }

        if (encoding != null) {
            checkEncoding(encoding, false);
            ignoreEncoding = true;
        } else {
            detectEncoding();
            ignoreEncoding = false;
        }

        // Read an XML or text declaration.
        tryEncodingDecl(ignoreEncoding);
    }

    /**
     * Check for an encoding declaration.
     */
    void tryEncodingDecl(boolean ignoreEncoding) throws java.lang.Exception
    {
        // Read the XML/Encoding declaration.
        if (tryRead("<?xml")) {
            if (tryWhitespace()) {
                if (inputStack.size() > 0) {
                    parseTextDecl(ignoreEncoding);
                } else {
                    parseXMPDecl(ignoreEncoding);
                }
            } else {
                unread("xml".toCharArray(), 3);
                parsePI();
            }
        }
    }

    /**
     * Attempt to detect the encoding of an entity.
     * <p>
     * The trick here (as suggested in the XML standard) is that any entity not
     * in UTF-8, or in UCS-2 with a byte-order mark, <b>must </b> begin with an
     * XML declaration or an encoding declaration; we simply have to look for
     * "<?XML" in various encodings.
     * <p>
     * This method has no way to distinguish among 8-bit encodings. Instead, it
     * assumes UTF-8, then (possibly) revises its assumption later in
     * checkEncoding(). Any ASCII-derived 8-bit encoding should work, but most
     * will be rejected later by checkEncoding().
     * <p>
     * I don't currently detect EBCDIC, since I'm concerned that it could also
     * be a valid UTF-8 sequence; I'll have to do more checking later.
     * 
     * @see #tryEncoding(byte[], byte, byte, byte, byte)
     * @see #tryEncoding(byte[], byte, byte)
     * @see #checkEncoding
     * @see #read8bitEncodingDeclaration
     */
    void detectEncoding() throws java.lang.Exception
    {
        byte signature[] = new byte[4];

        // Read the first four bytes for
        // autodetection.
        is.mark(4);
        is.read(signature);
        is.reset();

        // Look for a known signature.
        if (tryEncoding(signature, (byte) 0x00, (byte) 0x00, (byte) 0x00,
                (byte) 0x3c)) {
            // UCS-4 must begin with "<!XML"
            // 0x00 0x00 0x00 0x3c: UCS-4, big-endian (1234)
            encoding = ENCODING_UCS_4_1234;
        } else if (tryEncoding(signature, (byte) 0x3c, (byte) 0x00,
                (byte) 0x00, (byte) 0x00)) {
            // UCS-4 must begin with "<!XML"
            // 0x3c 0x00 0x00 0x00: UCS-4, little-endian (4321)
            encoding = ENCODING_UCS_4_4321;
        } else if (tryEncoding(signature, (byte) 0x00, (byte) 0x00,
                (byte) 0x3c, (byte) 0x00)) {
            // UCS-4 must begin with "<!XML"
            // 0x00 0x00 0x3c 0x00: UCS-4, unusual (2143)
            encoding = ENCODING_UCS_4_2143;
        } else if (tryEncoding(signature, (byte) 0x00, (byte) 0x3c,
                (byte) 0x00, (byte) 0x00)) {
            // UCS-4 must begin with "<!XML"
            // 0x00 0x3c 0x00 0x00: UCS-4, unusual (3421)
            encoding = ENCODING_UCS_4_3412;
        } else if (tryEncoding(signature, (byte) 0xfe, (byte) 0xff)) {
            // UCS-2 with a byte-order marker.
            // 0xfe 0xff: UCS-2, big-endian (12)
            encoding = ENCODING_UCS_2_12;
            is.read();
            is.read();
        } else if (tryEncoding(signature, (byte) 0xff, (byte) 0xfe)) {
            // UCS-2 with a byte-order marker.
            // 0xff 0xfe: UCS-2, little-endian (21)
            encoding = ENCODING_UCS_2_21;
            is.read();
            is.read();
        } else if (tryEncoding(signature, (byte) 0x00, (byte) 0x3c,
                (byte) 0x00, (byte) 0x3f)) {
            // UCS-2 without a BOM must begin with "<?XML"
            // 0x00 0x3c 0x00 0x3f: UCS-2, big-endian, no byte-order mark
            encoding = ENCODING_UCS_2_12;
            error("no byte-order mark for UCS-2 entity", null, null);
        } else if (tryEncoding(signature, (byte) 0x3c, (byte) 0x00,
                (byte) 0x3f, (byte) 0x00)) {
            // UCS-2 without a BOM must begin with "<?XML"
            // 0x3c 0x00 0x3f 0x00: UCS-2, little-endian, no byte-order mark
            encoding = ENCODING_UCS_2_21;
            error("no byte-order mark for UCS-2 entity", null, null);
        } else if (tryEncoding(signature, (byte) 0x3c, (byte) 0x3f,
                (byte) 0x78, (byte) 0x6d)) {
            // Some kind of 8-bit encoding with "<?XML"
            // 0x3c 0x3f 0x78 0x6d: UTF-8 or other 8-bit markup (read ENCODING)
            encoding = ENCODING_UTF_8;
            read8bitEncodingDeclaration();
        } else {
            // Some kind of 8-bit encoding without "<?XML"
            // (otherwise) UTF-8 without encoding/XML declaration
            encoding = ENCODING_UTF_8;
        }
    }

    /**
     * Check for a four-byte signature.
     * <p>
     * Utility routine for detectEncoding().
     * <p>
     * Always looks for some part of " <?XML" in a specific encoding.
     * 
     * @param sig
     *            The first four bytes read.
     * @param b1
     *            The first byte of the signature
     * @param b2
     *            The second byte of the signature
     * @param b3
     *            The third byte of the signature
     * @param b4
     *            The fourth byte of the signature
     * @see #detectEncoding
     */
    boolean tryEncoding(byte sig[], byte b1, byte b2, byte b3, byte b4)
    {
        return (sig[0] == b1 && sig[1] == b2 && sig[2] == b3 && sig[3] == b4);
    }

    /**
     * Check for a two-byte signature.
     * <p>
     * Looks for a UCS-2 byte-order mark.
     * <p>
     * Utility routine for detectEncoding().
     * 
     * @param sig
     *            The first four bytes read.
     * @param b1
     *            The first byte of the signature
     * @param b2
     *            The second byte of the signature
     * @see #detectEncoding
     */
    boolean tryEncoding(byte sig[], byte b1, byte b2)
    {
        return ((sig[0] == b1) && (sig[1] == b2));
    }

    /**
     * This method pushes a string back onto input.
     * <p>
     * It is useful either as the expansion of an internal entity, or for
     * backtracking during the parse.
     * <p>
     * Call pushCharArray() to do the actual work.
     * 
     * @param s
     *            The string to push back onto input.
     * @see #pushCharArray
     */
    void pushString(String ename, String s) throws java.lang.Exception
    {
        char ch[] = s.toCharArray();
        pushCharArray(ename, ch, 0, ch.length);
    }

    /**
     * Push a new internal input source.
     * <p>
     * This method is useful for expanding an internal entity, or for unreading
     * a string of characters. It creates a new readBuffer containing the
     * characters in the array, instead of characters converted from an input
     * byte stream.
     * <p>
     * I've added a couple of optimisations: don't push zero- length strings,
     * and just push back a single character for 1-character strings; this
     * should save some time and memory.
     * 
     * @param ch
     *            The char array to push.
     * @see #pushString
     * @see #pushURL
     * @see #readBuffer
     * @see #sourceType
     * @see #pushInput
     */
    void pushCharArray(String ename, char ch[], int start, int length)
            throws java.lang.Exception
    {
        // Push the existing status
        pushInput(ename);
        sourceType = INPUT_INTERNAL;
        readBuffer = ch;
        readBufferPos = start;
        readBufferLength = length;
        readBufferOverflow = -1;
    }

    /**
     * Save the current input source onto the stack.
     * <p>
     * This method saves all of the global variables associated with the current
     * input source, so that they can be restored when a new input source has
     * finished. It also tests for entity recursion.
     * <p>
     * The method saves the following global variables onto a stack using a
     * fixed-length array:
     * <ol>
     * <li>sourceType
     * <li>externalEntity
     * <li>readBuffer
     * <li>readBufferPos
     * <li>readBufferLength
     * <li>line
     * <li>encoding
     * </ol>
     * 
     * @param ename
     *            The name of the entity (if any) causing the new input.
     * @see #popInput
     * @see #sourceType
     * @see #externalEntity
     * @see #readBuffer
     * @see #readBufferPos
     * @see #readBufferLength
     * @see #line
     * @see #encoding
     */
    void pushInput(String ename) throws java.lang.Exception
    {
        Object input[] = new Object[12];

        // Check for entity recursion.
        if (ename != null) {
            Enumeration entities = entityStack.elements();
            while (entities.hasMoreElements()) {
                String e = (String) entities.nextElement();
                if (e == ename) {
                    error("recursive reference to entity", ename, null);
                }
            }
        }
        entityStack.push(ename);

        // Don't bother if there is no input.
        if (sourceType == INPUT_NONE) {
            return;
        }

        // Set up a snapshot of the current
        // input source.
        input[0] = new Integer(sourceType);
        input[1] = externalEntity;
        input[2] = readBuffer;
        input[3] = new Integer(readBufferPos);
        input[4] = new Integer(readBufferLength);
        input[5] = new Integer(line);
        input[6] = new Integer(encoding);
        input[7] = new Integer(readBufferOverflow);
        input[8] = is;
        input[9] = new Integer(currentByteCount);
        input[10] = new Integer(column);
        input[11] = reader;

        // Push it onto the stack.
        inputStack.push(input);
    }

    /**
     * Restore a previous input source.
     * <p>
     * This method restores all of the global variables associated with the
     * current input source.
     * 
     * @exception java.io.EOFException
     *                If there are no more entries on the input stack.
     * @see #pushInput
     * @see #sourceType
     * @see #externalEntity
     * @see #readBuffer
     * @see #readBufferPos
     * @see #readBufferLength
     * @see #line
     * @see #encoding
     */
    void popInput() throws java.lang.Exception
    {
        Object input[];

        switch (sourceType) {

            case INPUT_EXTERNAL:
                dataBufferFlush();
                if (handler != null && externalEntity != null) {
                    handler.endExternalEntity(externalEntity.getURL()
                            .toString());
                }
                break;
            case INPUT_STREAM:
                dataBufferFlush();
                if (baseURI != null) {
                    if (handler != null) {
                        handler.endExternalEntity(baseURI);
                    }
                }
                break;
            case INPUT_READER:
                dataBufferFlush();
                if (baseURI != null) {
                    if (handler != null) {
                        handler.endExternalEntity(baseURI);
                    }
                }
                break;
        }

        // Throw an EOFException if there
        // is nothing else to pop.
        if (inputStack.isEmpty()) {
            throw new EOFException();
        } else {
            String s;
            input = (Object[]) inputStack.pop();
            s = (String) entityStack.pop();
        }

        sourceType = ((Integer) input[0]).intValue();
        externalEntity = (URLConnection) input[1];
        readBuffer = (char[]) input[2];
        readBufferPos = ((Integer) input[3]).intValue();
        readBufferLength = ((Integer) input[4]).intValue();
        line = ((Integer) input[5]).intValue();
        encoding = ((Integer) input[6]).intValue();
        readBufferOverflow = ((Integer) input[7]).intValue();
        is = (InputStream) input[8];
        currentByteCount = ((Integer) input[9]).intValue();
        column = ((Integer) input[10]).intValue();
        reader = (Reader) input[11];
    }

    /**
     * Return true if we can read the expected character.
     * <p>
     * Note that the character will be removed from the input stream on success,
     * but will be put back on failure. Do not attempt to read the character
     * again if the method succeeds.
     * 
     * @param delim
     *            The character that should appear next. For a insensitive
     *            match, you must supply this in upper-case.
     * @return true if the character was successfully read, or false if it was
     *         not.
     * @see #tryRead(String)
     */
    boolean tryRead(char delim) throws java.lang.Exception
    {
        char c;

        // Read the character
        c = readCh();

        // Test for a match, and push the character
        // back if the match fails.
        if (c == delim) {
            return true;
        } else {
            unread(c);
            return false;
        }
    }

    /**
     * Return true if we can read the expected string.
     * <p>
     * This is simply a convenience method.
     * <p>
     * Note that the string will be removed from the input stream on success,
     * but will be put back on failure. Do not attempt to read the string again
     * if the method succeeds.
     * <p>
     * This method will push back a character rather than an array whenever
     * possible (probably the majority of cases).
     * <p>
     * <b>NOTE: </b> This method currently has a hard-coded limit of 100
     * characters for the delimiter.
     * 
     * @param delim
     *            The string that should appear next.
     * @return true if the string was successfully read, or false if it was not.
     * @see #tryRead(char)
     */
    boolean tryRead(String delim) throws java.lang.Exception
    {
        char ch[] = delim.toCharArray();
        char c;

        // Compare the input, character-
        // by character.

        for (int i = 0; i < ch.length; i++) {
            c = readCh();
            if (c != ch[i]) {
                unread(c);
                if (i != 0) {
                    unread(ch, i);
                }
                return false;
            }
        }
        return true;
    }

    /**
     * Return true if we can read some whitespace.
     * <p>
     * This is simply a convenience method.
     * <p>
     * This method will push back a character rather than an array whenever
     * possible (probably the majority of cases).
     * 
     * @return true if whitespace was found.
     */
    boolean tryWhitespace() throws java.lang.Exception
    {
        char c;
        c = readCh();
        if (isWhitespace(c)) {
            skipWhitespace();
            return true;
        } else {
            unread(c);
            return false;
        }
    }

    /**
     * Read all data until we find the specified string.
     * <p>
     * This is especially useful for scanning marked sections.
     * <p>
     * This is a a little inefficient right now, since it calls tryRead() for
     * every character.
     * 
     * @param delim
     *            The string delimiter
     * @see #tryRead(String, boolean)
     * @see #readCh
     */
    void parseUntil(String delim) throws java.lang.Exception
    {
        char c;
        int startLine = line;

        try {
            while (!tryRead(delim)) {
                c = readCh();
                dataBufferAppend(c);
            }
        } catch (EOFException e) {
            error("end of input while looking for delimiter (started on line "
                    + startLine + ')', null, delim);
        }
    }

    /**
     * Skip all data until we find the specified string.
     * <p>
     * This is especially useful for scanning comments.
     * <p>
     * This is a a little inefficient right now, since it calls tryRead() for
     * every character.
     * 
     * @param delim
     *            The string delimiter
     * @see #tryRead(String, boolean)
     * @see #readCh
     */
    void skipUntil(String delim) throws java.lang.Exception
    {
        while (!tryRead(delim)) {
            readCh();
        }
    }

    /**
     * Read just the encoding declaration (or XML declaration) at the start of
     * an external entity. When this method is called, we know that the
     * declaration is present (or appears to be). We also know that the entity
     * is in some sort of ASCII-derived 8-bit encoding. The idea of this is to
     * let us read what the 8-bit encoding is before we've committed to
     * converting any more of the file; the XML or encoding declaration must be
     * in 7-bit ASCII, so we're safe as long as we don't go past it.
     */
    void read8bitEncodingDeclaration() throws java.lang.Exception
    {
        int ch;
        readBufferPos = readBufferLength = 0;

        while (true) {
            ch = is.read();
            readBuffer[readBufferLength++] = (char) ch;
            switch (ch) {
                case (int) '>':
                    return;
                case -1:
                    error(
                            "end of file before end of XML or encoding declaration.",
                            null, "?>");
                    return;
            }
            if (readBuffer.length == readBufferLength) {
                error("unfinished XML or encoding declaration", null, null);
            }
        }
    }

    //////////////////////////////////////////////////////////////////////
    // Low-level I/O.
    //////////////////////////////////////////////////////////////////////

    /**
     * Read a chunk of data from an external input source.
     * <p>
     * This is simply a front-end that fills the rawReadBuffer with bytes, then
     * calls the appropriate encoding handler.
     * 
     * @see #encoding
     * @see #rawReadBuffer
     * @see #readBuffer
     * @see #filterCR
     * @see #copyUtf8ReadBuffer
     * @see #copyIso8859_1ReadBuffer
     * @see #copyUcs_2ReadBuffer
     * @see #copyUcs_4ReadBuffer
     */
    void readDataChunk() throws java.lang.Exception
    {
        int count, i, j;

        // See if we have any overflow.
        if (readBufferOverflow > -1) {
            readBuffer[0] = (char) readBufferOverflow;
            readBufferOverflow = -1;
            readBufferPos = 1;
            sawCR = true;
        } else {
            readBufferPos = 0;
            sawCR = false;
        }

        // Special situation -- we're taking
        // input from a character stream.
        if (sourceType == INPUT_READER) {
            count = reader.read(readBuffer, readBufferPos, READ_BUFFER_MAX - 1);
            if (count < 0) {
                readBufferLength = -1;
            } else {
                readBufferLength = readBufferPos + count;
                filterCR();
                sawCR = false;
            }
            return;
        }

        // Read as many bytes as possible
        // into the read buffer.
        count = is.read(rawReadBuffer, 0, READ_BUFFER_MAX);

        // Dispatch to an encoding-specific
        // reader method to populate the
        // readBuffer.
        switch (encoding) {
            case ENCODING_UTF_8:
                copyUtf8ReadBuffer(count);
                break;

            case ENCODING_ISO_8859_1:
                copyIso8859_1ReadBuffer(count);
                break;

            case ENCODING_UCS_2_12:
                copyUcs2ReadBuffer(count, 8, 0);
                break;

            case ENCODING_UCS_2_21:
                copyUcs2ReadBuffer(count, 0, 8);
                break;

            case ENCODING_UCS_4_1234:
                copyUcs4ReadBuffer(count, 24, 16, 8, 0);
                break;

            case ENCODING_UCS_4_4321:
                copyUcs4ReadBuffer(count, 0, 8, 16, 24);
                break;

            case ENCODING_UCS_4_2143:
                copyUcs4ReadBuffer(count, 16, 24, 0, 8);
                break;

            case ENCODING_UCS_4_3412:
                copyUcs4ReadBuffer(count, 8, 0, 24, 16);
                break;
        }

        // Filter out all carriage returns
        // if we've seen any.
        if (sawCR) {
            filterCR();
            sawCR = false;
        }

        // Reset the position.
        readBufferPos = 0;
        currentByteCount += count;
    }

    /**
     * Filter carriage returns in the read buffer.
     * <p>
     * CRLF becomes LF; CR becomes LF.
     * 
     * @see #readDataChunk
     * @see #readBuffer
     * @see #readBufferOverflow
     */
    void filterCR()
    {
        int i, j;

        readBufferOverflow = -1;

        loop: for (i = 0, j = 0; j < readBufferLength; i++, j++) {
            switch (readBuffer[j]) {
                case '\r':
                    if (j == readBufferLength - 1) {
                        readBufferOverflow = '\r';
                        readBufferLength--;
                        break loop;
                    } else if (readBuffer[j + 1] == '\n') {
                        j++;
                    }
                    readBuffer[i] = '\n';
                    break;

                case '\n':
                default:
                    readBuffer[i] = readBuffer[j];
                    break;
            }
        }
        readBufferLength = i;
    }

    /**
     * Convert a buffer of UTF-8-encoded bytes into UTF-16 characters.
     * <p>
     * When readDataChunk() calls this method, the raw bytes are in
     * rawReadBuffer, and the final characters will appear in readBuffer.
     * <p>
     * The tricky part of this is dealing with UTF-8 multi-byte sequences, but
     * it doesn't seem to slow things down too much.
     * 
     * @param count
     *            The number of bytes to convert.
     * @see #readDataChunk
     * @see #rawReadBuffer
     * @see #readBuffer
     * @see #getNextUtf8Byte
     */
    void copyUtf8ReadBuffer(int count) throws java.lang.Exception
    {
        int i = 0;
        int j = readBufferPos;
        int b1;
        boolean isSurrogate = false;
        while (i < count) {
            b1 = rawReadBuffer[i++];
            isSurrogate = false;

            // Determine whether we are dealing
            // with a one-, two-, three-, or four-
            // byte sequence.
            if ((b1 & 0x80) == 0) {
                // 1-byte sequence: 000000000xxxxxxx = 0xxxxxxx
                readBuffer[j++] = (char) b1;
            } else if ((b1 & 0xe0) == 0xc0) {
                // 2-byte sequence: 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx
                readBuffer[j++] = (char) (((b1 & 0x1f) << 6) | getNextUtf8Byte(
                        i++, count));
            } else if ((b1 & 0xf0) == 0xe0) {
                // 3-byte sequence: zzzzyyyyyyxxxxxx = 1110zzzz 10yyyyyy
                // 10xxxxxx
                readBuffer[j++] = (char) (((b1 & 0x0f) << 12)
                        | (getNextUtf8Byte(i++, count) << 6) | getNextUtf8Byte(
                        i++, count));
            } else if ((b1 & 0xf8) == 0xf0) {
                // 4-byte sequence: 11101110wwwwzzzzyy + 110111yyyyxxxxxx
                //     = 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
                // (uuuuu = wwww + 1)
                isSurrogate = true;
                int b2 = getNextUtf8Byte(i++, count);
                int b3 = getNextUtf8Byte(i++, count);
                int b4 = getNextUtf8Byte(i++, count);
                readBuffer[j++] = (char) (0xd800
                        | ((((b1 & 0x07) << 2) | ((b2 & 0x30) >> 4) - 1) << 6)
                        | ((b2 & 0x0f) << 2) | ((b3 & 0x30) >> 4));
                readBuffer[j++] = (char) (0xdc | ((b3 & 0x0f) << 6) | b4);
            } else {
                // Otherwise, the 8th bit may not be set in UTF-8
                encodingError("bad start for UTF-8 multi-byte sequence", b1, i);
            }
            if (readBuffer[j - 1] == '\r') {
                sawCR = true;
            }
        }
        // How many characters have we read?
        readBufferLength = j;
    }

    /**
     * Return the next byte value in a UTF-8 sequence. If it is not possible to
     * get a byte from the current entity, throw an exception.
     * 
     * @param pos
     *            The current position in the rawReadBuffer.
     * @param count
     *            The number of bytes in the rawReadBuffer
     * @return The significant six bits of a non-initial byte in a UTF-8
     *         sequence.
     * @exception EOFException
     *                If the sequence is incomplete.
     */
    int getNextUtf8Byte(int pos, int count) throws java.lang.Exception
    {
        int val;

        // Take a character from the buffer
        // or from the actual input stream.
        if (pos < count) {
            val = rawReadBuffer[pos];
        } else {
            val = is.read();
            if (val == -1) {
                encodingError("unfinished multi-byte UTF-8 sequence at EOF",
                        -1, pos);
            }
        }

        // Check for the correct bits at the
        // start.
        if ((val & 0xc0) != 0x80) {
            encodingError("bad continuation of multi-byte UTF-8 sequence", val,
                    pos + 1);
        }

        // Return the significant bits.
        return (val & 0x3f);
    }

    /**
     * Convert a buffer of ISO-8859-1-encoded bytes into UTF-16 characters.
     * <p>
     * When readDataChunk() calls this method, the raw bytes are in
     * rawReadBuffer, and the final characters will appear in readBuffer.
     * <p>
     * This is a direct conversion, with no tricks.
     * 
     * @param count
     *            The number of bytes to convert.
     * @see #readDataChunk
     * @see #rawReadBuffer
     * @see #readBuffer
     */
    void copyIso8859_1ReadBuffer(int count)
    {
        int i, j;
        for (i = 0, j = readBufferPos; i < count; i++, j++) {
            readBuffer[j] = (char) (rawReadBuffer[i] & 0xff);
            if (readBuffer[j] == '\r') {
                sawCR = true;
            }
        }
        readBufferLength = j;
    }

    /**
     * Convert a buffer of UCS-2-encoded bytes into UTF-16 characters.
     * <p>
     * When readDataChunk() calls this method, the raw bytes are in
     * rawReadBuffer, and the final characters will appear in readBuffer.
     * 
     * @param count
     *            The number of bytes to convert.
     * @param shift1
     *            The number of bits to shift byte 1.
     * @param shift2
     *            The number of bits to shift byte 2
     * @see #readDataChunk
     * @see #rawReadBuffer
     * @see #readBuffer
     */
    void copyUcs2ReadBuffer(int count, int shift1, int shift2)
            throws java.lang.Exception
    {
        int j = readBufferPos;

        if (count > 0 && (count % 2) != 0) {
            encodingError("odd number of bytes in UCS-2 encoding", -1, count);
        }
        for (int i = 0; i < count; i += 2) {
            readBuffer[j++] = (char) (((rawReadBuffer[i] & 0xff) << shift1) | ((rawReadBuffer[i + 1] & 0xff) << shift2));
            if (readBuffer[j - 1] == '\r') {
                sawCR = true;
            }
        }
        readBufferLength = j;
    }

    /**
     * Convert a buffer of UCS-4-encoded bytes into UTF-16 characters.
     * <p>
     * When readDataChunk() calls this method, the raw bytes are in
     * rawReadBuffer, and the final characters will appear in readBuffer.
     * <p>
     * Java has 16-bit chars, but this routine will attempt to use surrogates to
     * encoding values between 0x00010000 and 0x000fffff.
     * 
     * @param count
     *            The number of bytes to convert.
     * @param shift1
     *            The number of bits to shift byte 1.
     * @param shift2
     *            The number of bits to shift byte 2
     * @param shift3
     *            The number of bits to shift byte 2
     * @param shift4
     *            The number of bits to shift byte 2
     * @see #readDataChunk
     * @see #rawReadBuffer
     * @see #readBuffer
     */
    void copyUcs4ReadBuffer(int count, int shift1, int shift2, int shift3,
            int shift4) throws java.lang.Exception
    {
        int j = readBufferPos;
        int value;

        if (count > 0 && (count % 4) != 0) {
            encodingError(
                    "number of bytes in UCS-4 encoding not divisible by 4", -1,
                    count);
        }
        for (int i = 0; i < count; i += 4) {
            value = (((rawReadBuffer[i] & 0xff) << shift1)
                    | ((rawReadBuffer[i + 1] & 0xff) << shift2)
                    | ((rawReadBuffer[i + 2] & 0xff) << shift3) | ((rawReadBuffer[i + 3] & 0xff) << shift4));
            if (value < 0x0000ffff) {
                readBuffer[j++] = (char) value;
                if (value == (int) '\r') {
                    sawCR = true;
                }
            } else if (value < 0x000fffff) {
                readBuffer[j++] = (char) (0xd8 | ((value & 0x000ffc00) >> 10));
                readBuffer[j++] = (char) (0xdc | (value & 0x0003ff));
            } else {
                encodingError("value cannot be represented in UTF-16", value, i);
            }
        }
        readBufferLength = j;
    }

    /**
     * Report a character encoding error.
     */
    void encodingError(String message, int value, int offset)
            throws java.lang.Exception
    {
        String uri;

        if (value >= 0) {
            message = message + " (byte value: 0x" + Integer.toHexString(value)
                    + ')';
        }
        if (externalEntity != null) {
            uri = externalEntity.getURL().toString();
        } else {
            uri = baseURI;
        }
        handler.error(message, uri, -1, offset + currentByteCount);
    }

    //////////////////////////////////////////////////////////////////////
    // Local Variables.
    //////////////////////////////////////////////////////////////////////

    /**
     * Re-initialize the variables for each parse.
     */
    void initializeVariables()
    {
        // No errors; first line
        errorCount = 0;
        line = 1;
        column = 0;

        // Set up the buffers for data and names
        dataBufferPos = 0;
        dataBuffer = new char[DATA_BUFFER_INITIAL];
        nameBufferPos = 0;
        nameBuffer = new char[NAME_BUFFER_INITIAL];

        // Set up the variables for the current
        // element context.
        currentElement = null;
        currentElementContent = CONTENT_UNDECLARED;

        // Set up the input variables
        sourceType = INPUT_NONE;
    
        externalEntity = null;
        tagAttributePos = 0;
        tagAttributes = new String[100];
        rawReadBuffer = new byte[READ_BUFFER_MAX];
        readBufferOverflow = -1;

        context = CONTEXT_NONE;

        symbolTable = new Object[SYMBOL_TABLE_LENGTH];
    }

    /**
     * Clean up after the parse to allow some garbage collection. Leave around
     * anything that might be useful for queries.
     */
    void cleanupVariables()
    {
        errorCount = -1;
        line = -1;
        column = -1;
        dataBuffer = null;
        nameBuffer = null;
        currentElement = null;
        currentElementContent = CONTENT_UNDECLARED;
        sourceType = INPUT_NONE;
        inputStack = null;
        externalEntity = null;
        entityStack = null;
    }

    //
    // The current XML handler interface.
    //
    XmlHandler handler;

    //
    // I/O information.
    //
    private Reader reader; // current reader

    private InputStream is; // current input stream

    private int line; // current line number

    private int column; // current column number

    private int sourceType; // type of input source

    private Stack inputStack = new Stack () ; // stack of input sources

    private URLConnection externalEntity; // current external entity

    private int encoding; // current character encoding.

    private int currentByteCount; // how many bytes read from current source.

    //
    // Maintain a count of errors.
    //
    private int errorCount;

    //
    // Buffers for decoded but unparsed character input.
    //
    private final static int READ_BUFFER_MAX = 16384;

    private char readBuffer[];

    private int readBufferPos;

    private int readBufferLength;

    private int readBufferOverflow; // overflow character from last data chunk.

    //
    // Buffer for undecoded raw byte input.
    //
    private byte rawReadBuffer[];

    //
    // Buffer for parsed character data.
    //
    private static int DATA_BUFFER_INITIAL = 4096;

    private char dataBuffer[];

    private int dataBufferPos;

    //
    // Buffer for parsed names.
    //
    private static int NAME_BUFFER_INITIAL = 1024;

    private char nameBuffer[];

    private int nameBufferPos;

    //
    // Hashtables for DTD information on elements, entities, and notations.
    //
    private Hashtable elementInfo = new Hashtable();

    private Hashtable entityInfo = new Hashtable ();

    private Hashtable notationInfo;

    //
    // Element type currently in force.
    //
    private String currentElement;

    private int currentElementContent;

    //
    // Base external identifiers for resolution.
    //
    private String basePublicId;

    private String baseURI;

    private Reader baseReader;

    private InputStream baseInputStream;

    //
    // Stack of entity names, to help detect recursion.
    //
    private Stack entityStack = new Stack () ;

    //
    // Are we in a context where PEs are allowed?
    //
    private int context;

    //
    // Symbol table, for internalising names.
    //
    private Object symbolTable[];

    private final static int SYMBOL_TABLE_LENGTH = 1087;

    //
    // Hash table of attributes found in current start tag.
    //
    private String tagAttributes[];

    private int tagAttributePos;

    //
    // Utility flag: have we noticed a CR while reading the last
    // data chunk? If so, we will have to go back and normalise
    // CR/LF.
    //
    private boolean sawCR;
}