DTDParser.java example

Explorer
rngzip-master
/*
 * @(#)Parser.java	1.14 00/08/27
 * 
 * Copyright (c) 1998-2000 Sun Microsystems, Inc. All Rights Reserved.
 * 
 * This software is the confidential and proprietary information of Sun
 * Microsystems, Inc. ("Confidential Information").  You shall not
 * disclose such Confidential Information and shall use it only in
 * accordance with the terms of the license agreement you entered into
 * with Sun.
 * 
 * SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF THE
 * SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 * PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR ANY DAMAGES
 * SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING
 * THIS SOFTWARE OR ITS DERIVATIVES.
 */


package com.sun.msv.scanner.dtd;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Locale;
import java.util.Set;
import java.util.Vector;

import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

/**
 * This implements parsing of XML 1.0 DTDs.
 *
 * This conforms to the portion of the XML 1.0 specification related 
 * to the external DTD subset.
 *
 * For multi-language applications (such as web servers using XML 
 * processing to create dynamic content), a method supports choosing 
 * a locale for parser diagnostics which is both understood by the 
 * message recipient and supported by the parser.
 *
 * This parser produces a stream of parse events.  It supports some
 * features (exposing comments, CDATA sections, and entity references) 
 * which are not required to be reported by conformant XML processors.  
 *
 * @author David Brownell
 * @author Janet Koenig
 * @author Kohsuke KAWAGUCHI
 * @version $Id: DTDParser.java,v 1.6 2003/01/09 21:00:16 kk122374 Exp $
 */
public class DTDParser {
    public final static String TYPE_CDATA = "CDATA";
    public final static String TYPE_ID = "ID";
    public final static String TYPE_IDREF = "IDREF";
    public final static String TYPE_IDREFS = "IDREFS";
    public final static String TYPE_ENTITY = "ENTITY";
    public final static String TYPE_ENTITIES = "ENTITIES";
    public final static String TYPE_NMTOKEN = "NMTOKEN";
    public final static String TYPE_NMTOKENS = "NMTOKENS";
    public final static String TYPE_NOTATION = "NOTATION";
    public final static String TYPE_ENUMERATION = "ENUMERATION";

	
    // stack of input entities being merged
    private InputEntity		in;

    // temporaries reused during parsing
    private StringBuffer	strTmp;
    private char		nameTmp [];
    private NameCache		nameCache;
    private char		charTmp [] = new char [2];

    // parsing modes
    private boolean		isInAttribute = false;

    // temporary DTD parsing state
    private boolean		doLexicalPE;

    // DTD state, used during parsing
//    private SimpleHashtable	elements = new SimpleHashtable (47);
	protected final Set declaredElements = new java.util.HashSet();
    private SimpleHashtable	params = new SimpleHashtable (7);

    // exposed to package-private subclass
    Hashtable			notations = new Hashtable (7);
    SimpleHashtable		entities = new SimpleHashtable (17);

    private SimpleHashtable     ids = new SimpleHashtable ();

    // listeners for DTD parsing events
    private DTDEventListener    dtdHandler;

    private EntityResolver      resolver;
    private Locale              locale;

    // string constants -- use these copies so "==" works
    // package private
    static final String		strANY = "ANY";
    static final String		strEMPTY = "EMPTY";
	
    /**
     * Used by applications to request locale for diagnostics.
     *
     * @param l The locale to use, or null to use system defaults
     *	        (which may include only message IDs).
     *
     * @exception DTDParseException If no diagnostic messages are 
     *                              available in that locale.
     */
    public void setLocale (Locale l) throws SAXException {

	if (l != null && !messages.isLocaleSupported (l.toString ())) {
	    throw new SAXException (messages.getMessage (locale,
		    "P-078", new Object [] { l }));
        }
	locale = l;
    }

    /** 
     * Returns the diagnostic locale. 
     */
    public Locale getLocale () { 
        return locale; 
    }
    
    /**
     * Chooses a client locale to use for diagnostics, using the first
     * language specified in the list that is supported by this parser.
     * That locale is then set using <a href="#setLocale(java.util.Locale)">
     * setLocale()</a>.  Such a list could be provided by a variety of user
     * preference mechanisms, including the HTTP <em>Accept-Language</em>
     * header field.
     *
     * @see MessageCatalog
     *
     * @param languages Array of language specifiers, ordered with the most
     *	preferable one at the front.  For example, "en-ca" then "fr-ca",
     *  followed by "zh_CN".  Both RFC 1766 and Java styles are supported.
     * @return The chosen locale, or null.
     */
    public Locale chooseLocale (String languages [])
                  throws SAXException {

	Locale	l = messages.chooseLocale (languages);

	if (l != null) {
	    setLocale (l);
        }
	return l;
    }

    /** 
     * Lets applications control entity resolution. 
     */
    public void setEntityResolver (EntityResolver r) { 

        resolver = r; 
    }
 
    /** 
     * Returns the object used to resolve entities 
     */
    public EntityResolver getEntityResolver () { 

        return resolver; 
    }

    /** 
     * Used by applications to set handling of DTD parsing events. 
     */
    public void setDtdHandler (DTDEventListener handler) {
		dtdHandler = handler;
		if( handler!=null )
			handler.setDocumentLocator(
				new Locator(){
					public String getPublicId() { return DTDParser.this.getPublicId(); }
					public String getSystemId() { return DTDParser.this.getSystemId(); }
					public int getLineNumber() { return DTDParser.this.getLineNumber(); }
					public int getColumnNumber() { return DTDParser.this.getColumnNumber(); }
				});
    }

    /** 
     * Returns the handler used to for DTD parsing events. 
     */
    public DTDEventListener getDtdHandler () { 
        return dtdHandler; 
    }

    /** 
     * Parse a DTD. 
     */
    public void parse (InputSource in) 
                throws IOException, SAXException {
		init ();
		parseInternal (in);
    }

    /** 
     * Parse a DTD. 
     */
    public void parse (String uri)
	    throws IOException, SAXException
    {
		InputSource	in;

		init ();
		// System.out.println ("parse (\"" + uri + "\")");
		in = resolver.resolveEntity (null, uri);

		// If custom resolver punts resolution to parser, handle it ...
		if (in == null) {
		    in = Resolver.createInputSource (new java.net.URL (uri), false);

			// ... or if custom resolver doesn't correctly construct the
			// input entity, patch it up enough so relative URIs work, and
			// issue a warning to minimize later confusion.
		} else if (in.getSystemId () == null) {
		    warning ("P-065", null);
		    in.setSystemId (uri);
		}

		parseInternal (in);
    }

    // makes sure the parser is reset to "before a document"
    private void init ()
    {
		in = null;

		// alloc temporary data used in parsing
		strTmp = new StringBuffer ();
		nameTmp = new char [20];
		nameCache = new NameCache ();

		// reset doc info
		isInAttribute = false;

		doLexicalPE = false;

		entities.clear ();
		notations.clear ();
		params.clear ();
	//	elements.clear ();
		declaredElements.clear();

		// initialize predefined references ... re-interpreted later
		builtin ("amp", "&");
		builtin ("lt", "<");
		builtin ("gt", ">");
		builtin ("quot", "\"");
		builtin ("apos", "'");

		if (locale == null)
		    locale = Locale.getDefault ();
		if (resolver == null)
		    resolver = new Resolver ();
	        if (dtdHandler == null)
	            dtdHandler = new DTDHandlerBase();
    }

    private void builtin (String entityName, String entityValue) {
		InternalEntity entity;
		entity = new InternalEntity (entityName, entityValue.toCharArray ());
		entities.put (entityName, entity);
    }


    ////////////////////////////////////////////////////////////////
    //
    // parsing is by recursive descent, code roughly
    // following the BNF rules except tweaked for simple
    // lookahead.  rules are more or less in numeric order,
    // except where code sharing suggests other structures.
    //
    // a classic benefit of recursive descent parsers:  it's
    // relatively easy to get diagnostics that make sense.
    //
    ////////////////////////////////////////////////////////////////


    private void parseInternal (InputSource input)
                 throws IOException, SAXException {

	if (input == null)
	    fatal("P-000");

	try {
	    in = InputEntity.getInputEntity(dtdHandler, locale);
	    in.init(input, null, null, false);

	    dtdHandler.startDTD(in);

	    // [30] extSubset ::= TextDecl? extSubsetDecl
	    // [31] extSubsetDecl ::= ( markupdecl | conditionalSect
	    //		| PEReference | S )*
	    //	... same as [79] extPE, which is where the code is

            ExternalEntity externalSubset = new ExternalEntity(in);
            externalParameterEntity(externalSubset);

	    if (!in.isEOF ()) {
		fatal ("P-001", new Object []
			{ Integer.toHexString (((int)getc ())) } );
            }
            afterRoot();
            dtdHandler.endDTD ();

	} catch (EndOfInputException e) {
	    if (!in.isDocument ()) {
		String name = in.getName ();
		do {	// force a relevant URI and line number  
		    in = in.pop ();
		} while (in.isInternal ());
		fatal ("P-002", new Object [] { name });
	    } else {
		fatal ("P-003", null);
            }
	} catch (RuntimeException e) {
	    // Don't discard location that triggered the exception
	    // ## Should properly wrap exception
	    System.err.print("Internal DTD parser error: "); // ##
	    e.printStackTrace();
	    throw new SAXParseException ( e.getMessage () != null
		    ? e.getMessage () : e.getClass ().getName (),
		getPublicId (), getSystemId (),
		getLineNumber (), getColumnNumber ());

	} finally {
	    // recycle temporary data used during parsing
	    strTmp = null;
	    nameTmp = null;
	    nameCache = null;

	    // ditto input sources etc
	    if (in != null) {
		in.close ();
		in = null;
	    }

	    // get rid of all DTD info ... some of it would be
	    // useful for editors etc, investigate later.

	    params.clear();
	    entities.clear();
	    notations.clear();
		declaredElements.clear();
//	    elements.clear();
		ids.clear();
	}
    }

    void afterRoot () throws SAXException
    {
        // Make sure all IDREFs match declared ID attributes.  We scan
        // after the document element is parsed, since XML allows forward
        // references, and only now can we know if they're all resolved.

        for (Enumeration e = ids.keys ();
                e.hasMoreElements ();
                ) {
            String id = (String)e.nextElement ();
            Boolean value = (Boolean)ids.get(id);
            if (Boolean.FALSE == value)
                error ("V-024", new Object [] { id });
        }
    }


    // role is for diagnostics
    private void whitespace (String roleId) 
                 throws IOException, SAXException {

        // [3] S ::= (#x20 | #x9 | #xd | #xa)+
	if (!maybeWhitespace ()) {
	    fatal ("P-004", new Object []
		    { messages.getMessage (locale, roleId) });
        }
    }

    // S?
    private boolean maybeWhitespace () 
                    throws IOException, SAXException {

	if (!doLexicalPE)
	    return in.maybeWhitespace ();

	// see getc() for the PE logic -- this lets us splice
	// expansions of PEs in "anywhere".  getc() has smarts,
	// so for external PEs we don't bypass it.

	// XXX we can marginally speed PE handling, and certainly
	// be cleaner (hence potentially more correct), by using
	// the observations that expanded PEs only start and stop
	// where whitespace is allowed.  getc wouldn't need any
	// "lexical" PE expansion logic, and no other method needs
	// to handle termination of PEs.  (parsing of literals would
	// still need to pop entities, but not parsing of references
	// in content.)

	char c = getc();
	boolean saw = false;

	while (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
	    saw = true;

	    // this gracefully ends things when we stop playing
	    // with internal parameters.  caller should have a
	    // grammar rule allowing whitespace at end of entity.
	    if (in.isEOF () && !in.isInternal ())
		return saw;
	    c = getc ();
	}
	ungetc ();
	return saw;
    }

    private String maybeGetName ()
                   throws IOException, SAXException {

	NameCacheEntry	entry = maybeGetNameCacheEntry ();
	return (entry == null) ? null : entry.name;
    }

    private NameCacheEntry maybeGetNameCacheEntry ()
                           throws IOException, SAXException {

	// [5] Name ::= (Letter|'_'|':') (Namechar)*
	char		c = getc ();

	if (!XmlChars.isLetter (c) && c != ':' && c != '_') {
	    ungetc ();
	    return null;
	}
	return nameCharString (c);
    }

    // Used when parsing enumerations
    private String getNmtoken ()
                   throws IOException, SAXException {

	// [7] Nmtoken ::= (Namechar)+
	char c = getc ();
	if (!XmlChars.isNameChar (c))
	    fatal ("P-006", new Object [] { new Character (c) });
	return nameCharString (c).name;
    }

    // n.b. this gets used when parsing attribute values (for
    // internal references) so we can't use strTmp; it's also
    // a hotspot for CPU and memory in the parser (called at least
    // once for each element) so this has been optimized a bit.

    private NameCacheEntry nameCharString (char c)
                           throws IOException, SAXException {

	int	i = 1;

	nameTmp [0] = c;
	for (;;) {
	    if ((c = in.getNameChar ()) == 0)
		break;
	    if (i >= nameTmp.length) {
		char tmp [] = new char [nameTmp.length + 10];
		System.arraycopy (nameTmp, 0, tmp, 0, nameTmp.length);
		nameTmp = tmp;
	    }
	    nameTmp [i++] = c;
	}
	return nameCache.lookupEntry (nameTmp, i);
    }

    //
    // much similarity between parsing entity values in DTD
    // and attribute values (in DTD or content) ... both follow
    // literal parsing rules, newline canonicalization, etc
    //
    // leaves value in 'strTmp' ... either a "replacement text" (4.5),
    // or else partially normalized attribute value (the first bit
    // of 3.3.3's spec, without the "if not CDATA" bits).
    //
    private void parseLiteral (boolean isEntityValue)
                 throws IOException, SAXException {

	// [9] EntityValue ::=
	//	'"' ([^"&%] | Reference | PEReference)* '"'
	//    |	"'" ([^'&%] | Reference | PEReference)* "'"
	// [10] AttValue ::=
	//	'"' ([^"&]  | Reference		     )* '"'
	//    |	"'" ([^'&]  | Reference		     )* "'"
	char		quote = getc ();
	char		c;
	InputEntity	source = in;

	if (quote != '\'' && quote != '"') {
	    fatal ("P-007");
        }

	// don't report entity expansions within attributes,
	// they're reported "fully expanded" via SAX
	isInAttribute = !isEntityValue;

	// get value into strTmp
	strTmp = new StringBuffer ();

	// scan, allowing entity push/pop wherever ...
	// expanded entities can't terminate the literal!
	for (;;) {
	    if (in != source && in.isEOF ()) {
		// we don't report end of parsed entities
		// within attributes (no SAX hooks)
		in = in.pop ();
		continue;
	    }
	    if ((c = getc ()) == quote && in == source) {
		break;
            }

	    //
	    // Basically the "reference in attribute value"
	    // row of the chart in section 4.4 of the spec
	    //
	    if (c == '&') {
		String	entityName = maybeGetName ();

		if (entityName != null) {
		    nextChar (';', "F-020", entityName);

		    // 4.4 says:  bypass these here ... we'll catch
		    // forbidden refs to unparsed entities on use
		    if (isEntityValue) {
			strTmp.append ('&');
			strTmp.append (entityName);
			strTmp.append (';');
			continue;
		    }
		    expandEntityInLiteral (entityName, entities, isEntityValue);


		// character references are always included immediately
		} else if ((c = getc ()) == '#') {
		    int tmp = parseCharNumber ();

		    if (tmp > 0xffff) {
			tmp = surrogatesToCharTmp (tmp);
			strTmp.append (charTmp [0]);
			if (tmp == 2)
			    strTmp.append (charTmp [1]);
		    } else
			strTmp.append ((char) tmp);
		} else
		    fatal ("P-009");
		continue;

	    }

	    // expand parameter entities only within entity value literals
	    if (c == '%' && isEntityValue) {
		String	entityName = maybeGetName ();

		if (entityName != null) {
		    nextChar (';', "F-021", entityName);
                    expandEntityInLiteral (entityName, params, isEntityValue);
		    continue;
		} else
		    fatal ("P-011");
	    }

	    // For attribute values ...
	    if (!isEntityValue) {
		// 3.3.3 says whitespace normalizes to space...
		if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
		    strTmp.append (' ');
		    continue;
		}

		// "<" not legal in parsed literals ...
		if (c == '<')
		    fatal ("P-012");
	    }

	    strTmp.append (c);
	}
	isInAttribute = false;
    }

    // does a SINGLE expansion of the entity (often reparsed later)
    private void expandEntityInLiteral( String name, SimpleHashtable table,
	                                boolean isEntityValue) 
                 throws IOException, SAXException {

	Object	entity = table.get (name);

	if (entity instanceof InternalEntity) {
	    InternalEntity value = (InternalEntity) entity;
	    pushReader (value.buf, name, !value.isPE);

	} else if (entity instanceof ExternalEntity) {
	    if (!isEntityValue)	// must be a PE ...
		fatal ("P-013", new Object [] { name });
	    // XXX if this returns false ...
	    pushReader ((ExternalEntity) entity);

	} else if (entity == null) {
	    //
	    // Note:  much confusion about whether spec requires such
	    // errors to be fatal in many cases, but none about whether
	    // it allows "normal" errors to be unrecoverable!
	    //
	    fatal (
		(table == params) ? "V-022" : "P-014",
		new Object [] { name });
	}
    }

    // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
    // for PUBLIC and SYSTEM literals, also "<?xml ...type='literal'?>'
    
    // NOTE:  XML spec should explicitly say that PE ref syntax is
    // ignored in PIs, comments, SystemLiterals, and Pubid Literal
    // values ... can't process the XML spec's own DTD without doing
    // that for comments.

    private String getQuotedString (String type, String extra)
                   throws IOException, SAXException {

	// use in.getc to bypass PE processing
	char		 quote = in.getc ();

	if (quote != '\'' && quote != '"')
	    fatal ("P-015", new Object [] {
		messages.getMessage (locale, type, new Object [] { extra })
		});

	char		c;

	strTmp = new StringBuffer ();
	while ((c = in.getc ()) != quote)
	    strTmp.append ((char)c);
	return strTmp.toString ();
    }


    private String parsePublicId () throws IOException, SAXException {

	// [12] PubidLiteral ::= ('"' PubidChar* '"') | ("'" PubidChar* "'")
	// [13] PubidChar ::= #x20|#xd|#xa|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%]
	String retval = getQuotedString ("F-033", null);
	for (int i = 0; i < retval.length (); i++) {
	    char c = retval.charAt (i);
	    if (" \r\n-'()+,./:=?;!*#@$_%0123456789".indexOf(c) == -1
		    && !(c >= 'A' && c <= 'Z')
		    && !(c >= 'a' && c <= 'z'))
		fatal ("P-016", new Object [] { new Character (c) });
	}
	strTmp = new StringBuffer ();
	strTmp.append (retval);
	return normalize (false);
    }

    // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
    // handled by:  InputEntity.parsedContent()

    private boolean maybeComment (boolean skipStart)
                    throws IOException, SAXException {

	// [15] Comment ::= '<!--'
	//		( (Char - '-') | ('-' (Char - '-'))*
	//		'-->'
	if (!in.peek (skipStart ? "!--" : "<!--", null))
	    return false;

	boolean		savedLexicalPE = doLexicalPE;
	boolean		saveCommentText;

	doLexicalPE = false;
	saveCommentText = false;
	if (saveCommentText)
	    strTmp = new StringBuffer ();

    oneComment:
	for (;;) {
	    try {
		// bypass PE expansion, but permit PEs
		// to complete ... valid docs won't care.
		for (;;) {
		    int c = getc ();
		    if (c == '-') {
			c = getc ();
			if (c != '-') {
			    if (saveCommentText)
				strTmp.append ('-');
			    ungetc ();
			    continue;
			}
			nextChar ('>', "F-022", null);
			break oneComment;
		    }
		    if (saveCommentText)
			strTmp.append ((char)c);
		}
	    } catch (EndOfInputException e) {
		//
		// This is fatal EXCEPT when we're processing a PE...
		// in which case a validating processor reports an error.
		// External PEs are easy to detect; internal ones we
		// infer by being an internal entity outside an element.
		//
		if (in.isInternal ()) {
		    error ("V-021", null);
		}
		fatal ("P-017");
	    }
	}
	doLexicalPE = savedLexicalPE;
	if (saveCommentText)
	    dtdHandler.comment (strTmp.toString ());
	return true;
    }

    private boolean maybePI (boolean skipStart)
                    throws IOException, SAXException {

	// [16] PI ::= '<?' PITarget
	//		(S (Char* - (Char* '?>' Char*)))?
	//		'?>'
	// [17] PITarget ::= Name - (('X'|'x')('M'|'m')('L'|'l')
	boolean		savedLexicalPE = doLexicalPE;

	if (!in.peek (skipStart ? "?" : "<?", null))
	    return false;
	doLexicalPE = false;

	String		target = maybeGetName ();

	if (target == null) {
	    fatal ("P-018");
        }
	if ("xml".equals (target)) {
	    fatal ("P-019");
        }
	if ("xml".equalsIgnoreCase (target)) {
	    fatal ("P-020", new Object [] { target });
        }

	if (maybeWhitespace ()) {
	    strTmp = new StringBuffer ();
	    try {
		for (;;) {
		    // use in.getc to bypass PE processing
		    char c = in.getc ();
		    //Reached the end of PI.
		    if (c == '?' && in.peekc ('>'))
			break;
		    strTmp.append (c);
		}
	    } catch (EndOfInputException e) {
		fatal ("P-021");
	    }
	    dtdHandler.processingInstruction (target, strTmp.toString ());
	} else {
	    if (!in.peek ("?>", null)) {
		fatal ("P-022");
            }
	    dtdHandler.processingInstruction (target, "");
	}

	doLexicalPE = savedLexicalPE;
	return true;
    }

    // [18] CDSect ::= CDStart CData CDEnd
    // [19] CDStart ::= '<![CDATA['
    // [20] CData ::= (Char* - (Char* ']]>' Char*))
    // [21] CDEnd ::= ']]>'
    //
    //	... handled by InputEntity.unparsedContent()

    // collapsing several rules together ... 
    // simpler than attribute literals -- no reference parsing!
    private String maybeReadAttribute (String name, boolean must)
                   throws IOException, SAXException {

	// [24] VersionInfo ::= S 'version' Eq \'|\" versionNum \'|\"
	// [80] EncodingDecl ::= S 'encoding' Eq \'|\" EncName \'|\"
	// [32] SDDecl ::=  S 'standalone' Eq \'|\" ... \'|\"
	if (!maybeWhitespace ()) {
	    if (!must) {
		return null;
            }
	    fatal ("P-024", new Object [] { name });
	    // NOTREACHED
	}

	if (!peek (name)) {
	    if (must) {
		fatal ("P-024", new Object [] { name });
            } else {
	    	// To ensure that the whitespace is there so that when we
		// check for the next attribute we assure that the
		// whitespace still exists.
		ungetc ();
		return null;
	    }
        }

	// [25] Eq ::= S? '=' S?
	maybeWhitespace ();
	nextChar ('=', "F-023", null);
	maybeWhitespace ();

	return getQuotedString ("F-035", name);
    }

    private void readVersion (boolean must, String versionNum)
                 throws IOException, SAXException {

	String	value = maybeReadAttribute ("version", must);

	// [26] versionNum ::= ([a-zA-Z0-9_.:]| '-')+

	if (must && value == null)
	    fatal ("P-025", new Object [] { versionNum });
	if (value != null) {
	    int length = value.length ();
	    for (int i = 0; i < length; i++) {
		char c = value.charAt (i);
		if (!(    (c >= '0' && c <= '9')
			|| c == '_' || c == '.'
			|| (c >= 'a' && c <= 'z')
			|| (c >= 'A' && c <= 'Z')
			|| c == ':' || c == '-')
			)
		    fatal ("P-026", new Object [] { value });
	    }
	}
	if (value != null && !value.equals (versionNum))
	    error ("P-027", new Object [] { versionNum, value });
    }

    // common code used by most markup declarations
    // ... S (Q)Name ...
    private String getMarkupDeclname (String roleId, boolean qname)
                   throws IOException, SAXException {

	String	name;

	whitespace (roleId);
	name = maybeGetName ();
	if (name == null)
	    fatal ("P-005", new Object []
		{ messages.getMessage (locale, roleId) });
	return name;
    }

    private boolean maybeMarkupDecl ()
                    throws IOException, SAXException {

        // [29] markupdecl ::= elementdecl | Attlistdecl
        //	       | EntityDecl | NotationDecl | PI | Comment
	return maybeElementDecl ()
		|| maybeAttlistDecl ()
		|| maybeEntityDecl ()
		|| maybeNotationDecl ()
		|| maybePI (false)
		|| maybeComment (false);
    }

    private static final String		XmlLang = "xml:lang";

    private boolean isXmlLang (String value) {

	// [33] LanguageId ::= Langcode ('-' Subcode)*
	// [34] Langcode ::= ISO639Code | IanaCode | UserCode
	// [35] ISO639Code ::= [a-zA-Z] [a-zA-Z]
	// [36] IanaCode ::= [iI] '-' SubCode
	// [37] UserCode ::= [xX] '-' SubCode
	// [38] SubCode ::= [a-zA-Z]+

	// the ISO and IANA codes (and subcodes) are registered,
	// but that's neither a WF nor a validity constraint.

	int	nextSuffix;
	char	c;
	
	if (value.length () < 2)
	    return false;
	c = value.charAt (1);
	if (c == '-') {		// IANA, or user, code
	    c = value.charAt (0);
	    if (!(c == 'i' || c == 'I' || c == 'x' || c == 'X'))
		return false;
	    nextSuffix = 1;
	} else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
				// 2 letter ISO code, or error
	    c = value.charAt (0);
	    if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
		return false;
	    nextSuffix = 2;
	} else
	    return false;
	
	// here "suffix" ::= '-' [a-zA-Z]+ suffix*
	while (nextSuffix < value.length ()) {
	    c = value.charAt (nextSuffix);
	    if (c != '-')
		break;
	    while (++nextSuffix < value.length ()) {
		c = value.charAt (nextSuffix);
		if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')))
		    break;
	    }
	}
	return value.length () == nextSuffix && c != '-';
    }


    //
    // CHAPTER 3:  Logical Structures
    //

    /**
     * To validate, subclassers should at this time make sure that
     * values are of the declared types:<UL>
     *  <LI> ID and IDREF(S) values are Names
     *  <LI> NMTOKEN(S) are Nmtokens
     *  <LI> ENUMERATION values match one of the tokens
     *  <LI> NOTATION values match a notation name
     *  <LI> ENTITIY(IES) values match an unparsed external entity
     *  </UL>
     *
     * <P> Separately, make sure IDREF values match some ID
     * provided in the document (in the afterRoot method).
     */
/*    void validateAttributeSyntax (Attribute attr, String value)
         throws DTDParseException {
        // ID, IDREF(S) ... values are Names
        if (Attribute.ID == attr.type()) {
            if (!XmlNames.isName (value))
                error ("V-025", new Object [] { value });

            Boolean             b = (Boolean) ids.getNonInterned (value);
            if (b == null || b.equals (Boolean.FALSE))
                ids.put (value.intern (), Boolean.TRUE);
            else
                error ("V-026", new Object [] { value });

        } else if (Attribute.IDREF == attr.type()) {
            if (!XmlNames.isName (value))
                error ("V-027", new Object [] { value });

            Boolean             b = (Boolean) ids.getNonInterned (value);
            if (b == null)
                ids.put (value.intern (), Boolean.FALSE);

        } else if (Attribute.IDREFS == attr.type()) {
            StringTokenizer     tokenizer = new StringTokenizer (value);
            Boolean             b;
            boolean             sawValue = false;

            while (tokenizer.hasMoreTokens ()) {
                value = tokenizer.nextToken ();
                if (!XmlNames.isName (value))
                    error ("V-027", new Object [] { value });
                b = (Boolean) ids.getNonInterned (value);
                if (b == null)
                    ids.put (value.intern (), Boolean.FALSE);
                sawValue = true;
            }
            if (!sawValue)
                error ("V-039", null);


        // NMTOKEN(S) ... values are Nmtoken(s)
        } else if (Attribute.NMTOKEN == attr.type()) {
            if (!XmlNames.isNmtoken (value))
                error ("V-028", new Object [] { value });

        } else if (Attribute.NMTOKENS == attr.type()) {
            StringTokenizer     tokenizer = new StringTokenizer (value);
            boolean             sawValue = false;

            while (tokenizer.hasMoreTokens ()) {
                value = tokenizer.nextToken ();
                if (!XmlNames.isNmtoken (value))
                    error ("V-028", new Object [] { value });
                sawValue = true;
            }
            if (!sawValue)
                error ("V-032", null);

        // ENUMERATION ... values match one of the tokens
        } else if (Attribute.ENUMERATION == attr.type()) {
            for (int i = 0; i < attr.values().length; i++)
                if (value.equals (attr.values()[i]))
                    return;
            error ("V-029", new Object [] { value });

        // NOTATION values match a notation name
        } else if (Attribute.NOTATION == attr.type()) {
            //
            // XXX XML 1.0 spec should probably list references to
            // externally defined notations in standalone docs as
            // validity errors.  Ditto externally defined unparsed
            // entities; neither should show up in attributes, else
            // one needs to read the external declarations in order
            // to make sense of the document (exactly what tagging
            // a doc as "standalone" intends you won't need to do).
            //
            for (int i = 0; i < attr.values().length; i++)
                if (value.equals (attr.values()[i]))
                    return;
            error ("V-030", new Object [] { value });

        // ENTITY(IES) values match an unparsed entity(ies)
        } else if (Attribute.ENTITY == attr.type()) {
            // see note above re standalone
            if (!isUnparsedEntity (value))
                error ("V-031", new Object [] { value });

        } else if (Attribute.ENTITIES == attr.type()) {
            StringTokenizer     tokenizer = new StringTokenizer (value);
            boolean             sawValue = false;

            while (tokenizer.hasMoreTokens ()) {
                value = tokenizer.nextToken ();
                // see note above re standalone
                if (!isUnparsedEntity (value))
                    error ("V-031", new Object [] { value });
                sawValue = true;
            }
            if (!sawValue)
                error ("V-040", null);

        } else if (Attribute.CDATA != attr.type())
            throw new InternalError (attr.type());
    }
*/
    private boolean isUnparsedEntity (String name)
    {
        Object e = entities.getNonInterned (name);
        if (e == null || !(e instanceof ExternalEntity))
            return false;
        return ((ExternalEntity)e).notation != null;
    }

    private boolean maybeElementDecl ()
                    throws IOException, SAXException {

	// [45] elementDecl ::= '<!ELEMENT' S Name S contentspec S? '>'
	// [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
	InputEntity	start = peekDeclaration ("!ELEMENT");

	if (start == null)
	    return false;

	// n.b. for content models where inter-element whitespace is 
	// ignorable, we mark that fact here.
	String		name = getMarkupDeclname ("F-015", true);
//	Element    	element = (Element) elements.get (name);
	boolean		declEffective = false;

/*
	if (element != null) {
	    if (element.contentModel() != null) {
	        error ("V-012", new Object [] { name });
	    } // else <!ATTLIST name ...> came first
	} else {
	    element = new Element(name);
	    elements.put (element.name(), element);
	    declEffective = true;
	}
*/
	if( declaredElements.contains(name) )
       error ("V-012", new Object [] { name });
	else {
		declaredElements.add(name);
		declEffective = true;
	}
	
	short modelType;
	whitespace ("F-000");
	if (peek (strEMPTY)) {
///	    // leave element.contentModel as null for this case.
		dtdHandler.startContentModel( name, modelType=DTDEventListener.CONTENT_MODEL_EMPTY );
	} else if (peek (strANY)) {
///	    element.setContentModel(new StringModel(StringModelType.ANY));
		dtdHandler.startContentModel( name, modelType=DTDEventListener.CONTENT_MODEL_ANY );
	} else {
	    modelType=getMixedOrChildren(name);
	}
	
	dtdHandler.endContentModel( name, modelType );
	
	maybeWhitespace ();
	char c = getc ();
	if (c != '>')
	    fatal ("P-036", new Object [] { name, new Character (c) });
	if (start != in)
	    error ("V-013", null);

///        dtdHandler.elementDecl(element);

	return true;
    }

    // We're leaving the content model as a regular expression;
    // it's an efficient natural way to express such things, and
    // libraries often interpret them.  No whitespace in the
    // model we store, though!

	/** returns content model type. */
    private short getMixedOrChildren(String elementName/*Element element*/)
                   throws IOException, SAXException {    	

		InputEntity	start;

		// [47] children ::= (choice|seq) ('?'|'*'|'+')?
		strTmp = new StringBuffer ();

		nextChar ('(', "F-028", elementName );
		start = in;
		maybeWhitespace ();
		strTmp.append ('(');

		short modelType;
		if (peek ("#PCDATA")) {
		    strTmp.append ("#PCDATA");
			dtdHandler.startContentModel( elementName, modelType=DTDEventListener.CONTENT_MODEL_MIXED );
		    getMixed(elementName,start);
		} else {
			dtdHandler.startContentModel( elementName, modelType=DTDEventListener.CONTENT_MODEL_CHILDREN );
		    getcps(elementName,start);
		}
		
		return modelType;
    }

    // '(' S? already consumed
    // matching ')' must be in "start" entity if validating
    private void getcps(/*Element element,*/String elementName, InputEntity start) 
                         throws IOException, SAXException {

		// [48] cp ::= (Name|choice|seq) ('?'|'*'|'+')?
		// [49] choice ::= '(' S? cp (S? '|' S? cp)* S? ')'
		// [50] seq    ::= '(' S? cp (S? ',' S? cp)* S? ')'
		boolean		   decided = false;
		char		   type = 0;
//		ContentModel       retval, temp, current;

//		retval = temp = current = null;
		
		dtdHandler.startModelGroup();
		
		do {
		    String	tag;

		    tag = maybeGetName ();
		    if (tag != null) {
				strTmp.append (tag);
//				temp = new ElementModel(tag);
//				getFrequency((RepeatableContent)temp);
///->
				dtdHandler.childElement(tag, getFrequency() );
///<-
		    } else if (peek ("(")) {
				InputEntity	next = in;
				strTmp.append ('(');
				maybeWhitespace();
//				temp = getcps(element, next);
//				getFrequency(temp);
///->
				getcps(elementName,next);
///				getFrequency();		<- this looks like a bug
///<-
		    } else
				fatal ((type == 0) ? "P-039" :
					((type == ',') ? "P-037" : "P-038"),
				    new Object [] { new Character (getc ()) });

		    maybeWhitespace();
		    if (decided) {
				char c = getc();

//				if (current != null) {
//				    current.addChild(temp);
//				}
				if (c == type) {
				    strTmp.append(type);
				    maybeWhitespace();
					reportConnector(type);
				    continue;
				} else if (c == '\u0029') {	// rparen
				    ungetc();
				    continue;
				} else {
					fatal((type == 0) ? "P-041" : "P-040",
					new Object [] {
					    new Character (c),
					    new Character (type)
					    });
				}
		    } else {
				type = getc();
                switch(type) {
                    case '|':
                    case ',':
						reportConnector(type);
                        break;
                    default:
//                        retval = temp;
                        ungetc();
                        continue;
                }
//				retval = (ContentModel)current;
				decided = true;
//				current.addChild(temp);
				strTmp.append (type);
		    }
		    maybeWhitespace ();
		} while (!peek (")"));
	
		if (in != start)
			error ("V-014", new Object [] { elementName });
		strTmp.append (')');
		
		dtdHandler.endModelGroup(getFrequency());
//		return retval;
    }

	private void reportConnector( char type ) throws SAXException {
	    switch(type) {
		case '|':
			dtdHandler.connector( DTDEventListener.CHOICE );	///<-
			return;
		case ',':
			dtdHandler.connector( DTDEventListener.SEQUENCE ); ///<-
			return;
		default:
				throw new Error();	//assertion failed.
		}
	}
	
    private short getFrequency()
                 throws IOException, SAXException {

		final char c = getc ();

		if (c == '?') {
		    strTmp.append (c);
			return DTDEventListener.OCCURENCE_ZERO_OR_ONE;
	//	    original.setRepeat(Repeat.ZERO_OR_ONE);
		} else if (c == '+') {
		    strTmp.append(c);
			return DTDEventListener.OCCURENCE_ONE_OR_MORE;
	//	    original.setRepeat(Repeat.ONE_OR_MORE);
		} else if (c == '*') {
		    strTmp.append(c);
			return DTDEventListener.OCCURENCE_ZERO_OR_MORE;
	//	    original.setRepeat(Repeat.ZERO_OR_MORE);
		} else {
		    ungetc ();
			return DTDEventListener.OCCURENCE_ONCE;
		}
    }

    // '(' S? '#PCDATA' already consumed 
    // matching ')' must be in "start" entity if validating
    private void getMixed (String elementName,/*Element element,*/ InputEntity start)
                 throws IOException, SAXException {

	// [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
	//		| '(' S? '#PCDATA'                   S? ')'
	maybeWhitespace ();
	if (peek ("\u0029*") || peek ("\u0029")) {
	    if (in != start)
		error ("V-014", new Object [] { elementName });
	    strTmp.append (')');
//            element.setContentModel(new StringModel(StringModelType.PCDATA));
	    return;
	}

        ArrayList l = new ArrayList();
//	l.add(new StringModel(StringModelType.PCDATA));
	

	while (peek ("|")) {
	    String name;

	    strTmp.append ('|');
	    maybeWhitespace ();

            doLexicalPE = true;
	    name = maybeGetName ();
	    if (name == null)
		fatal ("P-042", new Object []
		    { elementName, Integer.toHexString (getc ()) });
		if (l.contains (name)) {
		    error ("V-015", new Object [] { name });
		} else {
			l.add(name);
			dtdHandler.mixedElement(name);
		}
	    strTmp.append (name);
	    maybeWhitespace ();
	}
	
	if (!peek ("\u0029*"))	// right paren
	    fatal ("P-043", new Object []
		{ elementName, new Character (getc ()) });
	if (in != start)
	    error ("V-014", new Object [] { elementName });
	strTmp.append (')');
//        ChoiceModel cm = new ChoiceModel((Collection)l);
//	cm.setRepeat(Repeat.ZERO_OR_MORE);
//       element.setContentModel(cm);
    }

    private boolean maybeAttlistDecl()
                    throws IOException, SAXException {

	// [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
	InputEntity start = peekDeclaration ("!ATTLIST");

	if (start == null)
	    return false;

	String		elementName = getMarkupDeclname ("F-016", true);
//	Element	element = (Element) elements.get (name);

//	if (element == null) {
//	    // not yet declared -- no problem.
//	    element = new Element(name);
//	    elements.put(name, element);
//	}

	while (!peek (">")) {

	    // [53] AttDef ::= S Name S AttType S DefaultDecl
	    // [54] AttType ::= StringType | TokenizedType | EnumeratedType

            // look for global attribute definitions, don't expand for now...
            maybeWhitespace();
            char c = getc();
            if (c == '%') {
                String entityName = maybeGetName();
                if (entityName != null) {
                    nextChar (';', "F-021", entityName);
	            whitespace ("F-021");
                    continue;
                } else
                    fatal ("P-011");
            }

            ungetc();
            // look for attribute name otherwise
	    String attName = maybeGetName ();
	    if (attName == null) {
		    fatal ("P-044", new Object [] { new Character (getc ()) });
            }
	    whitespace ("F-001");

///	    Attribute	a = new Attribute (name);
		
		String typeName;
		Vector values = null;	// notation/enumeration values
		
	    // Note:  use the type constants from Attribute
	    // so that "==" may be used (faster)

	    // [55] StringType ::= 'CDATA'
	    if (peek (TYPE_CDATA))
///			a.setType(Attribute.CDATA);
			typeName = TYPE_CDATA;

	    // [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS'
	    //		| 'ENTITY' | 'ENTITIES'
	    //		| 'NMTOKEN' | 'NMTOKENS'
	    // n.b. if "IDREFS" is there, both "ID" and "IDREF"
	    // match peekahead ... so this order matters!
	    else if (peek (TYPE_IDREFS))
			typeName = TYPE_IDREFS;
	    else if (peek (TYPE_IDREF))
			typeName = TYPE_IDREF;
	    else if (peek (TYPE_ID)) {
			typeName = TYPE_ID;
// TODO: should implement this error check?
///		if (element.id() != null) {
///                    error ("V-016", new Object [] { element.id() });
///		} else
///		    element.setId(name);
	    } else if (peek (TYPE_ENTITY))
			typeName = TYPE_ENTITY;
	    else if (peek (TYPE_ENTITIES))
			typeName = TYPE_ENTITIES;
	    else if (peek (TYPE_NMTOKENS))
			typeName = TYPE_NMTOKENS;
	    else if (peek (TYPE_NMTOKEN))
			typeName = TYPE_NMTOKEN;

	    // [57] EnumeratedType ::= NotationType | Enumeration
	    // [58] NotationType ::= 'NOTATION' S '(' S? Name
	    //		(S? '|' S? Name)* S? ')'
	    else if (peek (TYPE_NOTATION)) {
			typeName = TYPE_NOTATION;
			whitespace ("F-002");
			nextChar ('(', "F-029", null);
			maybeWhitespace ();

			values = new Vector();
			do {
				String name;
			    if ((name = maybeGetName ()) == null)
				fatal ("P-068");
			    // permit deferred declarations
			    if (notations.get (name) == null)
				notations.put (name, name);
			    values.addElement (name);
			    maybeWhitespace ();
			    if (peek ("|"))
				maybeWhitespace ();
			} while (!peek (")"));
///			a.setValues(new String [v.size ()]);
///			for (int i = 0; i < v.size (); i++)
///			    a.setValue(i, (String)v.elementAt(i));

	    // [59] Enumeration ::= '(' S? Nmtoken (S? '|' Nmtoken)* S? ')'
	    } else if (peek ("(")) {
///			a.setType(Attribute.ENUMERATION);
			typeName = TYPE_ENUMERATION;
			
			maybeWhitespace ();

///			Vector v = new Vector ();
			values = new Vector();
			do {
			    String name = getNmtoken ();
///			    v.addElement (name);
				values.addElement(name);
			    maybeWhitespace ();
			    if (peek ("|"))
				maybeWhitespace ();
			} while (!peek (")"));
///			a.setValues(new String [v.size ()]);
///			for (int i = 0; i < v.size (); i++)
///			    a.setValue(i, (String)v.elementAt(i));
		} else {
			fatal ("P-045",
			    new Object [] { attName, new Character (getc ()) });
			typeName = null;
		}
		
		short attributeUse;
		String defaultValue =null;
		
	    // [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
	    //		| (('#FIXED' S)? AttValue)
	    whitespace ("F-003");
	    if (peek ("#REQUIRED"))
			attributeUse = DTDEventListener.USE_REQUIRED;
///			a.setIsRequired(true);
	    else if (peek ("#FIXED")) {
///			if (a.type() == Attribute.ID)
			if( typeName==TYPE_ID )
			    error ("V-017", new Object [] { attName });
///			a.setIsFixed(true);
			attributeUse = DTDEventListener.USE_FIXED;
			whitespace ("F-004");
			parseLiteral (false);
///			if (a.type() != Attribute.CDATA)
///			    a.setDefaultValue(normalize(false));
///			else
///			    a.setDefaultValue(strTmp.toString());
			
			if( typeName==TYPE_CDATA )
				defaultValue = normalize(false);
			else
				defaultValue = strTmp.toString();
			
// TODO: implement this check		
///			if (a.type() != Attribute.CDATA)
///			    validateAttributeSyntax (a, a.defaultValue());
	    } else if (!peek ("#IMPLIED")) {
			attributeUse = DTDEventListener.USE_IMPLIED;
			
///			if (a.type() == Attribute.ID)
			if( typeName == TYPE_ID )
			    error ("V-018", new Object [] { attName });
			parseLiteral (false);
///			if (a.type() != Attribute.CDATA)
///			    a.setDefaultValue(normalize(false));
///			else
///			    a.setDefaultValue(strTmp.toString());
			if( typeName==TYPE_CDATA )
				defaultValue = normalize(false);
			else
				defaultValue = strTmp.toString();
			
// TODO: implement this check		
///			if (a.type() != Attribute.CDATA)
///			    validateAttributeSyntax (a, a.defaultValue());
		} else {
			// TODO: this looks like an fatal error.
			attributeUse = DTDEventListener.USE_NORMAL;
		}

	    if (XmlLang.equals(attName)
		    && defaultValue/* a.defaultValue()*/ != null
		    && !isXmlLang (defaultValue/*a.defaultValue()*/))
			error ("P-033", new Object [] { defaultValue /*a.defaultValue()*/ });

// TODO: isn't it an error to specify the same attribute twice?
///	    if (!element.attributes().contains(a)) {
///			element.addAttribute(a);
///			dtdHandler.attributeDecl(a);
///	    }
		
		String[] v = ( values!=null )?(String[])values.toArray(new String[0]):null;
		dtdHandler.attributeDecl( elementName, attName, typeName, v, attributeUse, defaultValue );
	    maybeWhitespace ();
	}
	if (start != in)
	    error ("V-013", null);
	return true;
    }

    // used when parsing literal attribute values,
    // or public identifiers.
    //
    // input in strTmp
    private String normalize (boolean invalidIfNeeded) {

	// this can allocate an extra string...

	String 	s = strTmp.toString ();
	String 	s2 = s.trim ();
	boolean	didStrip = false;

	if (s != s2) {
	    s = s2;
	    s2 = null;
	    didStrip = true;
	}
	strTmp = new StringBuffer ();
	for (int i = 0; i < s.length (); i++) {
	    char	c = s.charAt (i);
	    if (!XmlChars.isSpace (c)) {
		strTmp.append (c);
		continue;
	    }
	    strTmp.append (' ');
	    while (++i < s.length () && XmlChars.isSpace (s.charAt (i)))
		didStrip = true;
	    i--;
	}
	if (didStrip)
	    return strTmp.toString ();
	else
	    return s;
    }

    private boolean maybeConditionalSect ()
                    throws IOException, SAXException {

	// [61] conditionalSect ::= includeSect | ignoreSect

	if (!peek ("<!["))
	    return false;

	String		keyword;
	InputEntity	start = in;

	maybeWhitespace ();

	if ((keyword = maybeGetName ()) == null)
	    fatal ("P-046");
	maybeWhitespace ();
	nextChar ('[', "F-030", null);

	// [62] includeSect ::= '<![' S? 'INCLUDE' S? '['
	//				extSubsetDecl ']]>'
	if ("INCLUDE".equals (keyword)) {
	    for (;;) {
		while (in.isEOF () && in != start)
		    in = in.pop ();
		if (in.isEOF ()) {
			error ("V-020", null);
		}
		if (peek ("]]>"))
		    break;

		doLexicalPE = false;
		if (maybeWhitespace ())
		    continue;
		if (maybePEReference ())
		    continue;
		doLexicalPE = true;
		if (maybeMarkupDecl () || maybeConditionalSect ())
		    continue;

		fatal ("P-047");
	    }

	// [63] ignoreSect ::= '<![' S? 'IGNORE' S? '['
	//			ignoreSectcontents ']]>'
	// [64] ignoreSectcontents ::= Ignore ('<!['
	//			ignoreSectcontents ']]>' Ignore)*
	// [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
	} else if ("IGNORE".equals (keyword)) {
	    int	nestlevel = 1;
	    // ignoreSectcontents
	    doLexicalPE = false;
	    while (nestlevel > 0) {
		char c = getc ();	// will pop input entities
		if (c == '<') {
		    if (peek ("!["))
			nestlevel++;
		} else if (c == ']') {
		    if (peek ("]>"))
			nestlevel--;
		} else
		    continue;
	    }
	} else
	    fatal ("P-048", new Object [] { keyword });
	return true;
    }


    //
    // CHAPTER 4:  Physical Structures
    //

    // parse decimal or hex numeric character reference
    private int parseCharNumber()
                throws IOException, SAXException {

	char	c;
	int	retval = 0;

	// n.b. we ignore overflow ...
	if (getc () != 'x') {
	    ungetc ();
	    for (;;) {
		c = getc ();
		if (c >= '0' && c <= '9') {
		    retval *= 10;
		    retval += (c - '0');
		    continue;
		}
		if (c == ';')
		    return retval;
		fatal ("P-049");
	    }
	} else for (;;) {
	    c = getc ();
	    if (c >= '0' && c <= '9') {
		retval <<= 4;
		retval += (c - '0');
		continue;
	    }
	    if (c >= 'a' && c <= 'f') {
		retval <<= 4;
		retval += 10 + (c - 'a');
		continue;
	    }
	    if (c >= 'A' && c <= 'F') {
		retval <<= 4;
		retval += 10 + (c - 'A');
		continue;
	    }
	    if (c == ';')
		return retval;
	    fatal ("P-050");
	}
    }

    // parameter is a UCS-4 character ... i.e. not just 16 bit UNICODE,
    // though still subject to the 'Char' construct in XML
    private int surrogatesToCharTmp(int ucs4) 
                throws SAXException {

	if (ucs4 <= 0xffff) {
	    if (XmlChars.isChar (ucs4)) {
		charTmp [0] = (char) ucs4;
		return 1;
	    } 
	} else if (ucs4 <= 0x0010ffff) {
	    // we represent these as UNICODE surrogate pairs
	    ucs4 -= 0x10000;
	    charTmp [0] = (char) (0xd800 | ((ucs4 >> 10) & 0x03ff));
	    charTmp [1] = (char) (0xdc00 | (ucs4 & 0x03ff));
	    return 2;
	}
	fatal ("P-051", new Object [] { Integer.toHexString (ucs4) });
	// NOTREACHED
	return -1;
    }

    private boolean maybePEReference()
                    throws IOException, SAXException {

	// This is the SYNTACTIC version of this construct.
	// When processing external entities, there is also
	// a LEXICAL version; see getc() and doLexicalPE.

	// [69] PEReference ::= '%' Name ';'
	if (!in.peekc ('%'))
	    return false;

	String	name = maybeGetName ();
	Object	entity;

	if (name == null)
	    fatal ("P-011");
	nextChar (';', "F-021", name);
	entity = params.get (name);

	if (entity instanceof InternalEntity) {
	    InternalEntity	value = (InternalEntity) entity;
	    pushReader (value.buf, name, false);

	} else if (entity instanceof ExternalEntity) {
            pushReader((ExternalEntity)entity);
	    externalParameterEntity ((ExternalEntity)entity);

	} else if (entity == null) {
	    error ("V-022", new Object [] { name });
	}
	return true;
    }

    private boolean maybeEntityDecl()
                    throws IOException, SAXException {

	// [70] EntityDecl ::= GEDecl | PEDecl
	// [71] GEDecl ::= '<!ENTITY' S       Name S EntityDef S? '>'
	// [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDEF     S? '>'
	// [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
	// [74] PEDef     ::= EntityValue |  ExternalID
	//
	InputEntity	start = peekDeclaration ("!ENTITY");

	if (start == null)
	    return false;

	String		entityName;
	SimpleHashtable	defns;
	ExternalEntity	externalId;
	boolean		doStore;

	// PE expansion gets selectively turned off several places:
	// in ENTITY declarations (here), in comments, in PIs.
	
	// Here, we allow PE entities to be declared, and allows
	// literals to include PE refs without the added spaces
	// required with their expansion in markup decls.

	doLexicalPE = false;
	whitespace ("F-005");
	if (in.peekc ('%')) {
	    whitespace ("F-006");
	    defns = params;
	} else
	    defns = entities;

	ungetc ();	// leave some whitespace
	doLexicalPE = true;
	entityName = getMarkupDeclname ("F-017", false);
	whitespace ("F-007");
	externalId = maybeExternalID ();

	//
	// first definition sticks ... e.g. internal subset PEs are used
	// to override DTD defaults.  It's also an "error" to incorrectly
	// redefine builtin internal entities, but since reporting such
	// errors is optional we only give warnings ("just in case") for
	// non-parameter entities.
	//
	doStore = (defns.get (entityName) == null);
	if (!doStore && defns == entities)
	    warning ("P-054", new Object [] { entityName });
	
	// internal entities
	if (externalId == null) {
	    char		value [];
	    InternalEntity	entity;

	    doLexicalPE = false;		// "ab%bar;cd" -maybe-> "abcd"
	    parseLiteral (true);
	    doLexicalPE = true;
	    if (doStore) {
		value = new char [strTmp.length ()];
		if (value.length != 0)
		    strTmp.getChars (0, value.length, value, 0);
		entity = new InternalEntity (entityName, value);
		entity.isPE = (defns == params);
		entity.isFromInternalSubset = false;
		defns.put (entityName, entity);
		if (defns == entities)
		    dtdHandler.internalGeneralEntityDecl (entityName,
			new String (value));
	    }

	// external entities (including unparsed)
	} else {
	    // [76] NDataDecl ::= S 'NDATA' S Name
	    if (defns == entities && maybeWhitespace ()
		    && peek ("NDATA")) {
		externalId.notation = getMarkupDeclname ("F-018", false);

		// flag undeclared notation for checking after
		// the DTD is fully processed
		if (notations.get (externalId.notation) == null)
		    notations.put (externalId.notation, Boolean.TRUE);
	    }
	    externalId.name = entityName;
	    externalId.isPE = (defns == params);
	    externalId.isFromInternalSubset = false;
	    if (doStore) {
		defns.put (entityName, externalId);
		if (externalId.notation != null)
		    dtdHandler.unparsedEntityDecl (entityName,
			    externalId.publicId, externalId.systemId,
			    externalId.notation);
		else if (defns == entities)
		    dtdHandler.externalGeneralEntityDecl (entityName,
			    externalId.publicId, externalId.systemId);
	    }
	}
	maybeWhitespace ();
	nextChar ('>', "F-031", entityName);
	if (start != in)
	    error ("V-013", null);
	return true;
    }

    private ExternalEntity maybeExternalID ()
                           throws IOException, SAXException {

	// [75] ExternalID ::= 'SYSTEM' S SystemLiteral
	//		| 'PUBLIC' S' PubidLiteral S Systemliteral
	String		temp = null;
	ExternalEntity	retval;

	if (peek ("PUBLIC")) {
	    whitespace ("F-009");
	    temp = parsePublicId ();
	} else if (!peek ("SYSTEM"))
	    return null;

	retval = new ExternalEntity (in);
	retval.publicId = temp;
	whitespace ("F-008");
	retval.systemId = parseSystemId ();
	return retval;
    }

    private String parseSystemId ()
                   throws IOException, SAXException {

	String uri = getQuotedString ("F-034", null);
	int	temp = uri.indexOf (':');

	// resolve relative URIs ... must do it here since
	// it's relative to the source file holding the URI!

	// "new java.net.URL (URL, string)" conforms to RFC 1630,
	// but we can't use that except when the URI is a URL.
	// The entity resolver is allowed to handle URIs that are
	// not URLs, so we pass URIs through with scheme intact
	if (temp == -1 || uri.indexOf ('/') < temp) {
	    String	baseURI;

	    baseURI = in.getSystemId ();
	    if (baseURI == null)
		fatal ("P-055", new Object [] { uri });
	    if (uri.length () == 0)
		uri = ".";
	    baseURI = baseURI.substring (0, baseURI.lastIndexOf ('/') + 1);
	    if (uri.charAt (0) != '/')
		uri = baseURI + uri;
	    else {
		// XXX slashes at the beginning of a relative URI are
		// a special case we don't handle.
		throw new InternalError ();
	    }

	    // letting other code map any "/xxx/../" or "/./" to "/",
	    // since all URIs must handle it the same.
	}
	// check for fragment ID in URI
	if (uri.indexOf ('#') != -1)
	    error ("P-056", new Object [] { uri });
	return uri;
    }

    private void maybeTextDecl ()
                 throws IOException, SAXException {

	// [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
	if (peek ("<?xml")) {
	    readVersion (false, "1.0");
	    readEncoding (true);
	    maybeWhitespace ();
	    if (!peek ("?>"))
		fatal ("P-057");
	}
    }

    private void externalParameterEntity(ExternalEntity next)
                 throws IOException, SAXException {

	//
	// Reap the intended benefits of standalone declarations:
	// don't deal with external parameter entities, except to
	// validate the standalone declaration.
	//
	
	// n.b. "in external parameter entities" (and external
	// DTD subset, same grammar) parameter references can
	// occur "within" markup declarations ... expansions can
	// cross syntax rules.  Flagged here; affects getc().

	// [79] ExtPE ::= TextDecl? extSubsetDecl
	// [31] extSubsetDecl ::= ( markupdecl | conditionalSect
	//		| PEReference | S )*
	InputEntity	pe;

	// XXX if this returns false ...

	pe = in;
	maybeTextDecl();
	while (!pe.isEOF()) {
	    // pop internal PEs (and whitespace before/after)
	    if (in.isEOF()) {
		in = in.pop();
		continue;
	    }
	    doLexicalPE = false;
	    if (maybeWhitespace())
		continue;
	    if (maybePEReference())
		continue;
	    doLexicalPE = true;
	    if (maybeMarkupDecl() || maybeConditionalSect())
		continue;
	    break;
	}
	// if (in != pe) throw new InternalError("who popped my PE?");
	if (!pe.isEOF())
	    fatal("P-059", new Object [] { in.getName () });
    }

    private void readEncoding(boolean must)
                 throws IOException, SAXException {

	// [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
	String name = maybeReadAttribute ("encoding", must);

	if (name == null)
	    return;
	for (int i = 0; i < name.length (); i++) {
	    char c = name.charAt (i);
	    if ((c >= 'A' && c <= 'Z')
		    || (c >= 'a' && c <= 'z'))
		continue;
	    if (i != 0
		    && ((c >= '0' && c <= '9')
			|| c == '-'
			|| c == '_'
			|| c == '.'
			))
		continue;
	    fatal ("P-060", new Object [] { new Character (c) });
	}

	//
	// This should be the encoding in use, and it's even an error for
	// it to be anything else (in certain cases that are impractical to
	// to test, and may even be insufficient).  So, we do the best we
	// can, and warn if things look suspicious.  Note that Java doesn't
	// uniformly expose the encodings, and that the names it uses
	// internally are nonstandard.  Also, that the XML spec allows
	// such "errors" not to be reported at all.
	//
	String	currentEncoding = in.getEncoding ();

	if (currentEncoding != null
		&& !name.equalsIgnoreCase (currentEncoding))
	    warning ("P-061", new Object [] { name, currentEncoding });
    }

    private boolean maybeNotationDecl()
                    throws IOException, SAXException {

	// [82] NotationDecl ::= '<!NOTATION' S Name S
	//		(ExternalID | PublicID) S? '>'
	// [83] PublicID ::= 'PUBLIC' S PubidLiteral
	InputEntity	start = peekDeclaration ("!NOTATION");

	if (start == null)
	    return false;

	String		name = getMarkupDeclname ("F-019", false);
	ExternalEntity	entity = new ExternalEntity (in);

	whitespace ("F-011");
	if (peek ("PUBLIC")) {
	    whitespace ("F-009");
	    entity.publicId = parsePublicId ();
	    if (maybeWhitespace ()) {
		if (!peek (">"))
		    entity.systemId = parseSystemId ();
		else
			ungetc();
	    }
	} else if (peek ("SYSTEM")) {
	    whitespace ("F-008");
	    entity.systemId = parseSystemId ();
	} else
	    fatal ("P-062");
	maybeWhitespace ();
	nextChar ('>', "F-032", name);
	if (start != in)
	    error ("V-013", null);
	if (entity.systemId != null && entity.systemId.indexOf ('#') != -1)
	    error ("P-056", new Object [] { entity.systemId });

	Object	value = notations.get (name);
	if (value != null && value instanceof ExternalEntity)
	    warning ("P-063", new Object [] { name });

	else {
	    notations.put (name, entity);
	    dtdHandler.notationDecl (name, entity.publicId,
		    entity.systemId);
	}
	return true;
    }


    ////////////////////////////////////////////////////////////////
    //
    //	UTILITIES
    //
    ////////////////////////////////////////////////////////////////

    private char getc() throws IOException, SAXException {

	if (!doLexicalPE) {
  	    char c = in.getc ();
  	    return c;
  	}

	//
	// External parameter entities get funky processing of '%param;'
	// references.  It's not clearly defined in the XML spec; but it
	// boils down to having those refs be _lexical_ in most cases to
	// include partial syntax productions.  It also needs selective
	// enabling; "<!ENTITY % foo ...>" must work, for example, and
	// if "bar" is an empty string PE, "ab%bar;cd" becomes "abcd"
	// if it's expanded in a literal, else "ab  cd".  PEs also do
	// not expand within comments or PIs, and external PEs are only
	// allowed to have markup decls (and so aren't handled lexically).
	//
	// This PE handling should be merged into maybeWhitespace, where
	// it can be dealt with more consistently.
	//
	// Also, there are some validity constraints in this area.
	//
	char c;

	while (in.isEOF ()) {
	    if (in.isInternal () || (doLexicalPE && !in.isDocument ()))
		in = in.pop ();
	    else {
		fatal ("P-064", new Object [] { in.getName () });
	    }
	}
	if ((c = in.getc ()) == '%' && doLexicalPE) {
	    // PE ref ::= '%' name ';'
	    String	name = maybeGetName ();
	    Object	entity;

	    if (name == null)
		fatal ("P-011");
	    nextChar (';', "F-021", name);
	    entity = params.get (name);

	    // push a magic "entity" before and after the
	    // real one, so ungetc() behaves uniformly
	    pushReader (" ".toCharArray (), null, false);
	    if (entity instanceof InternalEntity)
		pushReader (((InternalEntity) entity).buf, name, false);
	    else if (entity instanceof ExternalEntity)
		// PEs can't be unparsed!
		// XXX if this returns false ...
		pushReader ((ExternalEntity) entity);
	    else if (entity == null)
		// see note in maybePEReference re making this be nonfatal.
		fatal ("V-022");
	    else
		throw new InternalError ();
	    pushReader (" ".toCharArray (), null, false);
	    return in.getc ();
	}
	return c;
    }

    private void ungetc () { 

        in.ungetc (); 
    }

    private boolean peek (String s) 
                    throws IOException, SAXException { 

        return in.peek (s, null); 
    }
    
    // Return the entity starting the specified declaration
    // (for validating declaration nesting) else null.

    private InputEntity peekDeclaration (String s)
                        throws IOException, SAXException {

	InputEntity	start;

	if (!in.peekc ('<'))
	    return null;
	start = in;
	if (in.peek (s, null))
	    return start;
	in.ungetc ();
	return null;
    }

    private void nextChar (char c, String location, String near)
                 throws IOException, SAXException {

	while (in.isEOF () && !in.isDocument ())
	    in = in.pop ();
	if (!in.peekc (c))
	    fatal ("P-008", new Object []
		{ new Character (c),
		    messages.getMessage (locale, location),
		    (near == null ? "" : ('"' + near + '"'))});
    }
    

    private void pushReader (char buf [], String name, boolean isGeneral)
                 throws SAXException {

	InputEntity	r = InputEntity.getInputEntity (dtdHandler, locale);
	r.init (buf, name, in, !isGeneral);
	in = r;
    }

    private boolean pushReader (ExternalEntity next)
                    throws IOException, SAXException {
		
		InputEntity	r = InputEntity.getInputEntity( dtdHandler, locale );
		InputSource	s;
		try {
			s = next.getInputSource( resolver );
		} catch( IOException e ) {
			String msg = 
				"unable to open the external entity from :" + next.systemId;
			if(next.publicId!=null)
				msg += " (public id:"+next.publicId+")";
			
			SAXParseException spe = new SAXParseException ( msg,
	            getPublicId(), getSystemId(), getLineNumber(), getColumnNumber(), e);
			dtdHandler.fatalError(spe);
			throw e;
		}

		r.init (s, next.name, in, next.isPE);
		in = r;
		return true;
    }

    public String getPublicId() {

        return (in == null) ? null : in.getPublicId ();
    }

    public String getSystemId() {

        return (in == null) ? null : in.getSystemId ();
    }

    public int getLineNumber() {

        return (in == null) ? -1 : in.getLineNumber ();
    }
	
    public int getColumnNumber() {

        return (in == null) ? -1 : in.getColumnNumber ();
    }

    // error handling convenience routines

    private void warning (String messageId, Object parameters [])
                 throws SAXException {

	SAXParseException e = new SAXParseException (
	    messages.getMessage (locale, messageId, parameters),
            getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());

	dtdHandler.warning(e);
    }

    void error (String messageId, Object parameters [])
         throws SAXException {

	SAXParseException e = new SAXParseException (
	    messages.getMessage (locale, messageId, parameters),
            getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());

	dtdHandler.error(e);
    }

    private void fatal (String messageId) throws SAXException {

	fatal (messageId, null);
    }

    private void fatal (String messageId, Object parameters [])
                 throws SAXException {

	SAXParseException e = new SAXParseException (
	    messages.getMessage (locale, messageId, parameters),
            getPublicId(), getSystemId(), getLineNumber(), getColumnNumber());

	dtdHandler.fatalError(e);

	throw e;
    }

    //
    // Map char arrays to strings ... cuts down both on memory and
    // CPU usage for element/attribute/other names that are reused.
    //
    // Documents typically repeat names a lot, so we more or less
    // intern all the strings within the document; since some strings
    // are repeated in multiple documents (e.g. stylesheets) we go
    // a bit further, and intern globally.
    //
    static class NameCache {
	//
	// Unless we auto-grow this, the default size should be a
	// reasonable bit larger than needed for most XML files
	// we've yet seen (and be prime).  If it's too small, the
	// penalty is just excess cache collisions.
	//
	NameCacheEntry	hashtable [] = new NameCacheEntry [541];

	//
	// Usually we just want to get the 'symbol' for these chars
	//
	String lookup (char value [], int len) {

	    return lookupEntry (value, len).name;
	}

	//
	// Sometimes we need to scan the chars in the resulting
	// string, so there's an accessor which exposes them.
	// (Mostly for element end tags.)
	//
	NameCacheEntry lookupEntry (char value [], int len) {

	    int			index = 0;
	    NameCacheEntry	entry;

	    // hashing to get index
	    for (int i = 0; i < len; i++)
		index = index * 31 + value [i];
	    index &= 0x7fffffff;
	    index %= hashtable.length;

	    // return entry if one's there ...
	    for (entry = hashtable [index];
		    entry != null;
		    entry = entry.next) {
		if (entry.matches (value, len))
		    return entry;
	    }

	    // else create new one
	    entry = new NameCacheEntry ();
	    entry.chars = new char [len];
	    System.arraycopy (value, 0, entry.chars, 0, len);
	    entry.name = new String (entry.chars);
		//
		// NOTE:  JDK 1.1 has a fixed size string intern table,
		// with non-GC'd entries.  It can panic here; that's a
		// JDK problem, use 1.2 or later with many identifiers.
		//
	    entry.name = entry.name.intern ();		// "global" intern
	    entry.next = hashtable [index];
	    hashtable [index] = entry;
	    return entry;
	}
    }

    static class NameCacheEntry {

	String		name;
	char		chars [];
	NameCacheEntry	next;

	boolean matches (char value [], int len) {

	    if (chars.length != len)
		return false;
	    for (int i = 0; i < len; i++)
		if (value [i] != chars [i])
		    return false;
	    return true;
	}
    }

    //
    // Message catalog for diagnostics.
    //
    static final Catalog	messages = new Catalog ();

    static final class Catalog extends MessageCatalog {

	Catalog () { 
            super (DTDParser.class); 
        }
    }

}