/* -*- c-basic-offset: 4; indent-tabs-mode: nil; -*- //------100-columns-wide------>|*/ /* * Copyright (c) 2002-2004 Extreme! Lab, Indiana University. All rights reserved. * * This software is open source. See the bottom of this file for the licence. * * $Id: MXParserNonValidating.java,v 1.6 2004/03/02 09:14:41 aslom Exp $ */ package org.xmlpull.mxp1; import java.io.IOException; import org.xmlpull.v1.XmlPullParserException; /** * Extend MXP parser to be full non validating XML 1.0 parser (added internal * DTD parsing and support for full XML 1.0 (or 1.1) character classes). * * @author <a href="http://www.extreme.indiana.edu/~aslom/">Aleksander * Slominski</a> */ public class MXParserNonValidating extends MXParserCachingStrings { private boolean processDocDecl; public MXParserNonValidating() { super(); } @Override public boolean getFeature(String name) { if (FEATURE_PROCESS_DOCDECL.equals(name)) { return processDocDecl; } else { return super.getFeature(name); } } @Override protected char[] lookuEntityReplacement(int entitNameLen) throws XmlPullParserException, IOException { if (!allStringsInterned) { final int hash = fastHash(buf, posStart, posEnd - posStart); LOOP: for (int i = entityEnd - 1; i >= 0; --i) { if (hash == entityNameHash[i] && entitNameLen == entityNameBuf[i].length) { final char[] entityBuf = entityNameBuf[i]; for (int j = 0; j < entitNameLen; j++) { if (buf[posStart + j] != entityBuf[j]) { continue LOOP; } } if (tokenize) { text = entityReplacement[i]; } return entityReplacementBuf[i]; } } } else { entityRefName = newString(buf, posStart, posEnd - posStart); for (int i = entityEnd - 1; i >= 0; --i) { // take advantage that interning for newStirng is enforced if (entityRefName == entityName[i]) { if (tokenize) { text = entityReplacement[i]; } return entityReplacementBuf[i]; } } } return null; } // will need to overwrite more() and processEntityRef ... @Override protected char more() throws IOException, XmlPullParserException { return super.more(); } @Override protected void parseDocdecl() throws XmlPullParserException, IOException { // make sure that tokenize flag is disabled temporarily!!!! final boolean oldTokenize = tokenize; try { // ASSUMPTION: seen <!D char ch = more(); if (ch != 'O') { throw new XmlPullParserException("expected <!DOCTYPE", this, null); } ch = more(); if (ch != 'C') { throw new XmlPullParserException("expected <!DOCTYPE", this, null); } ch = more(); if (ch != 'T') { throw new XmlPullParserException("expected <!DOCTYPE", this, null); } ch = more(); if (ch != 'Y') { throw new XmlPullParserException("expected <!DOCTYPE", this, null); } ch = more(); if (ch != 'P') { throw new XmlPullParserException("expected <!DOCTYPE", this, null); } ch = more(); if (ch != 'E') { throw new XmlPullParserException("expected <!DOCTYPE", this, null); } posStart = pos; // do simple and crude scanning for end of doctype // [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' // (markupdecl | DeclSep)* ']' S?)? '>' ch = requireNextS(); ch = readName(ch); ch = skipS(ch); // [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S // PubidLiteral S SystemLiteral if (ch == 'S' || ch == 'P') { ch = processExternalId(ch); ch = skipS(ch); } if (ch == '[') { processInternalSubset(); } ch = skipS(ch); if (ch != '>') { throw new XmlPullParserException( "expected > to finish <[DOCTYPE but got " + printable(ch), this, null); } posEnd = pos - 1; } finally { tokenize = oldTokenize; } } protected void processAttlistDecl(char ch) throws XmlPullParserException, IOException { // [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' // [53] AttDef ::= S Name S AttType S DefaultDecl // [54] AttType ::= StringType | TokenizedType | EnumeratedType // [55] StringType ::= 'CDATA' // [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | // 'ENTITIES' | 'NMTOKEN' // | 'NMTOKENS' // [57] EnumeratedType ::= NotationType | Enumeration // [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? // ')' // [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' // [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? // AttValue) // [WFC: No < in Attribute Values] // assert ch == 'A' } protected void processElementDecl(char ch) throws XmlPullParserException, IOException { // [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' // ???? [VC: Unique Element Type Declaration] // [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children // [47] children ::= (choice | seq) ('?' | '*' | '+')? // [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? // [49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')' // [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' // [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' // | '(' S? '#PCDATA' S? ')' // assert ch == 'L' ch = requireNextS(); readName(ch); ch = requireNextS(); // readContentSpec(ch); } protected void processEntityDecl(char ch) throws XmlPullParserException, IOException { // [70] EntityDecl ::= GEDecl | PEDecl // [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' // [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' // [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) // [74] PEDef ::= EntityValue | ExternalID // [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S // PubidLiteral S SystemLiteral // [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' // | "'" ([^%&'] | PEReference | Reference)* "'" // assert ch == 'N' } protected char processExternalId(char ch) throws XmlPullParserException, IOException { // [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S // PubidLiteral S SystemLiteral // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") // [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" // [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | // [-'()+,./:=?;!*#@$_%] // TODO return ch; } protected void processInternalSubset() throws XmlPullParserException, IOException { // [28] ... (markupdecl | DeclSep)* ']' // [WFC: External Subset] // [28a] DeclSep ::= PEReference | S // [WFC: PE Between Declarations] // [69] PEReference ::= '%' Name ';' //[WFC: No Recursion] [WFC: In DTD] while (true) { char ch = more(); // firs ttime called it will skip initial "[" if (ch == ']') { break; } if (ch == '%') { processPEReference(); } else if (isS(ch)) { ch = skipS(ch); } else { processMarkupDecl(ch); } } } protected void processMarkupDecl(char ch) throws XmlPullParserException, IOException { // [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | // NotationDecl | PI | Comment // [WFC: PEs in Internal Subset] // BIG SWITCH statement if (ch != '<') { throw new XmlPullParserException( "expected < for markupdecl in DTD not " + printable(ch), this, null); } ch = more(); if (ch == '?') { parsePI(); } else if (ch == '!') { ch = more(); if (ch == '-') { // note: if(tokenize == false) posStart/End is NOT changed!!!! parseComment(); } else { ch = more(); if (ch == 'A') { processAttlistDecl(ch); // A-TTLIST } else if (ch == 'E') { ch = more(); if (ch == 'L') { processElementDecl(ch); // EL-EMENT } else if (ch == 'N') { processEntityDecl(ch); // EN-TITY } else { throw new XmlPullParserException( "expected ELEMENT or ENTITY after <! in DTD not " + printable(ch), this, null); } } else if (ch == 'N') { processNotationDecl(ch); // N-OTATION } else { throw new XmlPullParserException( "expected markupdecl after <! in DTD not " + printable(ch), this, null); } } } else { throw new XmlPullParserException("expected markupdecl in DTD not " + printable(ch), this, null); } } protected void processNotationDecl(char ch) throws XmlPullParserException, IOException { // [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) // S? '>' // [83] PublicID ::= 'PUBLIC' S PubidLiteral // assert ch == 'N' } protected void processPEReference() throws XmlPullParserException, IOException { // TODO } protected char readName(char ch) throws XmlPullParserException, IOException { if (isNameStartChar(ch)) { throw new XmlPullParserException( "XML name must start with name start character not " + printable(ch), this, null); } while (isNameChar(ch)) { ch = more(); } return ch; } /** * This allows to change processing DOCDECL (controls if parser is * non-validating). */ @Override public void setFeature(String name, boolean state) throws XmlPullParserException { if (FEATURE_PROCESS_DOCDECL.equals(name)) { if (eventType != START_DOCUMENT) { throw new XmlPullParserException( "process DOCDECL feature can only be changed before parsing", this, null); } processDocDecl = state; if (state == false) { // } } else { super.setFeature(name, state); } } } /* * Indiana University Extreme! Lab Software License, Version 1.2 * * Copyright (c) 2002-2004 The Trustees of Indiana University. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1) All redistributions of source code must retain the above copyright notice, * the list of authors in the original source code, this list of conditions and * the disclaimer listed in this license; * * 2) All redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the disclaimer listed in this license in * the documentation and/or other materials provided with the distribution; * * 3) Any documentation included with all redistributions must include the * following acknowledgement: * * "This product includes software developed by the Indiana University Extreme! * Lab. For further information please visit http://www.extreme.indiana.edu/" * * Alternatively, this acknowledgment may appear in the software itself, and * wherever such third-party acknowledgments normally appear. * * 4) The name "Indiana University" or "Indiana University Extreme! Lab" shall * not be used to endorse or promote products derived from this software without * prior written permission from Indiana University. For written permission, * please contact http://www.extreme.indiana.edu/. * * 5) Products derived from this software may not use "Indiana * University" name nor may "Indiana University" appear in their name, without * prior written permission of the Indiana University. * * Indiana University provides no reassurances that the source code provided * does not infringe the patent or any other intellectual property rights of any * other entity. Indiana University disclaims any liability to any recipient for * claims brought by any other entity based on infringement of intellectual * property rights or otherwise. * * LICENSEE UNDERSTANDS THAT SOFTWARE IS PROVIDED "AS IS" FOR WHICH NO * WARRANTIES AS TO CAPABILITIES OR ACCURACY ARE MADE. INDIANA UNIVERSITY GIVES * NO WARRANTIES AND MAKES NO REPRESENTATION THAT SOFTWARE IS FREE OF * INFRINGEMENT OF THIRD PARTY PATENT, COPYRIGHT, OR OTHER PROPRIETARY RIGHTS. * INDIANA UNIVERSITY MAKES NO WARRANTIES THAT SOFTWARE IS FREE FROM "BUGS", * "VIRUSES", "TROJAN HORSES", "TRAP DOORS", "WORMS", OR OTHER HARMFUL CODE. * LICENSEE ASSUMES THE ENTIRE RISK AS TO THE PERFORMANCE OF SOFTWARE AND/OR * ASSOCIATED MATERIALS, AND TO THE PERFORMANCE AND VALIDITY OF INFORMATION * GENERATED USING SOFTWARE. */