/* -*- c-basic-offset: 4; indent-tabs-mode: nil; -*- //------100-columns-wide------>|*/ /* * Copyright (c) 2002-2004 Extreme! Lab, Indiana University. All rights reserved. * * This software is open source. See the bottom of this file for the licence. * * $Id: MXParserNonValidating.java,v 1.6 2004/03/02 09:14:41 aslom Exp $ */ package org.xmlpull.mxp1; import java.io.IOException; import org.xmlpull.v1.XmlPullParserException; /** * Extend MXP parser to be full non validating XML 1.0 parser * (added internal DTD parsing and support for full XML 1.0 (or 1.1) character classes). * * @author <a href="http://www.extreme.indiana.edu/~aslom/">Aleksander Slominski</a> */ public class MXParserNonValidating extends MXParserCachingStrings { private boolean processDocDecl; public MXParserNonValidating() { super(); } /** * This allows to change processing DOCDECL (controls if parser is non-validating). */ public void setFeature(String name, boolean state) throws XmlPullParserException { if(FEATURE_PROCESS_DOCDECL.equals(name)) { if(eventType != START_DOCUMENT) throw new XmlPullParserException( "process DOCDECL feature can only be changed before parsing", this, null); processDocDecl = state; if(state == false) { // } } else { super.setFeature(name, state); } } public boolean getFeature(String name) { if(FEATURE_PROCESS_DOCDECL.equals(name)) { return processDocDecl; } else { return super.getFeature(name); } } // will need to overwrite more() and processEntityRef ... protected char more() throws IOException, XmlPullParserException { return super.more(); } protected char[] lookuEntityReplacement(int entitNameLen) throws XmlPullParserException, IOException { if(!allStringsInterned) { final int hash = fastHash(buf, posStart, posEnd - posStart); LOOP: for (int i = entityEnd - 1; i >= 0; --i) { if(hash == entityNameHash[ i ] && entitNameLen == entityNameBuf[ i ].length) { final char[] entityBuf = entityNameBuf[ i ]; for (int j = 0; j < entitNameLen; j++) { if(buf[posStart + j] != entityBuf[j]) continue LOOP; } if(tokenize) text = entityReplacement[ i ]; return entityReplacementBuf[ i ]; } } } else { entityRefName = newString(buf, posStart, posEnd - posStart); for (int i = entityEnd - 1; i >= 0; --i) { // take advantage that interning for newStirng is enforced if(entityRefName == entityName[ i ]) { if(tokenize) text = entityReplacement[ i ]; return entityReplacementBuf[ i ]; } } } return null; } protected void parseDocdecl() throws XmlPullParserException, IOException { //make sure that tokenize flag is disabled temporarily!!!! final boolean oldTokenize = tokenize; try { //ASSUMPTION: seen <!D char ch = more(); if(ch != 'O') throw new XmlPullParserException( "expected <!DOCTYPE", this, null); ch = more(); if(ch != 'C') throw new XmlPullParserException( "expected <!DOCTYPE", this, null); ch = more(); if(ch != 'T') throw new XmlPullParserException( "expected <!DOCTYPE", this, null); ch = more(); if(ch != 'Y') throw new XmlPullParserException( "expected <!DOCTYPE", this, null); ch = more(); if(ch != 'P') throw new XmlPullParserException( "expected <!DOCTYPE", this, null); ch = more(); if(ch != 'E') throw new XmlPullParserException( "expected <!DOCTYPE", this, null); posStart = pos; // do simple and crude scanning for end of doctype // [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' // (markupdecl | DeclSep)* ']' S?)? '>' ch = requireNextS(); int nameStart = pos; ch = readName(ch); int nameEnd = pos; ch = skipS(ch); // [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral if(ch == 'S' || ch == 'P') { ch = processExternalId(ch); ch = skipS(ch); } if(ch == '[') { processInternalSubset(); } ch = skipS(ch); if(ch != '>') { throw new XmlPullParserException( "expected > to finish <[DOCTYPE but got "+printable(ch), this, null); } posEnd = pos - 1; } finally { tokenize = oldTokenize; } } protected char processExternalId(char ch) throws XmlPullParserException, IOException { // [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") // [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" // [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] //TODO return ch; } protected void processInternalSubset() throws XmlPullParserException, IOException { // [28] ... (markupdecl | DeclSep)* ']' // [WFC: External Subset] // [28a] DeclSep ::= PEReference | S // [WFC: PE Between Declarations] // [69] PEReference ::= '%' Name ';' //[WFC: No Recursion] [WFC: In DTD] while(true) { char ch = more(); // firs ttime called it will skip initial "[" if(ch == ']') break; if(ch == '%') { processPEReference(); } else if(isS(ch)) { ch = skipS(ch); } else { processMarkupDecl(ch); } } } protected void processPEReference() throws XmlPullParserException, IOException { //TODO } protected void processMarkupDecl(char ch) throws XmlPullParserException, IOException { // [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment // [WFC: PEs in Internal Subset] //BIG SWITCH statement if(ch != '<') { throw new XmlPullParserException("expected < for markupdecl in DTD not "+printable(ch), this, null); } ch = more(); if(ch == '?') { parsePI(); } else if(ch == '!') { ch = more(); if(ch == '-') { // note: if(tokenize == false) posStart/End is NOT changed!!!! parseComment(); } else { ch = more(); if(ch == 'A') { processAttlistDecl(ch); //A-TTLIST } else if(ch == 'E') { ch = more(); if(ch == 'L') { processElementDecl(ch); //EL-EMENT } else if(ch == 'N') { processEntityDecl(ch); // EN-TITY } else { throw new XmlPullParserException( "expected ELEMENT or ENTITY after <! in DTD not "+printable(ch), this, null); } } else if(ch == 'N') { processNotationDecl(ch); //N-OTATION } else { throw new XmlPullParserException( "expected markupdecl after <! in DTD not "+printable(ch),this, null); } } } else { throw new XmlPullParserException("expected markupdecl in DTD not "+printable(ch), this, null); } } protected void processElementDecl(char ch) throws XmlPullParserException, IOException { //[45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>' //???? [VC: Unique Element Type Declaration] // [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children // [47] children ::= (choice | seq) ('?' | '*' | '+')? // [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? // [49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')' // [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' // [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' // | '(' S? '#PCDATA' S? ')' //assert ch == 'L' ch = requireNextS(); readName(ch); ch = requireNextS(); // readContentSpec(ch); } protected void processAttlistDecl(char ch) throws XmlPullParserException, IOException { // [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>' // [53] AttDef ::= S Name S AttType S DefaultDecl // [54] AttType ::= StringType | TokenizedType | EnumeratedType // [55] StringType ::= 'CDATA' // [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' // | 'NMTOKENS' // [57] EnumeratedType ::= NotationType | Enumeration // [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' // [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' // [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) // [WFC: No < in Attribute Values] //assert ch == 'A' } protected void processEntityDecl(char ch) throws XmlPullParserException, IOException { // [70] EntityDecl ::= GEDecl | PEDecl // [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>' // [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>' // [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) // [74] PEDef ::= EntityValue | ExternalID // [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral //[9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' // | "'" ([^%&'] | PEReference | Reference)* "'" //assert ch == 'N' } protected void processNotationDecl(char ch) throws XmlPullParserException, IOException { // [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>' // [83] PublicID ::= 'PUBLIC' S PubidLiteral //assert ch == 'N' } protected char readName(char ch) throws XmlPullParserException, IOException { if(isNameStartChar(ch)) { throw new XmlPullParserException( "XML name must start with name start character not "+printable(ch), this, null); } while(isNameChar(ch)) { ch = more(); } return ch; } } /* * Indiana University Extreme! Lab Software License, Version 1.2 * * Copyright (c) 2002-2004 The Trustees of Indiana University. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * 1) All redistributions of source code must retain the above * copyright notice, the list of authors in the original source * code, this list of conditions and the disclaimer listed in this * license; * * 2) All redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the disclaimer * listed in this license in the documentation and/or other * materials provided with the distribution; * * 3) Any documentation included with all redistributions must include * the following acknowledgement: * * "This product includes software developed by the Indiana * University Extreme! Lab. For further information please visit * http://www.extreme.indiana.edu/" * * Alternatively, this acknowledgment may appear in the software * itself, and wherever such third-party acknowledgments normally * appear. * * 4) The name "Indiana University" or "Indiana University * Extreme! Lab" shall not be used to endorse or promote * products derived from this software without prior written * permission from Indiana University. For written permission, * please contact http://www.extreme.indiana.edu/. * * 5) Products derived from this software may not use "Indiana * University" name nor may "Indiana University" appear in their name, * without prior written permission of the Indiana University. * * Indiana University provides no reassurances that the source code * provided does not infringe the patent or any other intellectual * property rights of any other entity. Indiana University disclaims any * liability to any recipient for claims brought by any other entity * based on infringement of intellectual property rights or otherwise. * * LICENSEE UNDERSTANDS THAT SOFTWARE IS PROVIDED "AS IS" FOR WHICH * NO WARRANTIES AS TO CAPABILITIES OR ACCURACY ARE MADE. INDIANA * UNIVERSITY GIVES NO WARRANTIES AND MAKES NO REPRESENTATION THAT * SOFTWARE IS FREE OF INFRINGEMENT OF THIRD PARTY PATENT, COPYRIGHT, OR * OTHER PROPRIETARY RIGHTS. INDIANA UNIVERSITY MAKES NO WARRANTIES THAT * SOFTWARE IS FREE FROM "BUGS", "VIRUSES", "TROJAN HORSES", "TRAP * DOORS", "WORMS", OR OTHER HARMFUL CODE. LICENSEE ASSUMES THE ENTIRE * RISK AS TO THE PERFORMANCE OF SOFTWARE AND/OR ASSOCIATED MATERIALS, * AND TO THE PERFORMANCE AND VALIDITY OF INFORMATION GENERATED USING * SOFTWARE. */