/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.cocoon.components.sax; import java.util.ArrayList; import org.apache.cocoon.xml.DefaultLexicalHandler; import org.apache.cocoon.xml.XMLConsumer; import org.apache.cocoon.xml.XMLProducer; import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; import org.xml.sax.ext.LexicalHandler; import org.xml.sax.helpers.AttributesImpl; import org.xml.sax.helpers.DefaultHandler; /** * This a simple xml compiler which takes a byte array as input. * If you want to reuse this interpreter make sure to call first {@link #recycle()} * and then set the new consumer for the sax events. * * @version $Id: AbstractXMLByteStreamInterpreter.java 587751 2007-10-24 02:41:36Z vgritsenko $ */ public abstract class AbstractXMLByteStreamInterpreter implements XMLProducer, XMLByteStreamConstants { private ArrayList list = new ArrayList(); protected static final ContentHandler EMPTY_CONTENT_HANDLER = new DefaultHandler(); /** The <code>ContentHandler</code> receiving SAX events. */ protected ContentHandler contentHandler = EMPTY_CONTENT_HANDLER; /** The <code>LexicalHandler</code> receiving SAX events. */ protected LexicalHandler lexicalHandler = DefaultLexicalHandler.NULL_HANDLER; /** * Set the <code>XMLConsumer</code> that will receive XML data. * <br> * This method will simply call <code>setContentHandler(consumer)</code> * and <code>setLexicalHandler(consumer)</code>. */ public void setConsumer(XMLConsumer consumer) { setContentHandler(consumer); setLexicalHandler(consumer); } /** * Set the <code>ContentHandler</code> that will receive XML data. * <br> * Subclasses may retrieve this <code>ContentHandler</code> instance * accessing the protected <code>super.contentHandler</code> field. */ public void setContentHandler(ContentHandler handler) { this.contentHandler = handler; } /** * Set the <code>LexicalHandler</code> that will receive XML data. * <br> * Subclasses may retrieve this <code>LexicalHandler</code> instance * accessing the protected <code>super.lexicalHandler</code> field. */ public void setLexicalHandler(LexicalHandler handler) { this.lexicalHandler = handler; } public void recycle() { this.contentHandler = EMPTY_CONTENT_HANDLER; this.lexicalHandler = DefaultLexicalHandler.NULL_HANDLER; this.list.clear(); } /** * This method needs to be used by sub classes to start the parsing of the byte stream * * @throws SAXException */ protected void parse() throws SAXException { this.list.clear(); this.checkProlog(); int event = -1; int lastEvent = -1; while ( ( event = readEvent() ) != -1 ) { lastEvent = event; switch (event) { case START_DOCUMENT: contentHandler.startDocument(); break; case END_DOCUMENT: contentHandler.endDocument(); break; case START_PREFIX_MAPPING: contentHandler.startPrefixMapping(this.readString(), this.readString()); break; case END_PREFIX_MAPPING: contentHandler.endPrefixMapping(this.readString()); break; case START_ELEMENT: int attributes = this.readAttributes(); AttributesImpl atts = new AttributesImpl(); for (int i = 0; i < attributes; i++) { atts.addAttribute(this.readString(), this.readString(), this.readString(), this.readString(), this.readString()); } contentHandler.startElement(this.readString(), this.readString(), this.readString(), atts); break; case END_ELEMENT: contentHandler.endElement(this.readString(), this.readString(), this.readString()); break; case CHARACTERS: char[] chars = this.readChars(); int len = chars.length; while (len > 0 && chars[len-1]==0) len--; if (len > 0) contentHandler.characters(chars, 0, len); break; case IGNORABLE_WHITESPACE: char[] spaces = this.readChars(); len = spaces.length; while (len > 0 && spaces[len-1]==0) len--; if (len > 0) contentHandler.characters(spaces, 0, len); break; case PROCESSING_INSTRUCTION: contentHandler.processingInstruction(this.readString(), this.readString()); break; case COMMENT: chars = this.readChars(); len = chars.length; while (len > 0 && chars[len-1]==0) len--; if (len > 0) lexicalHandler.comment(chars, 0, len); break; case LOCATOR: { String publicId = this.readString(); String systemId = this.readString(); int lineNumber = this.read(); int columnNumber = this.read(); org.xml.sax.helpers.LocatorImpl locator = new org.xml.sax.helpers.LocatorImpl(); locator.setPublicId(publicId); locator.setSystemId(systemId); locator.setLineNumber(lineNumber); locator.setColumnNumber(columnNumber); contentHandler.setDocumentLocator(locator); } break; case START_DTD: lexicalHandler.startDTD(this.readString(), this.readString(), this.readString()); break; case END_DTD: lexicalHandler.endDTD(); break; case START_CDATA: lexicalHandler.startCDATA(); break; case END_CDATA: lexicalHandler.endCDATA(); break; case SKIPPED_ENTITY: contentHandler.skippedEntity( this.readString() ); break; case START_ENTITY: lexicalHandler.startEntity( this.readString() ); break; case END_ENTITY: lexicalHandler.endEntity( this.readString() ); break; default: throw new SAXException ("parsing error: event not supported: " + event); } } if( lastEvent != END_DOCUMENT ) { throw new SAXException ("parsing error: premature end of stream (lastEvent was " + lastEvent + ")." ); } } protected int readEvent() throws SAXException { return this.read(); } private int readAttributes() throws SAXException { int ch1 = this.read(); int ch2 = this.read(); return ((ch1 << 8) + (ch2 << 0)); } private String readString() throws SAXException { int length = this.readWord(); int index; if (length >= 0x00008000) { // index value in 16-bits format index = length & 0x00007FFF; return (String) list.get(index); } else { if (length == 0x00007FFF) { length = this.readLong(); if (length >= 0x80000000) { // index value in 32-bits format index = length & 0x7fffffff; return (String) list.get(index); } } char[] chars = this.readChars(length); int len = chars.length; if (len > 0) { while (chars[len-1]==0) len--; } String str; if (len == 0) { str = ""; } else { str = new String(chars, 0, len); } list.add(str); return str; } } /** * The returned char array might contain any number of zero bytes * at the end */ private char[] readChars() throws SAXException { int length = this.readWord(); if (length == 0x00007FFF) { length = this.readLong(); } return this.readChars(length); } protected abstract int read() throws SAXException; protected abstract int read(byte[] b) throws SAXException; /** * The returned char array might contain any number of zero bytes * at the end */ private char[] readChars(int len) throws SAXException { char[] str = new char[len]; byte[] bytearr = new byte[len]; int c, char2, char3; int count = 0; int i = 0; this.readBytes(bytearr); while (count < len) { c = bytearr[count] & 0xff; switch (c >> 4) { case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: // 0xxxxxxx count++; str[i++] = (char) c; break; case 12: case 13: // 110x xxxx 10xx xxxx count += 2; char2 = bytearr[count-1]; str[i++] = (char) (((c & 0x1F) << 6) | (char2 & 0x3F)); break; case 14: // 1110 xxxx 10xx xxxx 10xx xxxx count += 3; char2 = bytearr[count-2]; char3 = bytearr[count-1]; str[i++] = ((char)(((c & 0x0F) << 12) | ((char2 & 0x3F) << 6) | ((char3 & 0x3F) << 0))); break; default: // 10xx xxxx, 1111 xxxx throw new SAXException("UTFDataFormatException"); } } return str; } private void readBytes(byte[] b) throws SAXException { final int bytesRead = this.read( b ); if (bytesRead < b.length ) { throw new SAXException("End of is reached."); } } private int readWord() throws SAXException { int ch1 = this.read(); int ch2 = this.read(); return ((ch1 << 8) + (ch2 << 0)); } private int readLong() throws SAXException { int ch1 = this.read(); int ch2 = this.read(); int ch3 = this.read(); int ch4 = this.read(); return ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0)); } private void checkProlog() throws SAXException { int valid = 0; int ch = 0; if ((ch = this.read()) == 'C') valid++; else throw new SAXException("Unrecognized file format (." + valid + "," + ch + ")"); if ((ch = this.read()) == 'X') valid++; else throw new SAXException("Unrecognized file format (." + valid + "," + ch + ")"); if ((ch = this.read()) == 'M') valid++; else throw new SAXException("Unrecognized file format (." + valid + "," + ch + ")"); if ((ch = this.read()) == 'L') valid++; else throw new SAXException("Unrecognized file format (." + valid + "," + ch + ")"); if ((ch = this.read()) == 1) valid++; else throw new SAXException("Unrecognized file format (." + valid + "," + ch + ")"); if ((ch = this.read()) == 0) valid++; else throw new SAXException("Unrecognized file format (." + valid + "," + ch + ")"); } }