/* GASH 2 XMLReader.java The Ganymede object storage system. Created: 7 March 2000 Module By: Jonathan Abbey, jonabbey@arlut.utexas.edu ----------------------------------------------------------------------- Ganymede Directory Management System Copyright (C) 1996-2013 The University of Texas at Austin Ganymede is a registered trademark of The University of Texas at Austin Contact information Web site: http://www.arlut.utexas.edu/gash2 Author Email: ganymede_author@arlut.utexas.edu Email mailing list: ganymede@arlut.utexas.edu US Mail: Computer Science Division Applied Research Laboratories The University of Texas at Austin PO Box 8029, Austin TX 78713-8029 Telephone: (512) 835-3200 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. */ package arlut.csd.Util; import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.PipedOutputStream; import java.io.PrintWriter; import java.io.UnsupportedEncodingException;; import java.util.ArrayList; import java.util.List; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import org.xml.sax.Attributes; import org.xml.sax.helpers.DefaultHandler; /*------------------------------------------------------------------------------ class XMLReader ------------------------------------------------------------------------------*/ /** * <p>This class is intended to serve as a stream-oriented pull proxy, * allowing the Ganymede server to read XML entity and character data * from a SAX parser entity by entity, rather than through the use of * a callback interface, as is traditionally done with SAX.</p> * * <p>When instantiated, the XMLReader creates a background thread * that receives SAX events from the Java 1.4 JAXP SAX2 parser. These * SAX events are converted to {@link arlut.csd.Util.XMLItem XMLItem} * objects and saved in an internal buffer. The user of the XMLReader * class calls getNextItem() to retrieve these XMLItem objects from * the XMLReader buffer, in order of receipt.</p> * * <p>The background parse thread is throttled back as needed to avoid * overflowing the XMLReader's internal buffer.</p> */ public final class XMLReader extends org.xml.sax.helpers.DefaultHandler implements Runnable, java.io.Closeable { public final static boolean debug = false; private javax.xml.parsers.SAXParser parser; private org.xml.sax.InputSource inputSource; private org.xml.sax.Locator locator; private final XMLItem[] buffer; private int enqueuePtr = 0; private int dequeuePtr = 0; private int bufferContents = 0; private int bufferSize; /** * Set the lowWaterMark to something low on a single processor * system, to something high (equal to bufferSize?) on a * multi-processor native threads system. */ private int lowWaterMark; /** * Set the highWaterMark to something high if on a single processor * system, to something low (equal to 0) on a multi-processor * native threads system. */ private int highWaterMark; private Thread inputThread; private boolean done = false; private XMLItem pushback = null; private XMLElement halfElement; private SharedStringBuffer charBuffer = new SharedStringBuffer(); private boolean skipWhiteSpace; private PrintWriter err; private CircleBuffer circleBuffer = new CircleBuffer(30); /* -- */ /** * @param xmlFilename Name of the file to read * @param bufferSize How many items the XMLReader will buffer in its * data structures at one time * @param skipWhiteSpace If true, the no-param getNextItem() and peekNextItem() * methods will jump over any all-whitespace character data between other * elements. */ public XMLReader(String xmlFilename, int bufferSize, boolean skipWhiteSpace) throws IOException { this(xmlFilename, bufferSize, skipWhiteSpace, new PrintWriter(new OutputStreamWriter(System.err, "UTF-8"))); } /** * @param xmlFile A File object to read * @param bufferSize How many items the XMLReader will buffer in its * data structures at one time * @param skipWhiteSpace If true, the no-param getNextItem() and peekNextItem() * methods will jump over any all-whitespace character data between other * elements. */ public XMLReader(File xmlFile, int bufferSize, boolean skipWhiteSpace) throws IOException { this(xmlFile, bufferSize, skipWhiteSpace, new PrintWriter(new OutputStreamWriter(System.err, "UTF-8"))); } /** * @param xmlFile A File object to read * @param bufferSize How many items the XMLReader will buffer in its * data structures at one time * @param skipWhiteSpace If true, the no-param getNextItem() and peekNextItem() * methods will jump over any all-whitespace character data between other * elements. * @param err A PrintWriter object to send debugging/error output to */ public XMLReader(File xmlFile, int bufferSize, boolean skipWhiteSpace, PrintWriter err) throws IOException { this(new FileInputStream(xmlFile), bufferSize, skipWhiteSpace, err); } /** * @param xmlFilename Name of the file to read * @param bufferSize How many items the XMLReader will buffer in its * data structures at one time * @param skipWhiteSpace If true, the no-param getNextItem() and peekNextItem() * methods will jump over any all-whitespace character data between other * elements. * @param err A PrintWriter object to send debugging/error output to */ public XMLReader(String xmlFilename, int bufferSize, boolean skipWhiteSpace, PrintWriter err) throws IOException { this(new FileInputStream(xmlFilename), bufferSize, skipWhiteSpace, err); } /** * @param fileStream A FileInputStream opened on a file for us to read. * @param bufferSize How many items the XMLReader will buffer in its * data structures at one time * @param skipWhiteSpace If true, the no-param getNextItem() and peekNextItem() * methods will jump over any all-whitespace character data between other * elements. * @param err A PrintWriter object to send debugging/error output to */ public XMLReader(FileInputStream fileStream, int bufferSize, boolean skipWhiteSpace, PrintWriter err) throws IOException { try { SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(false); factory.setValidating(false); parser = factory.newSAXParser(); } catch (Throwable t) { throw new RuntimeException(t); } BufferedInputStream inStream = new BufferedInputStream(fileStream); inputSource = new InputSource(inStream); if (bufferSize < 20) { bufferSize = 20; } this.bufferSize = bufferSize; buffer = new XMLItem[bufferSize]; if (false) // optimize for single processor { this.highWaterMark = bufferSize - 5; this.lowWaterMark = 5; } else // optimize for multi-processor { this.highWaterMark = 0; this.lowWaterMark = bufferSize; } this.skipWhiteSpace = skipWhiteSpace; this.err = err; inputThread = new Thread(this); inputThread.start(); } /** * This constructor takes a PipeOutputStream as a parameter, creates a large * matching input pipe to read from, and spins off the XMLReader's parsing * thread to process data that is fed into the PipeOutputStream. * * @param sourcePipe the PipeOutputStream object that XML characters are * @param bufferSize How many items the XMLReader will buffer in its * data structures at one time * @param skipWhiteSpace If true, the no-param getNextItem() and peekNextItem() * methods will jump over any all-whitespace character data between other * elements. */ public XMLReader(PipedOutputStream sourcePipe, int bufferSize, boolean skipWhiteSpace) throws IOException { this(sourcePipe, bufferSize, skipWhiteSpace, new PrintWriter(new OutputStreamWriter(System.err, "UTF-8"))); } /** * This constructor takes a PipeOutputStream as a parameter, creates a large * matching input pipe to read from, and spins off the XMLReader's parsing * thread to process data that is fed into the PipeOutputStream. * * @param sourcePipe the PipeOutputStream object that XML characters are * @param bufferSize How many items the XMLReader will buffer in its * data structures at one time * @param skipWhiteSpace If true, the no-param getNextItem() and peekNextItem() * methods will jump over any all-whitespace character data between other * elements. * @param err A PrintWriter object to send debugging/error output to */ public XMLReader(PipedOutputStream sourcePipe, int bufferSize, boolean skipWhiteSpace, PrintWriter err) throws IOException { try { SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(false); factory.setValidating(false); parser = factory.newSAXParser(); } catch (Throwable t) { throw new RuntimeException(t); } BigPipedInputStream bpis = new BigPipedInputStream(sourcePipe, 65536); // 64k inputSource = new InputSource(bpis); this.bufferSize = bufferSize; buffer = new XMLItem[bufferSize]; this.skipWhiteSpace = skipWhiteSpace; this.err = err; inputThread = new Thread(this); inputThread.start(); } /** * <p>getNextItem() returns the next {@link arlut.csd.Util.XMLItem XMLItem} * from the XMLReader's buffer. If the background thread's parsing has fallen * behind, getNextItem() will block until either data is made available from * the parse thread, or the XMLReader is closed.</p> * * <p>getNextItem() returns null when there are no more XML elements or character * data to be read from the XMLReader stream.</p> * * @param skipWhiteSpaceChars if true, getNextItem() will silently eat any * all-whitespace character data. */ public XMLItem getNextItem(boolean skipWhiteSpaceChars) { XMLItem value = null; boolean finished = false; /* -- */ synchronized (buffer) { while (!finished) { finished = true; // assume we won't be seeing whitespace chars while (!done && pushback == null && bufferContents == 0) { try { buffer.wait(); } catch (InterruptedException ex) { throw new RuntimeException("interrupted, can't wait for buffer to fill."); } } if (debug && done) { err.println("XMLReader.getNextItem(): pushback == " + String.valueOf(pushback)); err.println("XMLReader.getNextItem(): bufferContents == " + bufferContents); err.flush(); return null; } if (done && pushback == null && bufferContents == 0) { return null; } if (pushback != null) { value = pushback; pushback = null; } else { value = dequeue(); // if we have drained the buffer below the low water // mark, wake up the SAX parser thread and let it // start filling us up again if (bufferContents <= lowWaterMark) { buffer.notifyAll(); } } if (skipWhiteSpaceChars) { // if we are skipping all-whitespace XMLCharData, we'll set // finished to false if containsNonWhitespace() returns false. if (value instanceof XMLCharData) { finished = ((XMLCharData) value).containsNonWhitespace(); } } } if (debug) { System.err.println("XMLReader.getNextItem() returning " + value); } return value; } } /** * <p>getNextItem() returns the next {@link arlut.csd.Util.XMLItem XMLItem} * from the XMLReader's buffer. If the background thread's parsing has fallen * behind, getNextItem() will block until either data is made available from * the parse thread, or the XMLReader is closed.</p> * * <p>getNextItem() returns null when there are no more XML elements or character * data to be read from the XMLReader stream.</p> */ public XMLItem getNextItem() { return getNextItem(this.skipWhiteSpace); } /** * <p>peekNextItem() returns the next {@link arlut.csd.Util.XMLItem XMLItem} * from the XMLReader's buffer. If the background thread's parsing has fallen * behind, peekNextItem() will block until either data is made available from * the parse thread, or the XMLReader is closed.</p> * * <p>peekNextItem() returns null when there are no more XML elements or character * data to be read from the XMLReader stream.</p> * * @param skipWhiteSpaceChars if true, peekNextItem() will silently eat any * all-whitespace character data. Any all-whitespace character data eaten * in this way will be taken out of the XMLReader buffer, and no subsequent * peekNextItem() or getNextItem(), with skipWhiteSpaceChars true or false, * will return that item. */ public XMLItem peekNextItem(boolean skipWhiteSpaceChars) { XMLItem value = null; boolean finished = false; /* -- */ synchronized (buffer) { while (!finished) { finished = true; // unless we eat whitespace // wait until there's data to be had while (!done && pushback == null && bufferContents == 0) { try { buffer.wait(); } catch (InterruptedException ex) { throw new RuntimeException("interrupted, can't wait for buffer to fill."); } } // if we're out of data and there will be no more, exit if (done && pushback == null && bufferContents == 0) { return null; } // identify the next value if (pushback != null) { value = pushback; } else { value = buffer[dequeuePtr]; } if (skipWhiteSpaceChars) { if ((value instanceof XMLCharData) && !((XMLCharData) value).containsNonWhitespace()) { getNextItem(false); // consume the whitespace finished = false; // loop again } } } if (debug) { System.err.println("XMLReader.peekNextItem() returning " + value); } return value; } } /** * <p>peekNextItem() returns the next {@link arlut.csd.Util.XMLItem XMLItem} * from the XMLReader's buffer. If the background thread's parsing has fallen * behind, peekNextItem() will block until either data is made available from * the parse thread, or the XMLReader is closed.</p> * * <p>peekNextItem() returns null when there are no more XML elements or character * data to be read from the XMLReader stream.</p> */ public XMLItem peekNextItem() { return peekNextItem(this.skipWhiteSpace); } /** * <p>pushbackItem() may be used to push the most recently read * XMLItem back onto the XMLReader's buffer. The XMLReader code * guarantees that there will be room to handle a single item * pushback, but two pushbacks in a row with no getNextItem() call * in between will cause an exception to be thrown.</p> */ public void pushbackItem(XMLItem item) { synchronized (buffer) { if (pushback != null) { throw new RuntimeException("can't pushback.. buffer overflow"); } pushback = item; buffer.notifyAll(); // in case we have multiple threads consuming } } /** * <p>This method is intended to be called in the situation where we * have some text between an open and close tag, as in * '<open>Some string</open>'.</p> * * <p>getFollowingString() does not expect there to be any other XML * elements between the open and close element in the stream.</p> * * <p>getFollowingString() expects the openElement to have already * been consumed from the reader at the time that it is called, and * will consume the close element before returning.</p> * * <p>If there is no character data between openElement and the * matching closeElement, null will be returned.</p> */ public String getFollowingString(XMLItem openItem, boolean skipWhiteSpace) { String result = null; XMLElement openElement; String tagName; XMLItem nextItem; /* -- */ if (!(openItem instanceof XMLElement)) { throw new IllegalArgumentException("getFollowingString() needs to be given an XMLElement."); } openElement = (XMLElement) openItem; // if we have no character data between the open and close tag, // the reader will have reported the openItem as being an empty // element. if (openElement.isEmpty()) { return null; } // okay, we know there's something before we get to the close // element.. handle it. tagName = openElement.getName(); nextItem = getNextItem(skipWhiteSpace); if (nextItem instanceof XMLCharData) { if (skipWhiteSpace) { result = nextItem.getCleanString(); } else { result = nextItem.getString(); } } // and get to the close tag, skipping over whatever gets in our // way while (nextItem != null && !nextItem.matchesClose(tagName)) { //err.println(">>> " + tagName + " seeking: " + nextItem); nextItem = getNextItem(skipWhiteSpace); } if (nextItem == null) { IllegalArgumentException ex = new IllegalArgumentException("unexpected end of stream"); err.println(ex.getMessage()); } return result; } /** * <p>This method reads the next XMLItem from the reader stream and, * if it is an non-empty XMLElement, will return that element as the * root node of a tree of all elements contained under it. All * XMLItems in the tree will be linked using the getParent() and * getChildren() methods supported by every XMLItem class.</p> * * <p>If getNextTree returns a multi-node tree, all XMLCloseElements * read from the reader stream will be eaten, and will not appear in * the tree returned. The XMLCloseElements are used to determine * where the list of children should end, and so are implicitly * captured in the tree returned. If any XMLError or XMLEndDocument * items are found while searching for the completion of an open * element's tree, that will be returned directly, and all items * loaded from the reader in building the tree will be thrown away. * XMLWarning elements will be returned at the point at which they * were encountered in the tree parsing.</p> * * <p>This method is recursive, and so may cause a * StackOverflowError to be thrown if the XML under the startingItem * is extremely deeply nested.</p> * * <p>This variant of getNextItem() uses the default skipWhiteSpace * setting for this XMLReader.</p> */ public XMLItem getNextTree() { return getNextTree(null, this.skipWhiteSpace); } /** * <p>This method takes an optional XMLItem and, if it is an * non-empty XMLElement, will return that element as the root node * of a tree of all elements contained under it. All XMLItems in * the tree will be linked using the getParent() and getChildren() * methods supported by every XMLItem class.</p> * * <p>If getNextTree returns a multi-node tree, all XMLCloseElements * read from the reader stream will be eaten, and will not appear in * the tree returned. The XMLCloseElements are used to determine * where the list of children should end, and so are implicitly * captured in the tree returned. If any XMLError or XMLEndDocument * items are found while searching for the completion of an open * element's tree, that will be returned directly, and all items * loaded from the reader in building the tree will be thrown away. * XMLWarning elements will be returned at the point at which they * were encountered in the tree parsing.</p> * * <p>This method is recursive, and so may cause a * StackOverflowError to be thrown if the XML under the startingItem * is extremely deeply nested.</p> * * <p>Note that the startingItem is optional, and if it is present, * it must be the last XMLItem read from this * XMLReader.. getNextTree() assumes that the XMLReader is primed to * read the first XMLItem following the startingItem if startingItem * is provided. If startingItem is not provided, getNextTree() will * read the next item from the XMLReader, and make that the root of * the tree returned. If the next item is not a non-empty XML * element start tag, the next item will be returned by itself.</p> * * <p>This variant of getNextItem() uses the default skipWhiteSpace * setting for this XMLReader.</p> */ public XMLItem getNextTree(XMLItem startingItem) { return getNextTree(startingItem, this.skipWhiteSpace); } /** * <p>This method takes an optional XMLItem and, if it is an * non-empty XMLElement, will return that element as the root node * of a tree of all elements contained under it. All XMLItems in * the tree will be linked using the getParent() and getChildren() * methods supported by every XMLItem class.</p> * * <p>If getNextTree returns a multi-node tree, all XMLCloseElements * read from the reader stream will be eaten, and will not appear in * the tree returned. The XMLCloseElements are used to determine * where the list of children should end, and so are implicitly * captured in the tree returned. If any XMLError or XMLEndDocument * items are found while searching for the completion of an open * element's tree, that will be returned directly, and all items * loaded from the reader in building the tree will be thrown away. * XMLWarning elements will be returned at the point at which they * were encountered in the tree parsing.</p> * * <p>This method is recursive, and so may cause a * StackOverflowError to be thrown if the XML under the startingItem * is extremely deeply nested.</p> * * <p>Note that the startingItem is optional, and if it is present, * it must be the last XMLItem read from this * XMLReader.. getNextTree() assumes that the XMLReader is primed to * read the first XMLItem following the startingItem if startingItem * is provided. If startingItem is not provided, getNextTree() will * read the next item from the XMLReader, and make that the root of * the tree returned. If the next item is not a non-empty XML * element start tag, the next item will be returned by itself.</p> */ public XMLItem getNextTree(XMLItem startingItem, boolean skipWhiteSpace) { XMLItem nextItem; /* -- */ if (startingItem == null) { startingItem = getNextItem(skipWhiteSpace); } if (!(startingItem instanceof XMLElement) || startingItem.isEmpty()) { return startingItem; } List<XMLItem> children = new ArrayList<XMLItem>(); while (true) { nextItem = getNextTree(null, skipWhiteSpace); // if we get an error or a pre-mature EOF, we just pass that up if (nextItem instanceof XMLError || nextItem instanceof XMLEndDocument) { startingItem.dissolve(); children = null; return nextItem; } // if we find the matching close, bundle up the children and // pass them up if (nextItem.matchesClose(startingItem.getName())) { if (children.size() > 0) { XMLItem[] childrenAry = new XMLItem[children.size()]; for (int i = 0; i < children.size(); i++) { childrenAry[i] = children.get(i); } startingItem.setChildren(childrenAry); } return startingItem; } nextItem.setParent(startingItem); children.add(nextItem); } } /** * <p>This method returns true if the next thing to be read in the * input stream is non-whitespace character data rather than an open * or close element tag.</p> * * <p>Calling this method has the side effect that if the next data * in the stream is a block of all-whitespace character data, that * all-whitespace character data will be silently eaten.</p> * * <p>This method goes well with getFollowingString(); you can call * this method first to verify that the next data is indeed char * data, then call getFollowingString() to get all of it.</p> */ public boolean isNextCharData() { XMLItem next = peekNextItem(true); return next instanceof XMLCharData; } /** * <p>close() causes the XMLReader to terminate its operations as * soon as possible. Once close() has been called, the background * XML parser will terminate with a SAXException the next time a SAX * callback is performed.</p> */ public void close() { synchronized (buffer) { done = true; buffer.notifyAll(); // to wake up any sleepers if the buffer is full } if (false) // XXX debug XXX { // bounce a runtime exception to get our stack trace try { throw new RuntimeException("XMLReader.close() called"); } catch (RuntimeException ex) { ex.printStackTrace(); } } } public boolean isDone() { return done; } public void run() { try { parser.parse(inputSource, this); } catch (SAXException ex) { if (!done) { // we don't want to bother printing out any content if // we've not got any content other than the // XMLStartDocument, so we'll gate on circleBuffer size > // 1 if (circleBuffer.getSize() > 1) { err.println("XMLReader parse error: " + ex.getMessage()); err.println("Leading context:"); err.println(circleBuffer.getContents()); err.flush(); } return; } } catch (IOException ex) { // if we're done and we've been reading data through a pipe, // we want to ignore the pipe broken error that the parser // seems to insist on running up against. if (!done) { err.println("XMLReader io error: " + ex.getMessage()); err.println("Leading context:"); err.println(circleBuffer.getContents()); err.flush(); throw new RuntimeException("XMLReader io error: " + ex.getMessage()); } } finally { // we've got to the end and either used the circleBuffer or // not. clear our reference to make sure we don't get a // lingering handle circleBuffer = null; close(); } } private final void pourIntoBuffer(XMLItem item) throws SAXException { try { enqueue(item); } catch (InterruptedException ex) { throw new SAXException("parse thread interrupted, can't wait for buffer to drain."); } // the buffer needs all xml items, since whitespace // is filtered out later in the processing chain, // but we have to filter out whitespace from our // debug trace buffer here if (skipWhiteSpace) { if (!(item instanceof XMLCharData)) { circleBuffer.add(item); } else { if (((XMLCharData) item).containsNonWhitespace()) { circleBuffer.add(item); } } } else { circleBuffer.add(item); } // if we have filled the buffer above the // high water mark, wake up the consumers // and let them start draining if (bufferContents >= highWaterMark) { buffer.notifyAll(); } } /** * <p>This is a private helper method used to move a completed * halfElement XMLElement (which stays half-completed until we know * whether the SAX parser will give us an immediately following * close element, in which case we want to mark the halfElement as * empty and eat the subsequent close) into the XMLReader's primary * buffer.</p> */ private final void completeElement() throws SAXException { if (halfElement != null) { XMLItem _item = halfElement; halfElement = null; pourIntoBuffer(_item); } } /** * <p>This is a private helper method used to finish processing * continuous character data. The Java SAX2 parser is perfectly * capable of generating multiple characters() calls during the * processing of a single continuous block of character data, but we * (the XMLReader class) only want to generate a single XMLCharData * object for our clients.</p> * * <p>When non-character data comes in, XMLReader makes sure to call * this method to get any accumulated character data flushed into * our XMLItem CircleBuffer.</p> */ private final void completeCharData() throws SAXException { if (charBuffer.length() != 0) { XMLItem _item = new XMLCharData(charBuffer.toString()); charBuffer.setLength(0); pourIntoBuffer(_item); } } /** * private enqueue method. Will block on the internal XMLItem * buffer if the circular buffer is full. */ private void enqueue(XMLItem item) throws InterruptedException { synchronized (buffer) { while (bufferContents >= bufferSize) { buffer.wait(); } buffer[enqueuePtr] = item; if (++enqueuePtr >= bufferSize) { enqueuePtr = 0; } bufferContents++; } } /** * private dequeue method. assumes that the calling code will check * bounds. */ private XMLItem dequeue() { synchronized (buffer) { XMLItem result = buffer[dequeuePtr]; buffer[dequeuePtr] = null; if (++dequeuePtr >= bufferSize) { dequeuePtr = 0; } bufferContents--; return result; } } // // // // SAX DefaultHandler overrides start here // // // /** * <p>The locator allows the application to determine the end * position of any document-related event, even if the parser is not * reporting an error. Typically, the application will use this * information for reporting its own errors (such as character * content that does not match an application's business rules). * The information returned by the locator is probably not * sufficient for use with a search engine.</p> * * <p>Note that the locator will return correct information only * during the invocation of the events in this interface. The * application should not attempt to use it at any other time.</p> * * @param locator An object that can return the location of * any SAX document event. * @see org.xml.sax.Locator */ public void setDocumentLocator(org.xml.sax.Locator locator) { this.locator = locator; } /** * Receive notification of the beginning of a document. * * <p>The SAX parser will invoke this method only once, before any * other methods in this interface or in DTDHandler (except for * setDocumentLocator).</p> * * @exception org.xml.sax.SAXException Any SAX exception, possibly * wrapping another exception. */ public void startDocument() throws SAXException { synchronized (buffer) { while (!done && bufferContents >= bufferSize) { try { buffer.wait(); } catch (InterruptedException ex) { err.println("XMLReader parse thread interrupted, can't wait for buffer to drain: " + ex.getMessage()); throw new SAXException("parse thread interrupted, can't wait for buffer to drain."); } } if (done) { SAXException ex = new SAXException("parse thread halted.. app code closed XMLReader stream."); throw ex; } pourIntoBuffer(new XMLStartDocument()); } } /** * Receive notification of the end of a document. * * <p>The SAX parser will invoke this method only once, and it will * be the last method invoked during the parse. The parser shall * not invoke this method until it has either abandoned parsing * (because of an unrecoverable error) or reached the end of * input.</p> * * @exception org.xml.sax.SAXException Any SAX exception, possibly * wrapping another exception. */ public void endDocument() throws SAXException { // note that the XML parser will close the input stream as needed // when the parser finishes. synchronized (buffer) { while (!done && bufferContents >= bufferSize) { try { buffer.wait(); } catch (InterruptedException ex) { err.println("XMLReader parse thread interrupted, can't wait for buffer to drain: " + ex.getMessage()); throw new SAXException("parse thread interrupted, can't wait for buffer to drain."); } } completeCharData(); completeElement(); if (done) { SAXException ex = new SAXException("parse thread halted.. app code closed XMLReader stream."); throw ex; } // we don't set done true here any more.. the finally in the run() method // should take care of that pourIntoBuffer(new XMLEndDocument()); } } /** * Receive notification of the beginning of an element. * * <p>The Parser will invoke this method at the beginning of every * element in the XML document; there will be a corresponding {@link * #endElement endElement} event for every startElement event (even * when the element is empty). All of the element's content will be * reported, in order, before the corresponding endElement * event.</p> * * <p>This event allows up to three name components for each * element:</p> * * <ol> * <li>the Namespace URI;</li> * <li>the local name; and</li> * <li>the qualified (prefixed) name.</li> * </ol> * * <p>Any or all of these may be provided, depending on the values * of the <var>http://xml.org/sax/features/namespaces</var> and the * <var>http://xml.org/sax/features/namespace-prefixes</var> * properties:</p> * * <ul> * <li>the Namespace URI and local name are required when * the namespaces property is <var>true</var> (the default), and are * optional when the namespaces property is <var>false</var> (if one is * specified, both must be);</li> * <li>the qualified name is required when the namespace-prefixes property * is <var>true</var>, and is optional when the namespace-prefixes property * is <var>false</var> (the default).</li> * </ul> * * <p>Note that the attribute list provided will contain only * attributes with explicit values (specified or defaulted): * #IMPLIED attributes will be omitted. The attribute list will * contain attributes used for Namespace declarations (xmlns* * attributes) only if the * <code>http://xml.org/sax/features/namespace-prefixes</code> * property is true (it is false by default, and support for a true * value is optional).</p> * * <p>Like {@link #characters characters()}, attribute values may * have characters that need more than one <code>char</code> * value.</p> * * @param uri the Namespace URI, or the empty string if the * element has no Namespace URI or if Namespace * processing is not being performed * @param localName the local name (without prefix), or the * empty string if Namespace processing is not being * performed * @param qName the qualified name (with prefix), or the * empty string if qualified names are not available * @param atts the attributes attached to the element. If * there are no attributes, it shall be an empty * Attributes object. The value of this object after * startElement returns is undefined * @throws org.xml.sax.SAXException any SAX exception, possibly * wrapping another exception * @see #endElement * @see org.xml.sax.Attributes * @see org.xml.sax.helpers.AttributesImpl */ public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException { synchronized (buffer) { while (!done && bufferContents >= bufferSize) { try { buffer.wait(); } catch (InterruptedException ex) { err.println("XMLReader parse thread interrupted, can't wait for buffer to drain: " + ex.getMessage()); throw new SAXException("parse thread interrupted, can't wait for buffer to drain."); } } completeCharData(); completeElement(); if (done) { SAXException ex = new SAXException("parse thread halted.. app code closed XMLReader stream."); throw ex; } halfElement = new XMLElement(qName, atts); buffer.notifyAll(); } } /** * <p>Receive notification of the end of an element.</p> * * <p>The SAX parser will invoke this method at the end of every * element in the XML document; there will be a corresponding {@link * #startElement startElement} event for every endElement event * (even when the element is empty).</p> * * <p>For information on the names, see startElement.</p> * * @param uri the Namespace URI, or the empty string if the * element has no Namespace URI or if Namespace * processing is not being performed * @param localName the local name (without prefix), or the * empty string if Namespace processing is not being * performed * @param qName the qualified XML name (with prefix), or the * empty string if qualified names are not available * * @throws org.xml.sax.SAXException any SAX exception, possibly * wrapping another exception */ public void endElement(String uri, String localName, String qName) throws SAXException { synchronized (buffer) { while (!done && bufferContents >= bufferSize) { try { buffer.wait(); } catch (InterruptedException ex) { err.println("XMLReader parse thread interrupted, can't wait for buffer to drain: " + ex.getMessage()); throw new SAXException("parse thread interrupted, can't wait for buffer to drain."); } } completeCharData(); if (halfElement != null && halfElement.matches(qName)) { halfElement.setEmpty(); completeElement(); return; } completeElement(); if (done) { SAXException ex = new SAXException("parse thread halted.. app code closed XMLReader stream."); throw ex; } pourIntoBuffer(new XMLCloseElement(qName)); } } /** * <p>Receive notification of character data.</p> * * <p>The Parser will call this method to report each chunk of * character data. SAX parsers may return all contiguous character * data in a single chunk, or they may split it into several chunks; * however, all of the characters in any single event must come from * the same external entity so that the Locator provides useful * information.</p> * * <p>The application must not attempt to read from the array * outside of the specified range.</p> * * <p>Individual characters may consist of more than one Java * <code>char</code> value. There are two important cases where * this happens, because characters can't be represented in just * sixteen bits. In one case, characters are represented in a * <em>Surrogate Pair</em>, using two special Unicode values. Such * characters are in the so-called "Astral Planes", with a code * point above U+FFFF. A second case involves composite characters, * such as a base character combining with one or more accent * characters. </p> * * <p> Your code should not assume that algorithms using * <code>char</code>-at-a-time idioms will be working in character * units; in some cases they will split characters. This is * relevant wherever XML permits arbitrary characters, such as * attribute values, processing instruction data, and comments as * well as in data reported from this method. It's also generally * relevant whenever Java code manipulates internationalized text; * the issue isn't unique to XML.</p> * * <p>Note that some parsers will report whitespace in element * content using the {@link #ignorableWhitespace * ignorableWhitespace} method rather than this one (validating * parsers <em>must</em> do so).</p> * * @param ch the characters from the XML document * @param start the start position in the array * @param length the number of characters to read from the array * * @throws org.xml.sax.SAXException any SAX exception, possibly * wrapping another exception * * @see #ignorableWhitespace * @see org.xml.sax.Locator */ public void characters(char ch[], int start, int length) throws SAXException { synchronized (buffer) { while (!done && bufferContents >= bufferSize) { try { buffer.wait(); } catch (InterruptedException ex) { err.println("XMLReader parse thread interrupted, can't wait for buffer to drain: " + ex.getMessage()); throw new SAXException("parse thread interrupted, can't wait for buffer to drain."); } } completeElement(); if (done) { SAXException ex = new SAXException("parse thread halted.. app code closed XMLReader stream."); throw ex; } charBuffer.append(ch, start, length); } } /** * <p>Receive notification of ignorable whitespace in element * content.</p> * * <p>Validating Parsers must use this method to report each chunk * of whitespace in element content (see the W3C XML 1.0 * recommendation, section 2.10): non-validating parsers may also * use this method if they are capable of parsing and using content * models.</p> * * <p>SAX parsers may return all contiguous whitespace in a single * chunk, or they may split it into several chunks; however, all of * the characters in any single event must come from the same * external entity, so that the Locator provides useful * information.</p> * * <p>The application must not attempt to read from the array * outside of the specified range.</p> * * @param ch the characters from the XML document * @param start the start position in the array * @param length the number of characters to read from the array * * @throws org.xml.sax.SAXException any SAX exception, possibly * wrapping another exception * * @see #characters */ public void ignorableWhitespace(char ch[], int start, int length) throws SAXException { synchronized (buffer) { while (!done && bufferContents >= bufferSize) { try { buffer.wait(); } catch (InterruptedException ex) { err.println("XMLReader parse thread interrupted, can't wait for buffer to drain: " + ex.getMessage()); throw new SAXException("parse thread interrupted, can't wait for buffer to drain."); } } completeCharData(); completeElement(); if (done) { SAXException ex = new SAXException("parse thread halted.. app code closed XMLReader stream."); throw ex; } pourIntoBuffer(new XMLCharData(ch, start, length)); } } /** * <p>Receive notification of a warning.</p> * * <p>SAX parsers will use this method to report conditions that are * not errors or fatal errors as defined by the XML 1.0 * recommendation. The default behaviour is to take no action.</p> * * <p>The SAX parser must continue to provide normal parsing events * after invoking this method: it should still be possible for the * application to process the document through to the end.</p> * * @param exception The warning information encapsulated in a * SAX parse exception. * * @exception org.xml.sax.SAXException Any SAX exception, possibly * wrapping another exception. * * @see org.xml.sax.SAXParseException */ public void warning(SAXParseException exception) throws SAXException { synchronized (buffer) { while (!done && bufferContents >= bufferSize) { try { buffer.wait(); } catch (InterruptedException ex) { err.println("XMLReader parse thread interrupted, can't wait for buffer to drain: " + ex.getMessage()); throw new SAXException("parse thread interrupted, can't wait for buffer to drain."); } } completeCharData(); completeElement(); if (done) { SAXException ex = new SAXException("parse thread halted.. app code closed XMLReader stream."); throw ex; } pourIntoBuffer(new XMLWarning(exception, locator)); } } /** * <p>Receive notification of a recoverable error.</p> * * <p>This corresponds to the definition of "error" in section 1.2 * of the W3C XML 1.0 Recommendation. For example, a validating * parser would use this callback to report the violation of a * validity constraint. The default behaviour is to take no * action.</p> * * <p>The SAX parser must continue to provide normal parsing events * after invoking this method: it should still be possible for the * application to process the document through to the end. If the * application cannot do so, then the parser should report a fatal * error even if the XML 1.0 recommendation does not require it to * do so.</p> * * @param exception The error information encapsulated in a * SAX parse exception. * * @exception org.xml.sax.SAXException Any SAX exception, possibly * wrapping another exception. * * @see org.xml.sax.SAXParseException */ public void error(SAXParseException exception) throws SAXException { synchronized (buffer) { while (!done && bufferContents >= bufferSize) { try { buffer.wait(); } catch (InterruptedException ex) { err.println("XMLReader parse thread interrupted, can't wait for buffer to drain: " + ex.getMessage()); throw new SAXException("parse thread interrupted, can't wait for buffer to drain."); } } completeCharData(); completeElement(); if (done) { SAXException ex = new SAXException("parse thread halted.. app code closed XMLReader stream."); throw ex; } err.println("XML parsing error: " + exception.getMessage()); pourIntoBuffer(new XMLError(exception, locator, false)); } } /** * <p>Receive notification of a non-recoverable error.</p> * * <p>This corresponds to the definition of "fatal error" in section * 1.2 of the W3C XML 1.0 Recommendation. For example, a parser * would use this callback to report the violation of a * well-formedness constraint.</p> * * <p>The application must assume that the document is unusable * after the parser has invoked this method, and should continue (if * at all) only for the sake of collecting addition error messages: * in fact, SAX parsers are free to stop reporting any other events * once this method has been invoked.</p> * * @param exception The error information encapsulated in a * SAX parse exception. * * @exception org.xml.sax.SAXException Any SAX exception, possibly * wrapping another exception. * * @see org.xml.sax.SAXParseException */ public void fatalError(SAXParseException exception) throws SAXException { synchronized (buffer) { while (!done && bufferContents >= bufferSize) { try { buffer.wait(); } catch (InterruptedException ex) { err.println("XMLReader parse thread interrupted, can't wait for buffer to drain: " + ex.getMessage()); throw new SAXException("parse thread interrupted, can't wait for buffer to drain."); } } completeCharData(); completeElement(); if (done) { SAXException ex = new SAXException("parse thread halted.. app code closed XMLReader stream."); throw ex; } done = true; err.println(exception.getMessage()); pourIntoBuffer(new XMLError(exception, locator, true)); } } }