XMLReader.java example

Explorer
Ganymede-master
- doc
  - customization
  - synchronization
    - UNIXBuilderTask.java
- src
/*
   GASH 2

   XMLReader.java

   The Ganymede object storage system.

   Created: 7 March 2000

   Module By: Jonathan Abbey, jonabbey@arlut.utexas.edu

   -----------------------------------------------------------------------

   Ganymede Directory Management System

   Copyright (C) 1996-2013
   The University of Texas at Austin

   Ganymede is a registered trademark of The University of Texas at Austin

   Contact information

   Web site: http://www.arlut.utexas.edu/gash2
   Author Email: ganymede_author@arlut.utexas.edu
   Email mailing list: ganymede@arlut.utexas.edu

   US Mail:

   Computer Science Division
   Applied Research Laboratories
   The University of Texas at Austin
   PO Box 8029, Austin TX 78713-8029

   Telephone: (512) 835-3200

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.

*/

package arlut.csd.Util;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PipedOutputStream;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;;
import java.util.ArrayList;
import java.util.List;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

import org.xml.sax.Attributes;
import org.xml.sax.helpers.DefaultHandler;

/*------------------------------------------------------------------------------
                                                                           class
                                                                       XMLReader

------------------------------------------------------------------------------*/

/**
 * <p>This class is intended to serve as a stream-oriented pull proxy,
 * allowing the Ganymede server to read XML entity and character data
 * from a SAX parser entity by entity, rather than through the use of
 * a callback interface, as is traditionally done with SAX.</p>
 *
 * <p>When instantiated, the XMLReader creates a background thread
 * that receives SAX events from the Java 1.4 JAXP SAX2 parser.  These
 * SAX events are converted to {@link arlut.csd.Util.XMLItem XMLItem}
 * objects and saved in an internal buffer.  The user of the XMLReader
 * class calls getNextItem() to retrieve these XMLItem objects from
 * the XMLReader buffer, in order of receipt.</p>
 *
 * <p>The background parse thread is throttled back as needed to avoid
 * overflowing the XMLReader's internal buffer.</p>
 */

public final class XMLReader extends org.xml.sax.helpers.DefaultHandler implements Runnable, java.io.Closeable {

  public final static boolean debug = false;

  private javax.xml.parsers.SAXParser parser;
  private org.xml.sax.InputSource inputSource;
  private org.xml.sax.Locator locator;
  private final XMLItem[] buffer;
  private int enqueuePtr = 0;
  private int dequeuePtr = 0;
  private int bufferContents = 0;
  private int bufferSize;

  /**
   * Set the lowWaterMark to something low on a single processor
   * system, to something high (equal to bufferSize?) on a
   * multi-processor native threads system.
   */

  private int lowWaterMark;

  /**
   * Set the highWaterMark to something high if on a single processor
   * system, to something low (equal to 0) on a multi-processor
   * native threads system.
   */

  private int highWaterMark;

  private Thread inputThread;
  private boolean done = false;
  private XMLItem pushback = null;
  private XMLElement halfElement;
  private SharedStringBuffer charBuffer = new SharedStringBuffer();
  private boolean skipWhiteSpace;
  private PrintWriter err;
  private CircleBuffer circleBuffer = new CircleBuffer(30);

  /* -- */

  /**
   * @param xmlFilename Name of the file to read
   * @param bufferSize How many items the XMLReader will buffer in its
   * data structures at one time
   * @param skipWhiteSpace If true, the no-param getNextItem() and peekNextItem()
   * methods will jump over any all-whitespace character data between other
   * elements.
   */

  public XMLReader(String xmlFilename, int bufferSize, boolean skipWhiteSpace) throws IOException
  {
    this(xmlFilename, bufferSize, skipWhiteSpace, new PrintWriter(new OutputStreamWriter(System.err, "UTF-8")));
  }

  /**
   * @param xmlFile A File object to read
   * @param bufferSize How many items the XMLReader will buffer in its
   * data structures at one time
   * @param skipWhiteSpace If true, the no-param getNextItem() and peekNextItem()
   * methods will jump over any all-whitespace character data between other
   * elements.
   */

  public XMLReader(File xmlFile, int bufferSize, boolean skipWhiteSpace) throws IOException
  {
    this(xmlFile, bufferSize, skipWhiteSpace, new PrintWriter(new OutputStreamWriter(System.err, "UTF-8")));
  }

  /**
   * @param xmlFile A File object to read
   * @param bufferSize How many items the XMLReader will buffer in its
   * data structures at one time
   * @param skipWhiteSpace If true, the no-param getNextItem() and peekNextItem()
   * methods will jump over any all-whitespace character data between other
   * elements.
   * @param err A PrintWriter object to send debugging/error output to
   */

  public XMLReader(File xmlFile, int bufferSize, boolean skipWhiteSpace, PrintWriter err) throws IOException
  {
    this(new FileInputStream(xmlFile), bufferSize, skipWhiteSpace, err);
  }

  /**
   * @param xmlFilename Name of the file to read
   * @param bufferSize How many items the XMLReader will buffer in its
   * data structures at one time
   * @param skipWhiteSpace If true, the no-param getNextItem() and peekNextItem()
   * methods will jump over any all-whitespace character data between other
   * elements.
   * @param err A PrintWriter object to send debugging/error output to
   */

  public XMLReader(String xmlFilename, int bufferSize, boolean skipWhiteSpace, PrintWriter err) throws IOException
  {
    this(new FileInputStream(xmlFilename), bufferSize, skipWhiteSpace, err);
  }

  /**
   * @param fileStream A FileInputStream opened on a file for us to read.
   * @param bufferSize How many items the XMLReader will buffer in its
   * data structures at one time
   * @param skipWhiteSpace If true, the no-param getNextItem() and peekNextItem()
   * methods will jump over any all-whitespace character data between other
   * elements.
   * @param err A PrintWriter object to send debugging/error output to
   */

  public XMLReader(FileInputStream fileStream, int bufferSize, boolean skipWhiteSpace, PrintWriter err) throws IOException
  {
    try
      {
        SAXParserFactory factory = SAXParserFactory.newInstance();
        factory.setNamespaceAware(false);
        factory.setValidating(false);
        parser = factory.newSAXParser();
      }
    catch (Throwable t)
      {
        throw new RuntimeException(t);
      }

    BufferedInputStream inStream = new BufferedInputStream(fileStream);
    inputSource = new InputSource(inStream);

    if (bufferSize < 20)
      {
        bufferSize = 20;
      }

    this.bufferSize = bufferSize;

    buffer = new XMLItem[bufferSize];

    if (false) // optimize for single processor
      {
        this.highWaterMark = bufferSize - 5;
        this.lowWaterMark = 5;
      }
    else // optimize for multi-processor
      {
        this.highWaterMark = 0;
        this.lowWaterMark = bufferSize;
      }

    this.skipWhiteSpace = skipWhiteSpace;
    this.err = err;

    inputThread = new Thread(this);
    inputThread.start();
  }

  /**
   * This constructor takes a PipeOutputStream as a parameter, creates a large
   * matching input pipe to read from, and spins off the XMLReader's parsing
   * thread to process data that is fed into the PipeOutputStream.
   *
   * @param sourcePipe the PipeOutputStream object that XML characters are
   * @param bufferSize How many items the XMLReader will buffer in its
   * data structures at one time
   * @param skipWhiteSpace If true, the no-param getNextItem() and peekNextItem()
   * methods will jump over any all-whitespace character data between other
   * elements.
   */

  public XMLReader(PipedOutputStream sourcePipe, int bufferSize,
                   boolean skipWhiteSpace) throws IOException
  {
    this(sourcePipe, bufferSize, skipWhiteSpace, new PrintWriter(new OutputStreamWriter(System.err, "UTF-8")));
  }

  /**
   * This constructor takes a PipeOutputStream as a parameter, creates a large
   * matching input pipe to read from, and spins off the XMLReader's parsing
   * thread to process data that is fed into the PipeOutputStream.
   *
   * @param sourcePipe the PipeOutputStream object that XML characters are
   * @param bufferSize How many items the XMLReader will buffer in its
   * data structures at one time
   * @param skipWhiteSpace If true, the no-param getNextItem() and peekNextItem()
   * methods will jump over any all-whitespace character data between other
   * elements.
   * @param err A PrintWriter object to send debugging/error output to
   */

  public XMLReader(PipedOutputStream sourcePipe, int bufferSize,
                   boolean skipWhiteSpace, PrintWriter err) throws IOException
  {
    try
      {
        SAXParserFactory factory = SAXParserFactory.newInstance();
        factory.setNamespaceAware(false);
        factory.setValidating(false);
        parser = factory.newSAXParser();
      }
    catch (Throwable t)
      {
        throw new RuntimeException(t);
      }

    BigPipedInputStream bpis = new BigPipedInputStream(sourcePipe, 65536); // 64k
    inputSource = new InputSource(bpis);

    this.bufferSize = bufferSize;
    buffer = new XMLItem[bufferSize];

    this.skipWhiteSpace = skipWhiteSpace;
    this.err = err;

    inputThread = new Thread(this);

    inputThread.start();
  }

  /**
   * <p>getNextItem() returns the next {@link arlut.csd.Util.XMLItem XMLItem}
   * from the XMLReader's buffer.  If the background thread's parsing has fallen
   * behind, getNextItem() will block until either data is made available from
   * the parse thread, or the XMLReader is closed.</p>
   *
   * <p>getNextItem() returns null when there are no more XML elements or character
   * data to be read from the XMLReader stream.</p>
   *
   * @param skipWhiteSpaceChars if true, getNextItem() will silently eat any
   * all-whitespace character data.
   */

  public XMLItem getNextItem(boolean skipWhiteSpaceChars)
  {
    XMLItem value = null;
    boolean finished = false;

    /* -- */

    synchronized (buffer)
      {
        while (!finished)
          {
            finished = true;    // assume we won't be seeing whitespace chars

            while (!done && pushback == null && bufferContents == 0)
              {
                try
                  {
                    buffer.wait();
                  }
                catch (InterruptedException ex)
                  {
                    throw new RuntimeException("interrupted, can't wait for buffer to fill.");
                  }
              }

            if (debug && done)
              {
                err.println("XMLReader.getNextItem(): pushback == " + String.valueOf(pushback));
                err.println("XMLReader.getNextItem(): bufferContents == " + bufferContents);
                err.flush();
                return null;
              }

            if (done && pushback == null && bufferContents == 0)
              {
                return null;
              }

            if (pushback != null)
              {
                value = pushback;
                pushback = null;
              }
            else
              {
                value = dequeue();

                // if we have drained the buffer below the low water
                // mark, wake up the SAX parser thread and let it
                // start filling us up again

                if (bufferContents <= lowWaterMark)
                  {
                    buffer.notifyAll();
                  }
              }

            if (skipWhiteSpaceChars)
              {
                // if we are skipping all-whitespace XMLCharData, we'll set
                // finished to false if containsNonWhitespace() returns false.

                if (value instanceof XMLCharData)
                  {
                    finished = ((XMLCharData) value).containsNonWhitespace();
                  }
              }
          }

        if (debug)
          {
            System.err.println("XMLReader.getNextItem() returning " + value);
          }

        return value;
      }
  }

  /**
   * <p>getNextItem() returns the next {@link arlut.csd.Util.XMLItem XMLItem}
   * from the XMLReader's buffer.  If the background thread's parsing has fallen
   * behind, getNextItem() will block until either data is made available from
   * the parse thread, or the XMLReader is closed.</p>
   *
   * <p>getNextItem() returns null when there are no more XML elements or character
   * data to be read from the XMLReader stream.</p>
   */

  public XMLItem getNextItem()
  {
    return getNextItem(this.skipWhiteSpace);
  }

  /**
   * <p>peekNextItem() returns the next {@link arlut.csd.Util.XMLItem XMLItem}
   * from the XMLReader's buffer.  If the background thread's parsing has fallen
   * behind, peekNextItem() will block until either data is made available from
   * the parse thread, or the XMLReader is closed.</p>
   *
   * <p>peekNextItem() returns null when there are no more XML elements or character
   * data to be read from the XMLReader stream.</p>
   *
   * @param skipWhiteSpaceChars if true, peekNextItem() will silently eat any
   * all-whitespace character data.  Any all-whitespace character data eaten
   * in this way will be taken out of the XMLReader buffer, and no subsequent
   * peekNextItem() or getNextItem(), with skipWhiteSpaceChars true or false,
   * will return that item.
   */

  public XMLItem peekNextItem(boolean skipWhiteSpaceChars)
  {
    XMLItem value = null;
    boolean finished = false;

    /* -- */

    synchronized (buffer)
      {
        while (!finished)
          {
            finished = true;    // unless we eat whitespace

            // wait until there's data to be had

            while (!done && pushback == null && bufferContents == 0)
              {
                try
                  {
                    buffer.wait();
                  }
                catch (InterruptedException ex)
                  {
                    throw new RuntimeException("interrupted, can't wait for buffer to fill.");
                  }
              }

            // if we're out of data and there will be no more, exit

            if (done && pushback == null && bufferContents == 0)
              {
                return null;
              }

            // identify the next value

            if (pushback != null)
              {
                value = pushback;
              }
            else
              {
                value = buffer[dequeuePtr];
              }

            if (skipWhiteSpaceChars)
              {
                if ((value instanceof XMLCharData) &&
                    !((XMLCharData) value).containsNonWhitespace())
                  {
                    getNextItem(false); // consume the whitespace
                    finished = false; // loop again
                  }
              }
          }

        if (debug)
          {
            System.err.println("XMLReader.peekNextItem() returning " + value);
          }

        return value;
      }
  }

  /**
   * <p>peekNextItem() returns the next {@link arlut.csd.Util.XMLItem XMLItem}
   * from the XMLReader's buffer.  If the background thread's parsing has fallen
   * behind, peekNextItem() will block until either data is made available from
   * the parse thread, or the XMLReader is closed.</p>
   *
   * <p>peekNextItem() returns null when there are no more XML elements or character
   * data to be read from the XMLReader stream.</p>
   */

  public XMLItem peekNextItem()
  {
    return peekNextItem(this.skipWhiteSpace);
  }

  /**
   * <p>pushbackItem() may be used to push the most recently read
   * XMLItem back onto the XMLReader's buffer.  The XMLReader code
   * guarantees that there will be room to handle a single item
   * pushback, but two pushbacks in a row with no getNextItem() call
   * in between will cause an exception to be thrown.</p>
   */

  public void pushbackItem(XMLItem item)
  {
    synchronized (buffer)
      {
        if (pushback != null)
          {
            throw new RuntimeException("can't pushback.. buffer overflow");
          }

        pushback = item;
        buffer.notifyAll();     // in case we have multiple threads consuming
      }
  }

  /**
   * <p>This method is intended to be called in the situation where we
   * have some text between an open and close tag, as in
   * '<open>Some string</open>'.</p>
   *
   * <p>getFollowingString() does not expect there to be any other XML
   * elements between the open and close element in the stream.</p>
   *
   * <p>getFollowingString() expects the openElement to have already
   * been consumed from the reader at the time that it is called, and
   * will consume the close element before returning.</p>
   *
   * <p>If there is no character data between openElement and the
   * matching closeElement, null will be returned.</p>
   */

  public String getFollowingString(XMLItem openItem, boolean skipWhiteSpace)
  {
    String result = null;
    XMLElement openElement;
    String tagName;
    XMLItem nextItem;

    /* -- */

    if (!(openItem instanceof XMLElement))
      {
        throw new IllegalArgumentException("getFollowingString() needs to be given an XMLElement.");
      }

    openElement = (XMLElement) openItem;

    // if we have no character data between the open and close tag,
    // the reader will have reported the openItem as being an empty
    // element.

    if (openElement.isEmpty())
      {
        return null;
      }

    // okay, we know there's something before we get to the close
    // element..  handle it.

    tagName = openElement.getName();
    nextItem = getNextItem(skipWhiteSpace);

    if (nextItem instanceof XMLCharData)
      {
        if (skipWhiteSpace)
          {
            result = nextItem.getCleanString();
          }
        else
          {
            result = nextItem.getString();
          }
      }

    // and get to the close tag, skipping over whatever gets in our
    // way

    while (nextItem != null && !nextItem.matchesClose(tagName))
      {
        //err.println(">>> " + tagName + " seeking: " + nextItem);
        nextItem = getNextItem(skipWhiteSpace);
      }

    if (nextItem == null)
      {
        IllegalArgumentException ex = new IllegalArgumentException("unexpected end of stream");
        err.println(ex.getMessage());
      }

    return result;
  }

  /**
   * <p>This method reads the next XMLItem from the reader stream and,
   * if it is an non-empty XMLElement, will return that element as the
   * root node of a tree of all elements contained under it.  All
   * XMLItems in the tree will be linked using the getParent() and
   * getChildren() methods supported by every XMLItem class.</p>
   *
   * <p>If getNextTree returns a multi-node tree, all XMLCloseElements
   * read from the reader stream will be eaten, and will not appear in
   * the tree returned.  The XMLCloseElements are used to determine
   * where the list of children should end, and so are implicitly
   * captured in the tree returned.  If any XMLError or XMLEndDocument
   * items are found while searching for the completion of an open
   * element's tree, that will be returned directly, and all items
   * loaded from the reader in building the tree will be thrown away.
   * XMLWarning elements will be returned at the point at which they
   * were encountered in the tree parsing.</p>
   *
   * <p>This method is recursive, and so may cause a
   * StackOverflowError to be thrown if the XML under the startingItem
   * is extremely deeply nested.</p>
   *
   * <p>This variant of getNextItem() uses the default skipWhiteSpace
   * setting for this XMLReader.</p>
   */

  public XMLItem getNextTree()
  {
    return getNextTree(null, this.skipWhiteSpace);
  }

  /**
   * <p>This method takes an optional XMLItem and, if it is an
   * non-empty XMLElement, will return that element as the root node
   * of a tree of all elements contained under it.  All XMLItems in
   * the tree will be linked using the getParent() and getChildren()
   * methods supported by every XMLItem class.</p>
   *
   * <p>If getNextTree returns a multi-node tree, all XMLCloseElements
   * read from the reader stream will be eaten, and will not appear in
   * the tree returned.  The XMLCloseElements are used to determine
   * where the list of children should end, and so are implicitly
   * captured in the tree returned.  If any XMLError or XMLEndDocument
   * items are found while searching for the completion of an open
   * element's tree, that will be returned directly, and all items
   * loaded from the reader in building the tree will be thrown away.
   * XMLWarning elements will be returned at the point at which they
   * were encountered in the tree parsing.</p>
   *
   * <p>This method is recursive, and so may cause a
   * StackOverflowError to be thrown if the XML under the startingItem
   * is extremely deeply nested.</p>
   *
   * <p>Note that the startingItem is optional, and if it is present,
   * it must be the last XMLItem read from this
   * XMLReader.. getNextTree() assumes that the XMLReader is primed to
   * read the first XMLItem following the startingItem if startingItem
   * is provided.  If startingItem is not provided, getNextTree() will
   * read the next item from the XMLReader, and make that the root of
   * the tree returned.  If the next item is not a non-empty XML
   * element start tag, the next item will be returned by itself.</p>
   *
   * <p>This variant of getNextItem() uses the default skipWhiteSpace
   * setting for this XMLReader.</p>
   */

  public XMLItem getNextTree(XMLItem startingItem)
  {
    return getNextTree(startingItem, this.skipWhiteSpace);
  }

  /**
   * <p>This method takes an optional XMLItem and, if it is an
   * non-empty XMLElement, will return that element as the root node
   * of a tree of all elements contained under it.  All XMLItems in
   * the tree will be linked using the getParent() and getChildren()
   * methods supported by every XMLItem class.</p>
   *
   * <p>If getNextTree returns a multi-node tree, all XMLCloseElements
   * read from the reader stream will be eaten, and will not appear in
   * the tree returned.  The XMLCloseElements are used to determine
   * where the list of children should end, and so are implicitly
   * captured in the tree returned.  If any XMLError or XMLEndDocument
   * items are found while searching for the completion of an open
   * element's tree, that will be returned directly, and all items
   * loaded from the reader in building the tree will be thrown away.
   * XMLWarning elements will be returned at the point at which they
   * were encountered in the tree parsing.</p>
   *
   * <p>This method is recursive, and so may cause a
   * StackOverflowError to be thrown if the XML under the startingItem
   * is extremely deeply nested.</p>
   *
   * <p>Note that the startingItem is optional, and if it is present,
   * it must be the last XMLItem read from this
   * XMLReader.. getNextTree() assumes that the XMLReader is primed to
   * read the first XMLItem following the startingItem if startingItem
   * is provided.  If startingItem is not provided, getNextTree() will
   * read the next item from the XMLReader, and make that the root of
   * the tree returned.  If the next item is not a non-empty XML
   * element start tag, the next item will be returned by itself.</p>
   */

  public XMLItem getNextTree(XMLItem startingItem, boolean skipWhiteSpace)
  {
    XMLItem nextItem;

    /* -- */

    if (startingItem == null)
      {
        startingItem = getNextItem(skipWhiteSpace);
      }

    if (!(startingItem instanceof XMLElement) || startingItem.isEmpty())
      {
        return startingItem;
      }

    List<XMLItem> children = new ArrayList<XMLItem>();

    while (true)
      {
        nextItem = getNextTree(null, skipWhiteSpace);

        // if we get an error or a pre-mature EOF, we just pass that up

        if (nextItem instanceof XMLError || nextItem instanceof XMLEndDocument)
          {
            startingItem.dissolve();
            children = null;
            return nextItem;
          }

        // if we find the matching close, bundle up the children and
        // pass them up

        if (nextItem.matchesClose(startingItem.getName()))
          {
            if (children.size() > 0)
              {
                XMLItem[] childrenAry = new XMLItem[children.size()];

                for (int i = 0; i < children.size(); i++)
                  {
                    childrenAry[i] = children.get(i);
                  }

                startingItem.setChildren(childrenAry);
              }

            return startingItem;
          }

        nextItem.setParent(startingItem);
        children.add(nextItem);
      }
  }

  /**
   * <p>This method returns true if the next thing to be read in the
   * input stream is non-whitespace character data rather than an open
   * or close element tag.</p>
   *
   * <p>Calling this method has the side effect that if the next data
   * in the stream is a block of all-whitespace character data, that
   * all-whitespace character data will be silently eaten.</p>
   *
   * <p>This method goes well with getFollowingString(); you can call
   * this method first to verify that the next data is indeed char
   * data, then call getFollowingString() to get all of it.</p>
   */

  public boolean isNextCharData()
  {
    XMLItem next = peekNextItem(true);

    return next instanceof XMLCharData;
  }

  /**
   * <p>close() causes the XMLReader to terminate its operations as
   * soon as possible.  Once close() has been called, the background
   * XML parser will terminate with a SAXException the next time a SAX
   * callback is performed.</p>
   */

  public void close()
  {
    synchronized (buffer)
      {
        done = true;
        buffer.notifyAll();     // to wake up any sleepers if the buffer is full
      }

    if (false)                  // XXX debug XXX
      {
        // bounce a runtime exception to get our stack trace

        try
          {
            throw new RuntimeException("XMLReader.close() called");
          }
        catch (RuntimeException ex)
          {
            ex.printStackTrace();
          }
      }
  }

  public boolean isDone()
  {
    return done;
  }

  public void run()
  {
    try
      {
        parser.parse(inputSource, this);
      }
    catch (SAXException ex)
      {
        if (!done)
          {
            // we don't want to bother printing out any content if
            // we've not got any content other than the
            // XMLStartDocument, so we'll gate on circleBuffer size >
            // 1

            if (circleBuffer.getSize() > 1)
              {
                err.println("XMLReader parse error: " + ex.getMessage());
                err.println("Leading context:");
                err.println(circleBuffer.getContents());
                err.flush();
              }

            return;
          }
      }
    catch (IOException ex)
      {
        // if we're done and we've been reading data through a pipe,
        // we want to ignore the pipe broken error that the parser
        // seems to insist on running up against.

        if (!done)
          {
            err.println("XMLReader io error: " + ex.getMessage());
            err.println("Leading context:");
            err.println(circleBuffer.getContents());
            err.flush();

            throw new RuntimeException("XMLReader io error: " + ex.getMessage());
          }
      }
    finally
      {
        // we've got to the end and either used the circleBuffer or
        // not.  clear our reference to make sure we don't get a
        // lingering handle

        circleBuffer = null;
        close();
      }
  }

  private final void pourIntoBuffer(XMLItem item) throws SAXException
  {
    try
      {
        enqueue(item);
      }
    catch (InterruptedException ex)
      {
        throw new SAXException("parse thread interrupted, can't wait for buffer to drain.");
      }

    // the buffer needs all xml items, since whitespace
    // is filtered out later in the processing chain,
    // but we have to filter out whitespace from our
    // debug trace buffer here

    if (skipWhiteSpace)
      {
        if (!(item instanceof XMLCharData))
          {
            circleBuffer.add(item);
          }
        else
          {
            if (((XMLCharData) item).containsNonWhitespace())
              {
                circleBuffer.add(item);
              }
          }
      }
    else
      {
        circleBuffer.add(item);
      }

    // if we have filled the buffer above the
    // high water mark, wake up the consumers
    // and let them start draining

    if (bufferContents >= highWaterMark)
      {
        buffer.notifyAll();
      }
  }

  /**
   * <p>This is a private helper method used to move a completed
   * halfElement XMLElement (which stays half-completed until we know
   * whether the SAX parser will give us an immediately following
   * close element, in which case we want to mark the halfElement as
   * empty and eat the subsequent close) into the XMLReader's primary
   * buffer.</p>
   */

  private final void completeElement() throws SAXException
  {
    if (halfElement != null)
      {
        XMLItem _item = halfElement;
        halfElement = null;

        pourIntoBuffer(_item);
      }
  }

  /**
   * <p>This is a private helper method used to finish processing
   * continuous character data.  The Java SAX2 parser is perfectly
   * capable of generating multiple characters() calls during the
   * processing of a single continuous block of character data, but we
   * (the XMLReader class) only want to generate a single XMLCharData
   * object for our clients.</p>
   *
   * <p>When non-character data comes in, XMLReader makes sure to call
   * this method to get any accumulated character data flushed into
   * our XMLItem CircleBuffer.</p>
   */

  private final void completeCharData() throws SAXException
  {
    if (charBuffer.length() != 0)
      {
        XMLItem _item = new XMLCharData(charBuffer.toString());
        charBuffer.setLength(0);

        pourIntoBuffer(_item);
      }
  }

  /**
   * private enqueue method.  Will block on the internal XMLItem
   * buffer if the circular buffer is full.
   */

  private void enqueue(XMLItem item) throws InterruptedException
  {
    synchronized (buffer)
      {
        while (bufferContents >= bufferSize)
          {
            buffer.wait();
          }

        buffer[enqueuePtr] = item;

        if (++enqueuePtr >= bufferSize)
          {
            enqueuePtr = 0;
          }

        bufferContents++;
      }
  }

  /**
   * private dequeue method.  assumes that the calling code will check
   * bounds.
   */

  private XMLItem dequeue()
  {
    synchronized (buffer)
      {
        XMLItem result = buffer[dequeuePtr];
        buffer[dequeuePtr] = null;

        if (++dequeuePtr >= bufferSize)
          {
            dequeuePtr = 0;
          }

        bufferContents--;

        return result;
      }
  }

  //
  //
  //
  // SAX DefaultHandler overrides start here
  //
  //
  //

  /**
   * <p>The locator allows the application to determine the end
   * position of any document-related event, even if the parser is not
   * reporting an error.  Typically, the application will use this
   * information for reporting its own errors (such as character
   * content that does not match an application's business rules).
   * The information returned by the locator is probably not
   * sufficient for use with a search engine.</p>
   *
   * <p>Note that the locator will return correct information only
   * during the invocation of the events in this interface.  The
   * application should not attempt to use it at any other time.</p>
   *
   * @param locator An object that can return the location of
   *                any SAX document event.
   * @see org.xml.sax.Locator */

  public void setDocumentLocator(org.xml.sax.Locator locator)
  {
    this.locator = locator;
  }

  /**
   * Receive notification of the beginning of a document.
   *
   * <p>The SAX parser will invoke this method only once, before any
   * other methods in this interface or in DTDHandler (except for
   * setDocumentLocator).</p>
   *
   * @exception org.xml.sax.SAXException Any SAX exception, possibly
   *            wrapping another exception.
   */

  public void startDocument() throws SAXException
  {
    synchronized (buffer)
      {
        while (!done && bufferContents >= bufferSize)
          {
            try
              {
                buffer.wait();
              }
            catch (InterruptedException ex)
              {
                err.println("XMLReader parse thread interrupted, can't wait for buffer to drain: " +
                            ex.getMessage());
                throw new SAXException("parse thread interrupted, can't wait for buffer to drain.");
              }
          }

        if (done)
          {
            SAXException ex = new SAXException("parse thread halted.. app code closed XMLReader stream.");
            throw ex;
          }

        pourIntoBuffer(new XMLStartDocument());
      }
  }

  /**
   * Receive notification of the end of a document.
   *
   * <p>The SAX parser will invoke this method only once, and it will
   * be the last method invoked during the parse.  The parser shall
   * not invoke this method until it has either abandoned parsing
   * (because of an unrecoverable error) or reached the end of
   * input.</p>
   *
   * @exception org.xml.sax.SAXException Any SAX exception, possibly
   *            wrapping another exception.
   */

  public void endDocument() throws SAXException
  {
    // note that the XML parser will close the input stream as needed
    // when the parser finishes.

    synchronized (buffer)
      {
        while (!done && bufferContents >= bufferSize)
          {
            try
              {
                buffer.wait();
              }
            catch (InterruptedException ex)
              {
                err.println("XMLReader parse thread interrupted, can't wait for buffer to drain: " +
                            ex.getMessage());
                throw new SAXException("parse thread interrupted, can't wait for buffer to drain.");
              }
          }

        completeCharData();
        completeElement();

        if (done)
          {
            SAXException ex = new SAXException("parse thread halted.. app code closed XMLReader stream.");
            throw ex;
          }

        // we don't set done true here any more.. the finally in the run() method
        // should take care of that

        pourIntoBuffer(new XMLEndDocument());
      }
  }

  /**
   * Receive notification of the beginning of an element.
   *
   * <p>The Parser will invoke this method at the beginning of every
   * element in the XML document; there will be a corresponding {@link
   * #endElement endElement} event for every startElement event (even
   * when the element is empty). All of the element's content will be
   * reported, in order, before the corresponding endElement
   * event.</p>
   *
   * <p>This event allows up to three name components for each
   * element:</p>
   *
   * <ol>
   * <li>the Namespace URI;</li>
   * <li>the local name; and</li>
   * <li>the qualified (prefixed) name.</li>
   * </ol>
   *
   * <p>Any or all of these may be provided, depending on the values
   * of the <var>http://xml.org/sax/features/namespaces</var> and the
   * <var>http://xml.org/sax/features/namespace-prefixes</var>
   * properties:</p>
   *
   * <ul>
   * <li>the Namespace URI and local name are required when
   * the namespaces property is <var>true</var> (the default), and are
   * optional when the namespaces property is <var>false</var> (if one is
   * specified, both must be);</li>
   * <li>the qualified name is required when the namespace-prefixes property
   * is <var>true</var>, and is optional when the namespace-prefixes property
   * is <var>false</var> (the default).</li>
   * </ul>
   *
   * <p>Note that the attribute list provided will contain only
   * attributes with explicit values (specified or defaulted):
   * #IMPLIED attributes will be omitted.  The attribute list will
   * contain attributes used for Namespace declarations (xmlns*
   * attributes) only if the
   * <code>http://xml.org/sax/features/namespace-prefixes</code>
   * property is true (it is false by default, and support for a true
   * value is optional).</p>
   *
   * <p>Like {@link #characters characters()}, attribute values may
   * have characters that need more than one <code>char</code>
   * value.</p>
   *
   * @param uri the Namespace URI, or the empty string if the
   *        element has no Namespace URI or if Namespace
   *        processing is not being performed
   * @param localName the local name (without prefix), or the
   *        empty string if Namespace processing is not being
   *        performed
   * @param qName the qualified name (with prefix), or the
   *        empty string if qualified names are not available
   * @param atts the attributes attached to the element.  If
   *        there are no attributes, it shall be an empty
   *        Attributes object.  The value of this object after
   *        startElement returns is undefined
   * @throws org.xml.sax.SAXException any SAX exception, possibly
   *            wrapping another exception
   * @see #endElement
   * @see org.xml.sax.Attributes
   * @see org.xml.sax.helpers.AttributesImpl
   */

  public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException
  {
    synchronized (buffer)
      {
        while (!done && bufferContents >= bufferSize)
          {
            try
              {
                buffer.wait();
              }
            catch (InterruptedException ex)
              {
                err.println("XMLReader parse thread interrupted, can't wait for buffer to drain: " +
                            ex.getMessage());
                throw new SAXException("parse thread interrupted, can't wait for buffer to drain.");
              }
          }

        completeCharData();
        completeElement();

        if (done)
          {
            SAXException ex = new SAXException("parse thread halted.. app code closed XMLReader stream.");
            throw ex;
          }

        halfElement = new XMLElement(qName, atts);

        buffer.notifyAll();
      }
  }

  /**
   * <p>Receive notification of the end of an element.</p>
   *
   * <p>The SAX parser will invoke this method at the end of every
   * element in the XML document; there will be a corresponding {@link
   * #startElement startElement} event for every endElement event
   * (even when the element is empty).</p>
   *
   * <p>For information on the names, see startElement.</p>
   *
   * @param uri the Namespace URI, or the empty string if the
   *        element has no Namespace URI or if Namespace
   *        processing is not being performed
   * @param localName the local name (without prefix), or the
   *        empty string if Namespace processing is not being
   *        performed
   * @param qName the qualified XML name (with prefix), or the
   *        empty string if qualified names are not available
   *
   * @throws org.xml.sax.SAXException any SAX exception, possibly
   *         wrapping another exception
   */

  public void endElement(String uri, String localName, String qName) throws SAXException
  {
    synchronized (buffer)
      {
        while (!done && bufferContents >= bufferSize)
          {
            try
              {
                buffer.wait();
              }
            catch (InterruptedException ex)
              {
                err.println("XMLReader parse thread interrupted, can't wait for buffer to drain: " +
                            ex.getMessage());
                throw new SAXException("parse thread interrupted, can't wait for buffer to drain.");
              }
          }

        completeCharData();

        if (halfElement != null && halfElement.matches(qName))
          {
            halfElement.setEmpty();
            completeElement();
            return;
          }

        completeElement();

        if (done)
          {
            SAXException ex = new SAXException("parse thread halted.. app code closed XMLReader stream.");
            throw ex;
          }

        pourIntoBuffer(new XMLCloseElement(qName));
      }
  }

  /**
   * <p>Receive notification of character data.</p>
   *
   * <p>The Parser will call this method to report each chunk of
   * character data.  SAX parsers may return all contiguous character
   * data in a single chunk, or they may split it into several chunks;
   * however, all of the characters in any single event must come from
   * the same external entity so that the Locator provides useful
   * information.</p>
   *
   * <p>The application must not attempt to read from the array
   * outside of the specified range.</p>
   *
   * <p>Individual characters may consist of more than one Java
   * <code>char</code> value.  There are two important cases where
   * this happens, because characters can't be represented in just
   * sixteen bits.  In one case, characters are represented in a
   * <em>Surrogate Pair</em>, using two special Unicode values. Such
   * characters are in the so-called "Astral Planes", with a code
   * point above U+FFFF.  A second case involves composite characters,
   * such as a base character combining with one or more accent
   * characters. </p>
   *
   * <p> Your code should not assume that algorithms using
   * <code>char</code>-at-a-time idioms will be working in character
   * units; in some cases they will split characters.  This is
   * relevant wherever XML permits arbitrary characters, such as
   * attribute values, processing instruction data, and comments as
   * well as in data reported from this method.  It's also generally
   * relevant whenever Java code manipulates internationalized text;
   * the issue isn't unique to XML.</p>
   *
   * <p>Note that some parsers will report whitespace in element
   * content using the {@link #ignorableWhitespace
   * ignorableWhitespace} method rather than this one (validating
   * parsers <em>must</em> do so).</p>
   *
   * @param ch the characters from the XML document
   * @param start the start position in the array
   * @param length the number of characters to read from the array
   *
   * @throws org.xml.sax.SAXException any SAX exception, possibly
   *            wrapping another exception
   *
   * @see #ignorableWhitespace
   * @see org.xml.sax.Locator
   */

  public void characters(char ch[], int start, int length) throws SAXException
  {
    synchronized (buffer)
      {
        while (!done && bufferContents >= bufferSize)
          {
            try
              {
                buffer.wait();
              }
            catch (InterruptedException ex)
              {
                err.println("XMLReader parse thread interrupted, can't wait for buffer to drain: " +
                            ex.getMessage());
                throw new SAXException("parse thread interrupted, can't wait for buffer to drain.");
              }
          }

        completeElement();

        if (done)
          {
            SAXException ex = new SAXException("parse thread halted.. app code closed XMLReader stream.");
            throw ex;
          }

        charBuffer.append(ch, start, length);
      }
  }

  /**
   * <p>Receive notification of ignorable whitespace in element
   * content.</p>
   *
   * <p>Validating Parsers must use this method to report each chunk
   * of whitespace in element content (see the W3C XML 1.0
   * recommendation, section 2.10): non-validating parsers may also
   * use this method if they are capable of parsing and using content
   * models.</p>
   *
   * <p>SAX parsers may return all contiguous whitespace in a single
   * chunk, or they may split it into several chunks; however, all of
   * the characters in any single event must come from the same
   * external entity, so that the Locator provides useful
   * information.</p>
   *
   * <p>The application must not attempt to read from the array
   * outside of the specified range.</p>
   *
   * @param ch the characters from the XML document
   * @param start the start position in the array
   * @param length the number of characters to read from the array
   *
   * @throws org.xml.sax.SAXException any SAX exception, possibly
   *            wrapping another exception
   *
   * @see #characters
   */

  public void ignorableWhitespace(char ch[], int start, int length) throws SAXException
  {
    synchronized (buffer)
      {
        while (!done && bufferContents >= bufferSize)
          {
            try
              {
                buffer.wait();
              }
            catch (InterruptedException ex)
              {
                err.println("XMLReader parse thread interrupted, can't wait for buffer to drain: " +
                            ex.getMessage());
                throw new SAXException("parse thread interrupted, can't wait for buffer to drain.");
              }
          }

        completeCharData();
        completeElement();

        if (done)
          {
            SAXException ex = new SAXException("parse thread halted.. app code closed XMLReader stream.");
            throw ex;
          }

        pourIntoBuffer(new XMLCharData(ch, start, length));
      }
  }

  /**
   * <p>Receive notification of a warning.</p>
   *
   * <p>SAX parsers will use this method to report conditions that are
   * not errors or fatal errors as defined by the XML 1.0
   * recommendation.  The default behaviour is to take no action.</p>
   *
   * <p>The SAX parser must continue to provide normal parsing events
   * after invoking this method: it should still be possible for the
   * application to process the document through to the end.</p>
   *
   * @param exception The warning information encapsulated in a
   *                  SAX parse exception.
   *
   * @exception org.xml.sax.SAXException Any SAX exception, possibly
   *            wrapping another exception.
   *
   * @see org.xml.sax.SAXParseException
   */

  public void warning(SAXParseException exception) throws SAXException
  {
    synchronized (buffer)
      {
        while (!done && bufferContents >= bufferSize)
          {
            try
              {
                buffer.wait();
              }
            catch (InterruptedException ex)
              {
                err.println("XMLReader parse thread interrupted, can't wait for buffer to drain: " +
                            ex.getMessage());
                throw new SAXException("parse thread interrupted, can't wait for buffer to drain.");
              }
          }

        completeCharData();
        completeElement();

        if (done)
          {
            SAXException ex = new SAXException("parse thread halted.. app code closed XMLReader stream.");
            throw ex;
          }

        pourIntoBuffer(new XMLWarning(exception, locator));
      }
  }

  /**
   * <p>Receive notification of a recoverable error.</p>
   *
   * <p>This corresponds to the definition of "error" in section 1.2
   * of the W3C XML 1.0 Recommendation.  For example, a validating
   * parser would use this callback to report the violation of a
   * validity constraint.  The default behaviour is to take no
   * action.</p>
   *
   * <p>The SAX parser must continue to provide normal parsing events
   * after invoking this method: it should still be possible for the
   * application to process the document through to the end.  If the
   * application cannot do so, then the parser should report a fatal
   * error even if the XML 1.0 recommendation does not require it to
   * do so.</p>
   *
   * @param exception The error information encapsulated in a
   *                  SAX parse exception.
   *
   * @exception org.xml.sax.SAXException Any SAX exception, possibly
   *            wrapping another exception.
   *
   * @see org.xml.sax.SAXParseException
   */

  public void error(SAXParseException exception) throws SAXException
  {
    synchronized (buffer)
      {
        while (!done && bufferContents >= bufferSize)
          {
            try
              {
                buffer.wait();
              }
            catch (InterruptedException ex)
              {
                err.println("XMLReader parse thread interrupted, can't wait for buffer to drain: " +
                            ex.getMessage());
                throw new SAXException("parse thread interrupted, can't wait for buffer to drain.");
              }
          }

        completeCharData();
        completeElement();

        if (done)
          {
            SAXException ex = new SAXException("parse thread halted.. app code closed XMLReader stream.");
            throw ex;
          }

        err.println("XML parsing error: " + exception.getMessage());
        pourIntoBuffer(new XMLError(exception, locator, false));
      }
  }

  /**
   * <p>Receive notification of a non-recoverable error.</p>
   *
   * <p>This corresponds to the definition of "fatal error" in section
   * 1.2 of the W3C XML 1.0 Recommendation.  For example, a parser
   * would use this callback to report the violation of a
   * well-formedness constraint.</p>
   *
   * <p>The application must assume that the document is unusable
   * after the parser has invoked this method, and should continue (if
   * at all) only for the sake of collecting addition error messages:
   * in fact, SAX parsers are free to stop reporting any other events
   * once this method has been invoked.</p>
   *
   * @param exception The error information encapsulated in a
   *                  SAX parse exception.
   *
   * @exception org.xml.sax.SAXException Any SAX exception, possibly
   *            wrapping another exception.
   *
   * @see org.xml.sax.SAXParseException
   */

  public void fatalError(SAXParseException exception) throws SAXException
  {
    synchronized (buffer)
      {
        while (!done && bufferContents >= bufferSize)
          {
            try
              {
                buffer.wait();
              }
            catch (InterruptedException ex)
              {
                err.println("XMLReader parse thread interrupted, can't wait for buffer to drain: " +
                            ex.getMessage());
                throw new SAXException("parse thread interrupted, can't wait for buffer to drain.");
              }
          }

        completeCharData();
        completeElement();

        if (done)
          {
            SAXException ex = new SAXException("parse thread halted.. app code closed XMLReader stream.");
            throw ex;
          }

        done = true;
        err.println(exception.getMessage());
        pourIntoBuffer(new XMLError(exception, locator, true));
      }
  }
}