XMLWriter.java example

Explorer
commoncrawl-crawler-master
- src
  - com
    - dappit
      - Dapper
        parser
        CompressedDomBuilder.java
        DebugDocumentBuilder.java
        DocumentBuilder.java
        DomDocumentBuilder.java
        EnviromentController.java
        HTMLParser.java
        InstructionsPool.java
        LinkExtractionDocumentBuilder.java
        MozillaParser.java
        ParserException.java
        ParserInitializationException.java
        ParserInstruction.java
  - org
    - commoncrawl
package org.commoncrawl.util;

/*
 * XMLWriter.java
 * Copyright (C) 1999,2000,2001 The Free Software Foundation
 * 
 * This file is part of GNU JAXP, a library.
 *
 * GNU JAXP is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * 
 * GNU JAXP is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 * Linking this library statically or dynamically with other modules is
 * making a combined work based on this library.  Thus, the terms and
 * conditions of the GNU General Public License cover the whole
 * combination.
 *
 * As a special exception, the copyright holders of this library give you
 * permission to link this library with independent modules to produce an
 * executable, regardless of the license terms of these independent
 * modules, and to copy and distribute the resulting executable under
 * terms of your choice, provided that you also meet, for each linked
 * independent module, the terms and conditions of the license of that
 * module.  An independent module is a module which is not derived from
 * or based on this library.  If you modify this library, you may extend
 * this exception to your version of the library, but you are not
 * obliged to do so.  If you do not wish to do so, delete this
 * exception statement from your version. 
 */


import java.io.BufferedWriter;
import java.io.CharConversionException;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Stack;

import org.xml.sax.*;
import org.xml.sax.ext.*;
import org.xml.sax.helpers.*;


/**
 * This class is a SAX handler which writes all its input as a well formed
 * XML or XHTML document.  If driven using SAX2 events, this output may
 * include a recreated document type declaration, subject to limitations
 * of SAX (no internal subset exposed) or DOM (the important declarations,
 * with their documentation, are discarded).
 *
 * <p> By default, text is generated "as-is", but some optional modes
 * are supported.  Pretty-printing is supported, to make life easier
 * for people reading the output.  XHTML (1.0) output has can be made
 * particularly pretty; all the built-in character entities are known.
 * Canonical XML can also be generated, assuming the input is properly
 * formed.
 *
 * <hr>
 *
 * <p> Some of the methods on this class are intended for applications to
 * use directly, rather than as pure SAX2 event callbacks.  Some of those
 * methods access the JavaBeans properties (used to tweak output formats,
 * for example canonicalization and pretty printing).  Subclasses
 * are expected to add new behaviors, not to modify current behavior, so
 * many such methods are final.</p>
 *
 * <p> The <em>write*()</em> methods may be slightly simpler for some
 * applications to use than direct callbacks.  For example, they support
 * a simple policy for encoding data items as the content of a single element.
 *
 * <p> To reuse an XMLWriter you must provide it with a new Writer, since
 * this handler closes the writer it was given as part of its endDocument()
 * handling.  (XML documents have an end of input, and the way to encode
 * that on a stream is to close it.) </p>
 *
 * <hr>
 *
 * <p> Note that any relative URIs in the source document, as found in
 * entity and notation declarations, ought to have been fully resolved by
 * the parser providing events to this handler.  This means that the
 * output text should only have fully resolved URIs, which may not be
 * the desired behavior in cases where later binding is desired. </p>
 *
 * <p> <em>Note that due to SAX2 defaults, you may need to manually
 * ensure that the input events are XML-conformant with respect to namespace
 * prefixes and declarations.  {@link gnu.xml.pipeline.NSFilter} is
 * one solution to this problem, in the context of processing pipelines.</em>
 * Something as simple as connecting this handler to a parser might not
 * generate the correct output.  Another workaround is to ensure that the
 * <em>namespace-prefixes</em> feature is always set to true, if you're
 * hooking this directly up to some XMLReader implementation.
 *
 * @see gnu.xml.pipeline.TextConsumer
 *
 * @author David Brownell
 */
public class XMLWriter
    implements ContentHandler, LexicalHandler, DTDHandler, DeclHandler
{
    // text prints/escapes differently depending on context
    //  CTX_ENTITY ... entity literal value
    //  CTX_ATTRIBUTE ... attribute literal value
    //  CTX_CONTENT ... content of an element
    //  CTX_UNPARSED ... CDATA, comment, PI, names, etc
    //  CTX_NAME ... name or nmtoken, no escapes possible
    private static final int  CTX_ENTITY = 1;
    private static final int  CTX_ATTRIBUTE = 2;
    private static final int  CTX_CONTENT = 3;
    private static final int  CTX_UNPARSED = 4;
    private static final int  CTX_NAME = 5;

// FIXME: names (element, attribute, PI, notation, etc) are not
// currently written out with range checks (escapeChars).
// In non-XHTML, some names can't be directly written; panic!

    private static String sysEOL;

    static {
  try {
      sysEOL = System.getProperty ("line.separator", "\n");

      // don't use the system's EOL if it's illegal XML.
      if (!isLineEnd (sysEOL))
    sysEOL = "\n";

  } catch (SecurityException e) {
      sysEOL = "\n";
  }
    }

    private static boolean isLineEnd (String eol)
    {
  return "\n".equals (eol)
        || "\r".equals (eol)
        || "\r\n".equals (eol);
    }

    private Writer    out;
    private boolean   inCDATA;
    private int     elementNestLevel;
    private String    eol = sysEOL;

    private short   dangerMask;
    private StringBuffer  stringBuf;
    private Locator   locator;
    private ErrorHandler  errHandler;

    private boolean   expandingEntities = false;
    private int     entityNestLevel;
    private boolean   xhtml;
    private boolean   startedDoctype;
    private String    encoding;

    private boolean   canonical;
    private boolean   inDoctype;
    private boolean   inEpilogue;

    // pretty printing controls
    private boolean   prettyPrinting;
    private int     column;
    private boolean   noWrap;
    private Stack   space = new Stack ();

    // this is not a hard'n'fast rule -- longer lines are OK,
    // but are to be avoided.  Here, prettyprinting is more to
    // show structure "cleanly" than to be precise about it.
    // better to have ragged layout than one line 24Kb long.
    private static final int  lineLength = 75;


    /**
     * Constructs this handler with System.out used to write SAX events
     * using the UTF-8 encoding.  Avoid using this except when you know
     * it's safe to close System.out at the end of the document.
     */
    public XMLWriter () throws IOException
  { this (System.out); }

    /**
     * Constructs a handler which writes all input to the output stream
     * in the UTF-8 encoding, and closes it when endDocument is called.
     * (Yes it's annoying that this throws an exception -- but there's
     * really no way around it, since it's barely possible a JDK may
     * exist somewhere that doesn't know how to emit UTF-8.)
     */
    public XMLWriter (OutputStream out) throws IOException
    {
  this (new OutputStreamWriter (out, "UTF8"));
    }

    /**
     * Constructs a handler which writes all input to the writer, and then
     * closes the writer when the document ends.  If an XML declaration is
     * written onto the output, and this class can determine the name of
     * the character encoding for this writer, that encoding name will be
     * included in the XML declaration.
     *
     * <P> See the description of the constructor which takes an encoding
     * name for imporant information about selection of encodings.
     *
     * @param writer XML text is written to this writer.
     */
    public XMLWriter (Writer writer)
    {
  this (writer, null);
    }

    /**
     * Constructs a handler which writes all input to the writer, and then
     * closes the writer when the document ends.  If an XML declaration is
     * written onto the output, this class will use the specified encoding
     * name in that declaration.  If no encoding name is specified, no
     * encoding name will be declared unless this class can otherwise
     * determine the name of the character encoding for this writer.
     *
     * <P> At this time, only the UTF-8 ("UTF8") and UTF-16 ("Unicode")
     * output encodings are fully lossless with respect to XML data.  If you
     * use any other encoding you risk having your data be silently mangled
     * on output, as the standard Java character encoding subsystem silently
     * maps non-encodable characters to a question mark ("?") and will not
     * report such errors to applications.
     *
     * <p> For a few other encodings the risk can be reduced. If the writer is
     * a java.io.OutputStreamWriter, and uses either the ISO-8859-1 ("8859_1",
     * "ISO8859_1", etc) or US-ASCII ("ASCII") encodings, content which
     * can't be encoded in those encodings will be written safely.  Where
     * relevant, the XHTML entity names will be used; otherwise, numeric
     * character references will be emitted.
     *
     * <P> However, there remain a number of cases where substituting such
     * entity or character references is not an option.  Such references are
     * not usable within a DTD, comment, PI, or CDATA section.  Neither may
     * they be used when element, attribute, entity, or notation names have
     * the problematic characters.
     *
     * @param writer XML text is written to this writer.
     * @param encoding if non-null, and an XML declaration is written,
     *  this is the name that will be used for the character encoding.
     */
    public XMLWriter (Writer writer, String encoding)
    {
  setWriter (writer, encoding);
    }
    
    private void setEncoding (String encoding)
    {
  if (encoding == null && out instanceof OutputStreamWriter)
      encoding = ((OutputStreamWriter)out).getEncoding ();

  if (encoding != null) {
      encoding = encoding.toUpperCase ();

      // Use official encoding names where we know them,
      // avoiding the Java-only names.  When using common
      // encodings where we can easily tell if characters
      // are out of range, we'll escape out-of-range
      // characters using character refs for safety.

      // I _think_ these are all the main synonyms for these!
      if ("UTF8".equals (encoding)) {
    encoding = "UTF-8";
      } else if ("US-ASCII".equals (encoding)
        || "ASCII".equals (encoding)) {
    dangerMask = (short) 0xff80;
    encoding = "US-ASCII";
      } else if ("ISO-8859-1".equals (encoding)
        || "8859_1".equals (encoding)
        || "ISO8859_1".equals (encoding)) {
    dangerMask = (short) 0xff00;
    encoding = "ISO-8859-1";
      } else if ("UNICODE".equals (encoding)
        || "UNICODE-BIG".equals (encoding)
        || "UNICODE-LITTLE".equals (encoding)) {
    encoding = "UTF-16";

    // TODO: UTF-16BE, UTF-16LE ... no BOM; what
    // release of JDK supports those Unicode names?
      }

      if (dangerMask != 0)
    stringBuf = new StringBuffer ();
  }

  this.encoding = encoding;
    }


    /**
     * Resets the handler to write a new text document.
     *
     * @param writer XML text is written to this writer.
     * @param encoding if non-null, and an XML declaration is written,
     *  this is the name that will be used for the character encoding.
     *
     * @exception IllegalStateException if the current
     *  document hasn't yet ended (with {@link #endDocument})
     */
    final public void setWriter (Writer writer, String encoding)
    {
  if (out != null)
      throw new IllegalStateException (
    "can't change stream in mid course");
  out = writer;
  if (out != null)
      setEncoding (encoding);
  if (!(out instanceof BufferedWriter))
      out = new BufferedWriter (out);
  space.push ("default");
    }

    /**
     * Assigns the line ending style to be used on output.
     * @param eolString null to use the system default; else
     *  "\n", "\r", or "\r\n".
     */
    final public void setEOL (String eolString)
    {
  if (eolString == null)
      eol = sysEOL;
  else if (!isLineEnd (eolString))
      eol = eolString;
  else
      throw new IllegalArgumentException (eolString);
    }

    /**
     * Assigns the error handler to be used to present most fatal
     * errors.
     */
    public void setErrorHandler (ErrorHandler handler)
    {
  errHandler = handler;
    }

    /**
     * Used internally and by subclasses, this encapsulates the logic
     * involved in reporting fatal errors.  It uses locator information
     * for good diagnostics, if available, and gives the application's
     * ErrorHandler the opportunity to handle the error before throwing
     * an exception.
     */
    protected void fatal (String message, Exception e)
    throws SAXException
    {
  SAXParseException x;

  if (locator == null)
      x = new SAXParseException (message, null, null, -1, -1, e);
  else
      x = new SAXParseException (message, locator, e);
  if (errHandler != null)
      errHandler.fatalError (x);
  throw x;
    }


    // JavaBeans properties

    /**
     * Controls whether the output should attempt to follow the "transitional"
     * XHTML rules so that it meets the "HTML Compatibility Guidelines"
     * appendix in the XHTML specification.  A "transitional" Document Type
     * Declaration (DTD) is placed near the beginning of the output document,
     * instead of whatever DTD would otherwise have been placed there, and
     * XHTML empty elements are printed specially.  When writing text in
     * US-ASCII or ISO-8859-1 encodings, the predefined XHTML internal
     * entity names are used (in preference to character references) when
     * writing content characters which can't be expressed in those encodings.
     *
     * <p> When this option is enabled, it is the caller's responsibility
     * to ensure that the input is otherwise valid as XHTML.  Things to
     * be careful of in all cases, as described in the appendix referenced
     * above, include:  <ul>
     *
     *  <li> Element and attribute names must be in lower case, both
     *    in the document and in any CSS style sheet.
     *  <li> All XML constructs must be valid as defined by the XHTML
     *    "transitional" DTD (including all familiar constructs,
     *    even deprecated ones).
     *  <li> The root element must be "html".
     *  <li> Elements that must be empty (such as <em><br></em>
     *    must have no content.
     *  <li> Use both <em>lang</em> and <em>xml:lang</em> attributes
     *    when specifying language.
     *  <li> Similarly, use both <em>id</em> and <em>name</em> attributes
     *    when defining elements that may be referred to through
     *    URI fragment identifiers ... and make sure that the
     *    value is a legal NMTOKEN, since not all such HTML 4.0
     *    identifiers are valid in XML.
     *  <li> Be careful with character encodings; make sure you provide
     *    a <em><meta http-equiv="Content-type"
     *    content="text/xml;charset=..." /></em> element in
     *    the HTML "head" element, naming the same encoding
     *    used to create this handler.  Also, if that encoding
     *    is anything other than US-ASCII, make sure that if
     *    the document is given a MIME content type, it has
     *    a <em>charset=...</em> attribute with that encoding.
     *  </ul>
     *
     * <p> Additionally, some of the oldest browsers have additional
     * quirks, to address with guidelines such as: <ul>
     *
     *  <li> Processing instructions may be rendered, so avoid them.
     *    (Similarly for an XML declaration.)
     *  <li> Embedded style sheets and scripts should not contain XML
     *    markup delimiters:  &, <, and ]]> are trouble.
     *  <li> Attribute values should not have line breaks or multiple
     *    consecutive white space characters.
     *  <li> Use no more than one of the deprecated (transitional)
     *    <em><isindex></em> elements.
     *  <li> Some boolean attributes (such as <em>compact, checked,
     *    disabled, readonly, selected,</em> and more) confuse
     *    some browsers, since they only understand minimized
     *    versions which are illegal in XML.
     *  </ul>
     *
     * <p> Also, some characteristics of the resulting output may be
     * a function of whether the document is later given a MIME
     * content type of <em>text/html</em> rather than one indicating
     * XML (<em>application/xml</em> or <em>text/xml</em>).  Worse,
     * some browsers ignore MIME content types and prefer to rely URI
     * name suffixes -- so an "index.xml" could always be XML, never
     * XHTML, no matter its MIME type.
     */
    final public void setXhtml (boolean value)
    {
  if (locator != null)
      throw new IllegalStateException ("started parsing");
  xhtml = value;
  if (xhtml)
      canonical = false;
    }

    /**
     * Returns true if the output attempts to echo the input following
     * "transitional" XHTML rules and matching the "HTML Compatibility
     * Guidelines" so that an HTML version 3 browser can read the output
     * as HTML; returns false (the default) othewise.
     */
    final public boolean isXhtml ()
    {
  return xhtml;
    }

    /**
     * Controls whether the output text contains references to
     * entities (the default), or instead contains the expanded
     * values of those entities.
     */
    final public void setExpandingEntities (boolean value)
    {
  if (locator != null)
      throw new IllegalStateException ("started parsing");
  expandingEntities = value;
  if (!expandingEntities)
      canonical = false;
    }

    /**
     * Returns true if the output will have no entity references;
     * returns false (the default) otherwise.
     */
    final public boolean isExpandingEntities ()
    {
  return expandingEntities;
    }

    /**
     * Controls pretty-printing, which by default is not enabled
     * (and currently is most useful for XHTML output).
     * Pretty printing enables structural indentation, sorting of attributes
     * by name, line wrapping, and potentially other mechanisms for making
     * output more or less readable.
     *
     * <p> At this writing, structural indentation and line wrapping are
     * enabled when pretty printing is enabled and the <em>xml:space</em>
     * attribute has the value <em>default</em> (its other legal value is
     * <em>preserve</em>, as defined in the XML specification).  The three
     * XHTML element types which use another value are recognized by their
     * names (namespaces are ignored).
     *
     * <p> Also, for the record, the "pretty" aspect of printing here
     * is more to provide basic structure on outputs that would otherwise
     * risk being a single long line of text.  For now, expect the
     * structure to be ragged ... unless you'd like to submit a patch
     * to make this be more strictly formatted!
     *
     * @exception IllegalStateException thrown if this method is invoked
     *  after output has begun.
     */
    final public void setPrettyPrinting (boolean value)
    {
  if (locator != null)
      throw new IllegalStateException ("started parsing");
  prettyPrinting = value;
  if (prettyPrinting)
      canonical = false;
    }

    /**
     * Returns value of flag controlling pretty printing.
     */
    final public boolean isPrettyPrinting ()
    {
  return prettyPrinting;
    }


    /**
     * Sets the output style to be canonicalized.  Input events must
     * meet requirements that are slightly more stringent than the
     * basic well-formedness ones, and include:  <ul>
     *
     *  <li> Namespace prefixes must not have been changed from those
     *  in the original document.  (This may only be ensured by setting
     *  the SAX2 XMLReader <em>namespace-prefixes</em> feature flag;
     *  by default, it is cleared.)
     *
     *  <li> Redundant namespace declaration attributes have been
     *  removed.  (If an ancestor element defines a namespace prefix
     *  and that declaration hasn't been overriden, an element must
     *  not redeclare it.)
     *
     *  <li> If comments are not to be included in the canonical output,
     *  they must first be removed from the input event stream; this
     *  <em>Canonical XML with comments</em> by default.
     *
     *  <li> If the input character encoding was not UCS-based, the
     *  character data must have been normalized using Unicode
     *  Normalization Form C.  (UTF-8 and UTF-16 are UCS-based.)
     *
     *  <li> Attribute values must have been normalized, as is done
     *  by any conformant XML processor which processes all external
     *  parameter entities.
     *
     *  <li> Similarly, attribute value defaulting has been performed.
     *
     *  </ul>
     *
     * <p> Note that fragments of XML documents, as specified by an XPath
     * node set, may be canonicalized.  In such cases, elements may need
     * some fixup (for <em>xml:*</em> attributes and application-specific
     * context).
     *
     * @exception IllegalArgumentException if the output encoding
     *  is anything other than UTF-8.
     */
    final public void setCanonical (boolean value)
    {
  if (value && !"UTF-8".equals (encoding))
      throw new IllegalArgumentException ("encoding != UTF-8");
  canonical = value;
  if (canonical) {
      prettyPrinting = xhtml = false;
      expandingEntities = true;
      eol = "\n";
  }
    }


    /**
     * Returns value of flag controlling canonical output.
     */
    final public boolean isCanonical ()
    {
  return canonical;
    }


    /**
     * Flushes the output stream.  When this handler is used in long lived
     * pipelines, it can be important to flush buffered state, for example
     * so that it can reach the disk as part of a state checkpoint.
     */
    final public void flush ()
    throws IOException
    {
  if (out != null)
      out.flush ();
    }


    // convenience routines

// FIXME:  probably want a subclass that holds a lot of these...
// and maybe more!
    
    /**
     * Writes the string as if characters() had been called on the contents
     * of the string.  This is particularly useful when applications act as
     * producers and write data directly to event consumers.
     */
    final public void write (String data)
    throws SAXException
    {
  char  buf [] = data.toCharArray ();
  characters (buf, 0, buf.length);
    }


    /**
     * Writes an element that has content consisting of a single string.
     * @see #writeEmptyElement
     * @see #startElement
     */
    public void writeElement (
  String uri,
  String localName,
  String qName,
  Attributes atts,
  String content
    ) throws SAXException
    {
  if (content == null || content.length () == 0) {
      writeEmptyElement (uri, localName, qName, atts);
      return;
  }
  startElement (uri, localName, qName, atts);
  char chars [] = content.toCharArray ();
  characters (chars, 0, chars.length);
  endElement (uri, localName, qName);
    }


    /**
     * Writes an element that has content consisting of a single integer,
     * encoded as a decimal string.
     * @see #writeEmptyElement
     * @see #startElement
     */
    public void writeElement (
  String uri,
  String localName,
  String qName,
  Attributes atts,
  int content
    ) throws SAXException
    {
  writeElement (uri, localName, qName, atts, Integer.toString (content));
    }


    // SAX1 ContentHandler
    /** <b>SAX1</b>:  provides parser status information */
    final public void setDocumentLocator (Locator l)
    {
  locator = l;
    }


    // URL for dtd that validates against all normal HTML constructs
    private static final String xhtmlFullDTD =
    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";

    
    /**
     * <b>SAX1</b>:  indicates the beginning of a document parse.
     * If you're writing (well formed) fragments of XML, neither
     * this nor endDocument should be called.
     */
    // NOT final
    public void startDocument ()
    throws SAXException
    {
  try {
      if (out == null)
    throw new IllegalStateException (
        "null Writer given to XMLWriter");

      // Not all parsers provide the locator we want; this also
      // flags whether events are being sent to this object yet.
      // We could only have this one call if we only printed whole
      // documents ... but we also print fragments, so most of the
      // callbacks here replicate this test.

      if (locator == null)
    locator = new LocatorImpl ();
      
      // Unless the data is in US-ASCII or we're canonicalizing, write
      // the XML declaration if we know the encoding.  US-ASCII won't
      // normally get mangled by web server confusion about the
      // character encodings used.  Plus, it's an easy way to
      // ensure we can write ASCII that's unlikely to confuse
      // elderly HTML parsers.

      if (!canonical
        && dangerMask != (short) 0xff80
        && encoding != null) {
    rawWrite ("<?xml version='1.0'");
    rawWrite (" encoding='" + encoding + "'");
    rawWrite ("?>");
    newline ();
      }

      if (xhtml) {

    rawWrite ("<!DOCTYPE html PUBLIC");
    newline ();
    rawWrite ("  '-//W3C//DTD XHTML 1.0 Transitional//EN'");
    newline ();
    rawWrite ("  '");
        // NOTE:  URL (above) matches the REC
    rawWrite (xhtmlFullDTD);
    rawWrite ("'>");
    newline ();
    newline ();

    // fake the rest of the handler into ignoring
    // everything until the root element, so any
    // XHTML DTD comments, PIs, etc are ignored
    startedDoctype = true;
      }

      entityNestLevel = 0;

  } catch (IOException e) {
      fatal ("can't write", e);
  }
    }

    /**
     * <b>SAX1</b>:  indicates the completion of a parse.
     * Note that all complete SAX event streams make this call, even
     * if an error is reported during a parse.
     */
    // NOT final
    public void endDocument ()
    throws SAXException
    {
  try {
      if (!canonical) {
    newline ();
    newline ();
      }
      out.close ();
      out = null;
      locator = null;
  } catch (IOException e) {
      fatal ("can't write", e);
  }
    }

    // XHTML elements declared as EMPTY print differently
    final private static boolean isEmptyElementTag (String tag)
    {
  switch (tag.charAt (0)) {
    case 'a': return "area".equals (tag);
    case 'b': return "base".equals (tag)
          || "basefont".equals (tag)
          || "br".equals (tag);
    case 'c': return "col".equals (tag);
    case 'f': return "frame".equals (tag);
    case 'h': return "hr".equals (tag);
    case 'i': return "img".equals (tag)
          || "input".equals (tag)
          || "isindex".equals (tag);
    case 'l': return "link".equals (tag);
    case 'm': return "meta".equals (tag);
    case 'p': return "param".equals (tag);
  }
  return false;
    }

    private static boolean indentBefore (String tag)
    {
  // basically indent before block content
  // and within structure like tables, lists
  switch (tag.charAt (0)) {
    case 'a': return "applet".equals (tag);
    case 'b': return "body".equals (tag)
          || "blockquote".equals (tag);
    case 'c': return "center".equals (tag);
    case 'f': return "frame".equals (tag)
          || "frameset".equals (tag);
    case 'h': return "head".equals (tag);
    case 'm': return "meta".equals (tag);
    case 'o': return "object".equals (tag);
    case 'p': return "param".equals (tag)
          || "pre".equals (tag);
    case 's': return "style".equals (tag);
    case 't': return "title".equals (tag)
          || "td".equals (tag)
          || "th".equals (tag);
  }
  // ... but not inline elements like "em", "b", "font"
  return false;
    }

    private static boolean spaceBefore (String tag)
    {
  // blank line AND INDENT before certain structural content
  switch (tag.charAt (0)) {
    case 'h': return "h1".equals (tag)
          || "h2".equals (tag)
          || "h3".equals (tag)
          || "h4".equals (tag)
          || "h5".equals (tag)
          || "h6".equals (tag)
          || "hr".equals (tag);
    case 'l': return "li".equals (tag);
    case 'o': return "ol".equals (tag);
    case 'p': return "p".equals (tag);
    case 't': return "table".equals (tag)
          || "tr".equals (tag);
    case 'u': return "ul".equals (tag);
  }
  return false;
    }

    // XHTML DTDs say these three have xml:space="preserve"
    private static boolean spacePreserve (String tag)
    {
  return "pre".equals (tag)
    || "style".equals (tag)
    || "script".equals (tag);
    }

    /**
     * <b>SAX2</b>:  ignored.
     */
    final public void startPrefixMapping (String prefix, String uri)
  {}

    /**
     * <b>SAX2</b>:  ignored.
     */
    final public void endPrefixMapping (String prefix)
  {}

    private void writeStartTag (
  String name,
  Attributes atts,
  boolean isEmpty
    ) throws SAXException, IOException
    {
  rawWrite ('<');
  rawWrite (name);

  // write out attributes ... sorting is particularly useful
  // with output that's been heavily defaulted.
  if (atts != null && atts.getLength () != 0) {

      // Set up to write, with optional sorting
      int   indices [] = new int [atts.getLength ()];

      for (int i= 0; i < indices.length; i++)
    indices [i] = i;
      
      // optionally sort

// FIXME:  canon xml demands xmlns nodes go first,
// and sorting by URI first (empty first) then localname
// it should maybe use a different sort

      if (canonical || prettyPrinting) {

    // insertion sort by attribute name
    for (int i = 1; i < indices.length; i++) {
        int n = indices [i], j;
        String  s = atts.getQName (n);

        for (j = i - 1; j >= 0; j--) {
      if (s.compareTo (atts.getQName (indices [j]))
        >= 0)
          break;
      indices [j + 1] = indices [j];
        }
        indices [j + 1] = n;
    }
      }

      // write, sorted or no
      for (int i= 0; i < indices.length; i++) {
    String  s = atts.getQName (indices [i]);

        if (s == null || "".equals (s))
      throw new IllegalArgumentException ("no XML name");
    rawWrite (" ");
    rawWrite (s);
    rawWrite ("=");
    writeQuotedValue (atts.getValue (indices [i]),
        CTX_ATTRIBUTE);
      }
  }
  if (isEmpty)
      rawWrite (" /");
  rawWrite ('>');
    }

    /**
     * <b>SAX2</b>:  indicates the start of an element.
     * When XHTML is in use, avoid attribute values with
     * line breaks or multiple whitespace characters, since
     * not all user agents handle them correctly.
     */
    final public void startElement (
  String uri,
  String localName,
  String qName,
  Attributes atts
    ) throws SAXException
    {
  startedDoctype = false;

  if (locator == null)
      locator = new LocatorImpl ();
      
  if (qName == null || "".equals (qName))
      throw new IllegalArgumentException ("no XML name");

  try {
      if (entityNestLevel != 0)
    return;
      if (prettyPrinting) {
    String whitespace = null;

    if (xhtml && spacePreserve (qName))
        whitespace = "preserve";
    else if (atts != null)
        whitespace = atts.getValue ("xml:space");
    if (whitespace == null)
        whitespace = (String) space.peek ();
    space.push (whitespace);

    if ("default".equals (whitespace)) {
        if (xhtml) {
      if (spaceBefore (qName)) {
          newline ();
          doIndent ();
      } else if (indentBefore (qName))
          doIndent ();
      // else it's inlined, modulo line length
      // FIXME: incrementing element nest level
      // for inlined elements causes ugliness
        } else
      doIndent ();
    }
      }
      elementNestLevel++;
      writeStartTag (qName, atts, xhtml && isEmptyElementTag (qName));

      if (xhtml) {
// FIXME: if this is an XHTML "pre" element, turn
// off automatic wrapping.
      }

  } catch (IOException e) {
      fatal ("can't write", e);
  }
    }

    /**
     * Writes an empty element.
     * @see #startElement
     */
    public void writeEmptyElement (
  String uri,
  String localName,
  String qName,
  Attributes atts
    ) throws SAXException
    {
  if (canonical) {
      startElement (uri, localName, qName, atts);
      endElement (uri, localName, qName);
  } else {
      try {
    writeStartTag (qName, atts, true);
      } catch (IOException e) {
    fatal ("can't write", e);
      }
  }
    }


    /** <b>SAX2</b>:  indicates the end of an element */
    final public void endElement (String uri, String localName, String qName)
    throws SAXException
    {
  if (qName == null || "".equals (qName))
      throw new IllegalArgumentException ("no XML name");

  try {
      elementNestLevel--;
      if (entityNestLevel != 0)
    return;
      if (xhtml && isEmptyElementTag (qName))
    return;
      rawWrite ("</");
      rawWrite (qName);
      rawWrite ('>');

      if (prettyPrinting) {
    if (!space.empty ())
        space.pop ();
    else
        fatal ("stack discipline", null);
      }
      if (elementNestLevel == 0)
    inEpilogue = true;

  } catch (IOException e) {
      fatal ("can't write", e);
  }
    }

    /** <b>SAX1</b>:  reports content characters */
    final public void characters (char ch [], int start, int length)
    throws SAXException
    {
  if (locator == null)
      locator = new LocatorImpl ();

  try {
      if (entityNestLevel != 0)
    return;
      if (inCDATA) {
    escapeChars (ch, start, length, CTX_UNPARSED);
      } else {
    escapeChars (ch, start, length, CTX_CONTENT);
      }
  } catch (IOException e) {
      fatal ("can't write", e);
  }
    }

    /** <b>SAX1</b>:  reports ignorable whitespace */
    final public void ignorableWhitespace (char ch [], int start, int length)
    throws SAXException
    {
  if (locator == null)
      locator = new LocatorImpl ();

  try {
      if (entityNestLevel != 0)
    return;
      // don't forget to map NL to CRLF, CR, etc
      escapeChars (ch, start, length, CTX_CONTENT);
  } catch (IOException e) {
      fatal ("can't write", e);
  }
    }

    /**
     * <b>SAX1</b>:  reports a PI.
     * This doesn't check for illegal target names, such as "xml" or "XML",
     * or namespace-incompatible ones like "big:dog"; the caller is
     * responsible for ensuring those names are legal.
     */
    final public void processingInstruction (String target, String data)
    throws SAXException
    {
  if (locator == null)
      locator = new LocatorImpl ();

  // don't print internal subset for XHTML
  if (xhtml && startedDoctype)
      return;

  // ancient HTML browsers might render these ... their loss.
  // to prevent:  "if (xhtml) return;".

  try {
      if (entityNestLevel != 0)
    return;
      if (canonical && inEpilogue)
    newline ();
      rawWrite ("<?");
      rawWrite (target);
      rawWrite (' ');
      escapeChars (data.toCharArray (), -1, -1, CTX_UNPARSED);
      rawWrite ("?>");
      if (elementNestLevel == 0 && !(canonical && inEpilogue))
    newline ();
  } catch (IOException e) {
      fatal ("can't write", e);
  }
    }

    /** <b>SAX1</b>: indicates a non-expanded entity reference */
    public void skippedEntity (String name)
    throws SAXException
    {
  try {
      rawWrite ("&");
      rawWrite (name);
      rawWrite (";");
  } catch (IOException e) {
      fatal ("can't write", e);
  }
    }

    // SAX2 LexicalHandler

    /** <b>SAX2</b>:  called before parsing CDATA characters */
    final public void startCDATA ()
    throws SAXException
    {
  if (locator == null)
      locator = new LocatorImpl ();
  
  if (canonical)
      return;

  try {
      inCDATA = true;
      if (entityNestLevel == 0)
    rawWrite ("<![CDATA[");
  } catch (IOException e) {
      fatal ("can't write", e);
  }
    }

    /** <b>SAX2</b>:  called after parsing CDATA characters */
    final public void endCDATA ()
    throws SAXException
    {
  if (canonical)
      return;

  try {
      inCDATA = false;
      if (entityNestLevel == 0)
    rawWrite ("]]>");
  } catch (IOException e) {
      fatal ("can't write", e);
  }
    }

    /**
     * <b>SAX2</b>:  called when the doctype is partially parsed
     * Note that this, like other doctype related calls, is ignored
     * when XHTML is in use.
     */
    final public void startDTD (String name, String publicId, String systemId)
    throws SAXException
    {
  if (locator == null)
      locator = new LocatorImpl ();
  if (xhtml)
      return;
  try {
      inDoctype = startedDoctype = true;
      if (canonical)
    return;
      rawWrite ("<!DOCTYPE ");
      rawWrite (name);
      rawWrite (' ');

      if (!expandingEntities) {
    if (publicId != null)
        rawWrite ("PUBLIC '" + publicId + "' '" + systemId + "' ");
    else if (systemId != null)
        rawWrite ("SYSTEM '" + systemId + "' ");
      }

      rawWrite ('[');
      newline ();
  } catch (IOException e) {
      fatal ("can't write", e);
  }
    }

    /** <b>SAX2</b>:  called after the doctype is parsed */
    final public void endDTD ()
    throws SAXException
    {
  inDoctype = false;
  if (canonical || xhtml)
      return;
  try {
      rawWrite ("]>");
      newline ();
  } catch (IOException e) {
      fatal ("can't write", e);
  }
    }

    /**
     * <b>SAX2</b>:  called before parsing a general entity in content
     */
    final public void startEntity (String name)
    throws SAXException
    {
  try {
      boolean writeEOL = true;

      // Predefined XHTML entities (for characters) will get
      // mapped back later.
      if (xhtml || expandingEntities)
    return;

      entityNestLevel++;
      if (name.equals ("[dtd]"))
    return;
      if (entityNestLevel != 1)
    return;
      if (!name.startsWith ("%")) {
    writeEOL = false;
    rawWrite ('&');
      }
      rawWrite (name);
      rawWrite (';');
      if (writeEOL)
    newline ();
  } catch (IOException e) {
      fatal ("can't write", e);
  }
    }

    /**
     * <b>SAX2</b>:  called after parsing a general entity in content
     */
    final public void endEntity (String name)
    throws SAXException
    {
  if (xhtml || expandingEntities)
      return;
  entityNestLevel--;
    }

    /**
     * <b>SAX2</b>:  called when comments are parsed.
     * When XHTML is used, the old HTML tradition of using comments
     * to for inline CSS, or for JavaScript code is  discouraged.
     * This is because XML processors are encouraged to discard, on
     * the grounds that comments are for users (and perhaps text
     * editors) not programs.  Instead, use external scripts
     */
    final public void comment (char ch [], int start, int length)
    throws SAXException
    {
  if (locator == null)
      locator = new LocatorImpl ();

  // don't print internal subset for XHTML
  if (xhtml && startedDoctype)
      return;
  // don't print comment in doctype for canon xml
  if (canonical && inDoctype)
      return;

  try {
      boolean indent;

      if (prettyPrinting && space.empty ())
    fatal ("stack discipline", null);
      indent = prettyPrinting && "default".equals (space.peek ());
      if (entityNestLevel != 0)
    return;
      if (indent)
    doIndent ();
      if (canonical && inEpilogue)
    newline ();
      rawWrite ("<!--");
      escapeChars (ch, start, length, CTX_UNPARSED);
      rawWrite ("-->");
      if (indent)
    doIndent ();
      if (elementNestLevel == 0 && !(canonical && inEpilogue))
    newline ();
  } catch (IOException e) {
      fatal ("can't write", e);
  }
    }

    // SAX1 DTDHandler

    /** <b>SAX1</b>:  called on notation declarations */
    final public void notationDecl (String name,
      String publicId, String systemId)
    throws SAXException
    {
  if (xhtml)
      return;
  try {
      // At this time, only SAX2 callbacks start these.
      if (!startedDoctype)
    return;

      if (entityNestLevel != 0)
    return;
      rawWrite ("<!NOTATION " + name + " ");
      if (publicId != null)
    rawWrite ("PUBLIC \"" + publicId + '"');
      else
    rawWrite ("SYSTEM ");
      if (systemId != null)
    rawWrite ('"' + systemId + '"');
      rawWrite (">");
      newline ();
  } catch (IOException e) {
      fatal ("can't write", e);
  }
    }

    /** <b>SAX1</b>:  called on unparsed entity declarations */
    final public void unparsedEntityDecl (String name,
  String publicId, String systemId,
  String notationName)
    throws SAXException
    {
  if (xhtml)
      return;
  try {
      // At this time, only SAX2 callbacks start these.
      if (!startedDoctype)  {
    // FIXME: write to temporary buffer, and make the start
    // of the root element write these declarations.
    return;
      }

      if (entityNestLevel != 0)
    return;
      rawWrite ("<!ENTITY " + name + " ");
      if (publicId != null)
    rawWrite ("PUBLIC \"" + publicId + '"');
      else
    rawWrite ("SYSTEM ");
      rawWrite ('"' + systemId + '"');
      rawWrite (" NDATA " + notationName + ">");
      newline ();
  } catch (IOException e) {
      fatal ("can't write", e);
  }
    }

    // SAX2 DeclHandler

    /** <b>SAX2</b>:  called on attribute declarations */
    final public void attributeDecl (String eName, String aName,
      String type, String mode, String value)
    throws SAXException
    {
  if (xhtml)
      return;
  try {
      // At this time, only SAX2 callbacks start these.
      if (!startedDoctype)
    return;
      if (entityNestLevel != 0)
    return;
      rawWrite ("<!ATTLIST " + eName + ' ' + aName + ' ');
      rawWrite (type);
      rawWrite (' ');
      if (mode != null)
    rawWrite (mode + ' ');
      if (value != null) 
    writeQuotedValue (value, CTX_ATTRIBUTE);
      rawWrite ('>');
      newline ();
  } catch (IOException e) {
      fatal ("can't write", e);
  }
    }

    /** <b>SAX2</b>:  called on element declarations */
    final public void elementDecl (String name, String model)
    throws SAXException
    {
  if (xhtml)
      return;
  try {
      // At this time, only SAX2 callbacks start these.
      if (!startedDoctype)
    return;
      if (entityNestLevel != 0)
    return;
      rawWrite ("<!ELEMENT " + name + ' ' + model + '>');
      newline ();
  } catch (IOException e) {
      fatal ("can't write", e);
  }
    }

    /** <b>SAX2</b>:  called on external entity declarations */
    final public void externalEntityDecl (
  String name,
  String publicId,
  String systemId)
    throws SAXException
    {
  if (xhtml)
      return;
  try {
      // At this time, only SAX2 callbacks start these.
      if (!startedDoctype)
    return;
      if (entityNestLevel != 0)
    return;
      rawWrite ("<!ENTITY ");
      if (name.startsWith ("%")) {
    rawWrite ("% ");
    rawWrite (name.substring (1));
      } else
    rawWrite (name);
      if (publicId != null)
    rawWrite (" PUBLIC \"" + publicId + '"');
      else
    rawWrite (" SYSTEM ");
      rawWrite ('"' + systemId + "\">");
      newline ();
  } catch (IOException e) {
      fatal ("can't write", e);
  }
    }

    /** <b>SAX2</b>:  called on internal entity declarations */
    final public void internalEntityDecl (String name, String value)
    throws SAXException
    {
  if (xhtml)
      return;
  try {
      // At this time, only SAX2 callbacks start these.
      if (!startedDoctype)
    return;
      if (entityNestLevel != 0)
    return;
      rawWrite ("<!ENTITY ");
      if (name.startsWith ("%")) {
    rawWrite ("% ");
    rawWrite (name.substring (1));
      } else
    rawWrite (name);
      rawWrite (' ');
      writeQuotedValue (value, CTX_ENTITY);
      rawWrite ('>');
      newline ();
  } catch (IOException e) {
      fatal ("can't write", e);
  }
    }

    private void writeQuotedValue (String value, int code)
    throws SAXException, IOException
    {
  char  buf [] = value.toCharArray ();
  int off = 0, len = buf.length;

  // we can't add line breaks to attribute/entity/... values
  noWrap = true;
  rawWrite ('"');
  escapeChars (buf, off, len, code);
  rawWrite ('"');
  noWrap = false;
    }
    
    // From "HTMLlat1x.ent" ... names of entities for ISO-8859-1
    // (Latin/1) characters, all codes:  160-255 (0xA0-0xFF).
    // Codes 128-159 have no assigned values.
    private static final String HTMLlat1x [] = {
  // 160
  "nbsp", "iexcl", "cent", "pound", "curren",
  "yen", "brvbar", "sect", "uml", "copy",

  // 170
  "ordf", "laquo", "not", "shy", "reg",
  "macr", "deg", "plusmn", "sup2", "sup3",

  // 180
  "acute", "micro", "para", "middot", "cedil",
  "sup1", "ordm", "raquo", "frac14", "frac12",

  // 190
  "frac34", "iquest", "Agrave", "Aacute", "Acirc",
  "Atilde", "Auml", "Aring", "AElig", "Ccedil",

  // 200
  "Egrave", "Eacute", "Ecirc", "Euml", "Igrave",
  "Iacute", "Icirc", "Iuml", "ETH", "Ntilde",

  // 210
  "Ograve", "Oacute", "Ocirc", "Otilde", "Ouml",
  "times", "Oslash", "Ugrave", "Uacute", "Ucirc",

  // 220
  "Uuml", "Yacute", "THORN", "szlig", "agrave",
  "aacute", "acirc", "atilde", "auml", "aring",

  // 230
  "aelig", "ccedil", "egrave", "eacute", "ecirc",
  "euml", "igrave", "iacute", "icirc", "iuml",

  // 240
  "eth", "ntilde", "ograve", "oacute", "ocirc",
  "otilde", "ouml", "divide", "oslash", "ugrave",

  // 250
  "uacute", "ucirc", "uuml", "yacute", "thorn",
  "yuml"
    };

    // From "HTMLsymbolx.ent" ... some of the symbols that
    // we can conveniently handle.  Entities for the Greek.
    // alphabet (upper and lower cases) are compact.
    private static final String HTMLsymbolx_GR [] = {
  // 913
  "Alpha", "Beta", "Gamma", "Delta", "Epsilon",
  "Zeta", "Eta", "Theta", "Iota", "Kappa",

  // 923
  "Lambda", "Mu", "Nu", "Xi", "Omicron",
  "Pi", "Rho", null, "Sigma", "Tau",

  // 933
  "Upsilon", "Phi", "Chi", "Psi", "Omega"
    };

    private static final String HTMLsymbolx_gr [] = {
  // 945
  "alpha", "beta", "gamma", "delta", "epsilon",
  "zeta", "eta", "theta", "iota", "kappa",

  // 955
  "lambda", "mu", "nu", "xi", "omicron",
  "pi", "rho", "sigmaf", "sigma", "tau",

  // 965
  "upsilon", "phi", "chi", "psi", "omega"
    };


    // General routine to write text and substitute predefined
    // entities (XML, and a special case for XHTML) as needed.
    private void escapeChars (char buf [], int off, int len, int code)
    throws SAXException, IOException
    {
  int first = 0;

  if (off < 0) {
      off = 0;
      len = buf.length;
  }
  for (int i = 0; i < len; i++) {
      String  esc;
      char  c = buf [off + i];

      switch (c) {
        // Note that CTX_ATTRIBUTE isn't explicitly tested here;
        // all syntax delimiters are escaped in CTX_ATTRIBUTE,
        // otherwise it's similar to CTX_CONTENT

        // ampersand flags entity references; entity replacement
        // text has unexpanded references, other text doesn't.
        case '&':
    if (code == CTX_ENTITY || code == CTX_UNPARSED)
        continue;
    esc = "amp";
    break;

        // attributes and text may NOT have literal '<', but
        // entities may have markup constructs
        case '<':
    if (code == CTX_ENTITY || code == CTX_UNPARSED)
        continue;
    esc = "lt";
    break;

        // as above re markup constructs; but otherwise
        // except when canonicalizing, this is for consistency
        case '>':
    if (code == CTX_ENTITY || code == CTX_UNPARSED)
        continue;
          esc = "gt";
    break;
        case '\'':
    if (code == CTX_CONTENT || code == CTX_UNPARSED)
        continue;
    if (canonical)
        continue;
    esc = "apos";
    break;

        // needed when printing quoted attribute/entity values
        case '"':
    if (code == CTX_CONTENT || code == CTX_UNPARSED)
        continue;
    esc = "quot";
    break;

        // make line ends work per host OS convention
        case '\n':
    esc = eol;
    break;

        //
        // No other characters NEED special treatment ... except
        // for encoding-specific issues, like whether the character
        // can really be represented in that encoding.
        //
        default:
    //
    // There are characters we can never write safely; getting
    // them is an error.
    //
    //   (a) They're never legal in XML ... detected by range 
    //  checks, and (eventually) by remerging surrogate
    //  pairs on output.  (Easy error for apps to prevent.)
    //
    //   (b) This encoding can't represent them, and we
    //  can't make reference substitution (e.g. inside
    //  CDATA sections, names, PI data, etc).  (Hard for
    //  apps to prevent, except by using UTF-8 or UTF-16
    //  as their output encoding.)
    //
    // We know a very little bit about what characters
    // the US-ASCII and ISO-8859-1 encodings support.  For
    // other encodings we can't detect the second type of
    // error at all.  (Never an issue for UTF-8 or UTF-16.)
    //

// FIXME:  CR in CDATA is an error; in text, turn to a char ref

// FIXME:  CR/LF/TAB in attributes should become char refs

    if ((c > 0xfffd)
      || ((c < 0x0020) && !((c == 0x0009)
        || (c == 0x000A) || (c == 0x000D)))
      || (((c & dangerMask) != 0)
          && (code == CTX_UNPARSED))) {

        // if case (b) in CDATA, we might end the section,
        // write a reference, then restart ... possible
        // in one DOM L3 draft.

        throw new CharConversionException (
          "Illegal or non-writable character: U+"
          + Integer.toHexString (c));
    }

    //
    // If the output encoding represents the character
    // directly, let it do so!  Else we'll escape it.
    //
    if ((c & dangerMask) == 0)
        continue;
    esc = null;

    // Avoid numeric refs where symbolic ones exist, as
    // symbolic ones make more sense to humans reading!
    if (xhtml) {
        // all the HTMLlat1x.ent entities
        // (all the "ISO-8859-1" characters)
        if (c >= 160 && c <= 255)
      esc = HTMLlat1x [c - 160];

        // not quite half the HTMLsymbolx.ent entities
        else if (c >= 913 && c <= 937)
      esc = HTMLsymbolx_GR [c - 913];
        else if (c >= 945 && c <= 969)
      esc = HTMLsymbolx_gr [c - 945];

        else switch (c) {
      // all of the HTMLspecialx.ent entities
      case  338: esc = "OElig"; break;
      case  339: esc = "oelig"; break;
      case  352: esc = "Scaron";  break;
      case  353: esc = "scaron";  break;
      case  376: esc = "Yuml";  break;
      case  710: esc = "circ";  break;
      case  732: esc = "tilde"; break;
      case 8194: esc = "ensp";  break;
      case 8195: esc = "emsp";  break;
      case 8201: esc = "thinsp";  break;
      case 8204: esc = "zwnj";  break;
      case 8205: esc = "zwj";   break;
      case 8206: esc = "lrm";   break;
      case 8207: esc = "rlm";   break;
      case 8211: esc = "ndash"; break;
      case 8212: esc = "mdash"; break;
      case 8216: esc = "lsquo"; break;
      case 8217: esc = "rsquo"; break;
      case 8218: esc = "sbquo"; break;
      case 8220: esc = "ldquo"; break;
      case 8221: esc = "rdquo"; break;
      case 8222: esc = "bdquo"; break;
      case 8224: esc = "dagger";  break;
      case 8225: esc = "Dagger";  break;
      case 8240: esc = "permil";  break;
      case 8249: esc = "lsaquo";  break;
      case 8250: esc = "rsaquo";  break;
      case 8364: esc = "euro";  break;

      // the other HTMLsymbox.ent entities
      case  402: esc = "fnof";  break;
      case  977: esc = "thetasym";  break;
      case  978: esc = "upsih"; break;
      case  982: esc = "piv";   break;
      case 8226: esc = "bull";  break;
      case 8230: esc = "hellip";  break;
      case 8242: esc = "prime"; break;
      case 8243: esc = "Prime"; break;
      case 8254: esc = "oline"; break;
      case 8260: esc = "frasl"; break;
      case 8472: esc = "weierp";  break;
      case 8465: esc = "image"; break;
      case 8476: esc = "real";  break;
      case 8482: esc = "trade"; break;
      case 8501: esc = "alefsym"; break;
      case 8592: esc = "larr";  break;
      case 8593: esc = "uarr";  break;
      case 8594: esc = "rarr";  break;
      case 8595: esc = "darr";  break;
      case 8596: esc = "harr";  break;
      case 8629: esc = "crarr"; break;
      case 8656: esc = "lArr";  break;
      case 8657: esc = "uArr";  break;
      case 8658: esc = "rArr";  break;
      case 8659: esc = "dArr";  break;
      case 8660: esc = "hArr";  break;
      case 8704: esc = "forall";  break;
      case 8706: esc = "part";  break;
      case 8707: esc = "exist"; break;
      case 8709: esc = "empty"; break;
      case 8711: esc = "nabla"; break;
      case 8712: esc = "isin";  break;
      case 8713: esc = "notin"; break;
      case 8715: esc = "ni";    break;
      case 8719: esc = "prod";  break;
      case 8721: esc = "sum";   break;
      case 8722: esc = "minus"; break;
      case 8727: esc = "lowast";  break;
      case 8730: esc = "radic"; break;
      case 8733: esc = "prop";  break;
      case 8734: esc = "infin"; break;
      case 8736: esc = "ang";   break;
      case 8743: esc = "and";   break;
      case 8744: esc = "or";    break;
      case 8745: esc = "cap";   break;
      case 8746: esc = "cup";   break;
      case 8747: esc = "int";   break;
      case 8756: esc = "there4";  break;
      case 8764: esc = "sim";   break;
      case 8773: esc = "cong";  break;
      case 8776: esc = "asymp"; break;
      case 8800: esc = "ne";    break;
      case 8801: esc = "equiv"; break;
      case 8804: esc = "le";    break;
      case 8805: esc = "ge";    break;
      case 8834: esc = "sub";   break;
      case 8835: esc = "sup";   break;
      case 8836: esc = "nsub";  break;
      case 8838: esc = "sube";  break;
      case 8839: esc = "supe";  break;
      case 8853: esc = "oplus"; break;
      case 8855: esc = "otimes";  break;
      case 8869: esc = "perp";  break;
      case 8901: esc = "sdot";  break;
      case 8968: esc = "lceil"; break;
      case 8969: esc = "rceil"; break;
      case 8970: esc = "lfloor";  break;
      case 8971: esc = "rfloor";  break;
      case 9001: esc = "lang";  break;
      case 9002: esc = "rang";  break;
      case 9674: esc = "loz";   break;
      case 9824: esc = "spades";  break;
      case 9827: esc = "clubs"; break;
      case 9829: esc = "hearts";  break;
      case 9830: esc = "diams"; break;
        }
    }

    // else escape with numeric char refs
    if (esc == null) {
        stringBuf.setLength (0);
        stringBuf.append ("#x");
        stringBuf.append (Integer.toHexString (c).toUpperCase ());
        esc = stringBuf.toString ();

        // FIXME:  We don't write surrogate pairs correctly.
        // They should work as one ref per character, since
        // each pair is one character.  For reading back into
        // Unicode, it matters beginning in Unicode 3.1 ...
    }
    break;
      }
      if (i != first)
    rawWrite (buf, off + first, i - first);
      first = i + 1;
      if (esc == eol)
    newline ();
      else {
    rawWrite ('&');
    rawWrite (esc);
    rawWrite (';');
      }
  }
  if (first < len)
      rawWrite (buf, off + first, len - first);
    }



    private void newline ()
    throws SAXException, IOException
    {
  out.write (eol);
  column = 0;
    }

    private void doIndent ()
    throws SAXException, IOException
    {
  int space = elementNestLevel * 2;

  newline ();
  column = space;
  // track tabs only at line starts
  while (space > 8) {
      out.write ("\t");
      space -= 8;
  }
  while (space > 0) {
      out.write ("  ");
      space -= 2;
  }
    }

    private void rawWrite (char c)
    throws IOException
    {
  out.write (c);
  column++;
    }

    private void rawWrite (String s)
    throws SAXException, IOException
    {
  if (prettyPrinting && "default".equals (space.peek ())) {
      char data [] = s.toCharArray ();
      rawWrite (data, 0, data.length);
  } else {
      out.write (s);
      column += s.length ();
  }
    }

    // NOTE:  if xhtml, the REC gives some rules about whitespace
    // which we could follow ... notably, many places where conformant
    // agents "must" consolidate/normalize whitespace.  Line ends can
    // be removed there, etc.  This may not be the right place to do
    // such mappings though.

    // Line buffering may help clarify algorithms and improve results.

    // It's likely xml:space needs more attention.

    private void rawWrite (char buf [], int offset, int length)
    throws SAXException, IOException
    {
  boolean   wrap;

  if (prettyPrinting && space.empty ())
      fatal ("stack discipline", null);

  wrap = prettyPrinting && "default".equals (space.peek ());
  if (!wrap) {
      out.write (buf, offset, length);
      column += length;
      return;
  }

  // we're pretty printing and want to fill lines out only
  // to the desired line length.
  while (length > 0) {
      int   target = lineLength - column;
      boolean wrote = false;

      // Do we even have a problem?
      if (target > length || noWrap) {
    out.write (buf, offset, length);
    column += length;
    return;
      }

      // break the line at a space character, trying to fill
      // as much of the line as possible.
      char  c;

      for (int i = target - 1; i >= 0; i--) {
    if ((c = buf [offset + i]) == ' ' || c == '\t') {
        i++;
        out.write (buf, offset, i);
        doIndent ();
        offset += i;
        length -= i;
        wrote = true;
        break;
    }
      }
      if (wrote)
    continue;
      
      // no space character permitting break before target
      // line length is filled.  So, take the next one.
      if (target < 0)
    target = 0;
      for (int i = target; i < length; i++)
    if ((c = buf [offset + i]) == ' ' || c == '\t') {
        i++;
        out.write (buf, offset, i);
        doIndent ();
        offset += i;
        length -= i;
        wrote = true;
        break;
    }
      if (wrote)
    continue;
      
      // no such luck.
      out.write (buf, offset, length);
      column += length;
      break;
  }
    }
}