XmlOutput.java example

Explorer
optiq-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to you under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.eigenbase.util;

import java.io.*;
import java.util.*;

import com.google.common.collect.Lists;

/**
 * Streaming XML output.
 *
 * <p>Use this class to write XML to any streaming source.
 * While the class itself is unstructured and doesn't enforce any DTD
 * specification, use of the class
 * does ensure that the output is syntactically valid XML.</p>
 */
public class XmlOutput {

  // This Writer is the underlying output stream to which all XML is
  // written.
  private final PrintWriter out;

  // The tagStack is maintained to check that tags are balanced.
  private final List<String> tagStack = new ArrayList<String>();

  // The class maintains an indentation level to improve output quality.
  private int indent;

  // The class also maintains the total number of tags written.  This
  // is used to monitor changes to the output
  private int tagsWritten;

  // This flag is set to true if the output should be compacted.
  // Compacted output is free of extraneous whitespace and is designed
  // for easier transport.
  private boolean compact;

  /** @see #setIndentString */
  private String indentString = "\t";

  /** @see #setGlob */
  private boolean glob;

  /**
   * Whether we have started but not finished a start tag. This only happens
   * if <code>glob</code> is true. The start tag is automatically closed
   * when we start a child node. If there are no child nodes, {@link #endTag}
   * creates an empty tag.
   */
  private boolean inTag;

  /** @see #setAlwaysQuoteCData */
  private boolean alwaysQuoteCData;

  /** @see #setIgnorePcdata */
  private boolean ignorePcdata;

  /**
   * Private helper function to display a degree of indentation
   * @param out the PrintWriter to which to display output.
   * @param indent the degree of indentation.
   */
  private void displayIndent(PrintWriter out, int indent) {
    if (!compact) {
      for (int i = 0; i < indent; i++) {
        out.print(indentString);
      }
    }
  }

  /**
   * Constructs a new XmlOutput based on any {@link Writer}.
   *
   * @param out the writer to which this XmlOutput generates results.
   */
  public XmlOutput(Writer out) {
    this(new PrintWriter(out, true));
  }

  /**
   * Constructs a new XmlOutput based on a {@link PrintWriter}.
   *
   * @param out the writer to which this XmlOutput generates results.
   */
  public XmlOutput(PrintWriter out) {
    this.out = out;
    indent = 0;
    tagsWritten = 0;
  }

  /**
   * Sets or unsets the compact mode.  Compact mode causes the generated
   * XML to be free of extraneous whitespace and other unnecessary
   * characters.
   *
   * @param compact true to turn on compact mode, or false to turn it off.
   */
  public void setCompact(boolean compact) {
    this.compact = compact;
  }

  public boolean getCompact() {
    return compact;
  }

  /**
   * Sets the string to print for each level of indentation. The default is a
   * tab. The value must not be <code>null</code>. Set this to the empty
   * string to achieve no indentation (note that
   * <code>{@link #setCompact}(true)</code> removes indentation <em>and</em>
   * newlines).
   */
  public void setIndentString(String indentString) {
    this.indentString = indentString;
  }

  /**
   * Sets whether to detect that tags are empty.
   */
  public void setGlob(boolean glob) {
    this.glob = glob;
  }

  /**
   * Sets whether to always quote cdata segments (even if they don't contain
   * special characters).
   */
  public void setAlwaysQuoteCData(boolean alwaysQuoteCData) {
    this.alwaysQuoteCData = alwaysQuoteCData;
  }

  /**
   * Sets whether to ignore unquoted text, such as whitespace.
   */
  public void setIgnorePcdata(boolean ignorePcdata) {
    this.ignorePcdata = ignorePcdata;
  }

  public boolean getIgnorePcdata() {
    return ignorePcdata;
  }

  /**
   * Sends a string directly to the output stream, without escaping any
   * characters.  Use with caution!
   */
  public void print(String s) {
    out.print(s);
  }

  /**
   * Starts writing a new tag to the stream.  The tag's name must be given and
   * its attributes should be specified by a fully constructed AttrVector
   * object.
   *
   * @param tagName the name of the tag to write.
   * @param attributes an XMLAttrVector containing the attributes to include
   *   in the tag.
   */
  public void beginTag(String tagName, XMLAttrVector attributes) {
    beginBeginTag(tagName);
    if (attributes != null) {
      attributes.display(out, indent);
    }
    endBeginTag(tagName);
  }

  public void beginBeginTag(String tagName) {
    if (inTag) {
      // complete the parent's start tag
      if (compact) {
        out.print(">");
      } else {
        out.println(">");
      }
      inTag = false;
    }
    displayIndent(out, indent);
    out.print("<");
    out.print(tagName);
  }

  public void endBeginTag(String tagName) {
    if (glob) {
      inTag = true;
    } else if (compact) {
      out.print(">");
    } else {
      out.println(">");
    }
    out.flush();
    Stacks.push(tagStack, tagName);
    indent++;
    tagsWritten++;
  }

  /**
   * Writes an attribute.
   */
  public void attribute(String name, String value) {
    printAtt(out, name, value);
  }

  /**
   * If we are currently inside the start tag, finishes it off.
   */
  public void beginNode() {
    if (inTag) {
      // complete the parent's start tag
      if (compact) {
        out.print(">");
      } else {
        out.println(">");
      }
      inTag = false;
    }
  }

  /**
   * Completes a tag.  This outputs the end tag corresponding to the
   * last exposed beginTag.  The tag name must match the name of the
   * corresponding beginTag.
   * @param tagName the name of the end tag to write.
   */
  public void endTag(String tagName) {
    // Check that the end tag matches the corresponding start tag
    Stacks.pop(tagStack, tagName);

    // Lower the indent and display the end tag
    indent--;
    if (inTag) {
      // we're still in the start tag -- this element had no children
      if (compact) {
        out.print("/>");
      } else {
        out.println("/>");
      }
      inTag = false;
    } else {
      displayIndent(out, indent);
      out.print("</");
      out.print(tagName);
      if (compact) {
        out.print(">");
      } else {
        out.println(">");
      }
    }
    out.flush();
  }

  /**
   * Writes an empty tag to the stream.  An empty tag is one with no
   * tags inside it, although it may still have attributes.
   *
   * @param tagName the name of the empty tag.
   * @param attributes an XMLAttrVector containing the attributes to
   * include in the tag.
   */
  public void emptyTag(String tagName, XMLAttrVector attributes) {
    if (inTag) {
      // complete the parent's start tag
      if (compact) {
        out.print(">");
      } else {
        out.println(">");
      }
      inTag = false;
    }
    displayIndent(out, indent);
    out.print("<");
    out.print(tagName);
    if (attributes != null) {
      out.print(" ");
      attributes.display(out, indent);
    }

    if (compact) {
      out.print("/>");
    } else {
      out.println("/>");
    }
    out.flush();
    tagsWritten++;
  }

  /**
   * Writes a CDATA section.  Such sections always appear on their own line.
   * The nature in which the CDATA section is written depends on the actual
   * string content with respect to these special characters/sequences:
   * <ul>
   * <li><code>&</code>
   * <li><code>"</code>
   * <li><code>'</code>
   * <li><code><</code>
   * <li><code>></code>
   * </ul>
   * Additionally, the sequence <code>]]></code> is special.
   * <ul>
   * <li>Content containing no special characters will be left as-is.
   * <li>Content containing one or more special characters but not the
   * sequence <code>]]></code> will be enclosed in a CDATA section.
   * <li>Content containing special characters AND at least one
   * <code>]]></code> sequence will be left as-is but have all of its
   * special characters encoded as entities.
   * </ul>
   * These special treatment rules are required to allow cdata sections
   * to contain XML strings which may themselves contain cdata sections.
   * Traditional CDATA sections <b>do not nest</b>.
   */
  public void cdata(String data) {
    cdata(data, false);
  }

  /**
   * Writes a CDATA section (as {@link #cdata(String)}).
   *
   * @param data string to write
   * @param quote if true, quote in a <code><![CDATA[</code>
   *        ... <code>]]></code> regardless of the content of
   *        <code>data</code>; if false, quote only if the content needs it
   */
  public void cdata(String data, boolean quote) {
    if (inTag) {
      // complete the parent's start tag
      if (compact) {
        out.print(">");
      } else {
        out.println(">");
      }
      inTag = false;
    }
    if (data == null) {
      data = "";
    }
    boolean specials = false;
    boolean cdataEnd = false;

    // Scan the string for special characters
    // If special characters are found, scan the string for ']]>'
    if (stringHasXMLSpecials(data)) {
      specials = true;
      if (data.contains("]]>")) {
        cdataEnd = true;
      }
    }

    // Display the result
    displayIndent(out, indent);
    if (quote || alwaysQuoteCData) {
      out.print("<![CDATA[");
      out.print(data);
      out.println("]]>");
    } else if (!specials) {
      out.print(data);
    } else {
      stringEncodeXML(data, out);
    }

    out.flush();
    tagsWritten++;
  }

  /**
   * Writes a String tag; a tag containing nothing but a CDATA section.
   */
  public void stringTag(String name, String data) {
    beginTag(name, null);
    cdata(data);
    endTag(name);
  }

  /**
   * Writes content.
   */
  public void content(String content) {
    if (content != null) {
      indent++;
      LineNumberReader
          in = new LineNumberReader(new StringReader(content));
      try {
        String line;
        while ((line = in.readLine()) != null) {
          displayIndent(out, indent);
          out.println(line);
        }
      } catch (IOException ex) {
        throw new AssertionError(ex);
      }
      indent--;
      out.flush();
    }
    tagsWritten++;
  }

  /**
   *  Write header. Use default version 1.0.
   */
  public void header() {
    out.println("<?xml version=\"1.0\" ?>");
    out.flush();
    tagsWritten++;
  }

  /**
   * Write header, take version as input.
   */
  public void header(String version) {
    out.print("<?xml version=\"");
    out.print(version);
    out.println("\" ?>");
    out.flush();
    tagsWritten++;
  }

  /**
   * Get the total number of tags written
   * @return the total number of tags written to the XML stream.
   */
  public int numTagsWritten() {
    return tagsWritten;
  }

  /** Print an XML attribute name and value for string val */
  private static void printAtt(PrintWriter pw, String name, String val) {
    if (val != null /* && !val.equals("") */) {
      pw.print(" ");
      pw.print(name);
      pw.print("=\"");
      pw.print(escapeForQuoting(val));
      pw.print("\"");
    }
  }

  /**
   * Encode a String for XML output, displaying it to a PrintWriter.
   * The String to be encoded is displayed, except that
   * special characters are converted into entities.
   * @param input a String to convert.
   * @param out a PrintWriter to which to write the results.
   */
  private static void stringEncodeXML(String input, PrintWriter out) {
    for (int i = 0; i < input.length(); i++) {
      char c = input.charAt(i);
      switch (c) {
      case '<':
      case '>':
      case '"':
      case '\'':
      case '&':
      case '\t':
      case '\n':
      case '\r':
        out.print("&#" + (int) c + ";");
        break;
      default:
        out.print(c);
      }
    }
  }

  private static String escapeForQuoting(String val) {
    return StringEscaper.XML_NUMERIC_ESCAPER.escapeString(val);
  }

  /**
   * Returns whether a string contains any XML special characters.
   *
   * <p>If this function returns true, the string will need to be
   * encoded either using the stringEncodeXML function above or using a
   * CDATA section.  Note that MSXML has a nasty bug whereby whitespace
   * characters outside of a CDATA section are lost when parsing.  To
   * avoid hitting this bug, this method treats many whitespace characters
   * as "special".</p>
   *
   * @param input the String to scan for XML special characters.
   * @return true if the String contains any such characters.
   */
  private static boolean stringHasXMLSpecials(String input) {
    for (int i = 0; i < input.length(); i++) {
      char c = input.charAt(i);
      switch (c) {
      case '<':
      case '>':
      case '"':
      case '\'':
      case '&':
      case '\t':
      case '\n':
      case '\r':
        return true;
      }
    }
    return false;
  }

  /**
   * Utility for replacing special characters
   * with escape sequences in strings.
   *
   * <p>A StringEscaper starts out as an identity transform in the "mutable"
   * state.  Call {@link #defineEscape} as many times as necessary to set up
   * mappings, and then call {@link #makeImmutable} before
   * actually applying the defined transform.  Or,
   * use one of the global mappings pre-defined here.</p>
   */
  static class StringEscaper implements Cloneable {
    private ArrayList<String> translationVector;
    private String [] translationTable;

    public static final StringEscaper XML_ESCAPER;
    public static final StringEscaper XML_NUMERIC_ESCAPER;
    public static final StringEscaper HTML_ESCAPER;
    public static final StringEscaper URL_ARG_ESCAPER;
    public static final StringEscaper URL_ESCAPER;

    /**
     * Identity transform
     */
    public StringEscaper() {
      translationVector = new ArrayList<String>();
    }

    /**
     * Map character "from" to escape sequence "to"
     */
    public void defineEscape(char from, String to) {
      int i = (int) from;
      if (i >= translationVector.size()) {
        // Extend list by adding the requisite number of nulls.
        final int count = i + 1 - translationVector.size();
        translationVector.addAll(Collections.<String>nCopies(count, null));
      }
      translationVector.set(i, to);
    }

    /**
     * Call this before attempting to escape strings; after this,
     * defineEscape may not be called again.
     */
    public void makeImmutable() {
      translationTable =
          translationVector.toArray(new String[translationVector.size()]);
      translationVector = null;
    }

    /**
     * Apply an immutable transformation to the given string.
     */
    public String escapeString(String s) {
      StringBuilder sb = null;
      int n = s.length();
      for (int i = 0; i < n; i++) {
        char c = s.charAt(i);
        String escape;
        // codes >= 128 (e.g. Euro sign) are always escaped
        if (c > 127) {
          escape = "&#" + Integer.toString(c) + ";";
        } else if (c >= translationTable.length) {
          escape = null;
        } else {
          escape = translationTable[c];
        }
        if (escape == null) {
          if (sb != null) {
            sb.append(c);
          }
        } else {
          if (sb == null) {
            sb = new StringBuilder(n * 2);
            sb.append(s.substring(0, i));
          }
          sb.append(escape);
        }
      }

      if (sb == null) {
        return s;
      } else {
        return sb.toString();
      }
    }

    protected StringEscaper clone() {
      StringEscaper clone = new StringEscaper();
      if (translationVector != null) {
        clone.translationVector = new ArrayList<String>(translationVector);
      }
      if (translationTable != null) {
        clone.translationTable = translationTable.clone();
      }
      return clone;
    }

    /**
     * Create a mutable escaper from an existing escaper, which may
     * already be immutable.
     */
    public StringEscaper getMutableClone() {
      StringEscaper clone = clone();
      if (clone.translationVector == null) {
        clone.translationVector = Lists.newArrayList(clone.translationTable);
        clone.translationTable = null;
      }
      return clone;
    }

    static {
      HTML_ESCAPER = new StringEscaper();
      HTML_ESCAPER.defineEscape('&', "&");
      HTML_ESCAPER.defineEscape('"', """);
//      htmlEscaper.defineEscape('\'',"'");
      HTML_ESCAPER.defineEscape('\'', "'");
      HTML_ESCAPER.defineEscape('<', "<");
      HTML_ESCAPER.defineEscape('>', ">");

      XML_NUMERIC_ESCAPER = new StringEscaper();
      XML_NUMERIC_ESCAPER.defineEscape('&', "&");
      XML_NUMERIC_ESCAPER.defineEscape('"', """);
      XML_NUMERIC_ESCAPER.defineEscape('\'', "'");
      XML_NUMERIC_ESCAPER.defineEscape('<', "<");
      XML_NUMERIC_ESCAPER.defineEscape('>', ">");

      URL_ARG_ESCAPER = new StringEscaper();
      URL_ARG_ESCAPER.defineEscape('?', "%3f");
      URL_ARG_ESCAPER.defineEscape('&', "%26");
      URL_ESCAPER = URL_ARG_ESCAPER.getMutableClone();
      URL_ESCAPER.defineEscape('%', "%%");
      URL_ESCAPER.defineEscape('"', "%22");
      URL_ESCAPER.defineEscape('\r', "+");
      URL_ESCAPER.defineEscape('\n', "+");
      URL_ESCAPER.defineEscape(' ', "+");
      URL_ESCAPER.defineEscape('#', "%23");

      HTML_ESCAPER.makeImmutable();
      XML_ESCAPER = HTML_ESCAPER;
      XML_NUMERIC_ESCAPER.makeImmutable();
      URL_ARG_ESCAPER.makeImmutable();
      URL_ESCAPER.makeImmutable();
    }
  }

  /** List of attribute names and values. */
  static class XMLAttrVector {
    public void display(PrintWriter out, int indent) {
      throw new UnsupportedOperationException();
    }
  }
}


// End XmlOutput.java