XMLHelper.java example

Explorer
sqlpower-library-master
- src
/*
 * Copyright (c) 2008, SQL Power Group Inc.
 *
 * This file is part of SQL Power Library.
 *
 * SQL Power Library is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * SQL Power Library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>. 
 */
package ca.sqlpower.xml;

import java.io.PrintWriter;

/**
 * XMLHelper is a simple utility for outputting indented XML markup. It escapes
 * all illegal XML characters using a custom (application-level) escaping
 * mechanism. The rationale behind this is to allow us to save such characters
 * (like from binary data in a project) into a project file and read them back
 * in again when loading a project.
 * <p>
 * Note that <i>all</i> data that passes through the methods in this class will
 * be escaped.  There is no check as to whether or not the illegal characters fall
 * in a CDATA section, a tag name, an attribute name or value, and so on.  We
 * consider this to be a reasonable simplification of the problem because illegal
 * XML characters are just that: Illegal.  If a tag name or attribute value contained
 * an illegal character, the resulting file would not be well-formed XML.  The fact
 * that this class escapes everything that goes through it won't break what would
 * otherwise be well-formed XML. It will only further mangle malformed XML!  Of
 * course, if the illegal characters fall in a CDATA section or attribute value,
 * the escaping will have saved the day. The results will be well-formed XML.
 * <p>
 * NB: When loading back in the data that was written using the XMLHelper, the
 * escaped characters will have to be 'unescaped' again after going through the
 * XML parser. See {@link #escape(String)} for a description of the escape
 * format.
 * <p>
 */
public class XMLHelper {

    public int indent;

    /**
     * Creates a new XMLHelper with an initial indentation amount of 0.
     */
    public XMLHelper() {
        super();
    }

    /**
     * Prints to the output writer {@link #out} indentation spaces
     * (according to {@link #indent}) followed by the given text.
     * @param out
     */
    public void print(PrintWriter out, String text) {
        for (int i = 0; i < indent; i++) {
            out.print(" ");
        }
        out.print(escape(text));
    }

    /**
     * Prints <code>text</code> to the output writer {@link #out} (no
     * indentation).
     */
    public void niprint(PrintWriter out, String text) {
        out.print(escape(text));
    }

    /**
     * Prints <code>text</code> followed by newline to the output
     * writer {@link #out} (no indentation).
     */
    public void niprintln(PrintWriter out, String text) {
        out.println(escape(text));
    }

    /**
     * Prints to the output writer {@link #out} indentation spaces
     * (according to {@link #indent}) followed by the given text
     * followed by a newline.
     */
    public void println(PrintWriter out, String text) {
        for (int i = 0; i < indent; i++) {
            out.print(" ");
        }
        out.println(escape(text));
    }
    
    /**
     * Takes a String argument and returns a string that escapes characters that
     * are illegal in an XML document according to the XML specification. The
     * set of valid XML characters is taken from the <a
     * href="http://www.w3.org/TR/REC-xml/">XML 1.0 specification</a>, section
     * 2.2. Additionally, the backslash character will be considered illegal if
     * it appears immediately before a lowercase u in the input string.
     * <p>
     * Illegal characters will be represented in the output in the "escaped
     * form," the string <tt>\\uNNNN</tt> where NNNN is the four-digit
     * hexadecimal value of the character. There will always be exactly four
     * characters following the \\u, and each of those four characters will be a
     * hex digit.
     * <p>
     * This escaping mechanism is not standard XML markup; it's
     * application-level data. No generic XML processor will unescape it on the
     * way in, so the job of unescaping lies with any application program that
     * wants to consume the XML data. The Architect handles this by wrapping a
     * SAX parser with a layer that detects and unescapes the \\u sequences.
     * <p>
     * 
     * @param text
     *            The input string that we want to check for illegal characters
     * @return Returns a string identical to the input string, except any
     *         character values that fall outside the range of legal XML
     *         characters will appear in the 6-character escaped form described
     *         above.
     */
    static String escape(String text) {
        if (text.equals("")) return "";
        
        // arbitrary amount of extra space
        StringBuilder sb = new StringBuilder(text.length()+10);
        
        for (int i = 0, n = text.length(); i < n; i++) {
            char ch = text.charAt(i);
            char nextch;
            if (i == n - 1) {
                nextch = 0;
            } else {
                nextch = text.charAt(i + 1);
            }
            
            if (ch == 0x09 || ch == 0x0a || ch == 0x0d ||
                    (ch >= 0x20 && ch <= 0xd7ff && ch != '\\') ||
                    (ch >= 0xe000 && ch <=0xfffd) ||
                    (ch == '\\' && nextch != 'u')) {
                sb.append(ch);
            } else {
                sb.append(String.format("\\u%04x", (int)ch));
            }
        }
        return sb.toString();
    }
    
    /**
     * Unescapes the String text according to the format described above in escape(String text)
     * 
     * @param text The String to escape. If the String is null, then we return null.
     * @return The unescaped version of the input string. If the string is null, return null
     */
    static String unescape(String text) {
        if (text == null) return null;
        
        StringBuilder unescapedText = new StringBuilder(text.length());
        
        for (int i = 0, n = text.length(); i < n ; ) {
            char ch = text.charAt(i);
            char nextch;
            if (i == n - 1) {
                nextch = 0;
            } else {
                nextch = text.charAt(i + 1);
            }
            
            if (ch == '\\' && nextch == 'u') {
                int charVal = Integer.parseInt(text.substring(i+2, i+6), 16);
                char unescapedChar = (char)charVal;
                unescapedText.append(unescapedChar);
                i += 6;
            } else {
                unescapedText.append(ch);
                i++;
            }
        }
        
        return unescapedText.toString();
    }
}