StringEscapeUtils.java example

Explorer
wicket-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.wicket.util.string;

import java.io.IOException;
import java.io.StringWriter;
import java.io.Writer;
import java.util.Locale;

/**
 * <p>
 * Escapes and unescapes <code>String</code>s for Java, Java Script, HTML, XML, and SQL.
 * </p>
 * 
 * <p>
 * #ThreadSafe#
 * </p>
 * 
 * @author Apache Software Foundation
 * @author Apache Jakarta Turbine
 * @author Purple Technology
 * @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a>
 * @author Antony Riley
 * @author Helge Tesgaard
 * @author <a href="sean@boohai.com">Sean Brown</a>
 * @author <a href="mailto:ggregory@seagullsw.com">Gary Gregory</a>
 * @author Phil Steitz
 * @author Pete Gieser
 * @since 2.0
 * @version $Id$
 */
// Copy from commons-lang ver. 2.6. Non-html/xml methods were removed
class StringEscapeUtils
{

	/**
	 * <p>
	 * <code>StringEscapeUtils</code> instances should NOT be constructed in standard programming.
	 * </p>
	 * 
	 * <p>
	 * Instead, the class should be used as:
	 * 
	 * <pre>
	 * StringEscapeUtils.escapeJava("foo");
	 * </pre>
	 * 
	 * </p>
	 * 
	 * <p>
	 * This constructor is public to permit tools that require a JavaBean instance to operate.
	 * </p>
	 */
	public StringEscapeUtils()
	{
		super();
	}

	/**
	 * <p>
	 * Returns an upper case hexadecimal <code>String</code> for the given character.
	 * </p>
	 * 
	 * @param ch
	 *            The character to convert.
	 * @return An upper case hexadecimal <code>String</code>
	 */
	private static String hex(char ch)
	{
		return Integer.toHexString(ch).toUpperCase(Locale.ENGLISH);
	}

	// HTML and XML
	// --------------------------------------------------------------------------
	/**
	 * <p>
	 * Escapes the characters in a <code>String</code> using HTML entities.
	 * </p>
	 * 
	 * <p>
	 * For example:
	 * </p>
	 * <p>
	 * <code>"bread" & "butter"</code>
	 * </p>
	 * becomes:
	 * <p>
	 * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>.
	 * </p>
	 * 
	 * <p>
	 * Supports all known HTML 4.0 entities, including funky accents. Note that the commonly used
	 * apostrophe escape character (&apos;) is not a legal entity and so is not supported).
	 * </p>
	 * 
	 * @param str
	 *            the <code>String</code> to escape, may be null
	 * @return a new escaped <code>String</code>, <code>null</code> if null string input
	 * 
	 * @see #unescapeHtml(String)
	 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO
	 *      Entities</a>
	 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO
	 *      Latin-1</a>
	 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity
	 *      references</a>
	 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character
	 *      References</a>
	 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code
	 *      positions</a>
	 */
	public static String escapeHtml(String str)
	{
		if (str == null)
		{
			return null;
		}
		try
		{
			StringWriter writer = new StringWriter((int)(str.length() * 1.5));
			escapeHtml(writer, str);
			return writer.toString();
		}
		catch (IOException ioe)
		{
			// should be impossible
			throw new RuntimeException(ioe);
		}
	}

	/**
	 * <p>
	 * Escapes the characters in a <code>String</code> using HTML entities and writes them to a
	 * <code>Writer</code>.
	 * </p>
	 * 
	 * <p>
	 * For example:
	 * </p>
	 * <code>"bread" & "butter"</code>
	 * <p>
	 * becomes:
	 * </p>
	 * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>.
	 * 
	 * <p>
	 * Supports all known HTML 4.0 entities, including funky accents. Note that the commonly used
	 * apostrophe escape character (&apos;) is not a legal entity and so is not supported).
	 * </p>
	 * 
	 * @param writer
	 *            the writer receiving the escaped string, not null
	 * @param string
	 *            the <code>String</code> to escape, may be null
	 * @throws IllegalArgumentException
	 *             if the writer is null
	 * @throws IOException
	 *             when <code>Writer</code> passed throws the exception from calls to the
	 *             {@link Writer#write(int)} methods.
	 * 
	 * @see #escapeHtml(String)
	 * @see #unescapeHtml(String)
	 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO
	 *      Entities</a>
	 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO
	 *      Latin-1</a>
	 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity
	 *      references</a>
	 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character
	 *      References</a>
	 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code
	 *      positions</a>
	 */
	public static void escapeHtml(Writer writer, String string) throws IOException
	{
		if (writer == null)
		{
			throw new IllegalArgumentException("The Writer must not be null.");
		}
		if (string == null)
		{
			return;
		}
		Entities.HTML40.escape(writer, string);
	}

	// -----------------------------------------------------------------------
	/**
	 * <p>
	 * Unescapes a string containing entity escapes to a string containing the actual Unicode
	 * characters corresponding to the escapes. Supports HTML 4.0 entities.
	 * </p>
	 * 
	 * <p>
	 * For example, the string "&lt;Fran&ccedil;ais&gt;" will become
	 * "<Français>"
	 * </p>
	 * 
	 * <p>
	 * If an entity is unrecognized, it is left alone, and inserted verbatim into the result string.
	 * e.g. "&gt;&zzzz;x" will become ">&zzzz;x".
	 * </p>
	 * 
	 * @param str
	 *            the <code>String</code> to unescape, may be null
	 * @return a new unescaped <code>String</code>, <code>null</code> if null string input
	 * @see #escapeHtml(Writer, String)
	 */
	public static String unescapeHtml(String str)
	{
		if (str == null)
		{
			return null;
		}
		try
		{
			StringWriter writer = new StringWriter((int)(str.length() * 1.5));
			unescapeHtml(writer, str);
			return writer.toString();
		}
		catch (IOException ioe)
		{
			// should be impossible
			throw new RuntimeException(ioe);
		}
	}

	/**
	 * <p>
	 * Unescapes a string containing entity escapes to a string containing the actual Unicode
	 * characters corresponding to the escapes. Supports HTML 4.0 entities.
	 * </p>
	 * 
	 * <p>
	 * For example, the string "&lt;Fran&ccedil;ais&gt;" will become
	 * "<Français>"
	 * </p>
	 * 
	 * <p>
	 * If an entity is unrecognized, it is left alone, and inserted verbatim into the result string.
	 * e.g. "&gt;&zzzz;x" will become ">&zzzz;x".
	 * </p>
	 * 
	 * @param writer
	 *            the writer receiving the unescaped string, not null
	 * @param string
	 *            the <code>String</code> to unescape, may be null
	 * @throws IllegalArgumentException
	 *             if the writer is null
	 * @throws IOException
	 *             if an IOException occurs
	 * @see #escapeHtml(String)
	 */
	public static void unescapeHtml(Writer writer, String string) throws IOException
	{
		if (writer == null)
		{
			throw new IllegalArgumentException("The Writer must not be null.");
		}
		if (string == null)
		{
			return;
		}
		Entities.HTML40.unescape(writer, string);
	}

	// -----------------------------------------------------------------------
	/**
	 * <p>
	 * Escapes the characters in a <code>String</code> using XML entities.
	 * </p>
	 * 
	 * <p>
	 * For example: <tt>"bread" & "butter"</tt> =>
	 * <tt>&quot;bread&quot; &amp; &quot;butter&quot;</tt>.
	 * </p>
	 * 
	 * <p>
	 * Supports only the five basic XML entities (gt, lt, quot, amp, apos). Does not support DTDs or
	 * external entities.
	 * </p>
	 * 
	 * <p>
	 * Note that unicode characters greater than 0x7f are currently escaped to their numerical \\u
	 * equivalent. This may change in future releases.
	 * </p>
	 * 
	 * @param writer
	 *            the writer receiving the unescaped string, not null
	 * @param str
	 *            the <code>String</code> to escape, may be null
	 * @throws IllegalArgumentException
	 *             if the writer is null
	 * @throws IOException
	 *             if there is a problem writing
	 * @see #unescapeXml(java.lang.String)
	 */
	public static void escapeXml(Writer writer, String str) throws IOException
	{
		if (writer == null)
		{
			throw new IllegalArgumentException("The Writer must not be null.");
		}
		if (str == null)
		{
			return;
		}
		Entities.XML.escape(writer, str);
	}

	/**
	 * <p>
	 * Escapes the characters in a <code>String</code> using XML entities.
	 * </p>
	 * 
	 * <p>
	 * For example: <tt>"bread" & "butter"</tt> =>
	 * <tt>&quot;bread&quot; &amp; &quot;butter&quot;</tt>.
	 * </p>
	 * 
	 * <p>
	 * Supports only the five basic XML entities (gt, lt, quot, amp, apos). Does not support DTDs or
	 * external entities.
	 * </p>
	 * 
	 * <p>
	 * Note that unicode characters greater than 0x7f are currently escaped to their numerical \\u
	 * equivalent. This may change in future releases.
	 * </p>
	 * 
	 * @param str
	 *            the <code>String</code> to escape, may be null
	 * @return a new escaped <code>String</code>, <code>null</code> if null string input
	 * @see #unescapeXml(java.lang.String)
	 */
	public static String escapeXml(String str)
	{
		if (str == null)
		{
			return null;
		}
		return Entities.XML.escape(str);
	}

	// -----------------------------------------------------------------------
	/**
	 * <p>
	 * Unescapes a string containing XML entity escapes to a string containing the actual Unicode
	 * characters corresponding to the escapes.
	 * </p>
	 * 
	 * <p>
	 * Supports only the five basic XML entities (gt, lt, quot, amp, apos). Does not support DTDs or
	 * external entities.
	 * </p>
	 * 
	 * <p>
	 * Note that numerical \\u unicode codes are unescaped to their respective unicode characters.
	 * This may change in future releases.
	 * </p>
	 * 
	 * @param writer
	 *            the writer receiving the unescaped string, not null
	 * @param str
	 *            the <code>String</code> to unescape, may be null
	 * @throws IllegalArgumentException
	 *             if the writer is null
	 * @throws IOException
	 *             if there is a problem writing
	 * @see #escapeXml(String)
	 */
	public static void unescapeXml(Writer writer, String str) throws IOException
	{
		if (writer == null)
		{
			throw new IllegalArgumentException("The Writer must not be null.");
		}
		if (str == null)
		{
			return;
		}
		Entities.XML.unescape(writer, str);
	}

	/**
	 * <p>
	 * Unescapes a string containing XML entity escapes to a string containing the actual Unicode
	 * characters corresponding to the escapes.
	 * </p>
	 * 
	 * <p>
	 * Supports only the five basic XML entities (gt, lt, quot, amp, apos). Does not support DTDs or
	 * external entities.
	 * </p>
	 * 
	 * <p>
	 * Note that numerical \\u unicode codes are unescaped to their respective unicode characters.
	 * This may change in future releases.
	 * </p>
	 * 
	 * @param str
	 *            the <code>String</code> to unescape, may be null
	 * @return a new unescaped <code>String</code>, <code>null</code> if null string input
	 * @see #escapeXml(String)
	 */
	public static String unescapeXml(String str)
	{
		if (str == null)
		{
			return null;
		}
		return Entities.XML.unescape(str);
	}
}