/*
* Copyright (C) 2011 eXo Platform SAS.
*
* This is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.exoplatform.commons.utils;
import java.io.StringWriter;
import java.io.Writer;
import org.gatein.common.io.WriterCharWriter;
import org.gatein.common.text.CharWriter;
import org.gatein.common.text.EncodingException;
import org.gatein.common.text.EntityEncoder;
import org.gatein.common.util.ParameterValidation;
/**
* This encoder provides a few methods to encode the String to its HTML entity representation.
*
* @author <a href="trongtt@gmail.com">Trong Tran</a>
* @version $Revision$
*/
public class HTMLEntityEncoder extends EntityEncoder {
private static volatile HTMLEntityEncoder singletonInstance;
public static HTMLEntityEncoder getInstance() {
if (singletonInstance == null) {
synchronized (HTMLEntityEncoder.class) {
if (singletonInstance == null) {
singletonInstance = new HTMLEntityEncoder();
}
}
}
return singletonInstance;
}
/** . */
private final String[] hexToEntity = buildHexEntityNumberArray();
/**
* Character set that are immune from encoding in HTML
*/
private static final char[] IMMUNE_HTML = { ',', '.', '-', '_', ' ' };
/**
* Character set that are immune from encoding in HTML Attribute
*/
private static final char[] IMMUNE_HTMLATTR = { ',', '.', '-', '_' };
public String encodeIfNotEncoded(String input) {
ParameterValidation.throwIllegalArgExceptionIfNull(input, "String");
if (!isEncoded(input)) {
return encode(input);
}
return input;
}
/**
* Best-effort basis for determining whether an input is already encoded
*
* @param input the text to verify
* @return whether or not the text seems to be encoded
*/
public final boolean isEncoded(String input) {
int indexFirstEntityStart = input.indexOf('&');
if (indexFirstEntityStart == -1) {
return false; // no & at all, so, definitely not encoded
}
int indexFirstEntityFinish = input.indexOf(';', indexFirstEntityStart);
if (indexFirstEntityFinish == -1) {
return false; // no ; at all, so, definitely not encoded
}
String firstEntity = input.substring(indexFirstEntityStart + 1, indexFirstEntityFinish);
if ("".equals(firstEntity)) {
return false;
// we have something like "&;", so, certainly not encoded... if it were encoded,
// the first ampersand would have been replaced by &, and it would have been caught
}
int found = reverse(firstEntity);
if (found > 0) {
return true;
}
// we have not found a positive indication that firstEntity is indeed something that translates into a single
// char, so, if it starts with "#x", then it's a unicode number... otherwise, let's be conservative and report
// that this is not encoded
return firstEntity.startsWith("#x");
}
/**
* Encode data for use in HTML
*
* @param input the string to encode for HTML
* @return input encoded for HTML
*/
public String encodeHTML(String input) {
return encode(input, IMMUNE_HTML);
}
/**
* Encode data for use in HTML attributes.
*
* @param input the string to encode for a HTML attribute
* @return input encoded for use as value of a HTML attribute
*/
public String encodeHTMLAttribute(String input) {
return encode(input, IMMUNE_HTMLATTR);
}
@Override
public void safeEncode(char[] chars, int off, int len, CharWriter writer) throws EncodingException {
safeEncode(chars, off, len, writer, IMMUNE_HTML);
}
/**
* @param chars the array to encode
* @param off the offset in the chars array
* @param len the length of chars to encode
* @param writer the writer to use
* @param immune the characters array are immune from encoding
* @throws EncodingException
*/
private void safeEncode(char[] chars, int off, int len, CharWriter writer, char[] immune) throws EncodingException {
// The index of the last copied char
int previous = off;
//
int to = off + len;
// Perform lookup char by char
for (int current = off; current < to; current++) {
char c = chars[current];
// Lookup
if (isImmutable(immune, c)) {
continue;
}
String replacement;
String hex;
// Do we have a replacement
if ((replacement = lookupEntityName(c)) != null) {
// We lazy create the result
// Append the previous chars if any
writer.append(chars, previous, current - previous);
// Append the replaced entity
writer.append('&').append(replacement).append(';');
// Update the previous pointer
previous = current + 1;
} else if ((hex = lookupHexEntityNumber(c)) != null) {
// We lazy create the result
// Append the previous chars if any
writer.append(chars, previous, current - previous);
// Append the replaced entity
writer.append("").append(hex).append(';');
// Update the previous pointer
previous = current + 1;
}
}
//
writer.append(chars, previous, chars.length - previous);
}
public final String lookupEntityName(char c) {
return lookup(c);
}
public final String lookupHexEntityNumber(char c) {
if (c < 0xFF) {
return hexToEntity[c];
}
return Integer.toHexString(c);
}
private boolean isImmutable(char[] array, char c) {
for (char ch : array) {
if (c == ch) {
return true;
}
}
return false;
}
private String encode(String input, char[] immutable) {
ParameterValidation.throwIllegalArgExceptionIfNull(input, "String");
Writer sw = new StringWriter();
CharWriter charWriter = new WriterCharWriter(sw);
safeEncode(input.toCharArray(), 0, input.length(), charWriter, immutable);
return sw.toString();
}
/**
* Build an array to store the hex string for characters to be encoded. If the character shouldn't be encoded, then store
* null.
*
* @return An array containing characters in hex string that are to be encoded.
*/
private String[] buildHexEntityNumberArray() {
String[] array = new String[256];
for (char c = 0; c < 0xFF; c++) {
if (c >= 0x30 && c <= 0x39 || c >= 0x41 && c <= 0x5A || c >= 0x61 && c <= 0x7A) {
array[c] = null;
} else {
array[c] = Integer.toHexString(c);
}
}
return array;
}
}