/* This code is part of Freenet. It is distributed under the GNU General * Public License, version 2 (or at your option any later version). See * http://www.gnu.org/ for further details of the GPL. */ package freenet.support; import java.util.HashMap; import java.util.Map; /** * Encodes any character mentioned with a substitute in the HTML spec. This * includes nulls, <>, quotes, but not control characters. It should be * safe to put the output of this function into a web page; if it is not * then we have big problems. Because we encode quotes it should also be * safe to include it inside attributes. I am not certain where the list in * HTMLEntities came from, but the list of potentially markup-significant * characters in [X]HTML is *really* small. * * Originally from com.websiteasp.ox pasckage. * * @author avian (Yves Lempereur) * @author Unique Person@w3nO30p4p9L81xKTXbCaQBOvUww (via Frost) */ public class HTMLEncoder { public final static CharTable charTable = new CharTable(HTMLEntities.encodeMap); public static String encode(String s) { int n = s.length(); StringBuilder sb = new StringBuilder(n); encodeToBuffer(n, s, sb); return sb.toString(); } public static void encodeToBuffer(String s, StringBuilder sb) { encodeToBuffer(s.length(), s, sb); } private static void encodeToBuffer(int n, String s, StringBuilder sb) { for (int i = 0; i < n; i++) { char c = s.charAt(i); String entity; if(Character.isLetterOrDigit(c)){ //only special characters need checking sb.append(c); } else if((entity = charTable.get(c))!=null){ sb.append('&'); sb.append(entity); sb.append(';'); } else{ sb.append(c); } } } /** * Encode String so it is safe to be used in XML attribute value and text. * * HTMLEncode.encode() use some HTML-specific entities (e.g. &) hence not suitable for * generic XML. */ public static String encodeXML(String s) { // Extensible Markup Language (XML) 1.0 (Fifth Edition) // [10] AttValue ::= '"' ([^<&"] | Reference)* '"' // | "'" ([^<&'] | Reference)* "'" // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) s = s.replace("&", "&"); s = s.replace("\"", """); s = s.replace("'", "'"); s = s.replace("<", "<"); s = s.replace(">", ">"); // CharData can't contain ']]>' return s; } private final static class CharTable{ private char[] chars; private String[] strings; private int modulo = 0; public CharTable(HashMap<Character, String> map){ int[] keys = new int[map.size()]; int keyIndex = 0; int max = 0; for (Character key : map.keySet()) { int val = key.charValue(); keys[keyIndex++] = val; if(val > max) max = val; } modulo = map.size(); int[] collisionTable = new int[max+1]; //using integers instead of booleans (no cleanup) boolean ok=false; while (!ok) { ++modulo; //try a higher modulo ok = true; for (int i = 0; ok && i < keys.length; ++i){ keyIndex = keys[i]%modulo; //try this modulo if (collisionTable[keyIndex] == modulo){ //is this value already used ok = false; } else{ collisionTable[keyIndex] = modulo; } } } //System.out.println("The modulo is:" + modulo); //was The modulo is:1474 chars = new char[modulo]; strings = new String[modulo]; for (Map.Entry<Character,String> entry : map.entrySet()) { Character character = entry.getKey(); keyIndex = character.charValue()%modulo; chars[keyIndex] = character.charValue(); strings[keyIndex] = entry.getValue(); } if (chars[0] == 0 && strings[0] != null) chars[0] = 1; } public String get(char key){ return chars[key%modulo] == key? strings[key%modulo]:null; } } }