package folioxml.utils; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.Map; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * This class offers methods to decode and encode html entities. * * @author Michael Yagudaev * @version 1.2 April 9, 2011 * retrieved from http://www.yagudaev.com/programming/java/7-jsp-escaping-html */ public class HtmlEntities { private static Map<String, Character> map = new LinkedHashMap<String, Character>(); static { map.put(""", (char) 34); map.put("&", (char) 38); map.put("<", (char) 60); map.put(">", (char) 62); map.put(" ", (char) 32);//160 removed because of bug in text map.put("¡", (char) 161); map.put("¢", (char) 162); map.put("£", (char) 163); map.put("¤", (char) 164); map.put("¥", (char) 165); map.put("¦", (char) 166); map.put("§", (char) 167); map.put("¨", (char) 168); map.put("©", (char) 169); map.put("ª", (char) 170); map.put("«", (char) 171); map.put("¬", (char) 172); map.put("­", (char) 173); map.put("®", (char) 174); map.put("¯", (char) 175); map.put("°", (char) 176); map.put("±", (char) 177); map.put("²", (char) 178); map.put("³", (char) 179); map.put("´", (char) 180); map.put("µ", (char) 181); map.put("¶", (char) 182); map.put("·", (char) 183); map.put("¸", (char) 184); map.put("¹", (char) 185); map.put("º", (char) 186); map.put("»", (char) 187); map.put("¼", (char) 188); map.put("½", (char) 189); map.put("¾", (char) 190); map.put("¿", (char) 191); map.put("×", (char) 215); map.put("÷", (char) 247); map.put("À", (char) 192); map.put("Á", (char) 193); map.put("Â", (char) 194); map.put("Ã", (char) 195); map.put("Ä", (char) 196); map.put("Å", (char) 197); map.put("Æ", (char) 198); map.put("Ç", (char) 199); map.put("È", (char) 200); map.put("É", (char) 201); map.put("Ê", (char) 202); map.put("Ë", (char) 203); map.put("Ì", (char) 204); map.put("Í", (char) 205); map.put("Î", (char) 206); map.put("Ï", (char) 207); map.put("Ð", (char) 208); map.put("Ñ", (char) 209); map.put("Ò", (char) 210); map.put("Ó", (char) 211); map.put("Ô", (char) 212); map.put("Õ", (char) 213); map.put("Ö", (char) 214); map.put("Ø", (char) 216); map.put("Ù", (char) 217); map.put("Ú", (char) 218); map.put("Û", (char) 219); map.put("Ü", (char) 220); map.put("Ý", (char) 221); map.put("Þ", (char) 222); map.put("ß", (char) 223); map.put("à", (char) 224); map.put("á", (char) 225); map.put("â", (char) 226); map.put("ã", (char) 227); map.put("ä", (char) 228); map.put("å", (char) 229); map.put("æ", (char) 230); map.put("ç", (char) 231); map.put("è", (char) 232); map.put("é", (char) 233); map.put("ê", (char) 234); map.put("ë", (char) 235); map.put("ì", (char) 236); map.put("í", (char) 237); map.put("î", (char) 238); map.put("ï", (char) 239); map.put("ð", (char) 240); map.put("ñ", (char) 241); map.put("ò", (char) 242); map.put("ó", (char) 243); map.put("ô", (char) 244); map.put("õ", (char) 245); map.put("ö", (char) 246); map.put("ø", (char) 248); map.put("ù", (char) 249); map.put("ú", (char) 250); map.put("û", (char) 251); map.put("ü", (char) 252); map.put("ý", (char) 253); map.put("þ", (char) 254); map.put("ÿ", (char) 255); map.put("…", (char) 8230);//"\u2026".charAt(0));//(char)8230);//8230 map.put("—", (char) 8212);//"\u2014".charAt(0));//(char)8212);//8212 map.put(" ", (char) 32); //160 map.put("", (char) "\uF6E1".charAt(0));//user-defined character } /** * Find the Html Entity and convert it back to a regular character if the * entity exists, otherwise return the same string. * * @param str * @return Character represented by HTML Entity or the same string if unknown entity. */ private static String fromHtmlEntity(String str) { Character ch = map.get(str); return (ch != null) ? ch.toString() : str; } /** * Finds the value and returns the key that corresponds to that value. If value not found * returns null. * * @param value The value to be found. * @return The key corresponding to the value that was found or null if value not found. */ private static String findValue(char value) { Set<String> keySet = map.keySet(); Iterator<String> i = keySet.iterator(); String key = i.next(); // key boolean found = false; String result = null; while (i.hasNext() && !found) { if (map.get(key).charValue() == value) { found = true; result = key; } key = i.next(); } return result; } /** * Converts special characters in ASCII into html entities (e.g. & -> &) * * @param encode The string to be encoded. * @return The encoded string with HTML entities. */ public static String encode(String encode) { StringBuilder str = new StringBuilder(encode); String key; int i = 0; // loop over all the characters in the string while (i < str.length()) { // try matching a character to an entity key = findValue(str.charAt(i)); if (key != null) { str.replace(i, i + 1, key); i += key.length(); } else { i++; } } return str.toString(); } /** * Converts html entities (e.g. &) into real characters (ASCII characters, e.g. & -> &) * * @param decode A string to be decoded. * @return The string decoded with no HTML entities. */ public static String decode(String decode) { StringBuilder str = new StringBuilder(decode); Matcher m = Pattern.compile("&[A-Za-z]+;").matcher(str); String replaceStr = null; int matchPointer = 0; while (m.find(matchPointer)) { // check if we have a corresponding key in our map replaceStr = fromHtmlEntity(m.group()); str.replace(m.start(), m.end(), replaceStr); matchPointer = m.start() + replaceStr.length(); } return str.toString(); } }