package folioxml.utils;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* This class offers methods to decode and encode html entities.
*
* @author Michael Yagudaev
* @version 1.2 April 9, 2011
* retrieved from http://www.yagudaev.com/programming/java/7-jsp-escaping-html
*/
public class HtmlEntities {
private static Map<String, Character> map = new LinkedHashMap<String, Character>();
static {
map.put(""", (char) 34);
map.put("&", (char) 38);
map.put("<", (char) 60);
map.put(">", (char) 62);
map.put(" ", (char) 32);//160 removed because of bug in text
map.put("¡", (char) 161);
map.put("¢", (char) 162);
map.put("£", (char) 163);
map.put("¤", (char) 164);
map.put("¥", (char) 165);
map.put("¦", (char) 166);
map.put("§", (char) 167);
map.put("¨", (char) 168);
map.put("©", (char) 169);
map.put("ª", (char) 170);
map.put("«", (char) 171);
map.put("¬", (char) 172);
map.put("", (char) 173);
map.put("®", (char) 174);
map.put("¯", (char) 175);
map.put("°", (char) 176);
map.put("±", (char) 177);
map.put("²", (char) 178);
map.put("³", (char) 179);
map.put("´", (char) 180);
map.put("µ", (char) 181);
map.put("¶", (char) 182);
map.put("·", (char) 183);
map.put("¸", (char) 184);
map.put("¹", (char) 185);
map.put("º", (char) 186);
map.put("»", (char) 187);
map.put("¼", (char) 188);
map.put("½", (char) 189);
map.put("¾", (char) 190);
map.put("¿", (char) 191);
map.put("×", (char) 215);
map.put("÷", (char) 247);
map.put("À", (char) 192);
map.put("Á", (char) 193);
map.put("Â", (char) 194);
map.put("Ã", (char) 195);
map.put("Ä", (char) 196);
map.put("Å", (char) 197);
map.put("Æ", (char) 198);
map.put("Ç", (char) 199);
map.put("È", (char) 200);
map.put("É", (char) 201);
map.put("Ê", (char) 202);
map.put("Ë", (char) 203);
map.put("Ì", (char) 204);
map.put("Í", (char) 205);
map.put("Î", (char) 206);
map.put("Ï", (char) 207);
map.put("Ð", (char) 208);
map.put("Ñ", (char) 209);
map.put("Ò", (char) 210);
map.put("Ó", (char) 211);
map.put("Ô", (char) 212);
map.put("Õ", (char) 213);
map.put("Ö", (char) 214);
map.put("Ø", (char) 216);
map.put("Ù", (char) 217);
map.put("Ú", (char) 218);
map.put("Û", (char) 219);
map.put("Ü", (char) 220);
map.put("Ý", (char) 221);
map.put("Þ", (char) 222);
map.put("ß", (char) 223);
map.put("à", (char) 224);
map.put("á", (char) 225);
map.put("â", (char) 226);
map.put("ã", (char) 227);
map.put("ä", (char) 228);
map.put("å", (char) 229);
map.put("æ", (char) 230);
map.put("ç", (char) 231);
map.put("è", (char) 232);
map.put("é", (char) 233);
map.put("ê", (char) 234);
map.put("ë", (char) 235);
map.put("ì", (char) 236);
map.put("í", (char) 237);
map.put("î", (char) 238);
map.put("ï", (char) 239);
map.put("ð", (char) 240);
map.put("ñ", (char) 241);
map.put("ò", (char) 242);
map.put("ó", (char) 243);
map.put("ô", (char) 244);
map.put("õ", (char) 245);
map.put("ö", (char) 246);
map.put("ø", (char) 248);
map.put("ù", (char) 249);
map.put("ú", (char) 250);
map.put("û", (char) 251);
map.put("ü", (char) 252);
map.put("ý", (char) 253);
map.put("þ", (char) 254);
map.put("ÿ", (char) 255);
map.put("…", (char) 8230);//"\u2026".charAt(0));//(char)8230);//8230
map.put("—", (char) 8212);//"\u2014".charAt(0));//(char)8212);//8212
map.put(" ", (char) 32); //160
map.put("", (char) "\uF6E1".charAt(0));//user-defined character
}
/**
* Find the Html Entity and convert it back to a regular character if the
* entity exists, otherwise return the same string.
*
* @param str
* @return Character represented by HTML Entity or the same string if unknown entity.
*/
private static String fromHtmlEntity(String str) {
Character ch = map.get(str);
return (ch != null) ? ch.toString() : str;
}
/**
* Finds the value and returns the key that corresponds to that value. If value not found
* returns null.
*
* @param value The value to be found.
* @return The key corresponding to the value that was found or null if value not found.
*/
private static String findValue(char value) {
Set<String> keySet = map.keySet();
Iterator<String> i = keySet.iterator();
String key = i.next(); // key
boolean found = false;
String result = null;
while (i.hasNext() && !found) {
if (map.get(key).charValue() == value) {
found = true;
result = key;
}
key = i.next();
}
return result;
}
/**
* Converts special characters in ASCII into html entities (e.g. & -> &)
*
* @param encode The string to be encoded.
* @return The encoded string with HTML entities.
*/
public static String encode(String encode) {
StringBuilder str = new StringBuilder(encode);
String key;
int i = 0;
// loop over all the characters in the string
while (i < str.length()) {
// try matching a character to an entity
key = findValue(str.charAt(i));
if (key != null) {
str.replace(i, i + 1, key);
i += key.length();
} else {
i++;
}
}
return str.toString();
}
/**
* Converts html entities (e.g. &) into real characters (ASCII characters, e.g. & -> &)
*
* @param decode A string to be decoded.
* @return The string decoded with no HTML entities.
*/
public static String decode(String decode) {
StringBuilder str = new StringBuilder(decode);
Matcher m = Pattern.compile("&[A-Za-z]+;").matcher(str);
String replaceStr = null;
int matchPointer = 0;
while (m.find(matchPointer)) {
// check if we have a corresponding key in our map
replaceStr = fromHtmlEntity(m.group());
str.replace(m.start(), m.end(), replaceStr);
matchPointer = m.start() + replaceStr.length();
}
return str.toString();
}
}