package jd.nutils.encoding;
import java.util.Hashtable;
/**
* Collection of static methods to convert special and extended characters into
* HTML entitities and vice versa.<br>
* <br>
* Copyright (c) 2004-2005 Tecnick.com S.r.l (www.tecnick.com) Via Ugo Foscolo
* n.19 - 09045 Quartu Sant'Elena (CA) - ITALY - www.tecnick.com -
* info@tecnick.com<br>
* Project homepage: <a href="http://htmlentities.sourceforge.net"
* target="_blank">http://htmlentities.sourceforge.net</a><br>
* License: http://www.gnu.org/copyleft/lesser.html LGPL
*
* @author Nicola Asuni [www.tecnick.com].
* @version 1.0.004
*
*
* Changes by JD-Team:
*
* added htmlTotal Method
*/
public class HTMLEntities {
/**
* Translation table for HTML entities.<br>
* reference: W3C - Character entity references in HTML 4 [<a
* href="http://www.w3.org/TR/html401/sgml/entities.html"
* target="_blank">http://www.w3.org/TR/html401/sgml/entities.html</a>].
*/
private static final Object[][] html_entities_table = { { "Á", 193 }, { "á", 225 }, { "Â", 194 }, { "â", 226 }, { "´", 180 }, { "Æ", 198 }, { "æ", 230 }, { "À", 192 }, { "à", 224 }, { "ℵ", 8501 }, { "Α", 913 }, { "α", 945 }, { "&", 38 }, { "∧", 8743 }, { "∠", 8736 }, { "Å", 197 }, { "å", 229 }, { "≈", 8776 }, { "Ã", 195 }, { "ã", 227 }, { "Ä", 196 }, { "ä", 228 }, { "„", 8222 }, { "Β", 914 }, { "β", 946 }, { "¦", 166 }, { "•", 8226 }, { "∩", 8745 }, { "Ç", 199 }, { "ç", 231 }, { "¸", 184 }, { "¢", 162 }, { "Χ", 935 }, { "χ", 967 }, { "ˆ", 710 }, { "♣", 9827 }, { "≅", 8773 }, { "©", 169 }, { "↵", 8629 }, { "∪", 8746 }, { "¤", 164 },
{ "†", 8224 }, { "‡", 8225 }, { "↓", 8595 }, { "⇓", 8659 }, { "°", 176 }, { "Δ", 916 }, { "δ", 948 }, { "♦", 9830 }, { "÷", 247 }, { "É", 201 }, { "é", 233 }, { "Ê", 202 }, { "ê", 234 }, { "È", 200 }, { "è", 232 }, { "∅", 8709 }, { " ", 8195 }, { " ", 8194 }, { "Ε", 917 }, { "ε", 949 }, { "≡", 8801 }, { "Η", 919 }, { "η", 951 }, { "Ð", 208 }, { "ð", 240 }, { "Ë", 203 }, { "ë", 235 }, { "€", 8364 }, { "∃", 8707 }, { "ƒ", 402 }, { "∀", 8704 }, { "½", 189 }, { "¼", 188 }, { "¾", 190 }, { "⁄", 8260 }, { "Γ", 915 }, { "γ", 947 }, { "≥", 8805 }, { "↔", 8596 }, { "⇔", 8660 }, { "♥", 9829 }, { "…", 8230 }, { "Í", 205 },
{ "í", 237 }, { "Î", 206 }, { "î", 238 }, { "¡", 161 }, { "Ì", 204 }, { "ì", 236 }, { "ℑ", 8465 }, { "∞", 8734 }, { "∫", 8747 }, { "Ι", 921 }, { "ι", 953 }, { "¿", 191 }, { "∈", 8712 }, { "Ï", 207 }, { "ï", 239 }, { "Κ", 922 }, { "κ", 954 }, { "Λ", 923 }, { "λ", 955 }, { "〈", 9001 }, { "«", 171 }, { "←", 8592 }, { "⇐", 8656 }, { "⌈", 8968 }, { "“", 8220 }, { "≤", 8804 }, { "⌊", 8970 }, { "∗", 8727 }, { "◊", 9674 }, { "", 8206 }, { "‹", 8249 }, { "‘", 8216 }, { "¯", 175 }, { "—", 8212 }, { "µ", 181 }, { "·", 183 }, { "−", 8722 }, { "Μ", 924 }, { "μ", 956 }, { "∇", 8711 }, { " ", 160 }, { "–", 8211 }, { "≠", 8800 }, { "∋", 8715 },
{ "¬", 172 }, { "∉", 8713 }, { "⊄", 8836 }, { "Ñ", 209 }, { "ñ", 241 }, { "Ν", 925 }, { "ν", 957 }, { "Ó", 211 }, { "ó", 243 }, { "Ô", 212 }, { "ô", 244 }, { "Œ", 338 }, { "œ", 339 }, { "Ò", 210 }, { "ò", 242 }, { "‾", 8254 }, { "Ω", 937 }, { "ω", 969 }, { "Ο", 927 }, { "ο", 959 }, { "⊕", 8853 }, { "∨", 8744 }, { "ª", 170 }, { "º", 186 }, { "Ø", 216 }, { "ø", 248 }, { "Õ", 213 }, { "õ", 245 }, { "⊗", 8855 }, { "Ö", 214 }, { "ö", 246 }, { "¶", 182 }, { "∂", 8706 }, { "‰", 8240 }, { "⊥", 8869 }, { "Φ", 934 }, { "φ", 966 }, { "Π", 928 }, { "π", 960 }, { "ϖ", 982 }, { "±", 177 }, { "£", 163 }, { "′", 8242 }, { "″", 8243 },
{ "∏", 8719 }, { "∝", 8733 }, { "Ψ", 936 }, { "ψ", 968 }, { "√", 8730 }, { "〉", 9002 }, { "»", 187 }, { "→", 8594 }, { "⇒", 8658 }, { "⌉", 8969 }, { "”", 8221 }, { "ℜ", 8476 }, { "®", 174 }, { "⌋", 8971 }, { "Ρ", 929 }, { "ρ", 961 }, { "", 8207 }, { "›", 8250 }, { "’", 8217 }, { "‚", 8218 }, { "Š", 352 }, { "š", 353 }, { "⋅", 8901 }, { "§", 167 }, { "", 173 }, { "Σ", 931 }, { "σ", 963 }, { "ς", 962 }, { "∼", 8764 }, { "♠", 9824 }, { "⊂", 8834 }, { "⊆", 8838 }, { "∑", 8721 }, { "¹", 185 }, { "²", 178 }, { "³", 179 }, { "⊃", 8835 }, { "⊇", 8839 }, { "ß", 223 }, { "Τ", 932 }, { "τ", 964 }, { "∴", 8756 }, { "Θ", 920 }, { "θ", 952 },
{ "ϑ", 977 }, { " ", 8201 }, { "Þ", 222 }, { "þ", 254 }, { "˜", 732 }, { "×", 215 }, { "™", 8482 }, { "Ú", 218 }, { "ú", 250 }, { "↑", 8593 }, { "⇑", 8657 }, { "Û", 219 }, { "û", 251 }, { "Ù", 217 }, { "ù", 249 }, { "¨", 168 }, { "ϒ", 978 }, { "Υ", 933 }, { "υ", 965 }, { "Ü", 220 }, { "ü", 252 }, { "℘", 8472 }, { "Ξ", 926 }, { "ξ", 958 }, { "Ý", 221 }, { "ý", 253 }, { "¥", 165 }, { "ÿ", 255 }, { "Ÿ", 376 }, { "Ζ", 918 }, { "ζ", 950 }, { "", 8205 }, { "", 8204 } };
/**
* Map to convert extended characters in html entities.
*/
private static final Hashtable<Integer, String> htmlentities_map = new Hashtable<Integer, String>();
/**
* Map to convert html entities in exteden characters.
*/
private static final Hashtable<String, Integer> unhtmlentities_map = new Hashtable<String, Integer>();
// ==========================================================================
// ====
// METHODS
// ==========================================================================
// ====
/**
* Get the html entities translation table.
*
* @return translation table
*/
public static Object[][] getEntitiesTable() {
return HTMLEntities.html_entities_table;
}
/**
* Replace & characters with & HTML entities.
*
* @param str
* the input string
* @return string with replaced characters
*/
public static String htmlAmpersand(final String str) {
return str.replaceAll("&", "&");
}
/**
* Replace < > characters with < > entities.
*
* @param str
* the input string
* @return string with replaced characters
*/
public static String htmlAngleBrackets(String str) {
str = str.replaceAll("<", "<");
str = str.replaceAll(">", ">");
return str;
}
/**
* Replace double quotes characters with HTML entities.
*
* @param str
* the input string
* @return string with replaced double quotes
*/
public static String htmlDoubleQuotes(String str) {
str = str.replaceAll("[\"]", """);
str = str.replaceAll("", """);
str = str.replaceAll("", """);
return str;
}
/**
* Convert special and extended characters into HTML entitities.
*
* @param str
* input string
* @return formatted string
* @see #unhtmlentities(String)
*/
public static String htmlentities(final String str) {
if (str == null) { return ""; }
// initialize html translation maps table the first time is called
if (HTMLEntities.htmlentities_map.isEmpty()) {
HTMLEntities.initializeEntitiesTables();
}
final StringBuilder buf = new StringBuilder(); // the otput string
// buffer
for (int i = 0; i < str.length(); ++i) {
final char ch = str.charAt(i);
final String entity = HTMLEntities.htmlentities_map.get(new Integer(ch)); // get
// equivalent
// html
// entity
if (entity == null) { // if entity has not been found
if (ch > 128) { // check if is an extended character
buf.append("" + (int) ch + ";"); // convert extended
// character
} else {
buf.append(ch); // append the character as is
}
} else {
buf.append(entity); // append the html entity
}
}
return buf.toString();
}
// methods to convert special characters
/**
* Replace single and double quotes characters with HTML entities.
*
* @param str
* the input string
* @return string with replaced quotes
*/
public static String htmlQuotes(String str) {
str = HTMLEntities.htmlDoubleQuotes(str); // convert double quotes
str = HTMLEntities.htmlSingleQuotes(str); // convert single quotes
return str;
}
/**
* Replace single quotes characters with HTML entities.
*
* @param str
* the input string
* @return string with replaced single quotes
*/
public static String htmlSingleQuotes(String str) {
str = str.replaceAll("[\']", "’");
str = str.replaceAll("'", "’");
str = str.replaceAll("", "’");
str = str.replaceAll("", "’");
return str;
}
/**
* @author JD-Team coalado
* @param format
* @return
*/
public static String htmlTotal(String format) {
format = HTMLEntities.htmlentities(format);
format = HTMLEntities.htmlAmpersand(format);
format = HTMLEntities.htmlAngleBrackets(format);
format = HTMLEntities.htmlDoubleQuotes(format);
format = HTMLEntities.htmlQuotes(format);
format = HTMLEntities.htmlSingleQuotes(format);
return format;
}
/**
* Initialize HTML entities table.
*/
private static void initializeEntitiesTables() {
// initialize html translation maps
for (int i = 0; i < HTMLEntities.html_entities_table.length; ++i) {
HTMLEntities.htmlentities_map.put((Integer) HTMLEntities.html_entities_table[i][1], (String) HTMLEntities.html_entities_table[i][0]);
HTMLEntities.unhtmlentities_map.put((String) HTMLEntities.html_entities_table[i][0], (Integer) HTMLEntities.html_entities_table[i][1]);
}
}
/**
* Replace & HTML entities with & characters.
*
* @param str
* the input string
* @return string with replaced entities
*/
public static String unhtmlAmpersand(final String str) {
return str.replaceAll("&", "&");
}
/**
* Replace < > entities with < > characters.
*
* @param str
* the input string
* @return string with replaced entities
*/
public static String unhtmlAngleBrackets(String str) {
str = str.replaceAll("<", "<");
str = str.replaceAll(">", ">");
return str;
}
/**
* Replace single quotes HTML entities with equivalent character.
*
* @param str
* the input string
* @return string with replaced single quotes
*/
public static String unhtmlDoubleQuotes(final String str) {
return str.replaceAll(""", "\"");
}
/**
* Convert HTML entities to special and extended unicode characters
* equivalents.
*
* @param str
* input string
* @return formatted string
* @see #htmlentities(String)
*/
public static String unhtmlentities(final String str) {
if (str == null) { return null; }
// initialize html translation maps table the first time is called
if (HTMLEntities.htmlentities_map.isEmpty()) {
HTMLEntities.initializeEntitiesTables();
}
final StringBuilder buf = new StringBuilder();
for (int i = 0; i < str.length(); ++i) {
final char ch = str.charAt(i);
if (ch == '&') {
final int semi = str.indexOf(';', i + 1);
if (semi == -1 || semi - i > 7) {
buf.append(ch);
continue;
}
final String entity = str.substring(i, semi + 1);
Integer iso;
if (entity.charAt(1) == ' ') {
buf.append(ch);
continue;
}
if (entity.charAt(1) == '#') {
if (entity.charAt(2) == 'x') {
iso = new Integer(Integer.parseInt(entity.substring(3, entity.length() - 1), 16));
} else {
iso = new Integer(entity.substring(2, entity.length() - 1));
}
} else {
iso = HTMLEntities.unhtmlentities_map.get(entity);
}
if (iso == null) {
buf.append(entity);
} else {
buf.append((char) iso.intValue());
}
i = semi;
} else {
buf.append(ch);
}
}
return buf.toString();
}
/**
* Replace single and double quotes HTML entities with equivalent
* characters.
*
* @param str
* the input string
* @return string with replaced quotes
*/
public static String unhtmlQuotes(String str) {
str = HTMLEntities.unhtmlDoubleQuotes(str); // convert double quotes
str = HTMLEntities.unhtmlSingleQuotes(str); // convert single quotes
return str;
}
/**
* Replace single quotes HTML entities with equivalent character.
*
* @param str
* the input string
* @return string with replaced single quotes
*/
public static String unhtmlSingleQuotes(final String str) {
return str.replaceAll("’", "\'");
}
/**
* Initialize HTML translation maps.
*/
public HTMLEntities() {
HTMLEntities.initializeEntitiesTables();
}
}