package jd.nutils.encoding; import java.util.Hashtable; /** * Collection of static methods to convert special and extended characters into * HTML entitities and vice versa.<br> * <br> * Copyright (c) 2004-2005 Tecnick.com S.r.l (www.tecnick.com) Via Ugo Foscolo * n.19 - 09045 Quartu Sant'Elena (CA) - ITALY - www.tecnick.com - * info@tecnick.com<br> * Project homepage: <a href="http://htmlentities.sourceforge.net" * target="_blank">http://htmlentities.sourceforge.net</a><br> * License: http://www.gnu.org/copyleft/lesser.html LGPL * * @author Nicola Asuni [www.tecnick.com]. * @version 1.0.004 * * * Changes by JD-Team: * * added htmlTotal Method */ public class HTMLEntities { /** * Translation table for HTML entities.<br> * reference: W3C - Character entity references in HTML 4 [<a * href="http://www.w3.org/TR/html401/sgml/entities.html" * target="_blank">http://www.w3.org/TR/html401/sgml/entities.html</a>]. */ private static final Object[][] html_entities_table = { { "Á", 193 }, { "á", 225 }, { "Â", 194 }, { "â", 226 }, { "´", 180 }, { "Æ", 198 }, { "æ", 230 }, { "À", 192 }, { "à", 224 }, { "ℵ", 8501 }, { "Α", 913 }, { "α", 945 }, { "&", 38 }, { "∧", 8743 }, { "∠", 8736 }, { "Å", 197 }, { "å", 229 }, { "≈", 8776 }, { "Ã", 195 }, { "ã", 227 }, { "Ä", 196 }, { "ä", 228 }, { "„", 8222 }, { "Β", 914 }, { "β", 946 }, { "¦", 166 }, { "•", 8226 }, { "∩", 8745 }, { "Ç", 199 }, { "ç", 231 }, { "¸", 184 }, { "¢", 162 }, { "Χ", 935 }, { "χ", 967 }, { "ˆ", 710 }, { "♣", 9827 }, { "≅", 8773 }, { "©", 169 }, { "↵", 8629 }, { "∪", 8746 }, { "¤", 164 }, { "†", 8224 }, { "‡", 8225 }, { "↓", 8595 }, { "⇓", 8659 }, { "°", 176 }, { "Δ", 916 }, { "δ", 948 }, { "♦", 9830 }, { "÷", 247 }, { "É", 201 }, { "é", 233 }, { "Ê", 202 }, { "ê", 234 }, { "È", 200 }, { "è", 232 }, { "∅", 8709 }, { " ", 8195 }, { " ", 8194 }, { "Ε", 917 }, { "ε", 949 }, { "≡", 8801 }, { "Η", 919 }, { "η", 951 }, { "Ð", 208 }, { "ð", 240 }, { "Ë", 203 }, { "ë", 235 }, { "€", 8364 }, { "∃", 8707 }, { "ƒ", 402 }, { "∀", 8704 }, { "½", 189 }, { "¼", 188 }, { "¾", 190 }, { "⁄", 8260 }, { "Γ", 915 }, { "γ", 947 }, { "≥", 8805 }, { "↔", 8596 }, { "⇔", 8660 }, { "♥", 9829 }, { "…", 8230 }, { "Í", 205 }, { "í", 237 }, { "Î", 206 }, { "î", 238 }, { "¡", 161 }, { "Ì", 204 }, { "ì", 236 }, { "ℑ", 8465 }, { "∞", 8734 }, { "∫", 8747 }, { "Ι", 921 }, { "ι", 953 }, { "¿", 191 }, { "∈", 8712 }, { "Ï", 207 }, { "ï", 239 }, { "Κ", 922 }, { "κ", 954 }, { "Λ", 923 }, { "λ", 955 }, { "⟨", 9001 }, { "«", 171 }, { "←", 8592 }, { "⇐", 8656 }, { "⌈", 8968 }, { "“", 8220 }, { "≤", 8804 }, { "⌊", 8970 }, { "∗", 8727 }, { "◊", 9674 }, { "‎", 8206 }, { "‹", 8249 }, { "‘", 8216 }, { "¯", 175 }, { "—", 8212 }, { "µ", 181 }, { "·", 183 }, { "−", 8722 }, { "Μ", 924 }, { "μ", 956 }, { "∇", 8711 }, { " ", 160 }, { "–", 8211 }, { "≠", 8800 }, { "∋", 8715 }, { "¬", 172 }, { "∉", 8713 }, { "⊄", 8836 }, { "Ñ", 209 }, { "ñ", 241 }, { "Ν", 925 }, { "ν", 957 }, { "Ó", 211 }, { "ó", 243 }, { "Ô", 212 }, { "ô", 244 }, { "Œ", 338 }, { "œ", 339 }, { "Ò", 210 }, { "ò", 242 }, { "‾", 8254 }, { "Ω", 937 }, { "ω", 969 }, { "Ο", 927 }, { "ο", 959 }, { "⊕", 8853 }, { "∨", 8744 }, { "ª", 170 }, { "º", 186 }, { "Ø", 216 }, { "ø", 248 }, { "Õ", 213 }, { "õ", 245 }, { "⊗", 8855 }, { "Ö", 214 }, { "ö", 246 }, { "¶", 182 }, { "∂", 8706 }, { "‰", 8240 }, { "⊥", 8869 }, { "Φ", 934 }, { "φ", 966 }, { "Π", 928 }, { "π", 960 }, { "ϖ", 982 }, { "±", 177 }, { "£", 163 }, { "′", 8242 }, { "″", 8243 }, { "∏", 8719 }, { "∝", 8733 }, { "Ψ", 936 }, { "ψ", 968 }, { "√", 8730 }, { "⟩", 9002 }, { "»", 187 }, { "→", 8594 }, { "⇒", 8658 }, { "⌉", 8969 }, { "”", 8221 }, { "ℜ", 8476 }, { "®", 174 }, { "⌋", 8971 }, { "Ρ", 929 }, { "ρ", 961 }, { "‏", 8207 }, { "›", 8250 }, { "’", 8217 }, { "‚", 8218 }, { "Š", 352 }, { "š", 353 }, { "⋅", 8901 }, { "§", 167 }, { "­", 173 }, { "Σ", 931 }, { "σ", 963 }, { "ς", 962 }, { "∼", 8764 }, { "♠", 9824 }, { "⊂", 8834 }, { "⊆", 8838 }, { "∑", 8721 }, { "¹", 185 }, { "²", 178 }, { "³", 179 }, { "⊃", 8835 }, { "⊇", 8839 }, { "ß", 223 }, { "Τ", 932 }, { "τ", 964 }, { "∴", 8756 }, { "Θ", 920 }, { "θ", 952 }, { "ϑ", 977 }, { " ", 8201 }, { "Þ", 222 }, { "þ", 254 }, { "˜", 732 }, { "×", 215 }, { "™", 8482 }, { "Ú", 218 }, { "ú", 250 }, { "↑", 8593 }, { "⇑", 8657 }, { "Û", 219 }, { "û", 251 }, { "Ù", 217 }, { "ù", 249 }, { "¨", 168 }, { "ϒ", 978 }, { "Υ", 933 }, { "υ", 965 }, { "Ü", 220 }, { "ü", 252 }, { "℘", 8472 }, { "Ξ", 926 }, { "ξ", 958 }, { "Ý", 221 }, { "ý", 253 }, { "¥", 165 }, { "ÿ", 255 }, { "Ÿ", 376 }, { "Ζ", 918 }, { "ζ", 950 }, { "‍", 8205 }, { "‌", 8204 } }; /** * Map to convert extended characters in html entities. */ private static final Hashtable<Integer, String> htmlentities_map = new Hashtable<Integer, String>(); /** * Map to convert html entities in exteden characters. */ private static final Hashtable<String, Integer> unhtmlentities_map = new Hashtable<String, Integer>(); // ========================================================================== // ==== // METHODS // ========================================================================== // ==== /** * Get the html entities translation table. * * @return translation table */ public static Object[][] getEntitiesTable() { return HTMLEntities.html_entities_table; } /** * Replace & characters with &amp; HTML entities. * * @param str * the input string * @return string with replaced characters */ public static String htmlAmpersand(final String str) { return str.replaceAll("&", "&"); } /** * Replace < > characters with &lt; &gt; entities. * * @param str * the input string * @return string with replaced characters */ public static String htmlAngleBrackets(String str) { str = str.replaceAll("<", "<"); str = str.replaceAll(">", ">"); return str; } /** * Replace double quotes characters with HTML entities. * * @param str * the input string * @return string with replaced double quotes */ public static String htmlDoubleQuotes(String str) { str = str.replaceAll("[\"]", """); str = str.replaceAll("“", """); str = str.replaceAll("”", """); return str; } /** * Convert special and extended characters into HTML entitities. * * @param str * input string * @return formatted string * @see #unhtmlentities(String) */ public static String htmlentities(final String str) { if (str == null) { return ""; } // initialize html translation maps table the first time is called if (HTMLEntities.htmlentities_map.isEmpty()) { HTMLEntities.initializeEntitiesTables(); } final StringBuilder buf = new StringBuilder(); // the otput string // buffer for (int i = 0; i < str.length(); ++i) { final char ch = str.charAt(i); final String entity = HTMLEntities.htmlentities_map.get(new Integer(ch)); // get // equivalent // html // entity if (entity == null) { // if entity has not been found if (ch > 128) { // check if is an extended character buf.append("&#" + (int) ch + ";"); // convert extended // character } else { buf.append(ch); // append the character as is } } else { buf.append(entity); // append the html entity } } return buf.toString(); } // methods to convert special characters /** * Replace single and double quotes characters with HTML entities. * * @param str * the input string * @return string with replaced quotes */ public static String htmlQuotes(String str) { str = HTMLEntities.htmlDoubleQuotes(str); // convert double quotes str = HTMLEntities.htmlSingleQuotes(str); // convert single quotes return str; } /** * Replace single quotes characters with HTML entities. * * @param str * the input string * @return string with replaced single quotes */ public static String htmlSingleQuotes(String str) { str = str.replaceAll("[\']", "’"); str = str.replaceAll("'", "’"); str = str.replaceAll("‘", "’"); str = str.replaceAll("’", "’"); return str; } /** * @author JD-Team coalado * @param format * @return */ public static String htmlTotal(String format) { format = HTMLEntities.htmlentities(format); format = HTMLEntities.htmlAmpersand(format); format = HTMLEntities.htmlAngleBrackets(format); format = HTMLEntities.htmlDoubleQuotes(format); format = HTMLEntities.htmlQuotes(format); format = HTMLEntities.htmlSingleQuotes(format); return format; } /** * Initialize HTML entities table. */ private static void initializeEntitiesTables() { // initialize html translation maps for (int i = 0; i < HTMLEntities.html_entities_table.length; ++i) { HTMLEntities.htmlentities_map.put((Integer) HTMLEntities.html_entities_table[i][1], (String) HTMLEntities.html_entities_table[i][0]); HTMLEntities.unhtmlentities_map.put((String) HTMLEntities.html_entities_table[i][0], (Integer) HTMLEntities.html_entities_table[i][1]); } } /** * Replace &amp; HTML entities with & characters. * * @param str * the input string * @return string with replaced entities */ public static String unhtmlAmpersand(final String str) { return str.replaceAll("&", "&"); } /** * Replace &lt; &gt; entities with < > characters. * * @param str * the input string * @return string with replaced entities */ public static String unhtmlAngleBrackets(String str) { str = str.replaceAll("<", "<"); str = str.replaceAll(">", ">"); return str; } /** * Replace single quotes HTML entities with equivalent character. * * @param str * the input string * @return string with replaced single quotes */ public static String unhtmlDoubleQuotes(final String str) { return str.replaceAll(""", "\""); } /** * Convert HTML entities to special and extended unicode characters * equivalents. * * @param str * input string * @return formatted string * @see #htmlentities(String) */ public static String unhtmlentities(final String str) { if (str == null) { return null; } // initialize html translation maps table the first time is called if (HTMLEntities.htmlentities_map.isEmpty()) { HTMLEntities.initializeEntitiesTables(); } final StringBuilder buf = new StringBuilder(); for (int i = 0; i < str.length(); ++i) { final char ch = str.charAt(i); if (ch == '&') { final int semi = str.indexOf(';', i + 1); if (semi == -1 || semi - i > 7) { buf.append(ch); continue; } final String entity = str.substring(i, semi + 1); Integer iso; if (entity.charAt(1) == ' ') { buf.append(ch); continue; } if (entity.charAt(1) == '#') { if (entity.charAt(2) == 'x') { iso = new Integer(Integer.parseInt(entity.substring(3, entity.length() - 1), 16)); } else { iso = new Integer(entity.substring(2, entity.length() - 1)); } } else { iso = HTMLEntities.unhtmlentities_map.get(entity); } if (iso == null) { buf.append(entity); } else { buf.append((char) iso.intValue()); } i = semi; } else { buf.append(ch); } } return buf.toString(); } /** * Replace single and double quotes HTML entities with equivalent * characters. * * @param str * the input string * @return string with replaced quotes */ public static String unhtmlQuotes(String str) { str = HTMLEntities.unhtmlDoubleQuotes(str); // convert double quotes str = HTMLEntities.unhtmlSingleQuotes(str); // convert single quotes return str; } /** * Replace single quotes HTML entities with equivalent character. * * @param str * the input string * @return string with replaced single quotes */ public static String unhtmlSingleQuotes(final String str) { return str.replaceAll("’", "\'"); } /** * Initialize HTML translation maps. */ public HTMLEntities() { HTMLEntities.initializeEntitiesTables(); } }