/** * Copyright (c) 2004-2005 Tecnick.com S.r.l (www.tecnick.com) Via Ugo Foscolo * n.19 - 09045 Quartu Sant'Elena (CA) - ITALY - www.tecnick.com - * info@tecnick.com<br/> * Project homepage: <a href="http://htmlentities.sourceforge.net" target="_blank">http://htmlentities.sourceforge.net</a><br/> * License: http://www.gnu.org/copyleft/lesser.html LGPL */ package com.tecnick.htmlutils.htmlentities; import java.util.Hashtable; /** * Collection of static methods to convert special and extended * characters into HTML entitities and vice versa.<br/><br/> * @author Nicola Asuni [www.tecnick.com]. * @version 1.0.004 */ public class HTMLEntities { /** * Translation table for HTML entities.<br/> * reference: W3C - Character entity references in HTML 4 [<a href="http://www.w3.org/TR/html401/sgml/entities.html" target="_blank">http://www.w3.org/TR/html401/sgml/entities.html</a>]. */ private static final Object[][] html_entities_table = { { new String("Á"), new Integer(193) }, { new String("á"), new Integer(225) }, { new String("Â"), new Integer(194) }, { new String("â"), new Integer(226) }, { new String("´"), new Integer(180) }, { new String("Æ"), new Integer(198) }, { new String("æ"), new Integer(230) }, { new String("À"), new Integer(192) }, { new String("à"), new Integer(224) }, { new String("ℵ"), new Integer(8501) }, { new String("Α"), new Integer(913) }, { new String("α"), new Integer(945) }, { new String("&"), new Integer(38) }, { new String("∧"), new Integer(8743) }, { new String("∠"), new Integer(8736) }, { new String("Å"), new Integer(197) }, { new String("å"), new Integer(229) }, { new String("≈"), new Integer(8776) }, { new String("Ã"), new Integer(195) }, { new String("ã"), new Integer(227) }, { new String("Ä"), new Integer(196) }, { new String("ä"), new Integer(228) }, { new String("„"), new Integer(8222) }, { new String("Β"), new Integer(914) }, { new String("β"), new Integer(946) }, { new String("¦"), new Integer(166) }, { new String("•"), new Integer(8226) }, { new String("∩"), new Integer(8745) }, { new String("Ç"), new Integer(199) }, { new String("ç"), new Integer(231) }, { new String("¸"), new Integer(184) }, { new String("¢"), new Integer(162) }, { new String("Χ"), new Integer(935) }, { new String("χ"), new Integer(967) }, { new String("ˆ"), new Integer(710) }, { new String("♣"), new Integer(9827) }, { new String("≅"), new Integer(8773) }, { new String("©"), new Integer(169) }, { new String("↵"), new Integer(8629) }, { new String("∪"), new Integer(8746) }, { new String("¤"), new Integer(164) }, { new String("†"), new Integer(8224) }, { new String("‡"), new Integer(8225) }, { new String("↓"), new Integer(8595) }, { new String("⇓"), new Integer(8659) }, { new String("°"), new Integer(176) }, { new String("Δ"), new Integer(916) }, { new String("δ"), new Integer(948) }, { new String("♦"), new Integer(9830) }, { new String("÷"), new Integer(247) }, { new String("É"), new Integer(201) }, { new String("é"), new Integer(233) }, { new String("Ê"), new Integer(202) }, { new String("ê"), new Integer(234) }, { new String("È"), new Integer(200) }, { new String("è"), new Integer(232) }, { new String("∅"), new Integer(8709) }, { new String(" "), new Integer(8195) }, { new String(" "), new Integer(8194) }, { new String("Ε"), new Integer(917) }, { new String("ε"), new Integer(949) }, { new String("≡"), new Integer(8801) }, { new String("Η"), new Integer(919) }, { new String("η"), new Integer(951) }, { new String("Ð"), new Integer(208) }, { new String("ð"), new Integer(240) }, { new String("Ë"), new Integer(203) }, { new String("ë"), new Integer(235) }, { new String("€"), new Integer(8364) }, { new String("∃"), new Integer(8707) }, { new String("ƒ"), new Integer(402) }, { new String("∀"), new Integer(8704) }, { new String("½"), new Integer(189) }, { new String("¼"), new Integer(188) }, { new String("¾"), new Integer(190) }, { new String("⁄"), new Integer(8260) }, { new String("Γ"), new Integer(915) }, { new String("γ"), new Integer(947) }, { new String("≥"), new Integer(8805) }, { new String("↔"), new Integer(8596) }, { new String("⇔"), new Integer(8660) }, { new String("♥"), new Integer(9829) }, { new String("…"), new Integer(8230) }, { new String("Í"), new Integer(205) }, { new String("í"), new Integer(237) }, { new String("Î"), new Integer(206) }, { new String("î"), new Integer(238) }, { new String("¡"), new Integer(161) }, { new String("Ì"), new Integer(204) }, { new String("ì"), new Integer(236) }, { new String("ℑ"), new Integer(8465) }, { new String("∞"), new Integer(8734) }, { new String("∫"), new Integer(8747) }, { new String("Ι"), new Integer(921) }, { new String("ι"), new Integer(953) }, { new String("¿"), new Integer(191) }, { new String("∈"), new Integer(8712) }, { new String("Ï"), new Integer(207) }, { new String("ï"), new Integer(239) }, { new String("Κ"), new Integer(922) }, { new String("κ"), new Integer(954) }, { new String("Λ"), new Integer(923) }, { new String("λ"), new Integer(955) }, { new String("⟨"), new Integer(9001) }, { new String("«"), new Integer(171) }, { new String("←"), new Integer(8592) }, { new String("⇐"), new Integer(8656) }, { new String("⌈"), new Integer(8968) }, { new String("“"), new Integer(8220) }, { new String("≤"), new Integer(8804) }, { new String("⌊"), new Integer(8970) }, { new String("∗"), new Integer(8727) }, { new String("◊"), new Integer(9674) }, { new String("‎"), new Integer(8206) }, { new String("‹"), new Integer(8249) }, { new String("‘"), new Integer(8216) }, { new String("¯"), new Integer(175) }, { new String("—"), new Integer(8212) }, { new String("µ"), new Integer(181) }, { new String("·"), new Integer(183) }, { new String("−"), new Integer(8722) }, { new String("Μ"), new Integer(924) }, { new String("μ"), new Integer(956) }, { new String("∇"), new Integer(8711) }, { new String(" "), new Integer(160) }, { new String("–"), new Integer(8211) }, { new String("≠"), new Integer(8800) }, { new String("∋"), new Integer(8715) }, { new String("¬"), new Integer(172) }, { new String("∉"), new Integer(8713) }, { new String("⊄"), new Integer(8836) }, { new String("Ñ"), new Integer(209) }, { new String("ñ"), new Integer(241) }, { new String("Ν"), new Integer(925) }, { new String("ν"), new Integer(957) }, { new String("Ó"), new Integer(211) }, { new String("ó"), new Integer(243) }, { new String("Ô"), new Integer(212) }, { new String("ô"), new Integer(244) }, { new String("Œ"), new Integer(338) }, { new String("œ"), new Integer(339) }, { new String("Ò"), new Integer(210) }, { new String("ò"), new Integer(242) }, { new String("‾"), new Integer(8254) }, { new String("Ω"), new Integer(937) }, { new String("ω"), new Integer(969) }, { new String("Ο"), new Integer(927) }, { new String("ο"), new Integer(959) }, { new String("⊕"), new Integer(8853) }, { new String("∨"), new Integer(8744) }, { new String("ª"), new Integer(170) }, { new String("º"), new Integer(186) }, { new String("Ø"), new Integer(216) }, { new String("ø"), new Integer(248) }, { new String("Õ"), new Integer(213) }, { new String("õ"), new Integer(245) }, { new String("⊗"), new Integer(8855) }, { new String("Ö"), new Integer(214) }, { new String("ö"), new Integer(246) }, { new String("¶"), new Integer(182) }, { new String("∂"), new Integer(8706) }, { new String("‰"), new Integer(8240) }, { new String("⊥"), new Integer(8869) }, { new String("Φ"), new Integer(934) }, { new String("φ"), new Integer(966) }, { new String("Π"), new Integer(928) }, { new String("π"), new Integer(960) }, { new String("ϖ"), new Integer(982) }, { new String("±"), new Integer(177) }, { new String("£"), new Integer(163) }, { new String("′"), new Integer(8242) }, { new String("″"), new Integer(8243) }, { new String("∏"), new Integer(8719) }, { new String("∝"), new Integer(8733) }, { new String("Ψ"), new Integer(936) }, { new String("ψ"), new Integer(968) }, { new String("""), new Integer(34) }, { new String("√"), new Integer(8730) }, { new String("⟩"), new Integer(9002) }, { new String("»"), new Integer(187) }, { new String("→"), new Integer(8594) }, { new String("⇒"), new Integer(8658) }, { new String("⌉"), new Integer(8969) }, { new String("”"), new Integer(8221) }, { new String("ℜ"), new Integer(8476) }, { new String("®"), new Integer(174) }, { new String("⌋"), new Integer(8971) }, { new String("Ρ"), new Integer(929) }, { new String("ρ"), new Integer(961) }, { new String("‏"), new Integer(8207) }, { new String("›"), new Integer(8250) }, { new String("’"), new Integer(8217) }, { new String("‚"), new Integer(8218) }, { new String("Š"), new Integer(352) }, { new String("š"), new Integer(353) }, { new String("⋅"), new Integer(8901) }, { new String("§"), new Integer(167) }, { new String("­"), new Integer(173) }, { new String("Σ"), new Integer(931) }, { new String("σ"), new Integer(963) }, { new String("ς"), new Integer(962) }, { new String("∼"), new Integer(8764) }, { new String("♠"), new Integer(9824) }, { new String("⊂"), new Integer(8834) }, { new String("⊆"), new Integer(8838) }, { new String("∑"), new Integer(8721) }, { new String("¹"), new Integer(185) }, { new String("²"), new Integer(178) }, { new String("³"), new Integer(179) }, { new String("⊃"), new Integer(8835) }, { new String("⊇"), new Integer(8839) }, { new String("ß"), new Integer(223) }, { new String("Τ"), new Integer(932) }, { new String("τ"), new Integer(964) }, { new String("∴"), new Integer(8756) }, { new String("Θ"), new Integer(920) }, { new String("θ"), new Integer(952) }, { new String("ϑ"), new Integer(977) }, { new String(" "), new Integer(8201) }, { new String("Þ"), new Integer(222) }, { new String("þ"), new Integer(254) }, { new String("˜"), new Integer(732) }, { new String("×"), new Integer(215) }, { new String("™"), new Integer(8482) }, { new String("Ú"), new Integer(218) }, { new String("ú"), new Integer(250) }, { new String("↑"), new Integer(8593) }, { new String("⇑"), new Integer(8657) }, { new String("Û"), new Integer(219) }, { new String("û"), new Integer(251) }, { new String("Ù"), new Integer(217) }, { new String("ù"), new Integer(249) }, { new String("¨"), new Integer(168) }, { new String("ϒ"), new Integer(978) }, { new String("Υ"), new Integer(933) }, { new String("υ"), new Integer(965) }, { new String("Ü"), new Integer(220) }, { new String("ü"), new Integer(252) }, { new String("℘"), new Integer(8472) }, { new String("Ξ"), new Integer(926) }, { new String("ξ"), new Integer(958) }, { new String("Ý"), new Integer(221) }, { new String("ý"), new Integer(253) }, { new String("¥"), new Integer(165) }, { new String("ÿ"), new Integer(255) }, { new String("Ÿ"), new Integer(376) }, { new String("Ζ"), new Integer(918) }, { new String("ζ"), new Integer(950) }, { new String("‍"), new Integer(8205) }, { new String("‌"), new Integer(8204) } }; /** * Map to convert extended characters in html entities. */ private static final Hashtable htmlentities_map = new Hashtable(); /** * Map to convert html entities in exteden characters. */ private static final Hashtable unhtmlentities_map = new Hashtable(); //============================================================================== // METHODS //============================================================================== /** * Initialize HTML translation maps. */ public HTMLEntities() { initializeEntitiesTables(); } /** * Initialize HTML entities table. */ private static void initializeEntitiesTables() { // initialize html translation maps for (int i = 0; i < html_entities_table.length; ++i) { htmlentities_map.put(html_entities_table[i][1], html_entities_table[i][0]); unhtmlentities_map.put(html_entities_table[i][0], html_entities_table[i][1]); } } /** * Get the html entities translation table. * * @return translation table */ public static Object[][] getEntitiesTable() { return html_entities_table; } /** * Convert special and extended characters into HTML entitities. * @param str input string * @return formatted string * @see #unhtmlentities(String) */ public static String htmlentities(String str) { if (str == null) { return ""; } //initialize html translation maps table the first time is called if (htmlentities_map.isEmpty()) { initializeEntitiesTables(); } StringBuffer buf = new StringBuffer(); //the otput string buffer for (int i = 0; i < str.length(); ++i) { char ch = str.charAt(i); String entity = (String) htmlentities_map.get(new Integer((int) ch)); //get equivalent html entity if (entity == null) { //if entity has not been found if (((int) ch) > 128) { //check if is an extended character buf.append("&#" + ((int) ch) + ";"); //convert extended character } else { buf.append(ch); //append the character as is } } else { buf.append(entity); //append the html entity } } return buf.toString(); } /** * Convert HTML entities to special and extended unicode characters * equivalents. * @param str input string * @return formatted string * @see #htmlentities(String) */ public static String unhtmlentities(String str) { //initialize html translation maps table the first time is called if (htmlentities_map.isEmpty()) { initializeEntitiesTables(); } StringBuffer buf = new StringBuffer(); for (int i = 0; i < str.length(); ++i) { char ch = str.charAt(i); if (ch == '&') { int semi = str.indexOf(';', i + 1); if ((semi == -1) || ((semi-i) > 7)){ buf.append(ch); continue; } String entity = str.substring(i, semi + 1); Integer iso; if (entity.charAt(1) == ' ') { buf.append(ch); continue; } if (entity.charAt(1) == '#') { if (entity.charAt(2) == 'x') { iso = new Integer(Integer.parseInt(entity.substring(3, entity.length() - 1), 16)); } else { iso = new Integer(entity.substring(2, entity.length() - 1)); } } else { iso = (Integer) unhtmlentities_map.get(entity); } if (iso == null) { buf.append(entity); } else { buf.append((char) (iso.intValue())); } i = semi; } else { buf.append(ch); } } return buf.toString(); } // methods to convert special characters /** * Replace single quotes characters with HTML entities. * * @param str the input string * @return string with replaced single quotes */ public static String htmlSingleQuotes(String str) { str = str.replaceAll("[\']", "’"); str = str.replaceAll("'", "’"); str = str.replaceAll("‘", "’"); str = str.replaceAll("’", "’"); return str; } /** * Replace single quotes HTML entities with equivalent character. * * @param str the input string * @return string with replaced single quotes */ public static String unhtmlSingleQuotes(String str) { return str.replaceAll("’", "\'"); } /** * Replace double quotes characters with HTML entities. * * @param str the input string * @return string with replaced double quotes */ public static String htmlDoubleQuotes(String str) { str = str.replaceAll("[\"]", """); str = str.replaceAll("“", """); str = str.replaceAll("”", """); return str; } /** * Replace single quotes HTML entities with equivalent character. * * @param str the input string * @return string with replaced single quotes */ public static String unhtmlDoubleQuotes(String str) { return str.replaceAll(""", "\""); } /** * Replace single and double quotes characters with HTML entities. * * @param str the input string * @return string with replaced quotes */ public static String htmlQuotes(String str) { str = htmlDoubleQuotes(str); //convert double quotes str = htmlSingleQuotes(str); //convert single quotes return str; } /** * Replace single and double quotes HTML entities with equivalent characters. * * @param str the input string * @return string with replaced quotes */ public static String unhtmlQuotes(String str) { str = unhtmlDoubleQuotes(str); //convert double quotes str = unhtmlSingleQuotes(str); //convert single quotes return str; } /** * Replace < > characters with &lt; &gt; entities. * * @param str the input string * @return string with replaced characters */ public static String htmlAngleBrackets(String str) { str = str.replaceAll("<", "<"); str = str.replaceAll(">", ">"); return str; } /** * Replace &lt; &gt; entities with < > characters. * * @param str the input string * @return string with replaced entities */ public static String unhtmlAngleBrackets(String str) { str = str.replaceAll("<", "<"); str = str.replaceAll(">", ">"); return str; } /** * Replace & characters with &amp; HTML entities. * * @param str the input string * @return string with replaced characters */ public static String htmlAmpersand(String str) { return str.replaceAll("&", "&"); } /** * Replace &amp; HTML entities with & characters. * * @param str the input string * @return string with replaced entities */ public static String unhtmlAmpersand(String str) { return str.replaceAll("&", "&"); } }