HTMLEntity.java example

Explorer
firetweet-master
/*
 * Copyright 2007 Yusuke Yamamoto
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package twitter4j.http;

import java.util.HashMap;
import java.util.Map;

public final class HTMLEntity {
	private static Map<String, String> entityEscapeMap = new HashMap<String, String>();

	private static Map<String, String> escapeEntityMap = new HashMap<String, String>();

	static {
		final String[][] entities = { { " ", " "/*
														 * no-break space =
														 * non-breaking space
														 */, "\u00A0" }, { "¡", "¡"/*
																							 * inverted
																							 * exclamation
																							 * mark
																							 */, "\u00A1" },
				{ "¢", "¢"/* cent sign */, "\u00A2" }, { "£", "£"/*
																					 * pound
																					 * sign
																					 */, "\u00A3" },
				{ "¤", "¤"/* currency sign */, "\u00A4" }, { "¥", "¥"/*
																						 * yen
																						 * sign
																						 * =
																						 * yuan
																						 * sign
																						 */, "\u00A5" },
				{ "¦", "¦"/* broken bar = broken vertical bar */, "\u00A6" },
				{ "§", "§"/* section sign */, "\u00A7" }, { "¨", "¨"/*
																						 * diaeresis
																						 * =
																						 * spacing
																						 * diaeresis
																						 */, "\u00A8" },
				{ "©", "©"/* copyright sign */, "\u00A9" }, { "ª", "ª"/*
																						 * feminine
																						 * ordinal
																						 * indicator
																						 */, "\u00AA" },
				{ "«", "«"/*
									 * left-pointing double angle quotation mark
									 * = left pointing guillemet
									 */, "\u00AB" }, { "¬", "¬"/*
																		 * not
																		 * sign
																		 * =
																		 * discretionary
																		 * hyphen
																		 */, "\u00AC" },
				{ "", ""/* soft hyphen = discretionary hyphen */, "\u00AD" },
				{ "®", "®"/*
									 * registered sign = registered trade mark
									 * sign
									 */, "\u00AE" }, { "¯", "¯"/*
																		 * macron
																		 * =
																		 * spacing
																		 * macron
																		 * =
																		 * overline
																		 * = APL
																		 * overbar
																		 */, "\u00AF" },
				{ "°", "°"/* degree sign */, "\u00B0" }, { "±", "±"/*
																						 * plus
																						 * -
																						 * minus
																						 * sign
																						 * =
																						 * plus
																						 * -
																						 * or
																						 * -
																						 * minus
																						 * sign
																						 */, "\u00B1" },
				{ "²", "²"/*
									 * superscript two = superscript digit two =
									 * squared
									 */, "\u00B2" }, { "³", "³"/*
																		 * superscript
																		 * three
																		 * =
																		 * superscript
																		 * digit
																		 * three
																		 * =
																		 * cubed
																		 */, "\u00B3" },
				{ "´", "´"/* acute accent = spacing acute */, "\u00B4" },
				{ "µ", "µ"/* micro sign */, "\u00B5" }, { "¶", "¶"/*
																					 * pilcrow
																					 * sign
																					 * =
																					 * paragraph
																					 * sign
																					 */, "\u00B6" },
				{ "·", "·"/*
									 * middle dot = Georgian comma = Greek
									 * middle dot
									 */, "\u00B7" }, { "¸", "¸"/*
																		 * cedilla
																		 * =
																		 * spacing
																		 * cedilla
																		 */, "\u00B8" },
				{ "¹", "¹"/*
									 * superscript one = superscript digit one
									 */, "\u00B9" }, { "º", "º"/*
																		 * masculine
																		 * ordinal
																		 * indicator
																		 */, "\u00BA" },
				{ "»", "»"/*
									 * right-pointing double angle quotation
									 * mark = right pointing guillemet
									 */, "\u00BB" }, { "¼", "¼"/*
																			 * vulgar
																			 * fraction
																			 * one
																			 * quarter
																			 * =
																			 * fraction
																			 * one
																			 * quarter
																			 */, "\u00BC" },
				{ "½", "½"/*
									 * vulgar fraction one half = fraction one
									 * half
									 */, "\u00BD" }, { "¾", "¾"/*
																			 * vulgar
																			 * fraction
																			 * three
																			 * quarters
																			 * =
																			 * fraction
																			 * three
																			 * quarters
																			 */, "\u00BE" },
				{ "¿", "¿"/*
									 * inverted question mark = turned question
									 * mark
									 */, "\u00BF" }, { "À", "À"/*
																			 * latin
																			 * capital
																			 * letter
																			 * A
																			 * with
																			 * grave
																			 * =
																			 * latin
																			 * capital
																			 * letter
																			 * A
																			 * grave
																			 */, "\u00C0" },
				{ "Á", "Á"/* latin capital letter A with acute */, "\u00C1" },
				{ "Â", "Â"/*
									 * latin capital letter A with circumflex
									 */, "\u00C2" }, { "Ã", "Ã"/*
																			 * latin
																			 * capital
																			 * letter
																			 * A
																			 * with
																			 * tilde
																			 */, "\u00C3" },
				{ "Ä", "Ä"/*
									 * latin capital letter A with diaeresis
									 */, "\u00C4" }, { "Å", "Å"/*
																		 * latin
																		 * capital
																		 * letter
																		 * A
																		 * with
																		 * ring
																		 * above
																		 * =
																		 * latin
																		 * capital
																		 * letter
																		 * A
																		 * ring
																		 */, "\u00C5" },
				{ "Æ", "Æ"/*
									 * latin capital letter AE = latin capital
									 * ligature AE
									 */, "\u00C6" }, { "Ç", "Ç"/*
																			 * latin
																			 * capital
																			 * letter
																			 * C
																			 * with
																			 * cedilla
																			 */, "\u00C7" },
				{ "È", "È"/* latin capital letter E with grave */, "\u00C8" },
				{ "É", "É"/* latin capital letter E with acute */, "\u00C9" },
				{ "Ê", "Ê"/*
									 * latin capital letter E with circumflex
									 */, "\u00CA" }, { "Ë", "Ë"/*
																		 * latin
																		 * capital
																		 * letter
																		 * E
																		 * with
																		 * diaeresis
																		 */, "\u00CB" },
				{ "Ì", "Ì"/* latin capital letter I with grave */, "\u00CC" },
				{ "Í", "Í"/* latin capital letter I with acute */, "\u00CD" },
				{ "Î", "Î"/*
									 * latin capital letter I with circumflex
									 */, "\u00CE" }, { "Ï", "Ï"/*
																		 * latin
																		 * capital
																		 * letter
																		 * I
																		 * with
																		 * diaeresis
																		 */, "\u00CF" },
				{ "Ð", "Ð"/* latin capital letter ETH */, "\u00D0" }, { "Ñ", "Ñ"/*
																									 * latin
																									 * capital
																									 * letter
																									 * N
																									 * with
																									 * tilde
																									 */, "\u00D1" },
				{ "Ò", "Ò"/* latin capital letter O with grave */, "\u00D2" },
				{ "Ó", "Ó"/* latin capital letter O with acute */, "\u00D3" },
				{ "Ô", "Ô"/*
									 * latin capital letter O with circumflex
									 */, "\u00D4" }, { "Õ", "Õ"/*
																			 * latin
																			 * capital
																			 * letter
																			 * O
																			 * with
																			 * tilde
																			 */, "\u00D5" },
				{ "Ö", "Ö"/*
									 * latin capital letter O with diaeresis
									 */, "\u00D6" }, { "×", "×"/*
																		 * multiplication
																		 * sign
																		 */, "\u00D7" },
				{ "Ø", "Ø"/*
									 * latin capital letter O with stroke =
									 * latin capital letter O slash
									 */, "\u00D8" }, { "Ù", "Ù"/*
																			 * latin
																			 * capital
																			 * letter
																			 * U
																			 * with
																			 * grave
																			 */, "\u00D9" },
				{ "Ú", "Ú"/* latin capital letter U with acute */, "\u00DA" },
				{ "Û", "Û"/*
									 * latin capital letter U with circumflex
									 */, "\u00DB" }, { "Ü", "Ü"/*
																		 * latin
																		 * capital
																		 * letter
																		 * U
																		 * with
																		 * diaeresis
																		 */, "\u00DC" },
				{ "Ý", "Ý"/* latin capital letter Y with acute */, "\u00DD" },
				{ "Þ", "Þ"/* latin capital letter THORN */, "\u00DE" },
				{ "ß", "ß"/*
									 * latin small letter sharp s = ess-zed
									 */, "\u00DF" }, { "à", "à"/*
																			 * latin
																			 * small
																			 * letter
																			 * a
																			 * with
																			 * grave
																			 * =
																			 * latin
																			 * small
																			 * letter
																			 * a
																			 * grave
																			 */, "\u00E0" },
				{ "á", "á"/*
									 * latin small letter a with acute
									 */, "\u00E1" }, { "â", "â"/*
																		 * latin
																		 * small
																		 * letter
																		 * a
																		 * with
																		 * circumflex
																		 */, "\u00E2" },
				{ "ã", "ã"/* latin small letter a with tilde */, "\u00E3" },
				{ "ä", "ä"/* latin small letter a with diaeresis */, "\u00E4" },
				{ "å", "å"/*
									 * latin small letter a with ring above =
									 * latin small letter a ring
									 */, "\u00E5" }, { "æ", "æ"/*
																		 * latin
																		 * small
																		 * letter
																		 * ae =
																		 * latin
																		 * small
																		 * ligature
																		 * ae
																		 */, "\u00E6" },
				{ "ç", "ç"/* latin small letter c with cedilla */, "\u00E7" },
				{ "è", "è"/* latin small letter e with grave */, "\u00E8" },
				{ "é", "é"/* latin small letter e with acute */, "\u00E9" },
				{ "ê", "ê"/*
									 * latin small letter e with circumflex
									 */, "\u00EA" }, { "ë", "ë"/*
																		 * latin
																		 * small
																		 * letter
																		 * e
																		 * with
																		 * diaeresis
																		 */, "\u00EB" },
				{ "ì", "ì"/* latin small letter i with grave */, "\u00EC" },
				{ "í", "í"/* latin small letter i with acute */, "\u00ED" },
				{ "î", "î"/*
									 * latin small letter i with circumflex
									 */, "\u00EE" }, { "ï", "ï"/*
																		 * latin
																		 * small
																		 * letter
																		 * i
																		 * with
																		 * diaeresis
																		 */, "\u00EF" },
				{ "ð", "ð"/* latin small letter eth */, "\u00F0" }, { "ñ", "ñ"/*
																									 * latin
																									 * small
																									 * letter
																									 * n
																									 * with
																									 * tilde
																									 */, "\u00F1" },
				{ "ò", "ò"/* latin small letter o with grave */, "\u00F2" },
				{ "ó", "ó"/* latin small letter o with acute */, "\u00F3" },
				{ "ô", "ô"/*
									 * latin small letter o with circumflex
									 */, "\u00F4" }, { "õ", "õ"/*
																			 * latin
																			 * small
																			 * letter
																			 * o
																			 * with
																			 * tilde
																			 */, "\u00F5" },
				{ "ö", "ö"/* latin small letter o with diaeresis */, "\u00F6" },
				{ "÷", "÷"/* division sign */, "\u00F7" }, { "ø", "ø"/*
																							 * latin
																							 * small
																							 * letter
																							 * o
																							 * with
																							 * stroke
																							 * =
																							 * latin
																							 * small
																							 * letter
																							 * o
																							 * slash
																							 */, "\u00F8" },
				{ "ù", "ù"/* latin small letter u with grave */, "\u00F9" },
				{ "ú", "ú"/* latin small letter u with acute */, "\u00FA" },
				{ "û", "û"/*
									 * latin small letter u with circumflex
									 */, "\u00FB" }, { "ü", "ü"/*
																		 * latin
																		 * small
																		 * letter
																		 * u
																		 * with
																		 * diaeresis
																		 */, "\u00FC" },
				{ "ý", "ý"/* latin small letter y with acute */, "\u00FD" },
				{ "þ", "þ"/* latin small letter thorn with */, "\u00FE" },
				{ "ÿ", "ÿ"/* latin small letter y with diaeresis */, "\u00FF" },
				{ "ƒ", "ƒ"/*
									 * latin small f with hook = function =
									 * florin
									 */, "\u0192" }
				/* Greek */
				, { "Α", "Α"/* greek capital letter alpha */, "\u0391" },
				{ "Β", "Β"/* greek capital letter beta */, "\u0392" }, { "Γ", "Γ"/*
																									 * greek
																									 * capital
																									 * letter
																									 * gamma
																									 */, "\u0393" },
				{ "Δ", "Δ"/* greek capital letter delta */, "\u0394" },
				{ "Ε", "Ε"/* greek capital letter epsilon */, "\u0395" },
				{ "Ζ", "Ζ"/* greek capital letter zeta */, "\u0396" }, { "Η", "Η"/*
																									 * greek
																									 * capital
																									 * letter
																									 * eta
																									 */, "\u0397" },
				{ "Θ", "Θ"/* greek capital letter theta */, "\u0398" }, { "Ι", "Ι"/*
																									 * greek
																									 * capital
																									 * letter
																									 * iota
																									 */, "\u0399" },
				{ "Κ", "Κ"/* greek capital letter kappa */, "\u039A" },
				{ "Λ", "Λ"/* greek capital letter lambda */, "\u039B" }, { "Μ", "Μ"/*
																									 * greek
																									 * capital
																									 * letter
																									 * mu
																									 */, "\u039C" },
				{ "Ν", "Ν"/* greek capital letter nu */, "\u039D" }, { "Ξ", "Ξ"/*
																							 * greek
																							 * capital
																							 * letter
																							 * xi
																							 */, "\u039E" },
				{ "Ο", "Ο"/* greek capital letter omicron */, "\u039F" },
				{ "Π", "Π"/* greek capital letter pi */, "\u03A0" }, { "Ρ", "Ρ"/*
																							 * greek
																							 * capital
																							 * letter
																							 * rho
																							 */, "\u03A1" }
				/* there is no Sigmaf and no \u03A2 */
				, { "Σ", "Σ"/* greek capital letter sigma */, "\u03A3" },
				{ "Τ", "Τ"/* greek capital letter tau */, "\u03A4" },
				{ "Υ", "Υ"/* greek capital letter upsilon */, "\u03A5" },
				{ "Φ", "Φ"/* greek capital letter phi */, "\u03A6" }, { "Χ", "Χ"/*
																									 * greek
																									 * capital
																									 * letter
																									 * chi
																									 */, "\u03A7" },
				{ "Ψ", "Ψ"/* greek capital letter psi */, "\u03A8" }, { "Ω", "Ω"/*
																									 * greek
																									 * capital
																									 * letter
																									 * omega
																									 */, "\u03A9" },
				{ "α", "α"/* greek small letter alpha */, "\u03B1" }, { "β", "β"/*
																									 * greek
																									 * small
																									 * letter
																									 * beta
																									 */, "\u03B2" },
				{ "γ", "γ"/* greek small letter gamma */, "\u03B3" }, { "δ", "δ"/*
																									 * greek
																									 * small
																									 * letter
																									 * delta
																									 */, "\u03B4" },
				{ "ε", "ε"/* greek small letter epsilon */, "\u03B5" },
				{ "ζ", "ζ"/* greek small letter zeta */, "\u03B6" }, { "η", "η"/*
																								 * greek
																								 * small
																								 * letter
																								 * eta
																								 */, "\u03B7" },
				{ "θ", "θ"/* greek small letter theta */, "\u03B8" }, { "ι", "ι"/*
																									 * greek
																									 * small
																									 * letter
																									 * iota
																									 */, "\u03B9" },
				{ "κ", "κ"/* greek small letter kappa */, "\u03BA" }, { "λ", "λ"/*
																									 * greek
																									 * small
																									 * letter
																									 * lambda
																									 */, "\u03BB" },
				{ "μ", "μ"/* greek small letter mu */, "\u03BC" }, { "ν", "ν"/*
																							 * greek
																							 * small
																							 * letter
																							 * nu
																							 */, "\u03BD" },
				{ "ξ", "ξ"/* greek small letter xi */, "\u03BE" }, { "ο", "ο"/*
																								 * greek
																								 * small
																								 * letter
																								 * omicron
																								 */, "\u03BF" },
				{ "π", "π"/* greek small letter pi */, "\u03C0" }, { "ρ", "ρ"/*
																							 * greek
																							 * small
																							 * letter
																							 * rho
																							 */, "\u03C1" },
				{ "ς", "ς"/* greek small letter final sigma */, "\u03C2" },
				{ "σ", "σ"/* greek small letter sigma */, "\u03C3" }, { "τ", "τ"/*
																									 * greek
																									 * small
																									 * letter
																									 * tau
																									 */, "\u03C4" },
				{ "υ", "υ"/* greek small letter upsilon */, "\u03C5" },
				{ "φ", "φ"/* greek small letter phi */, "\u03C6" }, { "χ", "χ"/*
																								 * greek
																								 * small
																								 * letter
																								 * chi
																								 */, "\u03C7" },
				{ "ψ", "ψ"/* greek small letter psi */, "\u03C8" }, { "ω", "ω"/*
																									 * greek
																									 * small
																									 * letter
																									 * omega
																									 */, "\u03C9" },
				{ "ϑ", "ϑ"/* greek small letter theta symbol */, "\u03D1" },
				{ "ϒ", "ϒ"/* greek upsilon with hook symbol */, "\u03D2" }, { "ϖ", "ϖ"/*
																										 * greek
																										 * pi
																										 * symbol
																										 */, "\u03D6" }
				/* General Punctuation */
				, { "•", "•"/* bullet = black small circle */, "\u2022" }
				/* bullet is NOT the same as bullet operator ,"\u2219 */
				, { "…", "…"/*
										 * horizontal ellipsis = three dot
										 * leader
										 */, "\u2026" }, { "′", "′"/*
																				 * prime
																				 * =
																				 * minutes
																				 * =
																				 * feet
																				 */, "\u2032" },
				{ "″", "″"/* double prime = seconds = inches */, "\u2033" },
				{ "‾", "‾"/* overline = spacing overscore */, "\u203E" }, { "⁄", "⁄"/*
																										 * fraction
																										 * slash
																										 */, "\u2044" }
				/* Letterlike Symbols */
				, { "℘", "℘"/*
										 * script capital P = power set =
										 * Weierstrass p
										 */, "\u2118" }, { "ℑ", "ℑ"/*
																				 * blackletter
																				 * capital
																				 * I
																				 * =
																				 * imaginary
																				 * part
																				 */, "\u2111" },
				{ "ℜ", "ℜ"/*
									 * blackletter capital R = real part symbol
									 */, "\u211C" }, { "™", "™"/*
																			 * trade
																			 * mark
																			 * sign
																			 */, "\u2122" },
				{ "ℵ", "ℵ"/*
										 * alef symbol = first transfinite
										 * cardinal
										 */, "\u2135" }
				/* alef symbol is NOT the same as hebrew letter alef ,"\u05D0"} */
				/* Arrows */
				, { "←", "←"/* leftwards arrow */, "\u2190" }, { "↑", "↑"/*
																								 * upwards
																								 * arrow
																								 */, "\u2191" },
				{ "→", "→"/* rightwards arrow */, "\u2192" }, { "↓", "↓"/*
																							 * downwards
																							 * arrow
																							 */, "\u2193" },
				{ "↔", "↔"/* left right arrow */, "\u2194" }, { "↵", "↵"/*
																							 * downwards
																							 * arrow
																							 * with
																							 * corner
																							 * leftwards
																							 * =
																							 * carriage
																							 * return
																							 */, "\u21B5" },
				{ "⇐", "⇐"/* leftwards double arrow */, "\u21D0" }
				/*
				 * Unicode does not say that lArr is the same as the 'is implied
				 * by' arrow but also does not have any other character for that
				 * function. So ? lArr can be used for 'is implied by' as
				 * ISOtech suggests
				 */
				, { "⇑", "⇑"/* upwards double arrow */, "\u21D1" }, { "⇒", "⇒"/*
																									 * rightwards
																									 * double
																									 * arrow
																									 */, "\u21D2" }
				/*
				 * Unicode does not say this is the 'implies' character but does
				 * not have another character with this function so ? rArr can
				 * be used for 'implies' as ISOtech suggests
				 */
				, { "⇓", "⇓"/* downwards double arrow */, "\u21D3" }, { "⇔", "⇔"/*
																										 * left
																										 * right
																										 * double
																										 * arrow
																										 */, "\u21D4" }
				/* Mathematical Operators */
				, { "∀", "∀"/* for all */, "\u2200" }, { "∂", "∂"/*
																						 * partial
																						 * differential
																						 */, "\u2202" },
				{ "∃", "∃"/* there exists */, "\u2203" }, { "∅", "∅"/*
																						 * empty
																						 * set
																						 * =
																						 * null
																						 * set
																						 * =
																						 * diameter
																						 */, "\u2205" },
				{ "∇", "∇"/* nabla = backward difference */, "\u2207" },
				{ "∈", "∈"/* element of */, "\u2208" }, { "∉", "∉"/*
																						 * not
																						 * an
																						 * element
																						 * of
																						 */, "\u2209" },
				{ "∋", "∋"/* contains as member */, "\u220B" }
				/* should there be a more memorable name than 'ni'? */
				, { "∏", "∏"/* n-ary product = product sign */, "\u220F" }
				/* prod is NOT the same character as ,"\u03A0"} */
				, { "∑", "∑"/* n-ary sumation */, "\u2211" }
				/* sum is NOT the same character as ,"\u03A3"} */
				, { "−", "−"/* minus sign */, "\u2212" }, { "∗", "∗"/*
																							 * asterisk
																							 * operator
																							 */, "\u2217" },
				{ "√", "√"/* square root = radical sign */, "\u221A" },
				{ "∝", "∝"/* proportional to */, "\u221D" }, { "∞", "∞"/* infinity */, "\u221E" },
				{ "∠", "∠"/* angle */, "\u2220" }, { "∧", "∧"/*
																				 * logical
																				 * and
																				 * =
																				 * wedge
																				 */, "\u2227" },
				{ "∨", "∨"/* logical or = vee */, "\u2228" }, { "∩", "∩"/*
																							 * intersection
																							 * =
																							 * cap
																							 */, "\u2229" },
				{ "∪", "∪"/* union = cup */, "\u222A" }, { "∫", "∫"/* integral */, "\u222B" },
				{ "∴", "∴"/* therefore */, "\u2234" }, { "∼", "∼"/*
																						 * tilde
																						 * operator
																						 * =
																						 * varies
																						 * with
																						 * =
																						 * similar
																						 * to
																						 */, "\u223C" }
				/*
				 * tilde operator is NOT the same character as the tilde
				 * ,"\u007E"}
				 */
				, { "≅", "≅"/* approximately equal to */, "\u2245" },
				{ "≈", "≈"/* almost equal to = asymptotic to */, "\u2248" },
				{ "≠", "≠"/* not equal to */, "\u2260" }, { "≡", "≡"/*
																						 * identical
																						 * to
																						 */, "\u2261" },
				{ "≤", "≤"/* less-than or equal to */, "\u2264" }, { "≥", "≥"/*
																								 * greater
																								 * -
																								 * than
																								 * or
																								 * equal
																								 * to
																								 */, "\u2265" },
				{ "⊂", "⊂"/* subset of */, "\u2282" }, { "⊃", "⊃"/*
																					 * superset
																					 * of
																					 */, "\u2283" }
				/* note that nsup 'not a superset of ,"\u2283"} */
				, { "⊆", "⊆"/* subset of or equal to */, "\u2286" }, { "⊇", "⊇"/*
																									 * superset
																									 * of
																									 * or
																									 * equal
																									 * to
																									 */, "\u2287" },
				{ "⊕", "⊕"/* circled plus = direct sum */, "\u2295" },
				{ "⊗", "⊗"/* circled times = vector product */, "\u2297" },
				{ "⊥", "⊥"/*
									 * up tack = orthogonal to = perpendicular
									 */, "\u22A5" }, { "⋅", "⋅"/*
																		 * dot
																		 * operator
																		 */, "\u22C5" }
				/*
				 * dot operator is NOT the same character as ,"\u00B7"} /*
				 * Miscellaneous Technical
				 */
				, { "⌈", "⌈"/* left ceiling = apl upstile */, "\u2308" },
				{ "⌉", "⌉"/* right ceiling */, "\u2309" }, { "⌊", "⌊"/*
																							 * left
																							 * floor
																							 * =
																							 * apl
																							 * downstile
																							 */, "\u230A" },
				{ "⌋", "⌋"/* right floor */, "\u230B" }, { "⟨", "〈"/*
																							 * left
																							 * -
																							 * pointing
																							 * angle
																							 * bracket
																							 * =
																							 * bra
																							 */, "\u2329" }
				/* lang is NOT the same character as ,"\u003C"} */
				, { "⟩", "〉"/*
										 * right-pointing angle bracket = ket
										 */, "\u232A" }
				/* rang is NOT the same character as ,"\u003E"} */
				/* Geometric Shapes */
				, { "◊", "◊"/* lozenge */, "\u25CA" }
				/* Miscellaneous Symbols */
				, { "♠", "♠"/* black spade suit */, "\u2660" }
				/* black here seems to mean filled as opposed to hollow */
				, { "♣", "♣"/* black club suit = shamrock */, "\u2663" },
				{ "♥", "♥"/* black heart suit = valentine */, "\u2665" },
				{ "♦", "♦"/* black diamond suit */, "\u2666" }, { """, """ /*
																							 * quotation
																							 * mark
																							 * =
																							 * APL
																							 * quote
																							 */, "\"" },
				{ "&", "&" /* ampersand */, "\u0026" }, { "<", "<" /*
																					 * less
																					 * -
																					 * than
																					 * sign
																					 */, "\u003C" },
				{ ">", ">" /* greater-than sign */, "\u003E" }
				/* Latin Extended-A */
				, { "Œ", "Œ" /* latin capital ligature OE */, "\u0152" },
				{ "œ", "œ" /* latin small ligature oe */, "\u0153" }
				/*
				 * ligature is a misnomer this is a separate character in some
				 * languages
				 */
				, { "Š", "Š" /*
										 * latin capital letter S with caron
										 */, "\u0160" }, { "š", "š" /*
																				 * latin
																				 * small
																				 * letter
																				 * s
																				 * with
																				 * caron
																				 */, "\u0161" },
				{ "Ÿ", "Ÿ" /*
									 * latin capital letter Y with diaeresis
									 */, "\u0178" }
				/* Spacing Modifier Letters */
				, { "ˆ", "ˆ" /* modifier letter circumflex accent */, "\u02C6" },
				{ "˜", "˜" /* small tilde */, "\u02DC" }
				/* General Punctuation */
				, { " ", " "/* en space */, "\u2002" }, { " ", " "/*
																						 * em
																						 * space
																						 */, "\u2003" },
				{ " ", " "/* thin space */, "\u2009" }, { "‌", "‌"/*
																							 * zero
																							 * width
																							 * non
																							 * -
																							 * joiner
																							 */, "\u200C" },
				{ "‍", "‍"/* zero width joiner */, "\u200D" }, { "‎", "‎"/*
																							 * left
																							 * -
																							 * to
																							 * -
																							 * right
																							 * mark
																							 */, "\u200E" },
				{ "‏", "‏"/* right-to-left mark */, "\u200F" }, { "–", "–"/*
																								 * en
																								 * dash
																								 */, "\u2013" },
				{ "—", "—"/* em dash */, "\u2014" }, { "‘", "‘"/*
																					 * left
																					 * single
																					 * quotation
																					 * mark
																					 */, "\u2018" },
				{ "’", "’"/* right single quotation mark */, "\u2019" },
				{ "‚", "‚"/* single low-9 quotation mark */, "\u201A" },
				{ "“", "“"/* left double quotation mark */, "\u201C" },
				{ "”", "”"/* right double quotation mark */, "\u201D" },
				{ "„", "„"/* double low-9 quotation mark */, "\u201E" },
				{ "†", "†"/* dagger */, "\u2020" }, { "‡", "‡"/*
																						 * double
																						 * dagger
																						 */, "\u2021" },
				{ "‰", "‰"/* per mille sign */, "\u2030" }, { "‹", "‹"/*
																								 * single
																								 * left
																								 * -
																								 * pointing
																								 * angle
																								 * quotation
																								 * mark
																								 */, "\u2039" }
				/* lsaquo is proposed but not yet ISO standardized */
				, { "›", "›"/*
										 * single right-pointing angle quotation
										 * mark
										 */, "\u203A" }
				/* rsaquo is proposed but not yet ISO standardized */
				, { "€", "€" /* euro sign */, "\u20AC" } };
		for (final String[] entity : entities) {
			entityEscapeMap.put(entity[2], entity[0]);
			escapeEntityMap.put(entity[0], entity[2]);
			escapeEntityMap.put(entity[1], entity[2]);
		}
	}

	public static String escape(final String original) {
		final StringBuffer buf = new StringBuffer(original);
		escape(buf);
		return buf.toString();
	}

	public static void escape(final StringBuffer original) {
		int index = 0;
		String escaped;
		while (index < original.length()) {
			escaped = entityEscapeMap.get(original.substring(index, index + 1));
			if (escaped != null) {
				original.replace(index, index + 1, escaped);
				index += escaped.length();
			} else {
				index++;
			}
		}
	}

	public static String unescape(final String original) {
		String returnValue = null;
		if (original != null) {
			final StringBuffer buf = new StringBuffer(original);
			unescape(buf);
			returnValue = buf.toString();
		}
		return returnValue;
	}

	public static void unescape(final StringBuffer original) {
		int index = 0;
		int semicolonIndex;
		String escaped;
		String entity;
		while (index < original.length()) {
			index = original.indexOf("&", index);
			if (-1 == index) {
				break;
			}
			semicolonIndex = original.indexOf(";", index);
			if (-1 != semicolonIndex) {
				escaped = original.substring(index, semicolonIndex + 1);
				entity = escapeEntityMap.get(escaped);
				if (entity != null) {
					original.replace(index, semicolonIndex + 1, entity);
				}
				index++;
			} else {
				break;
			}
		}
	}

}