/*
* Copyright 2007 Yusuke Yamamoto
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package twitter4j.http;
import java.util.HashMap;
import java.util.Map;
public final class HTMLEntity {
private static Map<String, String> entityEscapeMap = new HashMap<String, String>();
private static Map<String, String> escapeEntityMap = new HashMap<String, String>();
static {
final String[][] entities = { { " ", " "/*
* no-break space =
* non-breaking space
*/, "\u00A0" }, { "¡", "¡"/*
* inverted
* exclamation
* mark
*/, "\u00A1" },
{ "¢", "¢"/* cent sign */, "\u00A2" }, { "£", "£"/*
* pound
* sign
*/, "\u00A3" },
{ "¤", "¤"/* currency sign */, "\u00A4" }, { "¥", "¥"/*
* yen
* sign
* =
* yuan
* sign
*/, "\u00A5" },
{ "¦", "¦"/* broken bar = broken vertical bar */, "\u00A6" },
{ "§", "§"/* section sign */, "\u00A7" }, { "¨", "¨"/*
* diaeresis
* =
* spacing
* diaeresis
*/, "\u00A8" },
{ "©", "©"/* copyright sign */, "\u00A9" }, { "ª", "ª"/*
* feminine
* ordinal
* indicator
*/, "\u00AA" },
{ "«", "«"/*
* left-pointing double angle quotation mark
* = left pointing guillemet
*/, "\u00AB" }, { "¬", "¬"/*
* not
* sign
* =
* discretionary
* hyphen
*/, "\u00AC" },
{ "", ""/* soft hyphen = discretionary hyphen */, "\u00AD" },
{ "®", "®"/*
* registered sign = registered trade mark
* sign
*/, "\u00AE" }, { "¯", "¯"/*
* macron
* =
* spacing
* macron
* =
* overline
* = APL
* overbar
*/, "\u00AF" },
{ "°", "°"/* degree sign */, "\u00B0" }, { "±", "±"/*
* plus
* -
* minus
* sign
* =
* plus
* -
* or
* -
* minus
* sign
*/, "\u00B1" },
{ "²", "²"/*
* superscript two = superscript digit two =
* squared
*/, "\u00B2" }, { "³", "³"/*
* superscript
* three
* =
* superscript
* digit
* three
* =
* cubed
*/, "\u00B3" },
{ "´", "´"/* acute accent = spacing acute */, "\u00B4" },
{ "µ", "µ"/* micro sign */, "\u00B5" }, { "¶", "¶"/*
* pilcrow
* sign
* =
* paragraph
* sign
*/, "\u00B6" },
{ "·", "·"/*
* middle dot = Georgian comma = Greek
* middle dot
*/, "\u00B7" }, { "¸", "¸"/*
* cedilla
* =
* spacing
* cedilla
*/, "\u00B8" },
{ "¹", "¹"/*
* superscript one = superscript digit one
*/, "\u00B9" }, { "º", "º"/*
* masculine
* ordinal
* indicator
*/, "\u00BA" },
{ "»", "»"/*
* right-pointing double angle quotation
* mark = right pointing guillemet
*/, "\u00BB" }, { "¼", "¼"/*
* vulgar
* fraction
* one
* quarter
* =
* fraction
* one
* quarter
*/, "\u00BC" },
{ "½", "½"/*
* vulgar fraction one half = fraction one
* half
*/, "\u00BD" }, { "¾", "¾"/*
* vulgar
* fraction
* three
* quarters
* =
* fraction
* three
* quarters
*/, "\u00BE" },
{ "¿", "¿"/*
* inverted question mark = turned question
* mark
*/, "\u00BF" }, { "À", "À"/*
* latin
* capital
* letter
* A
* with
* grave
* =
* latin
* capital
* letter
* A
* grave
*/, "\u00C0" },
{ "Á", "Á"/* latin capital letter A with acute */, "\u00C1" },
{ "Â", "Â"/*
* latin capital letter A with circumflex
*/, "\u00C2" }, { "Ã", "Ã"/*
* latin
* capital
* letter
* A
* with
* tilde
*/, "\u00C3" },
{ "Ä", "Ä"/*
* latin capital letter A with diaeresis
*/, "\u00C4" }, { "Å", "Å"/*
* latin
* capital
* letter
* A
* with
* ring
* above
* =
* latin
* capital
* letter
* A
* ring
*/, "\u00C5" },
{ "Æ", "Æ"/*
* latin capital letter AE = latin capital
* ligature AE
*/, "\u00C6" }, { "Ç", "Ç"/*
* latin
* capital
* letter
* C
* with
* cedilla
*/, "\u00C7" },
{ "È", "È"/* latin capital letter E with grave */, "\u00C8" },
{ "É", "É"/* latin capital letter E with acute */, "\u00C9" },
{ "Ê", "Ê"/*
* latin capital letter E with circumflex
*/, "\u00CA" }, { "Ë", "Ë"/*
* latin
* capital
* letter
* E
* with
* diaeresis
*/, "\u00CB" },
{ "Ì", "Ì"/* latin capital letter I with grave */, "\u00CC" },
{ "Í", "Í"/* latin capital letter I with acute */, "\u00CD" },
{ "Î", "Î"/*
* latin capital letter I with circumflex
*/, "\u00CE" }, { "Ï", "Ï"/*
* latin
* capital
* letter
* I
* with
* diaeresis
*/, "\u00CF" },
{ "Ð", "Ð"/* latin capital letter ETH */, "\u00D0" }, { "Ñ", "Ñ"/*
* latin
* capital
* letter
* N
* with
* tilde
*/, "\u00D1" },
{ "Ò", "Ò"/* latin capital letter O with grave */, "\u00D2" },
{ "Ó", "Ó"/* latin capital letter O with acute */, "\u00D3" },
{ "Ô", "Ô"/*
* latin capital letter O with circumflex
*/, "\u00D4" }, { "Õ", "Õ"/*
* latin
* capital
* letter
* O
* with
* tilde
*/, "\u00D5" },
{ "Ö", "Ö"/*
* latin capital letter O with diaeresis
*/, "\u00D6" }, { "×", "×"/*
* multiplication
* sign
*/, "\u00D7" },
{ "Ø", "Ø"/*
* latin capital letter O with stroke =
* latin capital letter O slash
*/, "\u00D8" }, { "Ù", "Ù"/*
* latin
* capital
* letter
* U
* with
* grave
*/, "\u00D9" },
{ "Ú", "Ú"/* latin capital letter U with acute */, "\u00DA" },
{ "Û", "Û"/*
* latin capital letter U with circumflex
*/, "\u00DB" }, { "Ü", "Ü"/*
* latin
* capital
* letter
* U
* with
* diaeresis
*/, "\u00DC" },
{ "Ý", "Ý"/* latin capital letter Y with acute */, "\u00DD" },
{ "Þ", "Þ"/* latin capital letter THORN */, "\u00DE" },
{ "ß", "ß"/*
* latin small letter sharp s = ess-zed
*/, "\u00DF" }, { "à", "à"/*
* latin
* small
* letter
* a
* with
* grave
* =
* latin
* small
* letter
* a
* grave
*/, "\u00E0" },
{ "á", "á"/*
* latin small letter a with acute
*/, "\u00E1" }, { "â", "â"/*
* latin
* small
* letter
* a
* with
* circumflex
*/, "\u00E2" },
{ "ã", "ã"/* latin small letter a with tilde */, "\u00E3" },
{ "ä", "ä"/* latin small letter a with diaeresis */, "\u00E4" },
{ "å", "å"/*
* latin small letter a with ring above =
* latin small letter a ring
*/, "\u00E5" }, { "æ", "æ"/*
* latin
* small
* letter
* ae =
* latin
* small
* ligature
* ae
*/, "\u00E6" },
{ "ç", "ç"/* latin small letter c with cedilla */, "\u00E7" },
{ "è", "è"/* latin small letter e with grave */, "\u00E8" },
{ "é", "é"/* latin small letter e with acute */, "\u00E9" },
{ "ê", "ê"/*
* latin small letter e with circumflex
*/, "\u00EA" }, { "ë", "ë"/*
* latin
* small
* letter
* e
* with
* diaeresis
*/, "\u00EB" },
{ "ì", "ì"/* latin small letter i with grave */, "\u00EC" },
{ "í", "í"/* latin small letter i with acute */, "\u00ED" },
{ "î", "î"/*
* latin small letter i with circumflex
*/, "\u00EE" }, { "ï", "ï"/*
* latin
* small
* letter
* i
* with
* diaeresis
*/, "\u00EF" },
{ "ð", "ð"/* latin small letter eth */, "\u00F0" }, { "ñ", "ñ"/*
* latin
* small
* letter
* n
* with
* tilde
*/, "\u00F1" },
{ "ò", "ò"/* latin small letter o with grave */, "\u00F2" },
{ "ó", "ó"/* latin small letter o with acute */, "\u00F3" },
{ "ô", "ô"/*
* latin small letter o with circumflex
*/, "\u00F4" }, { "õ", "õ"/*
* latin
* small
* letter
* o
* with
* tilde
*/, "\u00F5" },
{ "ö", "ö"/* latin small letter o with diaeresis */, "\u00F6" },
{ "÷", "÷"/* division sign */, "\u00F7" }, { "ø", "ø"/*
* latin
* small
* letter
* o
* with
* stroke
* =
* latin
* small
* letter
* o
* slash
*/, "\u00F8" },
{ "ù", "ù"/* latin small letter u with grave */, "\u00F9" },
{ "ú", "ú"/* latin small letter u with acute */, "\u00FA" },
{ "û", "û"/*
* latin small letter u with circumflex
*/, "\u00FB" }, { "ü", "ü"/*
* latin
* small
* letter
* u
* with
* diaeresis
*/, "\u00FC" },
{ "ý", "ý"/* latin small letter y with acute */, "\u00FD" },
{ "þ", "þ"/* latin small letter thorn with */, "\u00FE" },
{ "ÿ", "ÿ"/* latin small letter y with diaeresis */, "\u00FF" },
{ "ƒ", "ƒ"/*
* latin small f with hook = function =
* florin
*/, "\u0192" }
/* Greek */
, { "Α", "Α"/* greek capital letter alpha */, "\u0391" },
{ "Β", "Β"/* greek capital letter beta */, "\u0392" }, { "Γ", "Γ"/*
* greek
* capital
* letter
* gamma
*/, "\u0393" },
{ "Δ", "Δ"/* greek capital letter delta */, "\u0394" },
{ "Ε", "Ε"/* greek capital letter epsilon */, "\u0395" },
{ "Ζ", "Ζ"/* greek capital letter zeta */, "\u0396" }, { "Η", "Η"/*
* greek
* capital
* letter
* eta
*/, "\u0397" },
{ "Θ", "Θ"/* greek capital letter theta */, "\u0398" }, { "Ι", "Ι"/*
* greek
* capital
* letter
* iota
*/, "\u0399" },
{ "Κ", "Κ"/* greek capital letter kappa */, "\u039A" },
{ "Λ", "Λ"/* greek capital letter lambda */, "\u039B" }, { "Μ", "Μ"/*
* greek
* capital
* letter
* mu
*/, "\u039C" },
{ "Ν", "Ν"/* greek capital letter nu */, "\u039D" }, { "Ξ", "Ξ"/*
* greek
* capital
* letter
* xi
*/, "\u039E" },
{ "Ο", "Ο"/* greek capital letter omicron */, "\u039F" },
{ "Π", "Π"/* greek capital letter pi */, "\u03A0" }, { "Ρ", "Ρ"/*
* greek
* capital
* letter
* rho
*/, "\u03A1" }
/* there is no Sigmaf and no \u03A2 */
, { "Σ", "Σ"/* greek capital letter sigma */, "\u03A3" },
{ "Τ", "Τ"/* greek capital letter tau */, "\u03A4" },
{ "Υ", "Υ"/* greek capital letter upsilon */, "\u03A5" },
{ "Φ", "Φ"/* greek capital letter phi */, "\u03A6" }, { "Χ", "Χ"/*
* greek
* capital
* letter
* chi
*/, "\u03A7" },
{ "Ψ", "Ψ"/* greek capital letter psi */, "\u03A8" }, { "Ω", "Ω"/*
* greek
* capital
* letter
* omega
*/, "\u03A9" },
{ "α", "α"/* greek small letter alpha */, "\u03B1" }, { "β", "β"/*
* greek
* small
* letter
* beta
*/, "\u03B2" },
{ "γ", "γ"/* greek small letter gamma */, "\u03B3" }, { "δ", "δ"/*
* greek
* small
* letter
* delta
*/, "\u03B4" },
{ "ε", "ε"/* greek small letter epsilon */, "\u03B5" },
{ "ζ", "ζ"/* greek small letter zeta */, "\u03B6" }, { "η", "η"/*
* greek
* small
* letter
* eta
*/, "\u03B7" },
{ "θ", "θ"/* greek small letter theta */, "\u03B8" }, { "ι", "ι"/*
* greek
* small
* letter
* iota
*/, "\u03B9" },
{ "κ", "κ"/* greek small letter kappa */, "\u03BA" }, { "λ", "λ"/*
* greek
* small
* letter
* lambda
*/, "\u03BB" },
{ "μ", "μ"/* greek small letter mu */, "\u03BC" }, { "ν", "ν"/*
* greek
* small
* letter
* nu
*/, "\u03BD" },
{ "ξ", "ξ"/* greek small letter xi */, "\u03BE" }, { "ο", "ο"/*
* greek
* small
* letter
* omicron
*/, "\u03BF" },
{ "π", "π"/* greek small letter pi */, "\u03C0" }, { "ρ", "ρ"/*
* greek
* small
* letter
* rho
*/, "\u03C1" },
{ "ς", "ς"/* greek small letter final sigma */, "\u03C2" },
{ "σ", "σ"/* greek small letter sigma */, "\u03C3" }, { "τ", "τ"/*
* greek
* small
* letter
* tau
*/, "\u03C4" },
{ "υ", "υ"/* greek small letter upsilon */, "\u03C5" },
{ "φ", "φ"/* greek small letter phi */, "\u03C6" }, { "χ", "χ"/*
* greek
* small
* letter
* chi
*/, "\u03C7" },
{ "ψ", "ψ"/* greek small letter psi */, "\u03C8" }, { "ω", "ω"/*
* greek
* small
* letter
* omega
*/, "\u03C9" },
{ "ϑ", "ϑ"/* greek small letter theta symbol */, "\u03D1" },
{ "ϒ", "ϒ"/* greek upsilon with hook symbol */, "\u03D2" }, { "ϖ", "ϖ"/*
* greek
* pi
* symbol
*/, "\u03D6" }
/* General Punctuation */
, { "•", "•"/* bullet = black small circle */, "\u2022" }
/* bullet is NOT the same as bullet operator ,"\u2219 */
, { "…", "…"/*
* horizontal ellipsis = three dot
* leader
*/, "\u2026" }, { "′", "′"/*
* prime
* =
* minutes
* =
* feet
*/, "\u2032" },
{ "″", "″"/* double prime = seconds = inches */, "\u2033" },
{ "‾", "‾"/* overline = spacing overscore */, "\u203E" }, { "⁄", "⁄"/*
* fraction
* slash
*/, "\u2044" }
/* Letterlike Symbols */
, { "℘", "℘"/*
* script capital P = power set =
* Weierstrass p
*/, "\u2118" }, { "ℑ", "ℑ"/*
* blackletter
* capital
* I
* =
* imaginary
* part
*/, "\u2111" },
{ "ℜ", "ℜ"/*
* blackletter capital R = real part symbol
*/, "\u211C" }, { "™", "™"/*
* trade
* mark
* sign
*/, "\u2122" },
{ "ℵ", "ℵ"/*
* alef symbol = first transfinite
* cardinal
*/, "\u2135" }
/* alef symbol is NOT the same as hebrew letter alef ,"\u05D0"} */
/* Arrows */
, { "←", "←"/* leftwards arrow */, "\u2190" }, { "↑", "↑"/*
* upwards
* arrow
*/, "\u2191" },
{ "→", "→"/* rightwards arrow */, "\u2192" }, { "↓", "↓"/*
* downwards
* arrow
*/, "\u2193" },
{ "↔", "↔"/* left right arrow */, "\u2194" }, { "↵", "↵"/*
* downwards
* arrow
* with
* corner
* leftwards
* =
* carriage
* return
*/, "\u21B5" },
{ "⇐", "⇐"/* leftwards double arrow */, "\u21D0" }
/*
* Unicode does not say that lArr is the same as the 'is implied
* by' arrow but also does not have any other character for that
* function. So ? lArr can be used for 'is implied by' as
* ISOtech suggests
*/
, { "⇑", "⇑"/* upwards double arrow */, "\u21D1" }, { "⇒", "⇒"/*
* rightwards
* double
* arrow
*/, "\u21D2" }
/*
* Unicode does not say this is the 'implies' character but does
* not have another character with this function so ? rArr can
* be used for 'implies' as ISOtech suggests
*/
, { "⇓", "⇓"/* downwards double arrow */, "\u21D3" }, { "⇔", "⇔"/*
* left
* right
* double
* arrow
*/, "\u21D4" }
/* Mathematical Operators */
, { "∀", "∀"/* for all */, "\u2200" }, { "∂", "∂"/*
* partial
* differential
*/, "\u2202" },
{ "∃", "∃"/* there exists */, "\u2203" }, { "∅", "∅"/*
* empty
* set
* =
* null
* set
* =
* diameter
*/, "\u2205" },
{ "∇", "∇"/* nabla = backward difference */, "\u2207" },
{ "∈", "∈"/* element of */, "\u2208" }, { "∉", "∉"/*
* not
* an
* element
* of
*/, "\u2209" },
{ "∋", "∋"/* contains as member */, "\u220B" }
/* should there be a more memorable name than 'ni'? */
, { "∏", "∏"/* n-ary product = product sign */, "\u220F" }
/* prod is NOT the same character as ,"\u03A0"} */
, { "∑", "∑"/* n-ary sumation */, "\u2211" }
/* sum is NOT the same character as ,"\u03A3"} */
, { "−", "−"/* minus sign */, "\u2212" }, { "∗", "∗"/*
* asterisk
* operator
*/, "\u2217" },
{ "√", "√"/* square root = radical sign */, "\u221A" },
{ "∝", "∝"/* proportional to */, "\u221D" }, { "∞", "∞"/* infinity */, "\u221E" },
{ "∠", "∠"/* angle */, "\u2220" }, { "∧", "∧"/*
* logical
* and
* =
* wedge
*/, "\u2227" },
{ "∨", "∨"/* logical or = vee */, "\u2228" }, { "∩", "∩"/*
* intersection
* =
* cap
*/, "\u2229" },
{ "∪", "∪"/* union = cup */, "\u222A" }, { "∫", "∫"/* integral */, "\u222B" },
{ "∴", "∴"/* therefore */, "\u2234" }, { "∼", "∼"/*
* tilde
* operator
* =
* varies
* with
* =
* similar
* to
*/, "\u223C" }
/*
* tilde operator is NOT the same character as the tilde
* ,"\u007E"}
*/
, { "≅", "≅"/* approximately equal to */, "\u2245" },
{ "≈", "≈"/* almost equal to = asymptotic to */, "\u2248" },
{ "≠", "≠"/* not equal to */, "\u2260" }, { "≡", "≡"/*
* identical
* to
*/, "\u2261" },
{ "≤", "≤"/* less-than or equal to */, "\u2264" }, { "≥", "≥"/*
* greater
* -
* than
* or
* equal
* to
*/, "\u2265" },
{ "⊂", "⊂"/* subset of */, "\u2282" }, { "⊃", "⊃"/*
* superset
* of
*/, "\u2283" }
/* note that nsup 'not a superset of ,"\u2283"} */
, { "⊆", "⊆"/* subset of or equal to */, "\u2286" }, { "⊇", "⊇"/*
* superset
* of
* or
* equal
* to
*/, "\u2287" },
{ "⊕", "⊕"/* circled plus = direct sum */, "\u2295" },
{ "⊗", "⊗"/* circled times = vector product */, "\u2297" },
{ "⊥", "⊥"/*
* up tack = orthogonal to = perpendicular
*/, "\u22A5" }, { "⋅", "⋅"/*
* dot
* operator
*/, "\u22C5" }
/*
* dot operator is NOT the same character as ,"\u00B7"} /*
* Miscellaneous Technical
*/
, { "⌈", "⌈"/* left ceiling = apl upstile */, "\u2308" },
{ "⌉", "⌉"/* right ceiling */, "\u2309" }, { "⌊", "⌊"/*
* left
* floor
* =
* apl
* downstile
*/, "\u230A" },
{ "⌋", "⌋"/* right floor */, "\u230B" }, { "〈", "〈"/*
* left
* -
* pointing
* angle
* bracket
* =
* bra
*/, "\u2329" }
/* lang is NOT the same character as ,"\u003C"} */
, { "〉", "〉"/*
* right-pointing angle bracket = ket
*/, "\u232A" }
/* rang is NOT the same character as ,"\u003E"} */
/* Geometric Shapes */
, { "◊", "◊"/* lozenge */, "\u25CA" }
/* Miscellaneous Symbols */
, { "♠", "♠"/* black spade suit */, "\u2660" }
/* black here seems to mean filled as opposed to hollow */
, { "♣", "♣"/* black club suit = shamrock */, "\u2663" },
{ "♥", "♥"/* black heart suit = valentine */, "\u2665" },
{ "♦", "♦"/* black diamond suit */, "\u2666" }, { """, """ /*
* quotation
* mark
* =
* APL
* quote
*/, "\"" },
{ "&", "&" /* ampersand */, "\u0026" }, { "<", "<" /*
* less
* -
* than
* sign
*/, "\u003C" },
{ ">", ">" /* greater-than sign */, "\u003E" }
/* Latin Extended-A */
, { "Œ", "Œ" /* latin capital ligature OE */, "\u0152" },
{ "œ", "œ" /* latin small ligature oe */, "\u0153" }
/*
* ligature is a misnomer this is a separate character in some
* languages
*/
, { "Š", "Š" /*
* latin capital letter S with caron
*/, "\u0160" }, { "š", "š" /*
* latin
* small
* letter
* s
* with
* caron
*/, "\u0161" },
{ "Ÿ", "Ÿ" /*
* latin capital letter Y with diaeresis
*/, "\u0178" }
/* Spacing Modifier Letters */
, { "ˆ", "ˆ" /* modifier letter circumflex accent */, "\u02C6" },
{ "˜", "˜" /* small tilde */, "\u02DC" }
/* General Punctuation */
, { " ", " "/* en space */, "\u2002" }, { " ", " "/*
* em
* space
*/, "\u2003" },
{ " ", " "/* thin space */, "\u2009" }, { "", ""/*
* zero
* width
* non
* -
* joiner
*/, "\u200C" },
{ "", ""/* zero width joiner */, "\u200D" }, { "", ""/*
* left
* -
* to
* -
* right
* mark
*/, "\u200E" },
{ "", ""/* right-to-left mark */, "\u200F" }, { "–", "–"/*
* en
* dash
*/, "\u2013" },
{ "—", "—"/* em dash */, "\u2014" }, { "‘", "‘"/*
* left
* single
* quotation
* mark
*/, "\u2018" },
{ "’", "’"/* right single quotation mark */, "\u2019" },
{ "‚", "‚"/* single low-9 quotation mark */, "\u201A" },
{ "“", "“"/* left double quotation mark */, "\u201C" },
{ "”", "”"/* right double quotation mark */, "\u201D" },
{ "„", "„"/* double low-9 quotation mark */, "\u201E" },
{ "†", "†"/* dagger */, "\u2020" }, { "‡", "‡"/*
* double
* dagger
*/, "\u2021" },
{ "‰", "‰"/* per mille sign */, "\u2030" }, { "‹", "‹"/*
* single
* left
* -
* pointing
* angle
* quotation
* mark
*/, "\u2039" }
/* lsaquo is proposed but not yet ISO standardized */
, { "›", "›"/*
* single right-pointing angle quotation
* mark
*/, "\u203A" }
/* rsaquo is proposed but not yet ISO standardized */
, { "€", "€" /* euro sign */, "\u20AC" } };
for (final String[] entity : entities) {
entityEscapeMap.put(entity[2], entity[0]);
escapeEntityMap.put(entity[0], entity[2]);
escapeEntityMap.put(entity[1], entity[2]);
}
}
public static String escape(final String original) {
final StringBuffer buf = new StringBuffer(original);
escape(buf);
return buf.toString();
}
public static void escape(final StringBuffer original) {
int index = 0;
String escaped;
while (index < original.length()) {
escaped = entityEscapeMap.get(original.substring(index, index + 1));
if (escaped != null) {
original.replace(index, index + 1, escaped);
index += escaped.length();
} else {
index++;
}
}
}
public static String unescape(final String original) {
String returnValue = null;
if (original != null) {
final StringBuffer buf = new StringBuffer(original);
unescape(buf);
returnValue = buf.toString();
}
return returnValue;
}
public static void unescape(final StringBuffer original) {
int index = 0;
int semicolonIndex;
String escaped;
String entity;
while (index < original.length()) {
index = original.indexOf("&", index);
if (-1 == index) {
break;
}
semicolonIndex = original.indexOf(";", index);
if (-1 != semicolonIndex) {
escaped = original.substring(index, semicolonIndex + 1);
entity = escapeEntityMap.get(escaped);
if (entity != null) {
original.replace(index, semicolonIndex + 1, entity);
}
index++;
} else {
break;
}
}
}
}