/**************************************************************************
OmegaT - Computer Assisted Translation (CAT) tool
with fuzzy matching, translation memory, keyword search,
glossaries, and translation leveraging into updated projects.
Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk
Home page: http://www.omegat.org/
Support center: http://groups.yahoo.com/group/OmegaT/
This file is part of OmegaT.
OmegaT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OmegaT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
**************************************************************************/
package org.omegat.util.xml;
import java.util.HashMap;
/**
* Entity filter for XML.
* <p>
* Does XML Entity -> Symbol conversion on source file read and Symbol -> XML
* Entity conversion on translation write.
*
* @author Keith Godfrey
* @author Maxym Mykhalchuk
*/
public class DefaultEntityFilter {
private static final HashMap<Integer, String> CHAR_MAP;
private static final HashMap<String, Integer> ESC_MAP;
static {
ESC_MAP = new HashMap<String, Integer>(512);
CHAR_MAP = new HashMap<Integer, String>(512);
// CHECKSTYLE:OFF
addMapEntry('\'', "apos");
addMapEntry('"', "quot");
addMapEntry('&', "amp");
addMapEntry(60, "lt");
addMapEntry(62, "gt");
addMapEntry(160, "nbsp");
addMapEntry(161, "iexcl");
addMapEntry(162, "cent");
addMapEntry(163, "pound");
addMapEntry(164, "curren");
addMapEntry(165, "yen");
addMapEntry(166, "brvbar");
addMapEntry(167, "sect");
addMapEntry(168, "uml");
addMapEntry(169, "copy");
addMapEntry(170, "ordf");
addMapEntry(171, "laquo");
addMapEntry(172, "not");
addMapEntry(173, "shy");
addMapEntry(174, "reg");
addMapEntry(175, "macr");
addMapEntry(176, "deg");
addMapEntry(177, "plusmn");
addMapEntry(178, "sup2");
addMapEntry(179, "sup3");
addMapEntry(180, "acute");
addMapEntry(181, "micro");
addMapEntry(182, "para");
addMapEntry(183, "middot");
addMapEntry(184, "cedil");
addMapEntry(185, "sup1");
addMapEntry(186, "ordm");
addMapEntry(187, "raquo");
addMapEntry(188, "frac14");
addMapEntry(189, "frac12");
addMapEntry(190, "frac34");
addMapEntry(191, "iquest");
addMapEntry(192, "Agrave");
addMapEntry(193, "Aacute");
addMapEntry(194, "Acirc");
addMapEntry(195, "Atilde");
addMapEntry(196, "Auml");
addMapEntry(197, "Aring");
addMapEntry(198, "AElig");
addMapEntry(199, "Ccedil");
addMapEntry(200, "Egrave");
addMapEntry(201, "Eacute");
addMapEntry(202, "Ecirc");
addMapEntry(203, "Euml");
addMapEntry(204, "Igrave");
addMapEntry(205, "Iacute");
addMapEntry(206, "Icirc");
addMapEntry(207, "Iuml");
addMapEntry(208, "ETH");
addMapEntry(209, "Ntilde");
addMapEntry(210, "Ograve");
addMapEntry(211, "Oacute");
addMapEntry(212, "Ocirc");
addMapEntry(213, "Otilde");
addMapEntry(214, "Ouml");
addMapEntry(215, "times");
addMapEntry(216, "Oslash");
addMapEntry(217, "Ugrave");
addMapEntry(218, "Uacute");
addMapEntry(219, "Ucirc");
addMapEntry(220, "Uuml");
addMapEntry(221, "Yacute");
addMapEntry(222, "THORN");
addMapEntry(223, "szlig");
addMapEntry(224, "agrave");
addMapEntry(225, "aacute");
addMapEntry(226, "acirc");
addMapEntry(227, "atilde");
addMapEntry(228, "auml");
addMapEntry(229, "aring");
addMapEntry(230, "aelig");
addMapEntry(231, "ccedil");
addMapEntry(232, "egrave");
addMapEntry(233, "eacute");
addMapEntry(234, "ecirc");
addMapEntry(235, "euml");
addMapEntry(236, "igrave");
addMapEntry(237, "iacute");
addMapEntry(238, "icirc");
addMapEntry(239, "iuml");
addMapEntry(240, "eth");
addMapEntry(241, "ntilde");
addMapEntry(242, "ograve");
addMapEntry(243, "oacute");
addMapEntry(244, "ocirc");
addMapEntry(245, "otilde");
addMapEntry(246, "ouml");
addMapEntry(247, "divide");
addMapEntry(248, "oslash");
addMapEntry(249, "ugrave");
addMapEntry(250, "uacute");
addMapEntry(251, "ucirc");
addMapEntry(252, "uuml");
addMapEntry(253, "yacute");
addMapEntry(254, "thorn");
addMapEntry(255, "yuml");
addMapEntry(338, "OElig");
addMapEntry(339, "oelig");
addMapEntry(352, "Scaron");
addMapEntry(353, "scaron");
addMapEntry(376, "Yuml");
addMapEntry(402, "fnof");
addMapEntry(710, "circ");
addMapEntry(732, "tilde");
addMapEntry(913, "Alpha");
addMapEntry(914, "Beta");
addMapEntry(915, "Gamma");
addMapEntry(916, "Delta");
addMapEntry(917, "Epsilon");
addMapEntry(918, "Zeta");
addMapEntry(919, "Eta");
addMapEntry(920, "Theta");
addMapEntry(921, "Iota");
addMapEntry(922, "Kappa");
addMapEntry(923, "Lambda");
addMapEntry(924, "Mu");
addMapEntry(925, "Nu");
addMapEntry(926, "Xi");
addMapEntry(927, "Omicron");
addMapEntry(928, "Pi");
addMapEntry(929, "Rho");
addMapEntry(931, "Sigma");
addMapEntry(932, "Tau");
addMapEntry(933, "Upsilon");
addMapEntry(934, "Phi");
addMapEntry(935, "Chi");
addMapEntry(936, "Psi");
addMapEntry(937, "Omega");
addMapEntry(945, "alpha");
addMapEntry(946, "beta");
addMapEntry(947, "gamma");
addMapEntry(948, "delta");
addMapEntry(949, "epsilon");
addMapEntry(950, "zeta");
addMapEntry(951, "eta");
addMapEntry(952, "theta");
addMapEntry(953, "iota");
addMapEntry(954, "kappa");
addMapEntry(955, "lambda");
addMapEntry(956, "mu");
addMapEntry(957, "nu");
addMapEntry(958, "xi");
addMapEntry(959, "omicron");
addMapEntry(960, "pi");
addMapEntry(961, "rho");
addMapEntry(962, "sigmaf");
addMapEntry(963, "sigma");
addMapEntry(964, "tau");
addMapEntry(965, "upsilon");
addMapEntry(966, "phi");
addMapEntry(967, "chi");
addMapEntry(968, "psi");
addMapEntry(969, "omega");
addMapEntry(977, "thetasym");
addMapEntry(978, "upsih");
addMapEntry(982, "piv");
addMapEntry(8194, "ensp");
addMapEntry(8195, "emsp");
addMapEntry(8201, "thinsp");
addMapEntry(8204, "zwnj");
addMapEntry(8205, "zwj");
addMapEntry(8206, "lrm");
addMapEntry(8207, "rlm");
addMapEntry(8211, "ndash");
addMapEntry(8212, "mdash");
addMapEntry(8216, "lsquo");
addMapEntry(8217, "rsquo");
addMapEntry(8218, "sbquo");
addMapEntry(8220, "ldquo");
addMapEntry(8221, "rdquo");
addMapEntry(8222, "bdquo");
addMapEntry(8224, "dagger");
addMapEntry(8225, "Dagger");
addMapEntry(8226, "bull");
addMapEntry(8230, "hellip");
addMapEntry(8240, "permil");
addMapEntry(8242, "prime");
addMapEntry(8243, "Prime");
addMapEntry(8249, "lsaquo");
addMapEntry(8250, "rsaquo");
addMapEntry(8254, "oline");
addMapEntry(8260, "frasl");
addMapEntry(8364, "euro");
addMapEntry(8465, "image");
addMapEntry(8472, "weierp");
addMapEntry(8476, "real");
addMapEntry(8482, "trade");
addMapEntry(8501, "alefsym");
addMapEntry(8592, "larr");
addMapEntry(8593, "uarr");
addMapEntry(8594, "rarr");
addMapEntry(8595, "darr");
addMapEntry(8596, "harr");
addMapEntry(8629, "crarr");
addMapEntry(8656, "lArr");
addMapEntry(8657, "uArr");
addMapEntry(8658, "rArr");
addMapEntry(8659, "dArr");
addMapEntry(8660, "hArr");
addMapEntry(8704, "forall");
addMapEntry(8706, "part");
addMapEntry(8707, "exist");
addMapEntry(8709, "empty");
addMapEntry(8711, "nabla");
addMapEntry(8712, "isin");
addMapEntry(8713, "notin");
addMapEntry(8715, "ni");
addMapEntry(8719, "prod");
addMapEntry(8721, "sum");
addMapEntry(8722, "minus");
addMapEntry(8727, "lowast");
addMapEntry(8730, "radic");
addMapEntry(8733, "prop");
addMapEntry(8734, "infin");
addMapEntry(8736, "ang");
addMapEntry(8743, "and");
addMapEntry(8744, "or");
addMapEntry(8745, "cap");
addMapEntry(8746, "cup");
addMapEntry(8747, "int");
addMapEntry(8756, "there4");
addMapEntry(8764, "sim");
addMapEntry(8773, "cong");
addMapEntry(8776, "asymp");
addMapEntry(8800, "ne");
addMapEntry(8801, "equiv");
addMapEntry(8804, "le");
addMapEntry(8805, "ge");
addMapEntry(8834, "sub");
addMapEntry(8835, "sup");
addMapEntry(8836, "nsub");
addMapEntry(8838, "sube");
addMapEntry(8839, "supe");
addMapEntry(8853, "oplus");
addMapEntry(8855, "otimes");
addMapEntry(8869, "perp");
addMapEntry(8901, "sdot");
addMapEntry(8968, "lceil");
addMapEntry(8969, "rceil");
addMapEntry(8970, "lfloor");
addMapEntry(8971, "rfloor");
addMapEntry(9001, "lang");
addMapEntry(9002, "rang");
addMapEntry(9674, "loz");
addMapEntry(9824, "spades");
addMapEntry(9827, "clubs");
addMapEntry(9829, "hearts");
addMapEntry(9830, "diams");
// CHECKSTYLE:ON
}
private static void addMapEntry(int val, String name) {
ESC_MAP.put(name, val);
CHAR_MAP.put(val, name);
}
/**
* Converts plaintext symbol to XML entity.
*/
public String convertToEntity(int cp) {
String s = CHAR_MAP.get(cp);
if (s != null) {
return "&" + s + ";";
} else {
return String.valueOf(Character.toChars(cp));
}
}
/**
* Converts XML entity to plaintext character. If the entity cannot be
* converted, returns 0.
*/
public int convertToSymbol(String escapeSequence) {
Integer cp = ESC_MAP.get(escapeSequence);
if (cp != null) {
return cp;
}
try {
return Integer.parseInt(escapeSequence);
} catch (NumberFormatException e) {
// Unconvertable Entity
return 0;
}
}
}