HTMLUnicodeConversionMaps.java example

Explorer
jabref-master
- src
package org.jabref.logic.util.strings;

import java.util.HashMap;
import java.util.Map;

public class HTMLUnicodeConversionMaps {

    // most of the LaTeX commands can be read at http://en.wikibooks.org/wiki/LaTeX/Accents
    // The symbols can be seen at http://www.fileformat.info/info/unicode/char/a4/index.htm. Replace "a4" with the U+ number
    // http://detexify.kirelabs.org/classify.html and http://www.ctan.org/tex-archive/info/symbols/comprehensive/ might help to find the right LaTeX command
    // http://llg.cubic.org/docs/ent2latex.html and http://www.w3.org/TR/xml-entity-names/byalpha.html are also useful
    public static final Map<String, String> HTML_LATEX_CONVERSION_MAP = new HashMap<>();
    public static final Map<Integer, String> ESCAPED_ACCENTS = new HashMap<>();
    public static final Map<String, String> UNICODE_ESCAPED_ACCENTS = new HashMap<>();
    public static final Map<Integer, String> NUMERICAL_LATEX_CONVERSION_MAP = new HashMap<>();
    public static final Map<String, String> UNICODE_LATEX_CONVERSION_MAP = new HashMap<>();
    public static final Map<String, String> LATEX_HTML_CONVERSION_MAP = new HashMap<>();
    public static final Map<String, String> LATEX_UNICODE_CONVERSION_MAP = new HashMap<>();

    /*   Portions © International Organization for Standardization 1986:
     Permission to copy in any form is granted for use with
     conforming SGML systems and applications as defined in
     ISO 8879, provided this notice is included in all copies.
     */

    // as well as http://www.w3.org/Math/characters/unicode.xml
    // An array of arrays of strings in the format:
    // {"decimal number of HTML entity", "text HTML entity", "corresponding LaTeX command"}
    // Leaving a field empty is OK as it then will not be included
    private static final String[][] CONVERSION_LIST = new String[][] {{"160", "nbsp", "{~}"}, // no-break space = non-breaking space,
            //                                 U+00A0 ISOnum
            {"161", "iexcl", "{\\textexclamdown}"}, // inverted exclamation mark, U+00A1 ISOnum
            {"162", "cent", "{\\textcent}"}, // cent sign, U+00A2 ISOnum
            {"163", "pound", "{\\pounds}"}, // pound sign, U+00A3 ISOnum
            {"164", "curren", "{\\textcurrency}"}, // currency sign, U+00A4 ISOnum
            {"165", "yen", "{\\textyen}"}, // yen sign = yuan sign, U+00A5 ISOnum
            {"166", "brvbar", "{\\textbrokenbar}"}, // broken bar = broken vertical bar,
            //                                 U+00A6 ISOnum
            {"167", "sect", "{{\\S}}"}, // section sign, U+00A7 ISOnum
            {"168", "uml", "{\\\"{}}"}, // diaeresis = spacing diaeresis,
            //                                 U+00A8 ISOdia
            {"169", "copy", "{\\copyright}"}, // copyright sign, U+00A9 ISOnum
            {"170", "ordf", "{\\textordfeminine}"}, // feminine ordinal indicator, U+00AA ISOnum
            {"171", "laquo", "{\\guillemotleft}"}, // left-pointing double angle quotation mark
            //                                 = left pointing guillemet, U+00AB ISOnum
            {"172", "not", "$\\neg$"}, // not sign, U+00AC ISOnum
            {"173", "shy", "\\-"}, // soft hyphen = discretionary hyphen,
            //                                 U+00AD ISOnum
            {"174", "reg", "{\\textregistered}"}, // registered sign = registered trade mark sign,
            //                                 U+00AE ISOnum
            {"175", "macr", "{\\={}}"}, // macron = spacing macron = overline
            //                                 = APL overbar, U+00AF ISOdia
            {"176", "deg", "{$^{\\circ}$}"}, // degree sign, U+00B0 ISOnum
            {"177", "plusmn", "$\\pm$"}, // plus-minus sign = plus-or-minus sign,
            //                                 U+00B1 ISOnum
            {"178", "sup2", "\\textsuperscript{2}"}, // superscript two = superscript digit two
            //                                 = squared, U+00B2 ISOnum
            {"179", "sup3", "\\textsuperscript{3}"}, // superscript three = superscript digit three
            //                                 = cubed, U+00B3 ISOnum
            {"180", "acute", "{\\'{}}"}, // acute accent = spacing acute,
            //                                 U+00B4 ISOdia
            {"181", "micro", "$\\mu$"}, // micro sign, U+00B5 ISOnum
            {"", "mu", "$\\mu$"}, // micro sign, U+00B5 ISOnum
            {"182", "para", "{{\\P}}"}, // pilcrow sign = paragraph sign,
            //                                 U+00B6 ISOnum
            {"183", "middot", "$\\cdot$"}, // middle dot = Georgian comma
            //                                 = Greek middle dot, U+00B7 ISOnum
            {"184", "cedil", "{\\c{}}"}, // cedilla = spacing cedilla, U+00B8 ISOdia
            {"185", "sup1", "\\textsuperscript{1}"}, // superscript one = superscript digit one,
            //                                 U+00B9 ISOnum
            {"186", "ordm", "{\\textordmasculine}"}, // masculine ordinal indicator,
            //                                 U+00BA ISOnum
            {"187", "raquo", "{\\guillemotright}"}, // right-pointing double angle quotation mark
            //                                 = right pointing guillemet, U+00BB ISOnum
            {"188", "frac14", "$\\sfrac{1}{4}$"}, // vulgar fraction one quarter
            //                                 = fraction one quarter, U+00BC ISOnum
            {"189", "frac12", "$\\sfrac{1}{2}$"}, // vulgar fraction one half
            //                                 = fraction one half, U+00BD ISOnum
            {"190", "frac34", "$\\sfrac{3}{4}$"}, // vulgar fraction three quarters
            //                                 = fraction three quarters, U+00BE ISOnum
            {"191", "iquest", "{\\textquestiondown}"}, // inverted question mark
            //                                 = turned question mark, U+00BF ISOnum
            {"192", "Agrave", "{{\\`{A}}}"}, // latin capital letter A with grave
            //                                 = latin capital letter A grave,
            //                                 U+00C0 ISOlat1
            {"193", "Aacute", "{{\\'{A}}}"}, // latin capital letter A with acute,
            //                                 U+00C1 ISOlat1
            {"194", "Acirc", "{{\\^{A}}}"}, // latin capital letter A with circumflex,
            //                                 U+00C2 ISOlat1
            {"195", "Atilde", "{{\\~{A}}}"}, // latin capital letter A with tilde,
            //                                 U+00C3 ISOlat1
            {"196", "Auml", "{{\\\"{A}}}"}, // latin capital letter A with diaeresis,
            //                                 U+00C4 ISOlat1
            {"197", "Aring", "{{\\AA}}"}, // latin capital letter A with ring above
            //                                 = latin capital letter A ring,
            //                                 U+00C5 ISOlat1
            {"198", "AElig", "{{\\AE}}"}, // latin capital letter AE
            //                                 = latin capital ligature AE,
            //                                 U+00C6 ISOlat1
            {"199", "Ccedil", "{{\\c{C}}}"}, // latin capital letter C with cedilla,
            //                                 U+00C7 ISOlat1
            {"200", "Egrave", "{{\\`{E}}}"}, // latin capital letter E with grave,
            //                                 U+00C8 ISOlat1
            {"201", "Eacute", "{{\\'{E}}}"}, // latin capital letter E with acute,
            //                                 U+00C9 ISOlat1
            {"202", "Ecirc", "{{\\^{E}}}"}, // latin capital letter E with circumflex,
            //                                 U+00CA ISOlat1
            {"203", "Euml", "{{\\\"{E}}}"}, // latin capital letter E with diaeresis,
            //                                 U+00CB ISOlat1
            {"204", "Igrave", "{{\\`{I}}}"}, // latin capital letter I with grave,
            //                                 U+00CC ISOlat1
            {"205", "Iacute", "{{\\'{I}}}"}, // latin capital letter I with acute,
            //                                 U+00CD ISOlat1
            {"206", "Icirc", "{{\\^{I}}}"}, // latin capital letter I with circumflex,
            //                                 U+00CE ISOlat1
            {"207", "Iuml", "{{\\\"{I}}}"}, // latin capital letter I with diaeresis,
            //                                 U+00CF ISOlat1
            {"208", "ETH", "{{\\DH}}"}, // latin capital letter ETH, U+00D0 ISOlat1
            {"209", "Ntilde", "{{\\~{N}}}"}, // latin capital letter N with tilde,
            //                                 U+00D1 ISOlat1
            {"210", "Ograve", "{{\\`{O}}}"}, // latin capital letter O with grave,
            //                                 U+00D2 ISOlat1
            {"211", "Oacute", "{{\\'{O}}}"}, // latin capital letter O with acute,
            //                                 U+00D3 ISOlat1
            {"212", "Ocirc", "{{\\^{O}}}"}, // latin capital letter O with circumflex,
            //                                 U+00D4 ISOlat1
            {"213", "Otilde", "{{\\~{O}}}"}, // latin capital letter O with tilde,
            //                                 U+00D5 ISOlat1
            {"214", "Ouml", "{{\\\"{O}}}"}, // latin capital letter O with diaeresis,
            //                                 U+00D6 ISOlat1
            {"215", "times", "$\\times$"}, // multiplication sign, U+00D7 ISOnum
            {"216", "Oslash", "{{\\O}}"}, // latin capital letter O with stroke
            //                                 = latin capital letter O slash,
            //                                 U+00D8 ISOlat1
            {"217", "Ugrave", "{{\\`{U}}}"}, // latin capital letter U with grave,
            //                                 U+00D9 ISOlat1
            {"218", "Uacute", "{{\\'{U}}}"}, // latin capital letter U with acute,
            //                                 U+00DA ISOlat1
            {"219", "Ucirc", "{{\\^{U}}}"}, // latin capital letter U with circumflex,
            //                                 U+00DB ISOlat1
            {"220", "Uuml", "{{\\\"{U}}}"}, // latin capital letter U with diaeresis,
            //                                 U+00DC ISOlat1
            {"221", "Yacute", "{{\\'{Y}}}"}, // latin capital letter Y with acute,
            //                                 U+00DD ISOlat1
            {"222", "THORN", "{{\\TH}}"}, // latin capital letter THORN,
            //                                 U+00DE ISOlat1
            {"223", "szlig", "{\\ss}"}, // latin small letter sharp s = ess-zed,
            //                                 U+00DF ISOlat1
            {"224", "agrave", "{\\`{a}}"}, // latin small letter a with grave
            //                                 = latin small letter a grave,
            //                                 U+00E0 ISOlat1
            {"225", "aacute", "{\\'{a}}"}, // latin small letter a with acute,
            //                                 U+00E1 ISOlat1
            {"226", "acirc", "{\\^{a}}"}, // latin small letter a with circumflex,
            //                                 U+00E2 ISOlat1
            {"227", "atilde", "{\\~{a}}"}, // latin small letter a with tilde,
            //                                 U+00E3 ISOlat1
            {"228", "auml", "{\\\"{a}}"}, // latin small letter a with diaeresis,
            //                                 U+00E4 ISOlat1
            {"229", "aring", "{{\\aa}}"}, // latin small letter a with ring above
            //                                 = latin small letter a ring,
            //                                 U+00E5 ISOlat1
            {"230", "aelig", "{\\ae}"}, // latin small letter ae
            //                                 = latin small ligature ae, U+00E6 ISOlat1
            {"231", "ccedil", "{\\c{c}}"}, // latin small letter c with cedilla,
            //                                 U+00E7 ISOlat1
            {"232", "egrave", "{\\`{e}}"}, // latin small letter e with grave,
            //                                 U+00E8 ISOlat1
            {"233", "eacute", "{\\'{e}}"}, // latin small letter e with acute,
            //                                 U+00E9 ISOlat1
            {"234", "ecirc", "{\\^{e}}"}, // latin small letter e with circumflex,
            //                                 U+00EA ISOlat1
            {"235", "euml", "{\\\"{e}}"}, // latin small letter e with diaeresis,
            //                                 U+00EB ISOlat1
            {"236", "igrave", "{\\`{i}}"}, // latin small letter i with grave,
            //                                 U+00EC ISOlat1
            {"237", "iacute", "{\\'{i}}"}, // latin small letter i with acute,
            //                                 U+00ED ISOlat1
            {"238", "icirc", "{\\^{i}}"}, // latin small letter i with circumflex,
            //                                 U+00EE ISOlat1
            {"239", "iuml", "{\\\"{i}}"}, // latin small letter i with diaeresis,
            //                                 U+00EF ISOlat1
            {"240", "eth", "{\\dh}"}, // latin small letter eth, U+00F0 ISOlat1
            {"241", "ntilde", "{\\~{n}}"}, // latin small letter n with tilde,
            //                                 U+00F1 ISOlat1
            {"242", "ograve", "{\\`{o}}"}, // latin small letter o with grave,
            //                                 U+00F2 ISOlat1
            {"243", "oacute", "{\\'{o}}"}, // latin small letter o with acute,
            //                                 U+00F3 ISOlat1
            {"244", "ocirc", "{\\^{o}}"}, // latin small letter o with circumflex,
            //                                 U+00F4 ISOlat1
            {"245", "otilde", "{\\~{o}}"}, // latin small letter o with tilde,
            //                                 U+00F5 ISOlat1
            {"246", "ouml", "{\\\"{o}}"}, // latin small letter o with diaeresis,
            //                                 U+00F6 ISOlat1
            {"247", "divide", "$\\div$"}, // division sign, U+00F7 ISOnum
            {"248", "oslash", "{\\o}"}, // latin small letter o with stroke,
            //                                 = latin small letter o slash,
            //                                 U+00F8 ISOlat1
            {"249", "ugrave", "{\\`{u}}"}, // latin small letter u with grave,
            //                                 U+00F9 ISOlat1
            {"250", "uacute", "{\\'{u}}"}, // latin small letter u with acute,
            //                                 U+00FA ISOlat1
            {"251", "ucirc", "{\\^{u}}"}, // latin small letter u with circumflex,
            //                                 U+00FB ISOlat1
            {"252", "uuml", "{\\\"{u}}"}, // latin small letter u with diaeresis,
            //                                 U+00FC ISOlat1
            {"253", "yacute", "{\\'{y}}"}, // latin small letter y with acute,
            //                                 U+00FD ISOlat1
            {"254", "thorn", "{\\th}"}, // latin small letter thorn,
            //                                 U+00FE ISOlat1
            {"255", "yuml", "{\\\"{y}}"}, // latin small letter y with diaeresis,
            //                                 U+00FF ISOlat1

            /* Greek */
            {"913", "Alpha", "{{$\\Alpha$}}"}, // greek capital letter alpha, U+0391
            {"914", "Beta", "{{$\\Beta$}}"}, // greek capital letter beta, U+0392
            {"915", "Gamma", "{{$\\Gamma$}}"}, // greek capital letter gamma,
            //                                   U+0393 ISOgrk3
            {"916", "Delta", "{{$\\Delta$}}"}, // greek capital letter delta,
            //                                   U+0394 ISOgrk3
            {"917", "Epsilon", "{{$\\Epsilon$}}"}, // greek capital letter epsilon, U+0395
            {"918", "Zeta", "{{$\\Zeta$}}"}, // greek capital letter zeta, U+0396
            {"919", "Eta", "{{$\\Eta$}}"}, // greek capital letter eta, U+0397
            {"920", "Theta", "{{$\\Theta$}}"}, // greek capital letter theta,
            //                                   U+0398 ISOgrk3
            {"921", "Iota", "{{$\\Iota$}}"}, // greek capital letter iota, U+0399
            {"922", "Kappa", "{{$\\Kappa$}}"}, // greek capital letter kappa, U+039A
            {"923", "Lambda", "{{$\\Lambda$}}"}, // greek capital letter lambda,
            //                                   U+039B ISOgrk3
            {"924", "Mu", "{{$\\Mu$}}"}, // greek capital letter mu, U+039C
            {"925", "Nu", "{{$\\Nu$}}"}, // greek capital letter nu, U+039D
            {"926", "Xi", "{{$\\Xi$}}"}, // greek capital letter xi, U+039E ISOgrk3
            {"927", "Omicron", "{{$\\Omicron$}}"}, // greek capital letter omicron, U+039F
            {"928", "Pi", "{{$\\Pi$}}"}, // greek capital letter pi, U+03A0 ISOgrk3
            {"929", "Rho", "{{$\\Rho$}}"}, // greek capital letter rho, U+03A1
            /* there is no Sigmaf, and no U+03A2 character either */
            {"931", "Sigma", "{{$\\Sigma$}}"}, // greek capital letter sigma,
            //                                   U+03A3 ISOgrk3
            {"932", "Tau", "{{$\\Tau$}}"}, // greek capital letter tau, U+03A4
            {"933", "Upsilon", "{{$\\Upsilon$}}"}, // greek capital letter upsilon,
            //                                   U+03A5 ISOgrk3
            {"934", "Phi", "{{$\\Phi$}}"}, // greek capital letter phi,
            //                                   U+03A6 ISOgrk3
            {"935", "Chi", "{{$\\Chi$}}"}, // greek capital letter chi, U+03A7
            {"936", "Psi", "{{$\\Psi$}}"}, // greek capital letter psi,
            //                                   U+03A8 ISOgrk3
            {"937", "Omega", "{{$\\Omega$}}"}, // greek capital letter omega,
            //                                   U+03A9 ISOgrk3

            {"945", "alpha", "$\\alpha$"}, // greek small letter alpha,
            //                                   U+03B1 ISOgrk3
            {"946", "beta", "$\\beta$"}, // greek small letter beta, U+03B2 ISOgrk3
            {"947", "gamma", "$\\gamma$"}, // greek small letter gamma,
            //                                   U+03B3 ISOgrk3
            {"948", "delta", "$\\delta$"}, // greek small letter delta,
            //                                   U+03B4 ISOgrk3
            {"949", "epsilon", "$\\epsilon$"}, // greek small letter epsilon,
            //                                   U+03B5 ISOgrk3
            {"950", "zeta", "$\\zeta$"}, // greek small letter zeta, U+03B6 ISOgrk3
            {"951", "eta", "$\\eta$"}, // greek small letter eta, U+03B7 ISOgrk3
            {"952", "theta", "$\\theta$"}, // greek small letter theta,
            //                                   U+03B8 ISOgrk3
            {"953", "iota", "$\\iota$"}, // greek small letter iota, U+03B9 ISOgrk3
            {"954", "kappa", "$\\kappa$"}, // greek small letter kappa,
            //                                   U+03BA ISOgrk3
            {"955", "lambda", "$\\lambda$"}, // greek small letter lambda,
            //                                   U+03BB ISOgrk3
            {"956", "mu", "$\\mu$"}, // greek small letter mu, U+03BC ISOgrk3
            {"957", "nu", "$\\nu$"}, // greek small letter nu, U+03BD ISOgrk3
            {"958", "xi", "$\\xi$"}, // greek small letter xi, U+03BE ISOgrk3
            {"959", "omicron", "$\\omicron$"}, // greek small letter omicron, U+03BF NEW
            {"960", "pi", "$\\phi$"}, // greek small letter pi, U+03C0 ISOgrk3
            {"961", "rho", "$\\rho$"}, // greek small letter rho, U+03C1 ISOgrk3
            {"962", "sigmaf", "$\\varsigma$"}, // greek small letter final sigma,
            //                                   U+03C2 ISOgrk3
            {"963", "sigma", "$\\sigma$"}, // greek small letter sigma,
            //                                   U+03C3 ISOgrk3
            {"964", "tau", "$\\tau$"}, // greek small letter tau, U+03C4 ISOgrk3
            {"965", "upsilon", "$\\upsilon$"}, // greek small letter upsilon,
            {"", "upsi", "$\\upsilon$"}, // alias
            //                                   U+03C5 ISOgrk3
            {"966", "phi", "$\\phi$"}, // greek small letter phi, U+03C6 ISOgrk3
            {"967", "chi", "$\\chi$"}, // greek small letter chi, U+03C7 ISOgrk3
            {"968", "psi", "$\\psi$"}, // greek small letter psi, U+03C8 ISOgrk3
            {"969", "omega", "$\\omega$"}, // greek small letter omega,
            //                                   U+03C9 ISOgrk3
            {"977", "thetasym", "$\\vartheta$"}, // greek small letter theta symbol,
            {"", "thetav", "$\\vartheta$"}, // greek small letter theta symbol,
            {"", "vartheta", "$\\vartheta$"}, // greek small letter theta symbol,
            //                                   U+03D1 NEW
            {"978", "upsih", "{{$\\Upsilon$}}"}, // greek upsilon with hook symbol,
            //                                   U+03D2 NEW
            {"982", "piv", "$\\varphi$"}, // greek pi symbol, U+03D6 ISOgrk3

            /* General Punctuation */
            {"8226", "bull", "$\\bullet$"}, // bullet = black small circle,
            //                                    U+2022 ISOpub
            /* bullet is NOT the same as bullet operator, U+2219 */
            {"8230", "hellip", "{\\ldots}"}, // horizontal ellipsis = three dot leader,
            //                                    U+2026 ISOpub
            {"8242", "prime", "$\\prime$"}, // prime = minutes = feet, U+2032 ISOtech
            {"8243", "Prime", "$\\prime\\prime$"}, // double prime = seconds = inches,
            //                                    U+2033 ISOtech
            {"8254", "oline", "{\\={}}"}, // overline = spacing overscore,
            //                                    U+203E NEW
            {"8260", "frasl", "/"}, // fraction slash, U+2044 NEW

            /* Letterlike Symbols */
            {"8472", "weierp", "$\\wp$"}, // script capital P = power set
            //                                    = Weierstrass p, U+2118 ISOamso
            {"8465", "image", "{{$\\Im$}}"}, // blackletter capital I = imaginary part,
            //                                    U+2111 ISOamso
            {"8476", "real", "{{$\\Re$}}"}, // blackletter capital R = real part symbol,
            //                                    U+211C ISOamso
            {"8482", "trade", "{\\texttrademark}"}, // trade mark sign, U+2122 ISOnum
            {"8501", "alefsym", "$\\aleph$"}, // alef symbol = first transfinite cardinal,
            //                                    U+2135 NEW
            /*    alef symbol is NOT the same as hebrew letter alef,
             U+05D0 although the same glyph could be used to depict both characters */
            /* Arrows */
            {"8592", "larr", "$\\leftarrow$"}, // leftwards arrow, U+2190 ISOnum
            {"8593", "uarr", "$\\uparrow$"}, // upwards arrow, U+2191 ISOnum
            {"8594", "rarr", "$\\rightarrow$"}, // rightwards arrow, U+2192 ISOnum
            {"8595", "darr", "$\\downarrow$"}, // downwards arrow, U+2193 ISOnum
            {"8596", "harr", "$\\leftrightarrow$"}, // left right arrow, U+2194 ISOamsa
            {"8629", "crarr", "$\\dlsh$"}, // downwards arrow with corner leftwards
            //                                    = carriage return, U+21B5 NEW - require mathabx
            {"8656", "lArr", "{{$\\Leftarrow$}}"}, // leftwards double arrow, U+21D0 ISOtech
            /*  ISO 10646 does not say that lArr is the same as the 'is implied by' arrow
             but also does not have any other character for that function. So ? lArr can
             be used for 'is implied by' as ISOtech suggests */
            {"8657", "uArr", "{{$\\Uparrow$}}"}, // upwards double arrow, U+21D1 ISOamsa
            {"8658", "rArr", "{{$\\Rightarrow$}}"}, // rightwards double arrow,
            //                                     U+21D2 ISOtech
            /*   ISO 10646 does not say this is the 'implies' character but does not have
             another character with this function so ?
             rArr can be used for 'implies' as ISOtech suggests */
            {"8659", "dArr", "{{$\\Downarrow$}}"}, // downwards double arrow, U+21D3 ISOamsa
            {"8660", "hArr", "{{$\\Leftrightarrow$}}"}, // left right double arrow,
            //                                     U+21D4 ISOamsa

            /* Mathematical Operators */
            {"8704", "forall", "$\\forall$"}, // for all, U+2200 ISOtech
            {"8706", "part", "$\\partial$"}, // partial differential, U+2202 ISOtech
            {"8707", "exist", "$\\exists$"}, // there exists, U+2203 ISOtech
            {"8709", "empty", "$\\emptyset$"}, // empty set = null set = diameter,
            //                                    U+2205 ISOamso
            {"8711", "nabla", "$\\nabla$"}, // nabla = backward difference,
            //                                    U+2207 ISOtech
            {"8712", "isin", "$\\in$"}, // element of, U+2208 ISOtech
            {"8713", "notin", "$\\notin$"}, // not an element of, U+2209 ISOtech
            {"8715", "ni", "$\\ni$"}, // contains as member, U+220B ISOtech
            /* should there be a more memorable name than 'ni'? */
            {"8719", "prod", "$\\prod$"}, // n-ary product = product sign,
            //                                    U+220F ISOamsb
            /*    prod is NOT the same character as U+03A0 'greek capital letter pi' though
             the same glyph might be used for both  */
            {"8721", "sum", "$\\sum$"}, // n-ary sumation, U+2211 ISOamsb
            /*    sum is NOT the same character as U+03A3 'greek capital letter sigma'
             though the same glyph might be used for both */
            {"8722", "minus", "$-$"}, // minus sign, U+2212 ISOtech
            {"8727", "lowast", "$\\ast$"}, // asterisk operator, U+2217 ISOtech
            {"8730", "radic", "$\\sqrt{}$"}, // square root = radical sign,
            //                                    U+221A ISOtech
            {"8733", "prop", "$\\propto$"}, // proportional to, U+221D ISOtech
            {"8734", "infin", "$\\infty$"}, // infinity, U+221E ISOtech
            {"8736", "ang", "$\\angle$"}, // angle, U+2220 ISOamso
            {"8743", "and", "$\\land$"}, // logical and = wedge, U+2227 ISOtech
            {"8744", "or", "$\\lor$"}, // logical or = vee, U+2228 ISOtech
            {"8745", "cap", "$\\cap$"}, // intersection = cap, U+2229 ISOtech
            {"8746", "cup", "$\\cup$"}, // union = cup, U+222A ISOtech
            {"8747", "int", "$\\int$"}, // integral, U+222B ISOtech
            {"8756", "there4", "$\\therefore$"}, // therefore, U+2234 ISOtech; AMSSymb
            {"8764", "sim", "$\\sim$"}, // tilde operator = varies with = similar to,
            //                                    U+223C ISOtech
            /*  tilde operator is NOT the same character as the tilde, U+007E,
             although the same glyph might be used to represent both   */
            {"8773", "cong", "$\\cong$"}, // approximately equal to, U+2245 ISOtech
            {"8776", "asymp", "$\\approx$"}, // almost equal to = asymptotic to,
            //                                    U+2248 ISOamsr
            {"8800", "ne", "$\\neq$"}, // not equal to, U+2260 ISOtech
            {"8801", "equiv", "$\\equiv$"}, // identical to, U+2261 ISOtech
            {"8804", "le", "$\\leq$"}, // less-than or equal to, U+2264 ISOtech
            {"8805", "ge", "$\\geq$"}, // greater-than or equal to,
            //                                    U+2265 ISOtech
            {"8834", "sub", "$\\subset$"}, // subset of, U+2282 ISOtech
            {"8835", "sup", "$\\supset$"}, // superset of, U+2283 ISOtech
            /*    note that nsup, 'not a superset of, U+2283' is not covered by the Symbol
             font encoding and is not included. Should it be, for symmetry?
             It is in ISOamsn   */
            {"8836", "nsub", "$\\not\\subset$"}, // not a subset of, U+2284 ISOamsn
            {"8838", "sube", "$\\subseteq$"}, // subset of or equal to, U+2286 ISOtech
            {"8839", "supe", "$\\supseteq$"}, // superset of or equal to,
            //                                    U+2287 ISOtech
            {"8853", "oplus", "$\\oplus$"}, // circled plus = direct sum,
            //                                    U+2295 ISOamsb
            {"8855", "otimes", "$\\otimes$"}, // circled times = vector product,
            //                                    U+2297 ISOamsb
            {"8869", "perp", "$\\perp$"}, // up tack = orthogonal to = perpendicular,
            //                                    U+22A5 ISOtech
            {"8901", "sdot", "$\\cdot$"}, // dot operator, U+22C5 ISOamsb
            /* dot operator is NOT the same character as U+00B7 middle dot */
            {"8968", "lceil", "$\\lceil$"}, // left ceiling = apl upstile,
            //                                    U+2308 ISOamsc
            {"8969", "rceil", "$\\rceil$"}, // right ceiling, U+2309 ISOamsc
            {"8970", "lfloor", "$\\lfloor$"}, // left floor = apl downstile,
            //                                    U+230A ISOamsc
            {"8971", "rfloor", "$\\rfloor$"}, // right floor, U+230B ISOamsc

            /* Miscellaneous Technical */
            {"9001", "lang", "$\\langle$"}, // left-pointing angle bracket = bra,
            //                                    U+2329 ISOtech
            /*    lang is NOT the same character as U+003C 'less than'
             or U+2039 'single left-pointing angle quotation mark' */
            {"9002", "rang", "$\\rangle$"}, // right-pointing angle bracket = ket,
            //                                    U+232A ISOtech
            /*    rang is NOT the same character as U+003E 'greater than'
             or U+203A 'single right-pointing angle quotation mark' */
            /* Geometric Shapes */
            {"9674", "loz", "$\\lozenge$"}, // lozenge, U+25CA ISOpub

            /* Miscellaneous Symbols */
            {"9824", "spades", "$\\spadesuit$"}, // black spade suit, U+2660 ISOpub
            /* black here seems to mean filled as opposed to hollow */
            {"9827", "clubs", "$\\clubsuit$"}, // black club suit = shamrock,
            //                                    U+2663 ISOpub
            {"9829", "hearts", "$\\heartsuit$"}, // black heart suit = valentine,
            //                                    U+2665 ISOpub
            {"9830", "diams", "$\\diamondsuit$"}, // black diamond suit, U+2666 ISOpub
            {"34", "quot", "\""}, // quotation mark = APL quote,
            //                                   U+0022 ISOnum
            {"38", "amp", "\\&"}, // ampersand, U+0026 ISOnum
            {"60", "lt", "$<$"}, // less-than sign, U+003C ISOnum
            {"62", "gt", "$>$"}, // greater-than sign, U+003E ISOnum


            /* General Punctuation */
            {"8194", "ensp", "\\hspace{0.5em}"}, // en space, U+2002 ISOpub
            {"8195", "emsp", "\\hspace{1em}"}, // em space, U+2003 ISOpub
            {"8201", "thinsp", "\\hspace{0.167em}"}, // thin space, U+2009 ISOpub
            {"8202", "", "\\hspace{0.1em}"}, // hair space, U+2010 ISOpub
            {"8204", "zwnj", "\\/{}"}, // zero width non-joiner,
            //                                   U+200C NEW RFC 2070
            {"8205", "zwj", ""}, // zero width joiner, U+200D NEW RFC 2070
            {"8206", "lrm", ""}, // left-to-right mark, U+200E NEW RFC 2070
            {"8207", "rlm", ""}, // right-to-left mark, U+200F NEW RFC 2070
            {"8211", "ndash", "--"}, // en dash, U+2013 ISOpub
            {"8212", "mdash", "---"}, // em dash, U+2014 ISOpub
            {"8216", "lsquo", "{\\textquoteleft}"}, // left single quotation mark,
            //                                   U+2018 ISOnum
            {"8217", "rsquo", "{\\textquoteright}"}, // right single quotation mark,
            //                                   U+2019 ISOnum
            {"8218", "sbquo", "{\\quotesinglbase}"}, // single low-9 quotation mark, U+201A NEW
            {"8220", "ldquo", "{\\textquotedblleft}"}, // left double quotation mark,
            //                                   U+201C ISOnum
            {"8221", "rdquo", "{\\textquotedblright}"}, // right double quotation mark,
            //                                   U+201D ISOnum
            {"8222", "bdquo", "{\\quotedblbase}"}, // double low-9 quotation mark, U+201E NEW
            {"8224", "dagger", "{\\dag}"}, // dagger, U+2020 ISOpub
            {"8225", "Dagger", "{\\ddag}"}, // double dagger, U+2021 ISOpub
            {"8240", "permil", "{\\textperthousand}"}, // per mille sign, U+2030 ISOtech
            {"8249", "lsaquo", "{\\guilsinglleft}"}, // single left-pointing angle quotation mark,
            //                                   U+2039 ISO proposed
            /* lsaquo is proposed but not yet ISO standardized */
            {"8250", "rsaquo", "{\\guilsinglright}"}, // single right-pointing angle quotation mark,
            //                                   U+203A ISO proposed
            /* rsaquo is proposed but not yet ISO standardized */
            {"8364", "euro", "{\\texteuro}"}, // euro sign, U+20AC NEW

            /* Manually added */
            {"35", "", "\\#"}, // Hash
            {"36", "dollar", "\\$"}, // Dollar
            {"37", "percnt", "\\%"}, // Percent
            {"39", "apos", "'"}, // Apostrophe
            {"40", "lpar", "("}, // Left bracket
            {"41", "rpar", ")"}, // Right bracket
            {"42", "", "*"}, // Asterisk
            {"43", "plus", "+"}, // Plus
            {"44", "comma", ","}, // Comma
            {"45", "hyphen", "-"}, // Hyphen
            {"46", "period", "."}, // Period
            {"47", "slash", "/"}, // Slash (solidus)
            {"58", "colon", ":"}, // Colon
            {"59", "semi", ";"}, // Semi colon
            {"61", "equals", "="}, // Equals to
            {"91", "lsqb", "["}, // Left square bracket
            {"92", "bsol", "{\\textbackslash}"}, // Backslash
            {"93", "rsqb", "]"}, // Right square bracket
            {"94", "Hat", "{\\^{}}"}, // Circumflex
            {"95", "lowbar", "\\_"}, // Underscore
            {"96", "grave", "{\\`{}}"}, // Grave
            {"123", "lbrace", "\\{"}, // Left curly bracket
            {"", "lcub", "\\{"}, // Left curly bracket
            {"124", "vert", "|"}, // Vertical bar
            {"", "verbar", "|"}, // Vertical bar
            {"", "VerticalLine", "|"}, // Vertical bar
            {"125", "rbrace", "\\}"}, // Right curly bracket
            {"", "rcub", "\\}"}, // Right curly bracket
            // {"138", "", "{{\\v{S}}}"}, // Line tabulation set
            // {"141", "", ""}, // Reverse line feed
            {"145", "", "`"}, // Apostrophe
            {"146", "", "'"}, // Apostrophe
            {"147", "", "``"}, // Quotation mark
            {"148", "", "''"}, // Quotation mark
            {"150", "", "--"}, // En dash
            // {"154", "", "{\\v{s}}"}, // Single character introducer
            {"256", "", "{{\\={A}}}"}, // capital A with macron
            {"257", "", "{\\={a}}"}, // small a with macron
            {"258", "", "{{\\u{A}}}"}, // capital A with breve
            {"259", "", "{\\u{a}}"}, // small a with breve
            {"260", "Aogon", "{{\\k{A}}}"}, // capital A with ogonek
            {"261", "aogon", "{\\k{a}}"}, // small a with ogonek
            {"262", "Cacute", "{{\\'{C}}}"}, // capital C with acute
            {"263", "cacute", "{\\'{c}}"}, // small C with acute
            {"264", "Ccirc", "{{\\^{C}}}"}, // capital C with circumflex
            {"265", "ccirc", "{\\^{c}}"}, // small C with circumflex
            {"266", "Cdot", "{{\\.{C}}}"}, // capital C with dot above
            {"267", "cdot", "{\\.{c}}"}, // small C with dot above
            {"268", "Ccaron", "{{\\v{C}}}"}, // capital C with caron
            {"269", "ccaron", "{\\v{c}}"}, // small C with caron
            {"270", "", "{{\\v{D}}}"}, // capital D with caron
            {"271", "", "{\\v{d}}"}, // small d with caron
            {"272", "Dstrok", "{{\\DJ}}"}, // capital D with stroke
            {"273", "dstrok", "{{\\dj}}"}, // small d with stroke
            {"274", "", "{{\\={E}}}"}, // capital E with macron
            {"275", "", "{\\={e}}"}, // small e with macron
            {"276", "", "{{\\u{E}}}"}, // capital E with breve
            {"277", "", "{\\u{e}}"}, // small e with breve
            {"278", "", "{{\\.{E}}}"}, // capital E with dot above
            {"279", "", "{\\.{e}}"}, // small e with dot above
            {"280", "Eogon", "{{\\k{E}}}"}, // capital E with ogonek
            {"281", "eogon", "{\\k{e}}"}, // small e with ogonek
            {"282", "", "{{\\v{E}}}"}, // capital E with caron
            {"283", "", "{\\v{e}}"}, // small e with caron
            {"284", "", "{{\\^{G}}}"}, // capital G with circumflex
            {"285", "", "{\\^{g}}"}, // small g with circumflex
            {"286", "", "{{\\u{G}}}"}, // capital G with breve
            {"287", "", "{\\u{g}}"}, // small g with breve
            {"288", "", "{{\\.{G}}}"}, // capital G with dot above
            {"289", "", "{\\.{g}}"}, // small g with dot above
            {"290", "", "{{\\c{G}}}"}, // capital G with cedilla
            {"291", "", "{\\c{g}}"}, // small g with cedilla
            {"292", "", "{{\\^{H}}}"}, // capital H with circumflex
            {"293", "", "{\\^{h}}"}, // small h with circumflex
            {"294", "", "{{\\B{H}}}"}, // capital H with stroke
            {"295", "", "{\\B{h}}"}, // small h with stroke
            {"296", "", "{{\\~{I}}}"}, // capital I with tilde
            {"297", "", "{\\~{\\i}}"}, // small i with tilde
            {"298", "Imacr", "{{\\={I}}}"}, // capital I with macron
            {"299", "imacr", "{\\={\\i}}"}, // small i with macron
            {"300", "", "{{\\u{I}}}"}, // capital I with breve
            {"301", "", "{\\u{\\i}}"}, // small i with breve
            {"302", "Iogon", "{{\\k{I}}}"}, // capital I with ogonek
            {"303", "iogon", "{\\k{i}}"}, // small i with ogonek
            {"304", "Idot", "{{\\.{I}}}"}, // capital I with dot above
            {"305", "inodot", "{\\i}"}, // Small i without the dot
            {"", "imath", "{\\i}"}, // Small i without the dot
            {"306", "", "{{\\IJ}}"}, // Dutch di-graph IJ
            {"307", "", "{{\\ij}}"}, // Dutch di-graph ij
            {"308", "", "{{\\^{J}}}"}, // capital J with circumflex
            {"309", "", "{\\^{\\j}}"}, // small j with circumflex
            {"310", "", "{{\\c{K}}}"}, // capital K with cedilla
            {"311", "", "{\\c{k}}"}, // small k with cedilla
            {"312", "", "{\\textkra}"}, // Letter kra
            {"313", "", "{{\\'{L}}}"}, // capital L with acute
            {"314", "", "{\\'{l}}"}, // small l with acute
            {"315", "", "{{\\c{L}}}"}, // capital L with cedilla
            {"316", "", "{\\c{l}}"}, // small l with cedilla
            {"317", "", "{{\\v{L}}}"}, // capital L with caron
            {"318", "", "{\\v{l}}"}, // small l with caron
            //{"319", "Lmidot", "{\\Lmidot}"}, // upper case L with mid dot
            //{"320", "lmidot", "{\\lmidot}"}, // lower case l with mid dot
            {"321", "Lstrok", "{{\\L}}"}, // upper case L with stroke
            {"322", "lstrok", "{{\\l}}"}, // lower case l with stroke
            {"323", "Nacute", "{{\\'{N}}}"}, // upper case N with acute
            {"324", "nacute", "{{\\'{n}}}"}, // lower case n with acute
            {"325", "", "{{\\c{N}}}"}, // capital N with cedilla
            {"326", "", "{\\c{n}}"}, // small n with cedilla
            {"327", "", "{{\\v{N}}}"}, // capital N with caron
            {"328", "", "{\\v{n}}"}, // small n with caron
            {"329", "", "{'n}"}, // small n preceded with apostroph
            {"330", "", "{{\\NG}}"}, // upper case letter Eng
            {"331", "", "{{\\ng}}"}, // lower case letter Eng
            {"332", "Omacro", "{{\\={O}}}"}, // the capital letter O with macron
            {"333", "omacro", "{\\={o}}"}, // the small letter o with macron
            {"334", "", "{{\\u{O}}}"}, // the capital letter O with breve
            {"335", "", "{\\u{o}}"}, // the small letter o with breve
            {"336", "", "{{\\H{O}}}"}, // the capital letter O with double acute
            {"337", "", "{\\H{o}}"}, // the small letter o with double acute
            {"338", "OElig", "{{\\OE}}"}, // OE-ligature
            {"339", "oelig", "{{\\oe}}"}, // oe-ligature
            {"340", "", "{{\\'{R}}}"}, // upper case R with acute
            {"341", "", "{{\\'{r}}}"}, // lower case r with acute
            {"342", "", "{{\\c{R}}}"}, // upper case R with cedilla
            {"343", "", "{{\\c{r}}}"}, // lower case r with cedilla
            {"344", "", "{{\\v{R}}}"}, // upper case R with caron
            {"345", "", "{{\\v{r}}}"}, // lower case r with caron
            {"346", "", "{{\\'{S}}}"}, // upper case S with acute
            {"347", "", "{{\\'{s}}}"}, // lower case s with acute
            {"348", "Scirc", "{{\\^{S}}}"}, // upper case S with circumflex
            {"349", "scirc", "{\\^{s}}"}, // lower case s with circumflex
            {"350", "Scedil", "{{\\c{S}}}"}, // upper case S with cedilla
            {"351", "scedil", "{\\c{s}}"}, // lower case s with cedilla
            {"352", "Scaron", "{{\\v{S}}}"}, // latin capital letter S with caron,
            {"353", "scaron", "{\\v{s}}"}, // latin small letter s with caron,
            {"354", "", "{{\\c{T}}}"}, // upper case T with cedilla
            {"355", "", "{{\\c{T}}}"}, // lower case t with cedilla
            {"356", "", "{{\\v{T}}}"}, // latin capital letter T with caron,
            {"357", "", "{\\v{t}}"}, // latin small letter t with caron,
            {"358", "", "{{\\B{T}}}"}, // latin capital letter T with stroke,
            {"359", "", "{\\B{t}}"}, // latin small letter t with stroke,
            {"360", "", "{{\\~{U}}}"}, // capital U with tilde
            {"361", "", "{\\~{u}}"}, // small u with tilde
            {"362", "", "{{\\={U}}}"}, // capital U with macron
            {"363", "", "{\\={u}}"}, // small u with macron
            {"364", "", "{{\\u{U}}}"}, // capital U with breve
            {"365", "", "{\\u{u}}"}, // small u with breve
            {"366", "", "{{\\r{U}}}"}, // capital U with ring
            {"367", "", "{\\r{u}}"}, // small u with ring
            {"368", "", "{{\\={U}}}"}, // capital U with double acute
            {"369", "", "{\\={u}}"}, // small u with double acute
            {"370", "Uogon", "{{\\k{U}}}"}, // capital U with ogonek
            {"371", "uogon", "{\\k{u}}"}, // small u with ogonek
            {"372", "", "{{\\^{W}}}"}, // capital W with circumflex
            {"373", "", "{\\^{w}}"}, // small w with circumflex
            {"374", "", "{{\\^{Y}}}"}, // capital Y with circumflex
            {"375", "", "{\\^{y}}"}, // small y with circumflex
            {"376", "Yuml", "{{\\\"{Y}}}"}, // latin capital letter Y with diaeresis,
            {"377", "", "{{\\'{Z}}}"}, // capital Z with acute
            {"378", "", "{\\'{z}}"}, // small z with acute
            {"379", "", "{{\\.{Z}}}"}, // capital Z with dot above
            {"380", "", "{\\.{z}}"}, // small z with dot above
            {"381", "Zcaron", "{{\\v{Z}}}"}, // capital Z with caron
            {"382", "zcaron", "{\\v{z}}"}, // small z with caron
            // {"383", "", ""}, // long s
            {"384", "", "{\\B{b}}"}, // small b with stroke

            {"402", "fnof", "\\textit{f}"}, // latin small f with hook = function

            {"405", "", "{{\\hv}}"}, // small letter Hv

            {"416", "", "{{\\OHORN}}"}, // capital O with horn
            {"417", "", "{{\\ohorn}}"}, // small o with horn

            {"431", "", "{{\\UHORN}}"}, // capital U with horn
            {"432", "", "{{\\uhorn}}"}, // small u with horn

            {"490", "Oogon", "{{\\k{O}}}"}, // capital letter O with ogonek
            {"491", "oogon", "{\\k{o}}"}, // small letter o with ogonek
            {"492", "", "{{\\k{\\={O}}}}"}, // capital letter O with ogonek and macron
            {"493", "", "{\\k{\\={o}}}"}, // small letter o with ogonek and macron

            {"536", "", "{{\\cb{S}}}"}, // capital letter S with comma below, require combelow
            {"537", "", "{\\cb{s}}"}, // small letter S with comma below, require combelow
            {"538", "", "{{\\cb{T}}}"}, // capital letter T with comma below, require combelow
            {"539", "", "{\\cb{t}}"}, // small letter T with comma below, require combelow
            {"710", "circ", "{\\^{}}"}, // modifier letter circumflex accent,
            {"726", "", "+"}, // Modifier plus sign
            {"727", "", "-"}, // Modifier minus sign
            {"728", "breve", "{\\u{}}"}, // Breve
            {"", "Breve", "{\\u{}}"}, // Breve
            {"729", "dot", "{\\.{}}"}, // Dot above
            {"730", "ring", "{\\r{}}"}, // Ring above
            {"731", "ogon", "{\\k{}}"}, // Ogonek
            {"732", "tilde", "\\~{}"}, // Small tilde
            {"733", "dblac", "{{\\H{}}}"}, // Double acute
            {"949", "epsi", "$\\epsilon$"}, // Epsilon - double check
            {"1013", "epsiv", "$\\varepsilonup$"}, // lunate epsilon, requires txfonts
            //{"1055", "", "{{\\cyrchar\\CYRP}}"}, // Cyrillic capital Pe
            //{"1082", "", "{\\cyrchar\\cyrk}"}, // Cyrillic small Ka
            // {"2013", "", ""},    // NKO letter FA -- Maybe en dash = 0x2013?
            // {"2014", "", ""},    // NKO letter FA -- Maybe em dash = 0x2014?
            {"8192", "", "\\hspace{0.5em}"}, // en quad
            {"8193", "", "\\hspace{1em}"}, // em quad
            {"8196", "", "\\hspace{0.333em}"}, // Three-Per-Em Space
            {"8197", "", "\\hspace{0.25em}"}, // Four-Per-Em Space
            {"8198", "", "\\hspace{0.167em}"}, // Six-Per-Em Space
            {"8208", "hyphen", "-"}, // Hyphen
            {"8229", "nldr", "\\.{}\\.{}"}, // Double dots - en leader
            {"8241", "", "{\\textpertenthousand}"}, // per ten thousands sign
            {"8244", "", "{$\\prime\\prime\\prime$}"}, // triple prime
            {"8251", "", "{\\textreferencemark}"}, {"8253", "", "{\\textinterrobang}"},
            {"8320", "", "$_{0}$"}, // sub-script 0
            {"8321", "", "$_{1}$"}, // sub-script 1
            {"8322", "", "$_{2}$"}, // sub-script 2
            {"8323", "", "$_{3}$"}, // sub-script 3
            {"8324", "", "$_{4}$"}, // sub-script 4
            {"8325", "", "$_{5}$"}, // sub-script 5
            {"8326", "", "$_{6}$"}, // sub-script 6
            {"8327", "", "$_{7}$"}, // sub-script 7
            {"8328", "", "$_{8}$"}, // sub-script 8
            {"8329", "", "$_{9}$"}, // sub-script 9
            {"8330", "", "$_{+}$"}, // sub-script +
            {"8331", "", "$_{-}$"}, // sub-script -
            {"8332", "", "$_{-}$"}, // sub-script =
            {"8333", "", "$_{(}$"}, // sub-script (
            {"8334", "", "$_{)}$"}, // sub-script )
            {"8450", "complexes", "$\\mathbb{C}$"}, // double struck capital C -- requires e.g. amsfonts
            {"8451", "", "{\\textcelsius}"}, // Degree Celsius
            {"8459", "Hscr", "{{$\\mathcal{H}$}}"}, // script capital H -- possibly use \mathscr
            {"8460", "Hfr", "{{$\\mathbb{H}$}}"}, // black letter capital H -- requires e.g. amsfonts
            {"8466", "Lscr", "{{$\\mathcal{L}$}}"}, // script capital L -- possibly use \mathscr
            {"8467", "ell", "{$\\ell$}"}, // script small l
            {"8469", "naturals", "{{$\\mathbb{N}$}}"}, // double struck capital N -- requires e.g. amsfonts
            {"8474", "Qopf", "{{$\\mathbb{Q}$}}"}, // double struck capital Q -- requires e.g. amsfonts
            {"8477", "reals", "{{$\\mathbb{R}$}}"}, // double struck capital R -- requires e.g. amsfonts
            {"8486", "", "${{\\Omega}}$"}, // Omega
            {"8491", "angst", "{{\\AA}}"}, // Angstrom
            {"8496", "Escr", "{{$\\mathcal{E}$}}"}, // script capital E
            {"8531", "frac13", "$\\sfrac{1}{3}$"}, // Vulgar fraction one third
            {"8532", "frac23", "$\\sfrac{2}{3}$"}, // Vulgar fraction two thirds
            {"8533", "frac15", "$\\sfrac{1}{5}$"}, // Vulgar fraction one fifth
            {"8534", "frac25", "$\\sfrac{2}{5}$"}, // Vulgar fraction two fifths
            {"8535", "frac35", "$\\sfrac{3}{5}$"}, // Vulgar fraction three fifths
            {"8536", "frac45", "$\\sfrac{4}{5}$"}, // Vulgar fraction four fifths
            {"8537", "frac16", "$\\sfrac{1}{6}$"}, // Vulgar fraction one sixth
            {"8538", "frac56", "$\\sfrac{5}{6}$"}, // Vulgar fraction five sixths
            {"8539", "frac18", "$\\sfrac{1}{8}$"}, // Vulgar fraction one eighth
            {"8540", "frac38", "$\\sfrac{3}{8}$"}, // Vulgar fraction three eighths
            {"8541", "frac58", "$\\sfrac{5}{8}$"}, // Vulgar fraction five eighths
            {"8542", "frac78", "$\\sfrac{7}{8}$"}, // Vulgar fraction seven eighths
            {"8710", "", "$\\triangle$"}, // Increment - could use a more appropriate symbol
            {"8714", "", "$\\in$"}, // Small element in
            {"8723", "mp", "$\\mp$"}, // Minus-plus
            {"8729", "bullet", "$\\bullet$"}, // Bullet operator
            {"8741", "", "$\\parallel$"}, // Parallel to
            {"8758", "ratio", ":"}, // Colon/ratio
            {"8771", "sime", "$\\simeq$"}, // almost equal to = asymptotic to,
            {"8776", "ap", "$\\approx$"}, // almost equal to = asymptotic to,
            {"8810", "ll", "$\\ll$"}, // Much less than
            {"", "Lt", "$\\ll$"}, // Much less than
            {"8811", "gg", "$\\gg$"}, // Much greater than
            {"", "Gt", "$\\gg$"}, // Much greater than
            {"8818", "lsim", "$\\lesssim$"}, // Less than or equivalent to
            {"8819", "gsim", "$\\gtrsim$"}, // Greater than or equivalent to
            {"8862", "boxplus", "$\\boxplus$"}, // Boxed plus -- requires amssymb
            {"8863", "boxminus", "$\\boxminus$"}, // Boxed minus -- requires amssymb
            {"8864", "boxtimes", "$\\boxtimes$"}, // Boxed times -- requires amssymb
            {"8882", "vltri", "$\\triangleleft$"}, // Left triangle
            {"8883", "vrtri", "$\\triangleright$"}, // Right triangle
            {"8896", "xwedge", "$\\bigwedge$"}, // Big wedge
            {"8897", "xvee", "$\\bigvee$"}, // Big vee
            {"8942", "vdots", "$\\vdots$"}, // vertical ellipsis U+22EE
            {"8943", "cdots", "$\\cdots$"}, // midline horizontal ellipsis U+22EF
            /*{"8944", "", "$\\ddots$"}, // up right diagonal ellipsis U+22F0 */
            {"8945", "ddots", "$\\ddots$"}, // down right diagonal ellipsis U+22F1

            {"9426", "circledc", "{\\copyright}"}, // circled small letter C
            {"9633", "square", "$\\square$"}, // White square
            {"9651", "xutri", "$\\bigtriangleup$"}, // White up-pointing big triangle
            {"9653", "utri", "$\\triangle$"}, // White up-pointing small triangle -- \vartriangle probably
            // better but requires amssymb
            {"10877", "les", "$\\leqslant$"}, // Less than slanted equal -- requires amssymb
            {"10878", "ges", "$\\geqslant$"}, // Less than slanted equal -- requires amssymb
            {"64256", "", "ff"}, // ff ligature (which LaTeX solves by itself)
            {"64257", "", "fi"}, // fi ligature (which LaTeX solves by itself)
            {"64258", "", "fl"}, // fl ligature (which LaTeX solves by itself)
            {"64259", "", "ffi"}, // ffi ligature (which LaTeX solves by itself)
            {"64260", "", "ffl"}, // ffl ligature (which LaTeX solves by itself)
            {"119978", "Oscr", "$\\mathcal{O}$"}, // script capital O -- possibly use \mathscr
            {"119984", "Uscr", "$\\mathcal{U}$"} // script capital U -- possibly use \mathscr

    };

    // List of combining accents
    private static final String[][] ACCENT_LIST = new String[][] {{"768", "`"}, // Grave
            {"769", "'"}, // Acute
            {"770", "^"}, // Circumflex
            {"771", "~"}, // Tilde
            {"772", "="}, // Macron
            {"773", "="}, // Overline - not completely correct
            {"774", "u"}, // Breve
            {"775", "."}, // Dot above
            {"776", "\""}, // Diaeresis
            {"777", "h"}, // Hook above
            {"778", "r"}, // Ring
            {"779", "H"}, // Double acute
            {"780", "v"}, // Caron
            {"781", "|"}, // Vertical line above
            {"782", "U"}, // Double vertical line above
            {"783", "G"}, // Double grave
            {"784", "textdotbreve"}, // Candrabindu
            {"785", "t"}, // Inverted breve
            //        {"786", ""},    // Turned comma above
            //        {"787", ""},    // Comma above
            {"788", "textrevcommaabove"}, // Reversed comma above
            {"789", "textcommaabover"}, // Comma above right
            {"790", "textsubgrave"}, // Grave accent below -requires tipa
            {"791", "textsubacute"}, // Acute accent below - requires tipa
            {"792", "textadvancing"}, // Left tack below - requires tipa
            {"793", "textretracting"}, // Right tack below - requires tipa
            {"794", "textlangleabove"}, // Left angle above
            {"795", "textrighthorn"}, // Horn
            {"796", "textsublhalfring"}, // Left half ring below - requires tipa
            {"797", "textraising"}, // Up tack below - requires tipa
            {"798", "textlowering"}, // Down tack below - requires tipa
            {"799", "textsubplus"}, // Plus sign below - requires tipa
            {"800", "textsubbar"}, // Minus sign below
            {"801", "textpalhookbelow"}, // Palatalized hook below
            {"802", "M"}, // Retroflex hook below - textrethookbelow?
            {"803", "d"}, // Dot below
            {"804", "textsubumlaut"}, // Diaeresis below - requires tipa
            {"805", "textsubring"}, // Ring below - requires tipa
            {"806", "cb"}, // Comma below - requires combelow
            {"807", "c"}, // Cedilla
            {"808", "k"}, // Ogonek
            {"809", "textsyllabic"}, // Vertical line below - requires tipa
            {"810", "textsubbridge"}, // Bridge below - requires tipa
            {"811", "textsubw"}, // Inverted double arch below - requires tipa
            {"812", "textsubwedge"}, // Caron below
            {"813", "textsubcircum"}, // Circumflex accent below - requires tipa
            {"814", "textsubbreve"}, // Breve below
            {"815", "textsubarch"}, // Inverted breve below - requires tipa
            {"816", "textsubtilde"}, // Tilde below - requires tipa
            {"817", "b"}, // Macron below - not completely correct
            {"818", "b"}, // Underline
            {"819", "subdoublebar"}, // Double low line -- requires extraipa
            {"820", "textsuperimposetilde"}, // Tilde overlay - requires tipa
            {"821", "B"}, // Short stroke overlay - textsstrokethru?
            {"822", "textlstrokethru"}, // Long stroke overlay
            {"823", "textsstrikethru"}, // Short solidus overlay
            {"824", "textlstrikethru"}, // Long solidus overlay
            {"825", "textsubrhalfring"}, // Right half ring below - requires tipa
            {"826", "textinvsubbridge"}, // inverted bridge below - requires tipa
            {"827", "textsubsquare"}, // Square below - requires tipa
            {"828", "textseagull"}, // Seagull below - requires tipa
            {"829", "textovercross"}, // X above - requires tipa
            //        {"830", ""},    // Vertical tilde
            //        {"831", ""},    // Double overline
            //        {"832", ""},    // Grave tone mark
            //        {"833", ""},    // Acute tone mark
            //        {"834", ""},    // Greek perispomeni
            //        {"835", ""},    // Greek koronis
            //        {"836", ""},    // Greek dialytika tonos
            //        {"837", ""},    // Greek ypogegrammeni
            {"838", "overbridge"}, // Bridge above - requires extraipa
            {"839", "subdoublebar"}, // Equals sign below - requires extraipa
            {"840", "subdoublevert"}, // Double vertical line below - requires extraipa
            {"841", "subcorner"}, // Left angle below - requires extraipa
            {"842", "crtilde"}, // Not tilde above - requires extraipa
            {"843", "dottedtilde"}, // Homothetic above - requires extraipa
            {"844", "doubletilde"}, // Almost equal to above - requires extraipa
            {"845", "spreadlips"}, // Left right arrow below - requires extraipa
            {"846", "whistle"}, // Upwards arrow below - requires extraipa
            {"861", "textdoublebreve"}, // Double breve
            {"862", "textdoublemacron"}, // Double macron
            {"863", "textdoublemacronbelow"}, // Double macron below
            {"864", "textdoubletilde"}, // Double tilde
            {"865", "texttoptiebar"}, // Double inverted breve
            {"866", "sliding"}, // Double rightwards arrow below - requires extraipa
    };

    static {
        for (String[] aConversionList : CONVERSION_LIST) {
            if (!(aConversionList[2].isEmpty())) {
                String strippedLaTeX = cleanLaTeX(aConversionList[2]);
                if (!(aConversionList[1].isEmpty())) {
                    HTML_LATEX_CONVERSION_MAP.put("&" + aConversionList[1] + ";", aConversionList[2]);
                    if (!strippedLaTeX.isEmpty()) {
                        LATEX_HTML_CONVERSION_MAP.put(strippedLaTeX, "&" + aConversionList[1] + ";");
                    }
                } else if (!(aConversionList[0].isEmpty()) && !strippedLaTeX.isEmpty()) {
                    LATEX_HTML_CONVERSION_MAP.put(strippedLaTeX, "&#" + aConversionList[0] + ";");
                }
                if (!(aConversionList[0].isEmpty())) {
                    NUMERICAL_LATEX_CONVERSION_MAP.put(Integer.decode(aConversionList[0]), aConversionList[2]);
                    if (Integer.decode(aConversionList[0]) > 128) {
                        String unicodeSymbol = String.valueOf(Character.toChars(Integer.decode(aConversionList[0])));
                        UNICODE_LATEX_CONVERSION_MAP.put(unicodeSymbol, aConversionList[2]);
                        if (!strippedLaTeX.isEmpty()) {
                            LATEX_UNICODE_CONVERSION_MAP.put(strippedLaTeX, unicodeSymbol);
                        }
                    }
                }
            }
        }
        for (String[] anAccentList : ACCENT_LIST) {
            ESCAPED_ACCENTS.put(Integer.decode(anAccentList[0]), anAccentList[1]);
            UNICODE_ESCAPED_ACCENTS.put(anAccentList[1],
                    String.valueOf(Character.toChars(Integer.decode(anAccentList[0]))));
        }
        // Manually added values which are killed by cleanLaTeX
        LATEX_HTML_CONVERSION_MAP.put("$", "$");
        LATEX_UNICODE_CONVERSION_MAP.put("$", "$");

        // Manual corrections
        LATEX_HTML_CONVERSION_MAP.put("AA", "Å"); // Overwritten by Å which is less supported
        LATEX_UNICODE_CONVERSION_MAP.put("AA", "Å"); // Overwritten by Ångstrom symbol
        LATEX_UNICODE_CONVERSION_MAP.put("'n", "ń");

        // Manual additions
        // Support relax to the extent that it is simply removed
        LATEX_HTML_CONVERSION_MAP.put("relax", "");
        LATEX_UNICODE_CONVERSION_MAP.put("relax", "");

    }

    private HTMLUnicodeConversionMaps() {
    }

    private static String cleanLaTeX(String escapedString) {
        // Get rid of \{}$ from the LaTeX-string
        return escapedString.replaceAll("[\\\\\\{\\}\\$]", "");
    }

}