package org.jabref.logic.util.strings; import java.util.HashMap; import java.util.Map; public class HTMLUnicodeConversionMaps { // most of the LaTeX commands can be read at http://en.wikibooks.org/wiki/LaTeX/Accents // The symbols can be seen at http://www.fileformat.info/info/unicode/char/a4/index.htm. Replace "a4" with the U+ number // http://detexify.kirelabs.org/classify.html and http://www.ctan.org/tex-archive/info/symbols/comprehensive/ might help to find the right LaTeX command // http://llg.cubic.org/docs/ent2latex.html and http://www.w3.org/TR/xml-entity-names/byalpha.html are also useful public static final Map<String, String> HTML_LATEX_CONVERSION_MAP = new HashMap<>(); public static final Map<Integer, String> ESCAPED_ACCENTS = new HashMap<>(); public static final Map<String, String> UNICODE_ESCAPED_ACCENTS = new HashMap<>(); public static final Map<Integer, String> NUMERICAL_LATEX_CONVERSION_MAP = new HashMap<>(); public static final Map<String, String> UNICODE_LATEX_CONVERSION_MAP = new HashMap<>(); public static final Map<String, String> LATEX_HTML_CONVERSION_MAP = new HashMap<>(); public static final Map<String, String> LATEX_UNICODE_CONVERSION_MAP = new HashMap<>(); /* Portions © International Organization for Standardization 1986: Permission to copy in any form is granted for use with conforming SGML systems and applications as defined in ISO 8879, provided this notice is included in all copies. */ // as well as http://www.w3.org/Math/characters/unicode.xml // An array of arrays of strings in the format: // {"decimal number of HTML entity", "text HTML entity", "corresponding LaTeX command"} // Leaving a field empty is OK as it then will not be included private static final String[][] CONVERSION_LIST = new String[][] {{"160", "nbsp", "{~}"}, // no-break space = non-breaking space, // U+00A0 ISOnum {"161", "iexcl", "{\\textexclamdown}"}, // inverted exclamation mark, U+00A1 ISOnum {"162", "cent", "{\\textcent}"}, // cent sign, U+00A2 ISOnum {"163", "pound", "{\\pounds}"}, // pound sign, U+00A3 ISOnum {"164", "curren", "{\\textcurrency}"}, // currency sign, U+00A4 ISOnum {"165", "yen", "{\\textyen}"}, // yen sign = yuan sign, U+00A5 ISOnum {"166", "brvbar", "{\\textbrokenbar}"}, // broken bar = broken vertical bar, // U+00A6 ISOnum {"167", "sect", "{{\\S}}"}, // section sign, U+00A7 ISOnum {"168", "uml", "{\\\"{}}"}, // diaeresis = spacing diaeresis, // U+00A8 ISOdia {"169", "copy", "{\\copyright}"}, // copyright sign, U+00A9 ISOnum {"170", "ordf", "{\\textordfeminine}"}, // feminine ordinal indicator, U+00AA ISOnum {"171", "laquo", "{\\guillemotleft}"}, // left-pointing double angle quotation mark // = left pointing guillemet, U+00AB ISOnum {"172", "not", "$\\neg$"}, // not sign, U+00AC ISOnum {"173", "shy", "\\-"}, // soft hyphen = discretionary hyphen, // U+00AD ISOnum {"174", "reg", "{\\textregistered}"}, // registered sign = registered trade mark sign, // U+00AE ISOnum {"175", "macr", "{\\={}}"}, // macron = spacing macron = overline // = APL overbar, U+00AF ISOdia {"176", "deg", "{$^{\\circ}$}"}, // degree sign, U+00B0 ISOnum {"177", "plusmn", "$\\pm$"}, // plus-minus sign = plus-or-minus sign, // U+00B1 ISOnum {"178", "sup2", "\\textsuperscript{2}"}, // superscript two = superscript digit two // = squared, U+00B2 ISOnum {"179", "sup3", "\\textsuperscript{3}"}, // superscript three = superscript digit three // = cubed, U+00B3 ISOnum {"180", "acute", "{\\'{}}"}, // acute accent = spacing acute, // U+00B4 ISOdia {"181", "micro", "$\\mu$"}, // micro sign, U+00B5 ISOnum {"", "mu", "$\\mu$"}, // micro sign, U+00B5 ISOnum {"182", "para", "{{\\P}}"}, // pilcrow sign = paragraph sign, // U+00B6 ISOnum {"183", "middot", "$\\cdot$"}, // middle dot = Georgian comma // = Greek middle dot, U+00B7 ISOnum {"184", "cedil", "{\\c{}}"}, // cedilla = spacing cedilla, U+00B8 ISOdia {"185", "sup1", "\\textsuperscript{1}"}, // superscript one = superscript digit one, // U+00B9 ISOnum {"186", "ordm", "{\\textordmasculine}"}, // masculine ordinal indicator, // U+00BA ISOnum {"187", "raquo", "{\\guillemotright}"}, // right-pointing double angle quotation mark // = right pointing guillemet, U+00BB ISOnum {"188", "frac14", "$\\sfrac{1}{4}$"}, // vulgar fraction one quarter // = fraction one quarter, U+00BC ISOnum {"189", "frac12", "$\\sfrac{1}{2}$"}, // vulgar fraction one half // = fraction one half, U+00BD ISOnum {"190", "frac34", "$\\sfrac{3}{4}$"}, // vulgar fraction three quarters // = fraction three quarters, U+00BE ISOnum {"191", "iquest", "{\\textquestiondown}"}, // inverted question mark // = turned question mark, U+00BF ISOnum {"192", "Agrave", "{{\\`{A}}}"}, // latin capital letter A with grave // = latin capital letter A grave, // U+00C0 ISOlat1 {"193", "Aacute", "{{\\'{A}}}"}, // latin capital letter A with acute, // U+00C1 ISOlat1 {"194", "Acirc", "{{\\^{A}}}"}, // latin capital letter A with circumflex, // U+00C2 ISOlat1 {"195", "Atilde", "{{\\~{A}}}"}, // latin capital letter A with tilde, // U+00C3 ISOlat1 {"196", "Auml", "{{\\\"{A}}}"}, // latin capital letter A with diaeresis, // U+00C4 ISOlat1 {"197", "Aring", "{{\\AA}}"}, // latin capital letter A with ring above // = latin capital letter A ring, // U+00C5 ISOlat1 {"198", "AElig", "{{\\AE}}"}, // latin capital letter AE // = latin capital ligature AE, // U+00C6 ISOlat1 {"199", "Ccedil", "{{\\c{C}}}"}, // latin capital letter C with cedilla, // U+00C7 ISOlat1 {"200", "Egrave", "{{\\`{E}}}"}, // latin capital letter E with grave, // U+00C8 ISOlat1 {"201", "Eacute", "{{\\'{E}}}"}, // latin capital letter E with acute, // U+00C9 ISOlat1 {"202", "Ecirc", "{{\\^{E}}}"}, // latin capital letter E with circumflex, // U+00CA ISOlat1 {"203", "Euml", "{{\\\"{E}}}"}, // latin capital letter E with diaeresis, // U+00CB ISOlat1 {"204", "Igrave", "{{\\`{I}}}"}, // latin capital letter I with grave, // U+00CC ISOlat1 {"205", "Iacute", "{{\\'{I}}}"}, // latin capital letter I with acute, // U+00CD ISOlat1 {"206", "Icirc", "{{\\^{I}}}"}, // latin capital letter I with circumflex, // U+00CE ISOlat1 {"207", "Iuml", "{{\\\"{I}}}"}, // latin capital letter I with diaeresis, // U+00CF ISOlat1 {"208", "ETH", "{{\\DH}}"}, // latin capital letter ETH, U+00D0 ISOlat1 {"209", "Ntilde", "{{\\~{N}}}"}, // latin capital letter N with tilde, // U+00D1 ISOlat1 {"210", "Ograve", "{{\\`{O}}}"}, // latin capital letter O with grave, // U+00D2 ISOlat1 {"211", "Oacute", "{{\\'{O}}}"}, // latin capital letter O with acute, // U+00D3 ISOlat1 {"212", "Ocirc", "{{\\^{O}}}"}, // latin capital letter O with circumflex, // U+00D4 ISOlat1 {"213", "Otilde", "{{\\~{O}}}"}, // latin capital letter O with tilde, // U+00D5 ISOlat1 {"214", "Ouml", "{{\\\"{O}}}"}, // latin capital letter O with diaeresis, // U+00D6 ISOlat1 {"215", "times", "$\\times$"}, // multiplication sign, U+00D7 ISOnum {"216", "Oslash", "{{\\O}}"}, // latin capital letter O with stroke // = latin capital letter O slash, // U+00D8 ISOlat1 {"217", "Ugrave", "{{\\`{U}}}"}, // latin capital letter U with grave, // U+00D9 ISOlat1 {"218", "Uacute", "{{\\'{U}}}"}, // latin capital letter U with acute, // U+00DA ISOlat1 {"219", "Ucirc", "{{\\^{U}}}"}, // latin capital letter U with circumflex, // U+00DB ISOlat1 {"220", "Uuml", "{{\\\"{U}}}"}, // latin capital letter U with diaeresis, // U+00DC ISOlat1 {"221", "Yacute", "{{\\'{Y}}}"}, // latin capital letter Y with acute, // U+00DD ISOlat1 {"222", "THORN", "{{\\TH}}"}, // latin capital letter THORN, // U+00DE ISOlat1 {"223", "szlig", "{\\ss}"}, // latin small letter sharp s = ess-zed, // U+00DF ISOlat1 {"224", "agrave", "{\\`{a}}"}, // latin small letter a with grave // = latin small letter a grave, // U+00E0 ISOlat1 {"225", "aacute", "{\\'{a}}"}, // latin small letter a with acute, // U+00E1 ISOlat1 {"226", "acirc", "{\\^{a}}"}, // latin small letter a with circumflex, // U+00E2 ISOlat1 {"227", "atilde", "{\\~{a}}"}, // latin small letter a with tilde, // U+00E3 ISOlat1 {"228", "auml", "{\\\"{a}}"}, // latin small letter a with diaeresis, // U+00E4 ISOlat1 {"229", "aring", "{{\\aa}}"}, // latin small letter a with ring above // = latin small letter a ring, // U+00E5 ISOlat1 {"230", "aelig", "{\\ae}"}, // latin small letter ae // = latin small ligature ae, U+00E6 ISOlat1 {"231", "ccedil", "{\\c{c}}"}, // latin small letter c with cedilla, // U+00E7 ISOlat1 {"232", "egrave", "{\\`{e}}"}, // latin small letter e with grave, // U+00E8 ISOlat1 {"233", "eacute", "{\\'{e}}"}, // latin small letter e with acute, // U+00E9 ISOlat1 {"234", "ecirc", "{\\^{e}}"}, // latin small letter e with circumflex, // U+00EA ISOlat1 {"235", "euml", "{\\\"{e}}"}, // latin small letter e with diaeresis, // U+00EB ISOlat1 {"236", "igrave", "{\\`{i}}"}, // latin small letter i with grave, // U+00EC ISOlat1 {"237", "iacute", "{\\'{i}}"}, // latin small letter i with acute, // U+00ED ISOlat1 {"238", "icirc", "{\\^{i}}"}, // latin small letter i with circumflex, // U+00EE ISOlat1 {"239", "iuml", "{\\\"{i}}"}, // latin small letter i with diaeresis, // U+00EF ISOlat1 {"240", "eth", "{\\dh}"}, // latin small letter eth, U+00F0 ISOlat1 {"241", "ntilde", "{\\~{n}}"}, // latin small letter n with tilde, // U+00F1 ISOlat1 {"242", "ograve", "{\\`{o}}"}, // latin small letter o with grave, // U+00F2 ISOlat1 {"243", "oacute", "{\\'{o}}"}, // latin small letter o with acute, // U+00F3 ISOlat1 {"244", "ocirc", "{\\^{o}}"}, // latin small letter o with circumflex, // U+00F4 ISOlat1 {"245", "otilde", "{\\~{o}}"}, // latin small letter o with tilde, // U+00F5 ISOlat1 {"246", "ouml", "{\\\"{o}}"}, // latin small letter o with diaeresis, // U+00F6 ISOlat1 {"247", "divide", "$\\div$"}, // division sign, U+00F7 ISOnum {"248", "oslash", "{\\o}"}, // latin small letter o with stroke, // = latin small letter o slash, // U+00F8 ISOlat1 {"249", "ugrave", "{\\`{u}}"}, // latin small letter u with grave, // U+00F9 ISOlat1 {"250", "uacute", "{\\'{u}}"}, // latin small letter u with acute, // U+00FA ISOlat1 {"251", "ucirc", "{\\^{u}}"}, // latin small letter u with circumflex, // U+00FB ISOlat1 {"252", "uuml", "{\\\"{u}}"}, // latin small letter u with diaeresis, // U+00FC ISOlat1 {"253", "yacute", "{\\'{y}}"}, // latin small letter y with acute, // U+00FD ISOlat1 {"254", "thorn", "{\\th}"}, // latin small letter thorn, // U+00FE ISOlat1 {"255", "yuml", "{\\\"{y}}"}, // latin small letter y with diaeresis, // U+00FF ISOlat1 /* Greek */ {"913", "Alpha", "{{$\\Alpha$}}"}, // greek capital letter alpha, U+0391 {"914", "Beta", "{{$\\Beta$}}"}, // greek capital letter beta, U+0392 {"915", "Gamma", "{{$\\Gamma$}}"}, // greek capital letter gamma, // U+0393 ISOgrk3 {"916", "Delta", "{{$\\Delta$}}"}, // greek capital letter delta, // U+0394 ISOgrk3 {"917", "Epsilon", "{{$\\Epsilon$}}"}, // greek capital letter epsilon, U+0395 {"918", "Zeta", "{{$\\Zeta$}}"}, // greek capital letter zeta, U+0396 {"919", "Eta", "{{$\\Eta$}}"}, // greek capital letter eta, U+0397 {"920", "Theta", "{{$\\Theta$}}"}, // greek capital letter theta, // U+0398 ISOgrk3 {"921", "Iota", "{{$\\Iota$}}"}, // greek capital letter iota, U+0399 {"922", "Kappa", "{{$\\Kappa$}}"}, // greek capital letter kappa, U+039A {"923", "Lambda", "{{$\\Lambda$}}"}, // greek capital letter lambda, // U+039B ISOgrk3 {"924", "Mu", "{{$\\Mu$}}"}, // greek capital letter mu, U+039C {"925", "Nu", "{{$\\Nu$}}"}, // greek capital letter nu, U+039D {"926", "Xi", "{{$\\Xi$}}"}, // greek capital letter xi, U+039E ISOgrk3 {"927", "Omicron", "{{$\\Omicron$}}"}, // greek capital letter omicron, U+039F {"928", "Pi", "{{$\\Pi$}}"}, // greek capital letter pi, U+03A0 ISOgrk3 {"929", "Rho", "{{$\\Rho$}}"}, // greek capital letter rho, U+03A1 /* there is no Sigmaf, and no U+03A2 character either */ {"931", "Sigma", "{{$\\Sigma$}}"}, // greek capital letter sigma, // U+03A3 ISOgrk3 {"932", "Tau", "{{$\\Tau$}}"}, // greek capital letter tau, U+03A4 {"933", "Upsilon", "{{$\\Upsilon$}}"}, // greek capital letter upsilon, // U+03A5 ISOgrk3 {"934", "Phi", "{{$\\Phi$}}"}, // greek capital letter phi, // U+03A6 ISOgrk3 {"935", "Chi", "{{$\\Chi$}}"}, // greek capital letter chi, U+03A7 {"936", "Psi", "{{$\\Psi$}}"}, // greek capital letter psi, // U+03A8 ISOgrk3 {"937", "Omega", "{{$\\Omega$}}"}, // greek capital letter omega, // U+03A9 ISOgrk3 {"945", "alpha", "$\\alpha$"}, // greek small letter alpha, // U+03B1 ISOgrk3 {"946", "beta", "$\\beta$"}, // greek small letter beta, U+03B2 ISOgrk3 {"947", "gamma", "$\\gamma$"}, // greek small letter gamma, // U+03B3 ISOgrk3 {"948", "delta", "$\\delta$"}, // greek small letter delta, // U+03B4 ISOgrk3 {"949", "epsilon", "$\\epsilon$"}, // greek small letter epsilon, // U+03B5 ISOgrk3 {"950", "zeta", "$\\zeta$"}, // greek small letter zeta, U+03B6 ISOgrk3 {"951", "eta", "$\\eta$"}, // greek small letter eta, U+03B7 ISOgrk3 {"952", "theta", "$\\theta$"}, // greek small letter theta, // U+03B8 ISOgrk3 {"953", "iota", "$\\iota$"}, // greek small letter iota, U+03B9 ISOgrk3 {"954", "kappa", "$\\kappa$"}, // greek small letter kappa, // U+03BA ISOgrk3 {"955", "lambda", "$\\lambda$"}, // greek small letter lambda, // U+03BB ISOgrk3 {"956", "mu", "$\\mu$"}, // greek small letter mu, U+03BC ISOgrk3 {"957", "nu", "$\\nu$"}, // greek small letter nu, U+03BD ISOgrk3 {"958", "xi", "$\\xi$"}, // greek small letter xi, U+03BE ISOgrk3 {"959", "omicron", "$\\omicron$"}, // greek small letter omicron, U+03BF NEW {"960", "pi", "$\\phi$"}, // greek small letter pi, U+03C0 ISOgrk3 {"961", "rho", "$\\rho$"}, // greek small letter rho, U+03C1 ISOgrk3 {"962", "sigmaf", "$\\varsigma$"}, // greek small letter final sigma, // U+03C2 ISOgrk3 {"963", "sigma", "$\\sigma$"}, // greek small letter sigma, // U+03C3 ISOgrk3 {"964", "tau", "$\\tau$"}, // greek small letter tau, U+03C4 ISOgrk3 {"965", "upsilon", "$\\upsilon$"}, // greek small letter upsilon, {"", "upsi", "$\\upsilon$"}, // alias // U+03C5 ISOgrk3 {"966", "phi", "$\\phi$"}, // greek small letter phi, U+03C6 ISOgrk3 {"967", "chi", "$\\chi$"}, // greek small letter chi, U+03C7 ISOgrk3 {"968", "psi", "$\\psi$"}, // greek small letter psi, U+03C8 ISOgrk3 {"969", "omega", "$\\omega$"}, // greek small letter omega, // U+03C9 ISOgrk3 {"977", "thetasym", "$\\vartheta$"}, // greek small letter theta symbol, {"", "thetav", "$\\vartheta$"}, // greek small letter theta symbol, {"", "vartheta", "$\\vartheta$"}, // greek small letter theta symbol, // U+03D1 NEW {"978", "upsih", "{{$\\Upsilon$}}"}, // greek upsilon with hook symbol, // U+03D2 NEW {"982", "piv", "$\\varphi$"}, // greek pi symbol, U+03D6 ISOgrk3 /* General Punctuation */ {"8226", "bull", "$\\bullet$"}, // bullet = black small circle, // U+2022 ISOpub /* bullet is NOT the same as bullet operator, U+2219 */ {"8230", "hellip", "{\\ldots}"}, // horizontal ellipsis = three dot leader, // U+2026 ISOpub {"8242", "prime", "$\\prime$"}, // prime = minutes = feet, U+2032 ISOtech {"8243", "Prime", "$\\prime\\prime$"}, // double prime = seconds = inches, // U+2033 ISOtech {"8254", "oline", "{\\={}}"}, // overline = spacing overscore, // U+203E NEW {"8260", "frasl", "/"}, // fraction slash, U+2044 NEW /* Letterlike Symbols */ {"8472", "weierp", "$\\wp$"}, // script capital P = power set // = Weierstrass p, U+2118 ISOamso {"8465", "image", "{{$\\Im$}}"}, // blackletter capital I = imaginary part, // U+2111 ISOamso {"8476", "real", "{{$\\Re$}}"}, // blackletter capital R = real part symbol, // U+211C ISOamso {"8482", "trade", "{\\texttrademark}"}, // trade mark sign, U+2122 ISOnum {"8501", "alefsym", "$\\aleph$"}, // alef symbol = first transfinite cardinal, // U+2135 NEW /* alef symbol is NOT the same as hebrew letter alef, U+05D0 although the same glyph could be used to depict both characters */ /* Arrows */ {"8592", "larr", "$\\leftarrow$"}, // leftwards arrow, U+2190 ISOnum {"8593", "uarr", "$\\uparrow$"}, // upwards arrow, U+2191 ISOnum {"8594", "rarr", "$\\rightarrow$"}, // rightwards arrow, U+2192 ISOnum {"8595", "darr", "$\\downarrow$"}, // downwards arrow, U+2193 ISOnum {"8596", "harr", "$\\leftrightarrow$"}, // left right arrow, U+2194 ISOamsa {"8629", "crarr", "$\\dlsh$"}, // downwards arrow with corner leftwards // = carriage return, U+21B5 NEW - require mathabx {"8656", "lArr", "{{$\\Leftarrow$}}"}, // leftwards double arrow, U+21D0 ISOtech /* ISO 10646 does not say that lArr is the same as the 'is implied by' arrow but also does not have any other character for that function. So ? lArr can be used for 'is implied by' as ISOtech suggests */ {"8657", "uArr", "{{$\\Uparrow$}}"}, // upwards double arrow, U+21D1 ISOamsa {"8658", "rArr", "{{$\\Rightarrow$}}"}, // rightwards double arrow, // U+21D2 ISOtech /* ISO 10646 does not say this is the 'implies' character but does not have another character with this function so ? rArr can be used for 'implies' as ISOtech suggests */ {"8659", "dArr", "{{$\\Downarrow$}}"}, // downwards double arrow, U+21D3 ISOamsa {"8660", "hArr", "{{$\\Leftrightarrow$}}"}, // left right double arrow, // U+21D4 ISOamsa /* Mathematical Operators */ {"8704", "forall", "$\\forall$"}, // for all, U+2200 ISOtech {"8706", "part", "$\\partial$"}, // partial differential, U+2202 ISOtech {"8707", "exist", "$\\exists$"}, // there exists, U+2203 ISOtech {"8709", "empty", "$\\emptyset$"}, // empty set = null set = diameter, // U+2205 ISOamso {"8711", "nabla", "$\\nabla$"}, // nabla = backward difference, // U+2207 ISOtech {"8712", "isin", "$\\in$"}, // element of, U+2208 ISOtech {"8713", "notin", "$\\notin$"}, // not an element of, U+2209 ISOtech {"8715", "ni", "$\\ni$"}, // contains as member, U+220B ISOtech /* should there be a more memorable name than 'ni'? */ {"8719", "prod", "$\\prod$"}, // n-ary product = product sign, // U+220F ISOamsb /* prod is NOT the same character as U+03A0 'greek capital letter pi' though the same glyph might be used for both */ {"8721", "sum", "$\\sum$"}, // n-ary sumation, U+2211 ISOamsb /* sum is NOT the same character as U+03A3 'greek capital letter sigma' though the same glyph might be used for both */ {"8722", "minus", "$-$"}, // minus sign, U+2212 ISOtech {"8727", "lowast", "$\\ast$"}, // asterisk operator, U+2217 ISOtech {"8730", "radic", "$\\sqrt{}$"}, // square root = radical sign, // U+221A ISOtech {"8733", "prop", "$\\propto$"}, // proportional to, U+221D ISOtech {"8734", "infin", "$\\infty$"}, // infinity, U+221E ISOtech {"8736", "ang", "$\\angle$"}, // angle, U+2220 ISOamso {"8743", "and", "$\\land$"}, // logical and = wedge, U+2227 ISOtech {"8744", "or", "$\\lor$"}, // logical or = vee, U+2228 ISOtech {"8745", "cap", "$\\cap$"}, // intersection = cap, U+2229 ISOtech {"8746", "cup", "$\\cup$"}, // union = cup, U+222A ISOtech {"8747", "int", "$\\int$"}, // integral, U+222B ISOtech {"8756", "there4", "$\\therefore$"}, // therefore, U+2234 ISOtech; AMSSymb {"8764", "sim", "$\\sim$"}, // tilde operator = varies with = similar to, // U+223C ISOtech /* tilde operator is NOT the same character as the tilde, U+007E, although the same glyph might be used to represent both */ {"8773", "cong", "$\\cong$"}, // approximately equal to, U+2245 ISOtech {"8776", "asymp", "$\\approx$"}, // almost equal to = asymptotic to, // U+2248 ISOamsr {"8800", "ne", "$\\neq$"}, // not equal to, U+2260 ISOtech {"8801", "equiv", "$\\equiv$"}, // identical to, U+2261 ISOtech {"8804", "le", "$\\leq$"}, // less-than or equal to, U+2264 ISOtech {"8805", "ge", "$\\geq$"}, // greater-than or equal to, // U+2265 ISOtech {"8834", "sub", "$\\subset$"}, // subset of, U+2282 ISOtech {"8835", "sup", "$\\supset$"}, // superset of, U+2283 ISOtech /* note that nsup, 'not a superset of, U+2283' is not covered by the Symbol font encoding and is not included. Should it be, for symmetry? It is in ISOamsn */ {"8836", "nsub", "$\\not\\subset$"}, // not a subset of, U+2284 ISOamsn {"8838", "sube", "$\\subseteq$"}, // subset of or equal to, U+2286 ISOtech {"8839", "supe", "$\\supseteq$"}, // superset of or equal to, // U+2287 ISOtech {"8853", "oplus", "$\\oplus$"}, // circled plus = direct sum, // U+2295 ISOamsb {"8855", "otimes", "$\\otimes$"}, // circled times = vector product, // U+2297 ISOamsb {"8869", "perp", "$\\perp$"}, // up tack = orthogonal to = perpendicular, // U+22A5 ISOtech {"8901", "sdot", "$\\cdot$"}, // dot operator, U+22C5 ISOamsb /* dot operator is NOT the same character as U+00B7 middle dot */ {"8968", "lceil", "$\\lceil$"}, // left ceiling = apl upstile, // U+2308 ISOamsc {"8969", "rceil", "$\\rceil$"}, // right ceiling, U+2309 ISOamsc {"8970", "lfloor", "$\\lfloor$"}, // left floor = apl downstile, // U+230A ISOamsc {"8971", "rfloor", "$\\rfloor$"}, // right floor, U+230B ISOamsc /* Miscellaneous Technical */ {"9001", "lang", "$\\langle$"}, // left-pointing angle bracket = bra, // U+2329 ISOtech /* lang is NOT the same character as U+003C 'less than' or U+2039 'single left-pointing angle quotation mark' */ {"9002", "rang", "$\\rangle$"}, // right-pointing angle bracket = ket, // U+232A ISOtech /* rang is NOT the same character as U+003E 'greater than' or U+203A 'single right-pointing angle quotation mark' */ /* Geometric Shapes */ {"9674", "loz", "$\\lozenge$"}, // lozenge, U+25CA ISOpub /* Miscellaneous Symbols */ {"9824", "spades", "$\\spadesuit$"}, // black spade suit, U+2660 ISOpub /* black here seems to mean filled as opposed to hollow */ {"9827", "clubs", "$\\clubsuit$"}, // black club suit = shamrock, // U+2663 ISOpub {"9829", "hearts", "$\\heartsuit$"}, // black heart suit = valentine, // U+2665 ISOpub {"9830", "diams", "$\\diamondsuit$"}, // black diamond suit, U+2666 ISOpub {"34", "quot", "\""}, // quotation mark = APL quote, // U+0022 ISOnum {"38", "amp", "\\&"}, // ampersand, U+0026 ISOnum {"60", "lt", "$<$"}, // less-than sign, U+003C ISOnum {"62", "gt", "$>$"}, // greater-than sign, U+003E ISOnum /* General Punctuation */ {"8194", "ensp", "\\hspace{0.5em}"}, // en space, U+2002 ISOpub {"8195", "emsp", "\\hspace{1em}"}, // em space, U+2003 ISOpub {"8201", "thinsp", "\\hspace{0.167em}"}, // thin space, U+2009 ISOpub {"8202", "", "\\hspace{0.1em}"}, // hair space, U+2010 ISOpub {"8204", "zwnj", "\\/{}"}, // zero width non-joiner, // U+200C NEW RFC 2070 {"8205", "zwj", ""}, // zero width joiner, U+200D NEW RFC 2070 {"8206", "lrm", ""}, // left-to-right mark, U+200E NEW RFC 2070 {"8207", "rlm", ""}, // right-to-left mark, U+200F NEW RFC 2070 {"8211", "ndash", "--"}, // en dash, U+2013 ISOpub {"8212", "mdash", "---"}, // em dash, U+2014 ISOpub {"8216", "lsquo", "{\\textquoteleft}"}, // left single quotation mark, // U+2018 ISOnum {"8217", "rsquo", "{\\textquoteright}"}, // right single quotation mark, // U+2019 ISOnum {"8218", "sbquo", "{\\quotesinglbase}"}, // single low-9 quotation mark, U+201A NEW {"8220", "ldquo", "{\\textquotedblleft}"}, // left double quotation mark, // U+201C ISOnum {"8221", "rdquo", "{\\textquotedblright}"}, // right double quotation mark, // U+201D ISOnum {"8222", "bdquo", "{\\quotedblbase}"}, // double low-9 quotation mark, U+201E NEW {"8224", "dagger", "{\\dag}"}, // dagger, U+2020 ISOpub {"8225", "Dagger", "{\\ddag}"}, // double dagger, U+2021 ISOpub {"8240", "permil", "{\\textperthousand}"}, // per mille sign, U+2030 ISOtech {"8249", "lsaquo", "{\\guilsinglleft}"}, // single left-pointing angle quotation mark, // U+2039 ISO proposed /* lsaquo is proposed but not yet ISO standardized */ {"8250", "rsaquo", "{\\guilsinglright}"}, // single right-pointing angle quotation mark, // U+203A ISO proposed /* rsaquo is proposed but not yet ISO standardized */ {"8364", "euro", "{\\texteuro}"}, // euro sign, U+20AC NEW /* Manually added */ {"35", "", "\\#"}, // Hash {"36", "dollar", "\\$"}, // Dollar {"37", "percnt", "\\%"}, // Percent {"39", "apos", "'"}, // Apostrophe {"40", "lpar", "("}, // Left bracket {"41", "rpar", ")"}, // Right bracket {"42", "", "*"}, // Asterisk {"43", "plus", "+"}, // Plus {"44", "comma", ","}, // Comma {"45", "hyphen", "-"}, // Hyphen {"46", "period", "."}, // Period {"47", "slash", "/"}, // Slash (solidus) {"58", "colon", ":"}, // Colon {"59", "semi", ";"}, // Semi colon {"61", "equals", "="}, // Equals to {"91", "lsqb", "["}, // Left square bracket {"92", "bsol", "{\\textbackslash}"}, // Backslash {"93", "rsqb", "]"}, // Right square bracket {"94", "Hat", "{\\^{}}"}, // Circumflex {"95", "lowbar", "\\_"}, // Underscore {"96", "grave", "{\\`{}}"}, // Grave {"123", "lbrace", "\\{"}, // Left curly bracket {"", "lcub", "\\{"}, // Left curly bracket {"124", "vert", "|"}, // Vertical bar {"", "verbar", "|"}, // Vertical bar {"", "VerticalLine", "|"}, // Vertical bar {"125", "rbrace", "\\}"}, // Right curly bracket {"", "rcub", "\\}"}, // Right curly bracket // {"138", "", "{{\\v{S}}}"}, // Line tabulation set // {"141", "", ""}, // Reverse line feed {"145", "", "`"}, // Apostrophe {"146", "", "'"}, // Apostrophe {"147", "", "``"}, // Quotation mark {"148", "", "''"}, // Quotation mark {"150", "", "--"}, // En dash // {"154", "", "{\\v{s}}"}, // Single character introducer {"256", "", "{{\\={A}}}"}, // capital A with macron {"257", "", "{\\={a}}"}, // small a with macron {"258", "", "{{\\u{A}}}"}, // capital A with breve {"259", "", "{\\u{a}}"}, // small a with breve {"260", "Aogon", "{{\\k{A}}}"}, // capital A with ogonek {"261", "aogon", "{\\k{a}}"}, // small a with ogonek {"262", "Cacute", "{{\\'{C}}}"}, // capital C with acute {"263", "cacute", "{\\'{c}}"}, // small C with acute {"264", "Ccirc", "{{\\^{C}}}"}, // capital C with circumflex {"265", "ccirc", "{\\^{c}}"}, // small C with circumflex {"266", "Cdot", "{{\\.{C}}}"}, // capital C with dot above {"267", "cdot", "{\\.{c}}"}, // small C with dot above {"268", "Ccaron", "{{\\v{C}}}"}, // capital C with caron {"269", "ccaron", "{\\v{c}}"}, // small C with caron {"270", "", "{{\\v{D}}}"}, // capital D with caron {"271", "", "{\\v{d}}"}, // small d with caron {"272", "Dstrok", "{{\\DJ}}"}, // capital D with stroke {"273", "dstrok", "{{\\dj}}"}, // small d with stroke {"274", "", "{{\\={E}}}"}, // capital E with macron {"275", "", "{\\={e}}"}, // small e with macron {"276", "", "{{\\u{E}}}"}, // capital E with breve {"277", "", "{\\u{e}}"}, // small e with breve {"278", "", "{{\\.{E}}}"}, // capital E with dot above {"279", "", "{\\.{e}}"}, // small e with dot above {"280", "Eogon", "{{\\k{E}}}"}, // capital E with ogonek {"281", "eogon", "{\\k{e}}"}, // small e with ogonek {"282", "", "{{\\v{E}}}"}, // capital E with caron {"283", "", "{\\v{e}}"}, // small e with caron {"284", "", "{{\\^{G}}}"}, // capital G with circumflex {"285", "", "{\\^{g}}"}, // small g with circumflex {"286", "", "{{\\u{G}}}"}, // capital G with breve {"287", "", "{\\u{g}}"}, // small g with breve {"288", "", "{{\\.{G}}}"}, // capital G with dot above {"289", "", "{\\.{g}}"}, // small g with dot above {"290", "", "{{\\c{G}}}"}, // capital G with cedilla {"291", "", "{\\c{g}}"}, // small g with cedilla {"292", "", "{{\\^{H}}}"}, // capital H with circumflex {"293", "", "{\\^{h}}"}, // small h with circumflex {"294", "", "{{\\B{H}}}"}, // capital H with stroke {"295", "", "{\\B{h}}"}, // small h with stroke {"296", "", "{{\\~{I}}}"}, // capital I with tilde {"297", "", "{\\~{\\i}}"}, // small i with tilde {"298", "Imacr", "{{\\={I}}}"}, // capital I with macron {"299", "imacr", "{\\={\\i}}"}, // small i with macron {"300", "", "{{\\u{I}}}"}, // capital I with breve {"301", "", "{\\u{\\i}}"}, // small i with breve {"302", "Iogon", "{{\\k{I}}}"}, // capital I with ogonek {"303", "iogon", "{\\k{i}}"}, // small i with ogonek {"304", "Idot", "{{\\.{I}}}"}, // capital I with dot above {"305", "inodot", "{\\i}"}, // Small i without the dot {"", "imath", "{\\i}"}, // Small i without the dot {"306", "", "{{\\IJ}}"}, // Dutch di-graph IJ {"307", "", "{{\\ij}}"}, // Dutch di-graph ij {"308", "", "{{\\^{J}}}"}, // capital J with circumflex {"309", "", "{\\^{\\j}}"}, // small j with circumflex {"310", "", "{{\\c{K}}}"}, // capital K with cedilla {"311", "", "{\\c{k}}"}, // small k with cedilla {"312", "", "{\\textkra}"}, // Letter kra {"313", "", "{{\\'{L}}}"}, // capital L with acute {"314", "", "{\\'{l}}"}, // small l with acute {"315", "", "{{\\c{L}}}"}, // capital L with cedilla {"316", "", "{\\c{l}}"}, // small l with cedilla {"317", "", "{{\\v{L}}}"}, // capital L with caron {"318", "", "{\\v{l}}"}, // small l with caron //{"319", "Lmidot", "{\\Lmidot}"}, // upper case L with mid dot //{"320", "lmidot", "{\\lmidot}"}, // lower case l with mid dot {"321", "Lstrok", "{{\\L}}"}, // upper case L with stroke {"322", "lstrok", "{{\\l}}"}, // lower case l with stroke {"323", "Nacute", "{{\\'{N}}}"}, // upper case N with acute {"324", "nacute", "{{\\'{n}}}"}, // lower case n with acute {"325", "", "{{\\c{N}}}"}, // capital N with cedilla {"326", "", "{\\c{n}}"}, // small n with cedilla {"327", "", "{{\\v{N}}}"}, // capital N with caron {"328", "", "{\\v{n}}"}, // small n with caron {"329", "", "{'n}"}, // small n preceded with apostroph {"330", "", "{{\\NG}}"}, // upper case letter Eng {"331", "", "{{\\ng}}"}, // lower case letter Eng {"332", "Omacro", "{{\\={O}}}"}, // the capital letter O with macron {"333", "omacro", "{\\={o}}"}, // the small letter o with macron {"334", "", "{{\\u{O}}}"}, // the capital letter O with breve {"335", "", "{\\u{o}}"}, // the small letter o with breve {"336", "", "{{\\H{O}}}"}, // the capital letter O with double acute {"337", "", "{\\H{o}}"}, // the small letter o with double acute {"338", "OElig", "{{\\OE}}"}, // OE-ligature {"339", "oelig", "{{\\oe}}"}, // oe-ligature {"340", "", "{{\\'{R}}}"}, // upper case R with acute {"341", "", "{{\\'{r}}}"}, // lower case r with acute {"342", "", "{{\\c{R}}}"}, // upper case R with cedilla {"343", "", "{{\\c{r}}}"}, // lower case r with cedilla {"344", "", "{{\\v{R}}}"}, // upper case R with caron {"345", "", "{{\\v{r}}}"}, // lower case r with caron {"346", "", "{{\\'{S}}}"}, // upper case S with acute {"347", "", "{{\\'{s}}}"}, // lower case s with acute {"348", "Scirc", "{{\\^{S}}}"}, // upper case S with circumflex {"349", "scirc", "{\\^{s}}"}, // lower case s with circumflex {"350", "Scedil", "{{\\c{S}}}"}, // upper case S with cedilla {"351", "scedil", "{\\c{s}}"}, // lower case s with cedilla {"352", "Scaron", "{{\\v{S}}}"}, // latin capital letter S with caron, {"353", "scaron", "{\\v{s}}"}, // latin small letter s with caron, {"354", "", "{{\\c{T}}}"}, // upper case T with cedilla {"355", "", "{{\\c{T}}}"}, // lower case t with cedilla {"356", "", "{{\\v{T}}}"}, // latin capital letter T with caron, {"357", "", "{\\v{t}}"}, // latin small letter t with caron, {"358", "", "{{\\B{T}}}"}, // latin capital letter T with stroke, {"359", "", "{\\B{t}}"}, // latin small letter t with stroke, {"360", "", "{{\\~{U}}}"}, // capital U with tilde {"361", "", "{\\~{u}}"}, // small u with tilde {"362", "", "{{\\={U}}}"}, // capital U with macron {"363", "", "{\\={u}}"}, // small u with macron {"364", "", "{{\\u{U}}}"}, // capital U with breve {"365", "", "{\\u{u}}"}, // small u with breve {"366", "", "{{\\r{U}}}"}, // capital U with ring {"367", "", "{\\r{u}}"}, // small u with ring {"368", "", "{{\\={U}}}"}, // capital U with double acute {"369", "", "{\\={u}}"}, // small u with double acute {"370", "Uogon", "{{\\k{U}}}"}, // capital U with ogonek {"371", "uogon", "{\\k{u}}"}, // small u with ogonek {"372", "", "{{\\^{W}}}"}, // capital W with circumflex {"373", "", "{\\^{w}}"}, // small w with circumflex {"374", "", "{{\\^{Y}}}"}, // capital Y with circumflex {"375", "", "{\\^{y}}"}, // small y with circumflex {"376", "Yuml", "{{\\\"{Y}}}"}, // latin capital letter Y with diaeresis, {"377", "", "{{\\'{Z}}}"}, // capital Z with acute {"378", "", "{\\'{z}}"}, // small z with acute {"379", "", "{{\\.{Z}}}"}, // capital Z with dot above {"380", "", "{\\.{z}}"}, // small z with dot above {"381", "Zcaron", "{{\\v{Z}}}"}, // capital Z with caron {"382", "zcaron", "{\\v{z}}"}, // small z with caron // {"383", "", ""}, // long s {"384", "", "{\\B{b}}"}, // small b with stroke {"402", "fnof", "\\textit{f}"}, // latin small f with hook = function {"405", "", "{{\\hv}}"}, // small letter Hv {"416", "", "{{\\OHORN}}"}, // capital O with horn {"417", "", "{{\\ohorn}}"}, // small o with horn {"431", "", "{{\\UHORN}}"}, // capital U with horn {"432", "", "{{\\uhorn}}"}, // small u with horn {"490", "Oogon", "{{\\k{O}}}"}, // capital letter O with ogonek {"491", "oogon", "{\\k{o}}"}, // small letter o with ogonek {"492", "", "{{\\k{\\={O}}}}"}, // capital letter O with ogonek and macron {"493", "", "{\\k{\\={o}}}"}, // small letter o with ogonek and macron {"536", "", "{{\\cb{S}}}"}, // capital letter S with comma below, require combelow {"537", "", "{\\cb{s}}"}, // small letter S with comma below, require combelow {"538", "", "{{\\cb{T}}}"}, // capital letter T with comma below, require combelow {"539", "", "{\\cb{t}}"}, // small letter T with comma below, require combelow {"710", "circ", "{\\^{}}"}, // modifier letter circumflex accent, {"726", "", "+"}, // Modifier plus sign {"727", "", "-"}, // Modifier minus sign {"728", "breve", "{\\u{}}"}, // Breve {"", "Breve", "{\\u{}}"}, // Breve {"729", "dot", "{\\.{}}"}, // Dot above {"730", "ring", "{\\r{}}"}, // Ring above {"731", "ogon", "{\\k{}}"}, // Ogonek {"732", "tilde", "\\~{}"}, // Small tilde {"733", "dblac", "{{\\H{}}}"}, // Double acute {"949", "epsi", "$\\epsilon$"}, // Epsilon - double check {"1013", "epsiv", "$\\varepsilonup$"}, // lunate epsilon, requires txfonts //{"1055", "", "{{\\cyrchar\\CYRP}}"}, // Cyrillic capital Pe //{"1082", "", "{\\cyrchar\\cyrk}"}, // Cyrillic small Ka // {"2013", "", ""}, // NKO letter FA -- Maybe en dash = 0x2013? // {"2014", "", ""}, // NKO letter FA -- Maybe em dash = 0x2014? {"8192", "", "\\hspace{0.5em}"}, // en quad {"8193", "", "\\hspace{1em}"}, // em quad {"8196", "", "\\hspace{0.333em}"}, // Three-Per-Em Space {"8197", "", "\\hspace{0.25em}"}, // Four-Per-Em Space {"8198", "", "\\hspace{0.167em}"}, // Six-Per-Em Space {"8208", "hyphen", "-"}, // Hyphen {"8229", "nldr", "\\.{}\\.{}"}, // Double dots - en leader {"8241", "", "{\\textpertenthousand}"}, // per ten thousands sign {"8244", "", "{$\\prime\\prime\\prime$}"}, // triple prime {"8251", "", "{\\textreferencemark}"}, {"8253", "", "{\\textinterrobang}"}, {"8320", "", "$_{0}$"}, // sub-script 0 {"8321", "", "$_{1}$"}, // sub-script 1 {"8322", "", "$_{2}$"}, // sub-script 2 {"8323", "", "$_{3}$"}, // sub-script 3 {"8324", "", "$_{4}$"}, // sub-script 4 {"8325", "", "$_{5}$"}, // sub-script 5 {"8326", "", "$_{6}$"}, // sub-script 6 {"8327", "", "$_{7}$"}, // sub-script 7 {"8328", "", "$_{8}$"}, // sub-script 8 {"8329", "", "$_{9}$"}, // sub-script 9 {"8330", "", "$_{+}$"}, // sub-script + {"8331", "", "$_{-}$"}, // sub-script - {"8332", "", "$_{-}$"}, // sub-script = {"8333", "", "$_{(}$"}, // sub-script ( {"8334", "", "$_{)}$"}, // sub-script ) {"8450", "complexes", "$\\mathbb{C}$"}, // double struck capital C -- requires e.g. amsfonts {"8451", "", "{\\textcelsius}"}, // Degree Celsius {"8459", "Hscr", "{{$\\mathcal{H}$}}"}, // script capital H -- possibly use \mathscr {"8460", "Hfr", "{{$\\mathbb{H}$}}"}, // black letter capital H -- requires e.g. amsfonts {"8466", "Lscr", "{{$\\mathcal{L}$}}"}, // script capital L -- possibly use \mathscr {"8467", "ell", "{$\\ell$}"}, // script small l {"8469", "naturals", "{{$\\mathbb{N}$}}"}, // double struck capital N -- requires e.g. amsfonts {"8474", "Qopf", "{{$\\mathbb{Q}$}}"}, // double struck capital Q -- requires e.g. amsfonts {"8477", "reals", "{{$\\mathbb{R}$}}"}, // double struck capital R -- requires e.g. amsfonts {"8486", "", "${{\\Omega}}$"}, // Omega {"8491", "angst", "{{\\AA}}"}, // Angstrom {"8496", "Escr", "{{$\\mathcal{E}$}}"}, // script capital E {"8531", "frac13", "$\\sfrac{1}{3}$"}, // Vulgar fraction one third {"8532", "frac23", "$\\sfrac{2}{3}$"}, // Vulgar fraction two thirds {"8533", "frac15", "$\\sfrac{1}{5}$"}, // Vulgar fraction one fifth {"8534", "frac25", "$\\sfrac{2}{5}$"}, // Vulgar fraction two fifths {"8535", "frac35", "$\\sfrac{3}{5}$"}, // Vulgar fraction three fifths {"8536", "frac45", "$\\sfrac{4}{5}$"}, // Vulgar fraction four fifths {"8537", "frac16", "$\\sfrac{1}{6}$"}, // Vulgar fraction one sixth {"8538", "frac56", "$\\sfrac{5}{6}$"}, // Vulgar fraction five sixths {"8539", "frac18", "$\\sfrac{1}{8}$"}, // Vulgar fraction one eighth {"8540", "frac38", "$\\sfrac{3}{8}$"}, // Vulgar fraction three eighths {"8541", "frac58", "$\\sfrac{5}{8}$"}, // Vulgar fraction five eighths {"8542", "frac78", "$\\sfrac{7}{8}$"}, // Vulgar fraction seven eighths {"8710", "", "$\\triangle$"}, // Increment - could use a more appropriate symbol {"8714", "", "$\\in$"}, // Small element in {"8723", "mp", "$\\mp$"}, // Minus-plus {"8729", "bullet", "$\\bullet$"}, // Bullet operator {"8741", "", "$\\parallel$"}, // Parallel to {"8758", "ratio", ":"}, // Colon/ratio {"8771", "sime", "$\\simeq$"}, // almost equal to = asymptotic to, {"8776", "ap", "$\\approx$"}, // almost equal to = asymptotic to, {"8810", "ll", "$\\ll$"}, // Much less than {"", "Lt", "$\\ll$"}, // Much less than {"8811", "gg", "$\\gg$"}, // Much greater than {"", "Gt", "$\\gg$"}, // Much greater than {"8818", "lsim", "$\\lesssim$"}, // Less than or equivalent to {"8819", "gsim", "$\\gtrsim$"}, // Greater than or equivalent to {"8862", "boxplus", "$\\boxplus$"}, // Boxed plus -- requires amssymb {"8863", "boxminus", "$\\boxminus$"}, // Boxed minus -- requires amssymb {"8864", "boxtimes", "$\\boxtimes$"}, // Boxed times -- requires amssymb {"8882", "vltri", "$\\triangleleft$"}, // Left triangle {"8883", "vrtri", "$\\triangleright$"}, // Right triangle {"8896", "xwedge", "$\\bigwedge$"}, // Big wedge {"8897", "xvee", "$\\bigvee$"}, // Big vee {"8942", "vdots", "$\\vdots$"}, // vertical ellipsis U+22EE {"8943", "cdots", "$\\cdots$"}, // midline horizontal ellipsis U+22EF /*{"8944", "", "$\\ddots$"}, // up right diagonal ellipsis U+22F0 */ {"8945", "ddots", "$\\ddots$"}, // down right diagonal ellipsis U+22F1 {"9426", "circledc", "{\\copyright}"}, // circled small letter C {"9633", "square", "$\\square$"}, // White square {"9651", "xutri", "$\\bigtriangleup$"}, // White up-pointing big triangle {"9653", "utri", "$\\triangle$"}, // White up-pointing small triangle -- \vartriangle probably // better but requires amssymb {"10877", "les", "$\\leqslant$"}, // Less than slanted equal -- requires amssymb {"10878", "ges", "$\\geqslant$"}, // Less than slanted equal -- requires amssymb {"64256", "", "ff"}, // ff ligature (which LaTeX solves by itself) {"64257", "", "fi"}, // fi ligature (which LaTeX solves by itself) {"64258", "", "fl"}, // fl ligature (which LaTeX solves by itself) {"64259", "", "ffi"}, // ffi ligature (which LaTeX solves by itself) {"64260", "", "ffl"}, // ffl ligature (which LaTeX solves by itself) {"119978", "Oscr", "$\\mathcal{O}$"}, // script capital O -- possibly use \mathscr {"119984", "Uscr", "$\\mathcal{U}$"} // script capital U -- possibly use \mathscr }; // List of combining accents private static final String[][] ACCENT_LIST = new String[][] {{"768", "`"}, // Grave {"769", "'"}, // Acute {"770", "^"}, // Circumflex {"771", "~"}, // Tilde {"772", "="}, // Macron {"773", "="}, // Overline - not completely correct {"774", "u"}, // Breve {"775", "."}, // Dot above {"776", "\""}, // Diaeresis {"777", "h"}, // Hook above {"778", "r"}, // Ring {"779", "H"}, // Double acute {"780", "v"}, // Caron {"781", "|"}, // Vertical line above {"782", "U"}, // Double vertical line above {"783", "G"}, // Double grave {"784", "textdotbreve"}, // Candrabindu {"785", "t"}, // Inverted breve // {"786", ""}, // Turned comma above // {"787", ""}, // Comma above {"788", "textrevcommaabove"}, // Reversed comma above {"789", "textcommaabover"}, // Comma above right {"790", "textsubgrave"}, // Grave accent below -requires tipa {"791", "textsubacute"}, // Acute accent below - requires tipa {"792", "textadvancing"}, // Left tack below - requires tipa {"793", "textretracting"}, // Right tack below - requires tipa {"794", "textlangleabove"}, // Left angle above {"795", "textrighthorn"}, // Horn {"796", "textsublhalfring"}, // Left half ring below - requires tipa {"797", "textraising"}, // Up tack below - requires tipa {"798", "textlowering"}, // Down tack below - requires tipa {"799", "textsubplus"}, // Plus sign below - requires tipa {"800", "textsubbar"}, // Minus sign below {"801", "textpalhookbelow"}, // Palatalized hook below {"802", "M"}, // Retroflex hook below - textrethookbelow? {"803", "d"}, // Dot below {"804", "textsubumlaut"}, // Diaeresis below - requires tipa {"805", "textsubring"}, // Ring below - requires tipa {"806", "cb"}, // Comma below - requires combelow {"807", "c"}, // Cedilla {"808", "k"}, // Ogonek {"809", "textsyllabic"}, // Vertical line below - requires tipa {"810", "textsubbridge"}, // Bridge below - requires tipa {"811", "textsubw"}, // Inverted double arch below - requires tipa {"812", "textsubwedge"}, // Caron below {"813", "textsubcircum"}, // Circumflex accent below - requires tipa {"814", "textsubbreve"}, // Breve below {"815", "textsubarch"}, // Inverted breve below - requires tipa {"816", "textsubtilde"}, // Tilde below - requires tipa {"817", "b"}, // Macron below - not completely correct {"818", "b"}, // Underline {"819", "subdoublebar"}, // Double low line -- requires extraipa {"820", "textsuperimposetilde"}, // Tilde overlay - requires tipa {"821", "B"}, // Short stroke overlay - textsstrokethru? {"822", "textlstrokethru"}, // Long stroke overlay {"823", "textsstrikethru"}, // Short solidus overlay {"824", "textlstrikethru"}, // Long solidus overlay {"825", "textsubrhalfring"}, // Right half ring below - requires tipa {"826", "textinvsubbridge"}, // inverted bridge below - requires tipa {"827", "textsubsquare"}, // Square below - requires tipa {"828", "textseagull"}, // Seagull below - requires tipa {"829", "textovercross"}, // X above - requires tipa // {"830", ""}, // Vertical tilde // {"831", ""}, // Double overline // {"832", ""}, // Grave tone mark // {"833", ""}, // Acute tone mark // {"834", ""}, // Greek perispomeni // {"835", ""}, // Greek koronis // {"836", ""}, // Greek dialytika tonos // {"837", ""}, // Greek ypogegrammeni {"838", "overbridge"}, // Bridge above - requires extraipa {"839", "subdoublebar"}, // Equals sign below - requires extraipa {"840", "subdoublevert"}, // Double vertical line below - requires extraipa {"841", "subcorner"}, // Left angle below - requires extraipa {"842", "crtilde"}, // Not tilde above - requires extraipa {"843", "dottedtilde"}, // Homothetic above - requires extraipa {"844", "doubletilde"}, // Almost equal to above - requires extraipa {"845", "spreadlips"}, // Left right arrow below - requires extraipa {"846", "whistle"}, // Upwards arrow below - requires extraipa {"861", "textdoublebreve"}, // Double breve {"862", "textdoublemacron"}, // Double macron {"863", "textdoublemacronbelow"}, // Double macron below {"864", "textdoubletilde"}, // Double tilde {"865", "texttoptiebar"}, // Double inverted breve {"866", "sliding"}, // Double rightwards arrow below - requires extraipa }; static { for (String[] aConversionList : CONVERSION_LIST) { if (!(aConversionList[2].isEmpty())) { String strippedLaTeX = cleanLaTeX(aConversionList[2]); if (!(aConversionList[1].isEmpty())) { HTML_LATEX_CONVERSION_MAP.put("&" + aConversionList[1] + ";", aConversionList[2]); if (!strippedLaTeX.isEmpty()) { LATEX_HTML_CONVERSION_MAP.put(strippedLaTeX, "&" + aConversionList[1] + ";"); } } else if (!(aConversionList[0].isEmpty()) && !strippedLaTeX.isEmpty()) { LATEX_HTML_CONVERSION_MAP.put(strippedLaTeX, "&#" + aConversionList[0] + ";"); } if (!(aConversionList[0].isEmpty())) { NUMERICAL_LATEX_CONVERSION_MAP.put(Integer.decode(aConversionList[0]), aConversionList[2]); if (Integer.decode(aConversionList[0]) > 128) { String unicodeSymbol = String.valueOf(Character.toChars(Integer.decode(aConversionList[0]))); UNICODE_LATEX_CONVERSION_MAP.put(unicodeSymbol, aConversionList[2]); if (!strippedLaTeX.isEmpty()) { LATEX_UNICODE_CONVERSION_MAP.put(strippedLaTeX, unicodeSymbol); } } } } } for (String[] anAccentList : ACCENT_LIST) { ESCAPED_ACCENTS.put(Integer.decode(anAccentList[0]), anAccentList[1]); UNICODE_ESCAPED_ACCENTS.put(anAccentList[1], String.valueOf(Character.toChars(Integer.decode(anAccentList[0])))); } // Manually added values which are killed by cleanLaTeX LATEX_HTML_CONVERSION_MAP.put("$", "$"); LATEX_UNICODE_CONVERSION_MAP.put("$", "$"); // Manual corrections LATEX_HTML_CONVERSION_MAP.put("AA", "Å"); // Overwritten by Å which is less supported LATEX_UNICODE_CONVERSION_MAP.put("AA", "Å"); // Overwritten by Ångstrom symbol LATEX_UNICODE_CONVERSION_MAP.put("'n", "ń"); // Manual additions // Support relax to the extent that it is simply removed LATEX_HTML_CONVERSION_MAP.put("relax", ""); LATEX_UNICODE_CONVERSION_MAP.put("relax", ""); } private HTMLUnicodeConversionMaps() { } private static String cleanLaTeX(String escapedString) { // Get rid of \{}$ from the LaTeX-string return escapedString.replaceAll("[\\\\\\{\\}\\$]", ""); } }