/********************************************************************************* * TotalCross Software Development Kit * * Copyright (C) 2003-2004 Pierre G. Richard * * Copyright (C) 2003-2012 SuperWaba Ltda. * * All Rights Reserved * * * * This library and virtual machine is distributed in the hope that it will * * be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * * * This file is covered by the GNU LESSER GENERAL PUBLIC LICENSE VERSION 3.0 * * A copy of this license is located in file license.txt at the root of this * * SDK or can be downloaded here: * * http://www.gnu.org/licenses/lgpl-3.0.txt * * * *********************************************************************************/ package totalcross.ui.html; /** * <code>NamedEntitiesDereferencer</code> provides an extremely fast way * to map Named Entities References to their corresponding Unicode value. * <P> * <b>Note:</b> the Entity table described below comes from the official XHTML * entity reference list.  Also, notice that entity names are * case sensitive. */ public class NamedEntitiesDereferencer { /* This table was generated by Jaxo's GenStatTable utility - do not edit! */ /** GenStatTable double-hash list * - input file: file:/D:/u/pgr/com/jaxo/html/HtmlHRefProps.txt * - gen date: Aug 31, 2003 6:49:55 PM CEST * - 0 error(s), 0 warning(s) */ private static final int entries[][] = { { -1824316352, 0x160, // Scaron -1304807232, 0xe8, // egrave -1105492192, 0x230a, // lfloor -908183520, 0x161, // scaron 114240, 0x2282, // sub 3433440, 0xb6, // para 70470880, 0xce, // Icirc 100023552, 0xee, // icirc 104663296, 0x96, // ndash 2074027232, 0xc8 // Egrave },{ -1074341375, 0xb7, // middot -902468895, 0x3c2, // sigmaf 76305729, 0x3a9, // Omega 105858401, 0x3c9 // omega },{ -1366144414, 0xe7, // ccedil 119522, 0xa5, // yen 95576866, 0x2666, // diams 2012690050, 0xc7 // Ccedil },{ -1425292669, 0xe1, // aacute 3555, 0x2228, // or 3343619, 0xaf, // macr 3449699, 0x221d, // prop 3526211, 0xa7, // sect 3541923, 0xb9, // sup1 105832675, 0x203e, // oline 109946563, 0xdf, // szlig 1953541795, 0xc1 // Aacute },{ -1419323836, 0xe0, // agrave -1220935388, 0x2026, // hellip 67716, 0x3a7, // Chi 98468, 0x3c7, // chi 113860, 0xad, // shy 3541924, 0xb2, // sup2 92646980, 0xb4, // acute 103278564, 0x2018, // lsquo 103890628, 0xb5, // micro 104574660, 0x2207, // nabla 110621028, 0x2122, // trade 1959510628, 0xc0 // Agrave },{ 2469413, 0xd6, // Ouml 3059493, 0x2245, // cong 3117317, 0x2003, // emsp 3419557, 0xaa, // ordf 3422725, 0xf6, // ouml 3541541, 0x2286, // sube 3541925, 0xb3, // sup3 79882757, 0x3a3, // Sigma 94536933, 0xb8, // cedil 100097093, 0xa1, // iexcl 109435429, 0x3c3 // sigma },{ 80550, 0x3a8, // Psi 99334, 0xb0, // deg 111302, 0x3c8, // psi 3118278, 0x2002, // ensp 62191590, 0xc6, // AElig 76012006, 0xd4, // Ocirc 92697574, 0xe6, // aelig 100348102, 0x221e, // infin 105564678, 0xf4, // ocirc 110364486, 0xd7 // times },{ -2026225689, 0x39b, // Lambda -1110092857, 0x3bb, // lambda 2535, 0x39d, // Nu 3527, 0x3bd, // nu 107431, 0x200e, // lrm 68567943, 0x393, // Gamma 98120615, 0x3b3 // gamma },{ -791593944, 0x2118, // weierp 2504, 0x39c, // Mu 3496, 0x3bc, // mu 83848, 0x3a4, // Tau 114600, 0x3c4, // tau 3049896, 0xa2, // cent 3492904, 0x232a // rang },{ -1922900887, 0xd5, // Otilde -1006768055, 0xf5, // otilde 93596489, 0x201e, // bdquo 105954953, 0x2295, // oplus 1115315049, 0x3d1 // thetasym },{ -1951530038, 0xd1, // Ntilde -1535503510, 0x3b5, // epsilon -1339299958, 0x2020, // dagger -1035397206, 0xf1, // ntilde 93127274, 0x2248, // asymp 108819690, 0x2019, // rsquo 129149770, 0x395, // Epsilon 2039534506, 0x2021 // Dagger },{ -1654325877, 0xdd, // Yacute -922037109, 0x203a, // rsaquo -738193045, 0xfd, // yacute 114251, 0x2211, // sum 2290667, 0xcf, // Iuml 3243979, 0xef, // iuml 72265003, 0x39a, // Kappa 101817675, 0x3ba // kappa },{ 3419564, 0xba, // ordm 3551660, 0x21d1, // uArr 3582412, 0x2191, // uarr 81553132, 0xdb, // Ucirc 106857100, 0xa3, // pound 111105804, 0xfb // ucirc },{ -1380416467, 0xa6, // brvbar -1221256979, 0x2665, // hearts 121037, 0x200d, // zwj 3124973, 0x20ac, // euro 96634189, 0x2205 // empty },{ -1923745138, 0xd8, // Oslash -1096866130, 0x2217, // lowast -1007612306, 0xf8, // oslash -896191506, 0x2660, // spades 114254, 0x2283, // sup 3314158, 0x2329, // lang 97692206, 0x2044 // frasl },{ -1768842481, 0xda, // Uacute -1266526065, 0xbd, // frac12 -1266526001, 0xbe, // frac34 -852709649, 0xfa, // uacute 104431, 0x222b, // int 2171503, 0xcb, // Euml 3124815, 0xeb, // euml 3241935, 0x2208, // isin 3462287, 0x21d2, // rArr 3493039, 0x2192, // rarr 108270575, 0x221a // radic },{ -1762873648, 0xd9, // Ugrave -874817968, 0x2234, // there4 -846740816, 0xf9, // ugrave 2066960, 0x392, // Beta 3020272, 0x3b2, // beta 103901296, 0x2212 // minus },{ -1266526063, 0xbc, // frac14 -1093812015, 0x2039, // lsaquo 2833, 0x39e, // Xi 3825, 0x3be, // xi 80209, 0x3a6, // Phi 110961, 0x3c6, // phi 3374865, 0xa0, // nbsp 63529457, 0xc5, // Aring 93082129, 0xe5, // aring 102790001, 0x2308 // lceil },{ 70002, 0x397, // Eta 98258, 0x2229, // cap 100754, 0x3b7, // eta 3391250, 0x2284, // nsub 94921618, 0x21b5 // crarr },{ -991722477, 0x2030, // permil 109267, 0xac, // not 113011, 0x200f, // rlm 2052339, 0xc4, // Auml 2285107, 0x399, // Iota 3000915, 0x27, // apos 3005651, 0xe4, // auml 3035411, 0x2022, // bull 3238419, 0x3b9, // iota 3433459, 0x2202, // part 3437299, 0x22a5, // perp 102831699, 0x201c // ldquo },{ 112788, 0xae, // reg 115924, 0xa8, // uml 75120884, 0x152, // OElig 105626868, 0x153 // oelig },{ -1940617387, 0xd3, // Oacute -1024484555, 0xf3, // oacute 3059573, 0xa9, // copy 3283541, 0x21d0, // lArr 3314293, 0x2190 // larr },{ -1934648554, 0xd2, // Ograve -1018515722, 0xf2, // ograve -874702154, 0x2009, // thinsp 3525622, 0x22c5, // sdot 96757814, 0x2261, // equiv 102742326, 0xab // laquo },{ -1407576169, 0xe3, // atilde -1006767049, 0x2297, // otimes 3511, 0x2260, // ne 96727, 0x2227, // and 107351, 0x25ca, // loz 113879, 0x223c, // sim 3053847, 0x2c6, // circ 3449687, 0x220f, // prod 3541975, 0x2287, // supe 79799255, 0xde, // THORN 96955127, 0x2203, // exist 108331127, 0x2309, // rceil 110337015, 0xfe, // thorn 111502423, 0x3d2, // upsih 1971258295, 0xc3 // Atilde },{ -1268790216, 0x2200, // forall 2781944, 0x396, // Zeta 3735256, 0x3b6, // zeta 63082712, 0xc2, // Acirc 65915800, 0x394, // Delta 92635384, 0xe2, // acirc 95468472, 0x3b4, // delta 105008952, 0x2209, // notin 110363480, 0x2dc // tilde },{ -1345696935, 0x3bf, // omicron -1331463047, 0xf7, // divide -1180962279, 0xbf, // iquest 2585, 0x3a0, // Pi 3449, 0x2264, // le 3577, 0x3c0, // pi 68985, 0xd0, // ETH 82137, 0x3a1, // Rho 100761, 0xf0, // eth 112889, 0x3c1, // rho 3164377, 0x21d4, // hArr 3195129, 0x2194, // harr 3752377, 0x200c, // zwnj 108372825, 0x201d, // rdquo 318956345, 0x39f // Omicron },{ -933717286, 0x230b, // rfloor -220346502, 0x3c5, // upsilon 96730, 0x2220, // ang 1444306778, 0x3a5 // Upsilon },{ -2112392293, 0xcd, // Iacute -1349120421, 0xa4, // curren -1196259461, 0xed, // iacute -985162565, 0xb1, // plusmn -918079173, 0x2135, // alefsym 3515, 0x220b, // ni 2767323, 0x178, // Yuml 3720635, 0xff, // yuml 100313435, 0x2111 // image },{ -2106423460, 0xcc, // Igrave -1190290628, 0xec, // igrave 66776796, 0xca, // Ecirc 96329468, 0xea, // ecirc 108283452, 0xbb, // raquo 109236764, 0x201a // sbquo },{ 111005, 0x3d6, // piv 3045213, 0x21d3, // dArr 3075965, 0x2193, // darr 94761597, 0x2663 // clubs },{ 3294, 0x2265, // ge 98878, 0x222a, // cup 3496350, 0x211c, // real 63357246, 0x391, // Alpha 80774782, 0x398, // Theta 92909918, 0x3b1, // alpha 110327454, 0x3b8 // theta },{ -1310776065, 0xe9, // eacute 2648159, 0xdc, // Uuml 3147935, 0x192, // fnof 3601471, 0xfc, // uuml 77382239, 0x2033, // Prime 103739775, 0x97, // mdash 106934911, 0x2032, // prime 2068058399, 0xc9 // Eacute } }; /** * Get the code associated to a key. * * @param b byte array containing the key * @param offset position of the first byte of the key in the array * @param count number of bytes composing the key * @return the corresponding character value, or 0 if invalid */ public static char toCode(byte b[], int offset, int count) { int key = 0; // compute the key associated to the series of bytes while (count-- > 0) { byte ch = b[offset++]; key = (key << 5) - key + ch; } int[] bucket = entries[key & 0x1F]; // open the bucket with it for (int i=0; i < bucket.length; i += 2) { int j = bucket[i]; if (j >= key) { if (j == key) return (char)bucket[i+1]; break; } } return 0; // which is an invalid unicode character } }