/********************************************************************************* * TotalCross Software Development Kit * * Copyright (C) 2003-2004 Pierre G. Richard * * Copyright (C) 2003-2012 SuperWaba Ltda. * * All Rights Reserved * * * * This library and virtual machine is distributed in the hope that it will * * be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * * * This file is covered by the GNU LESSER GENERAL PUBLIC LICENSE VERSION 3.0 * * A copy of this license is located in file license.txt at the root of this * * SDK or can be downloaded here: * * http://www.gnu.org/licenses/lgpl-3.0.txt * * * *********************************************************************************/ package totalcross.ui.html; /** * <code>NamedEntitiesDereferencer</code> provides an extremely fast way * to map the set of known HTML tags to unique integer values. * <P> * <b>Note:</b> the Elements described below comes from the HTML 4.01 DTD * with IE extensions.  Also, notice that tag names are case * insensitive. */ public class TagDereferencer { /** Code identifying an unknown, possibly invalid, tag */ public static final int UNKNOWN = 0; /** Code identifying a "A" tag */ public static final int A = 1; /** Code identifying a "ABBR" tag */ public static final int ABBR = 2; /** Code identifying a "ACRONYM" tag */ public static final int ACRONYM = 3; /** Code identifying a "ADDRESS" tag */ public static final int ADDRESS = 4; /** Code identifying a "APPLET" tag */ public static final int APPLET = 5; /** Code identifying a "AREA" tag */ public static final int AREA = 6; /** Code identifying a "B" tag */ public static final int B = 7; /** Code identifying a "BASE" tag */ public static final int BASE = 8; /** Code identifying a "BASEFONT" tag */ public static final int BASEFONT = 9; /** Code identifying a "BDO" tag */ public static final int BDO = 10; /** Code identifying a "BIG" tag */ public static final int BIG = 11; /** Code identifying a "BLOCKQUOTE" tag */ public static final int BLOCKQUOTE = 12; /** Code identifying a "BODY" tag */ public static final int BODY = 13; /** Code identifying a "BR" tag */ public static final int BR = 14; /** Code identifying a "BUTTON" tag */ public static final int BUTTON = 15; /** Code identifying a "CAPTION" tag */ public static final int CAPTION = 16; /** Code identifying a "CENTER" tag */ public static final int CENTER = 17; /** Code identifying a "CITE" tag */ public static final int CITE = 18; /** Code identifying a "CODE" tag */ public static final int CODE = 19; /** Code identifying a "COL" tag */ public static final int COL = 20; /** Code identifying a "COLGROUP" tag */ public static final int COLGROUP = 21; /** Code identifying a "COMMENT" tag */ public static final int COMMENT = 22; /** Code identifying a "DD" tag */ public static final int DD = 23; /** Code identifying a "DEL" tag */ public static final int DEL = 24; /** Code identifying a "DFN" tag */ public static final int DFN = 25; /** Code identifying a "DIR" tag */ public static final int DIR = 26; /** Code identifying a "DIV" tag */ public static final int DIV = 27; /** Code identifying a "DL" tag */ public static final int DL = 28; /** Code identifying a "DT" tag */ public static final int DT = 29; /** Code identifying a "EM" tag */ public static final int EM = 30; /** Code identifying a "EMBED" tag */ public static final int EMBED = 31; /** Code identifying a "FIELDSET" tag */ public static final int FIELDSET = 32; /** Code identifying a "FONT" tag */ public static final int FONT = 33; /** Code identifying a "FORM" tag */ public static final int FORM = 34; /** Code identifying a "FRAME" tag */ public static final int FRAME = 35; /** Code identifying a "FRAMESET" tag */ public static final int FRAMESET = 36; /** Code identifying a "H1" tag */ public static final int H1 = 37; /** Code identifying a "H2" tag */ public static final int H2 = 38; /** Code identifying a "H3" tag */ public static final int H3 = 39; /** Code identifying a "H4" tag */ public static final int H4 = 40; /** Code identifying a "H5" tag */ public static final int H5 = 41; /** Code identifying a "H6" tag */ public static final int H6 = 42; /** Code identifying a "HEAD" tag */ public static final int HEAD = 43; /** Code identifying a "HR" tag */ public static final int HR = 44; /** Code identifying a "HTML" tag */ public static final int HTML = 45; /** Code identifying a "I" tag */ public static final int I = 46; /** Code identifying a "IFRAME" tag */ public static final int IFRAME = 47; /** Code identifying a "IMG" tag */ public static final int IMG = 48; /** Code identifying a "INPUT" tag */ public static final int INPUT = 49; /** Code identifying a "INS" tag */ public static final int INS = 50; /** Code identifying a "ISINDEX" tag */ public static final int ISINDEX = 51; /** Code identifying a "KBD" tag */ public static final int KBD = 52; /** Code identifying a "LABEL" tag */ public static final int LABEL = 53; /** Code identifying a "LEGEND" tag */ public static final int LEGEND = 54; /** Code identifying a "LI" tag */ public static final int LI = 55; /** Code identifying a "LINK" tag */ public static final int LINK = 56; /** Code identifying a "MAP" tag */ public static final int MAP = 57; /** Code identifying a "MENU" tag */ public static final int MENU = 58; /** Code identifying a "META" tag */ public static final int META = 59; /** Code identifying a "NOFRAMES" tag */ public static final int NOFRAMES = 60; /** Code identifying a "NOSCRIPT" tag */ public static final int NOSCRIPT = 61; /** Code identifying a "OBJECT" tag */ public static final int OBJECT = 62; /** Code identifying a "OL" tag */ public static final int OL = 63; /** Code identifying a "OPTGROUP" tag */ public static final int OPTGROUP = 64; /** Code identifying a "OPTION" tag */ public static final int OPTION = 65; /** Code identifying a "P" tag */ public static final int P = 66; /** Code identifying a "PARAM" tag */ public static final int PARAM = 67; /** Code identifying a "PLAINTEXT" tag */ public static final int PLAINTEXT = 68; /** Code identifying a "PRE" tag */ public static final int PRE = 69; /** Code identifying a "Q" tag */ public static final int Q = 70; /** Code identifying a "S" tag */ public static final int S = 71; /** Code identifying a "SAMP" tag */ public static final int SAMP = 72; /** Code identifying a "SCRIPT" tag */ public static final int SCRIPT = 73; /** Code identifying a "SELECT" tag */ public static final int SELECT = 74; /** Code identifying a "SMALL" tag */ public static final int SMALL = 75; /** Code identifying a "SPAN" tag */ public static final int SPAN = 76; /** Code identifying a "STRIKE" tag */ public static final int STRIKE = 77; /** Code identifying a "STRONG" tag */ public static final int STRONG = 78; /** Code identifying a "STYLE" tag */ public static final int STYLE = 79; /** Code identifying a "SUB" tag */ public static final int SUB = 80; /** Code identifying a "SUP" tag */ public static final int SUP = 81; /** Code identifying a "TABLE" tag */ public static final int TABLE = 82; /** Code identifying a "TBODY" tag */ public static final int TBODY = 83; /** Code identifying a "TD" tag */ public static final int TD = 84; /** Code identifying a "TEXTAREA" tag */ public static final int TEXTAREA = 85; /** Code identifying a "TFOOT" tag */ public static final int TFOOT = 86; /** Code identifying a "TH" tag */ public static final int TH = 87; /** Code identifying a "THEAD" tag */ public static final int THEAD = 88; /** Code identifying a "TITLE" tag */ public static final int TITLE = 89; /** Code identifying a "TR" tag */ public static final int TR = 90; /** Code identifying a "TT" tag */ public static final int TT = 91; /** Code identifying a "U" tag */ public static final int U = 92; /** Code identifying a "UL" tag */ public static final int UL = 93; /** Code identifying a "VAR" tag */ public static final int VAR = 94; /** Code identifying a "WBR" tag */ public static final int WBR = 95; /** Code identifying a "XMP" tag */ public static final int XMP = 96; /* This table was generated by Jaxo's GenStatTable utility - do not edit! */ /** GenStatTable double-hash list * - input file: file:/D:/u/newdev/SuperWaba/jaxo/html/HtmlTagProps.txt * - gen date: Sep 1, 2003 9:13:55 AM CEST * - 0 error(s), 0 warning(s) */ private static final int entries[][] = { { -938331488, 0x9, // BASEFONT 2176, 0x17, // DD 2688, 0x5b, // TT 65760, 0xb, // BIG 66912, 0x14, // COL 82464, 0x50, // SUB 2213344, 0x2b // HEAD },{ 65, 0x1 // A },{ 66, 0x7, // B 2044322, 0xd, // BODY 79739586, 0x56 // TFOOT },{ -445489757, 0x3, // ACRONYM 72611, 0x30, // IMG 79491, 0x45 // PRE },{ -2137885020, 0x2f, // IFRAME 2163908, 0x22 // FORM },{ 2362885, 0x3b // META },{ 1270556102, 0x10 // CAPTION },{ 84743, 0x5e, // VAR 85735, 0x5f, // WBR 79011047, 0x4b, // SMALL 1975348647, 0x3c // NOFRAMES },{ -1485680184, 0x33, // ISINDEX -146362072, 0x20, // FIELDSET 2184, 0x1c, // DL 2216, 0x1e // EM },{ 73, 0x2e, // I 2281, 0x25 // H1 },{ 2282, 0x26, // H2 2314, 0x2c, // HR 2551626, 0x4c, // SPAN 69820330, 0x31 // INPUT },{ -1854356277, 0x49, // SCRIPT 2283, 0x27, // H3 67563, 0x18, // DEL 2228139, 0x2d // HTML },{ -1960789556, 0x3d, // NOSCRIPT 2284, 0x28, // H4 67596, 0x19, // DFN 701853612, 0x40 // OPTGROUP },{ -2053244915, 0x36, // LEGEND 2285, 0x29, // H5 65613, 0xa, // BDO 67693, 0x1a, // DIR 74189, 0x34, // KBD 2017421, 0x6, // AREA 2074093, 0x13, // CODE 67154253, 0x23, // FRAME 75898989, 0x43 // PARAM },{ 2286, 0x2a, // H6 72654, 0x32, // INS 82478, 0x51, // SUP 79578030, 0x52 // TABLE },{ 2163791, 0x21, // FONT 1788294671, 0xc // BLOCKQUOTE },{ 80, 0x42, // P 2128, 0xe, // BR 2192, 0x1d, // DT 2672, 0x54 // TD },{ 81, 0x46, // Q 67697, 0x1b, // DIV 2001969, 0x2, // ABBR 2031313, 0x8, // BASE 2537585, 0x48, // SAMP 79242641, 0x4f // STYLE },{ -1838656590, 0x4d, // STRIKE 1970608946, 0xf // BUTTON },{ 83, 0x47 // S },{ -429709356, 0x4, // ADDRESS 2676, 0x57, // TH 72189652, 0x35, // LABEL 79789108, 0x58 // THEAD },{ -1956807563, 0x41, // OPTION -862326827, 0x24, // FRAMESET 85, 0x5c, // U 1984282709, 0x11 // CENTER },{ 79620086, 0x53 // TBODY },{ -1838650729, 0x4e, // STRONG 2711, 0x5d, // UL 2068823, 0x12, // CITE 464861655, 0x44 // PLAINTEXT },{ 79833656, 0x59 // TITLE },{ 66082489, 0x1f // EMBED },{ -220616902, 0x55, // TEXTAREA 2336762, 0x38, // LINK 1937235034, 0x5 // APPLET },{ 87035, 0x60 // XMP },{ -1852692228, 0x4a, // SELECT 76092, 0x39 // MAP },{ 2429, 0x37, // LI 2525, 0x3f // OL },{ 2686, 0x5a // TR },{ -1970038977, 0x3e, // OBJECT 2362719, 0x3a, // MENU 146429183, 0x15, // COLGROUP 1668381247, 0x16 // COMMENT } }; /** * Returns the hash code of the given string, automatically converting the chars to upper case. * * @param b * @param offset * @param count * @return The hash code. */ public static int hashCode(byte b[], int offset, int count) { int key = 0; // compute the key associated to the series of bytes while (count-- > 0) { byte ch = b[offset++]; // Warning: following line upper cases assuming latin-1 ASCII if (('a' <= ch) && (ch <= 'z')) // flsobral@tc111_12: reverted back to upper case conversion and fixed documentation. ch -= ('a'-'A'); key = (key << 5) - key + ch; } return key; } /** * Get the code associated to a key. * * @param b byte array containing the key * @param offset position of the first byte of the key in the array * @param count number of bytes composing the key * @return the corresponding character value, or 0 if invalid */ public static char toCode(byte b[], int offset, int count) { int key = hashCode(b,offset,count); int[] bucket = entries[key & 0x1F]; // open the bucket with it for (int i=0; i < bucket.length; i += 2) { int j = bucket[i]; if (j >= key) { if (j == key) return (char)bucket[i+1]; break; } } return 0; // which is an invalid unicode character } }