package org.basex.build.xml; import static org.basex.build.BuildText.*; import static org.basex.core.Text.*; import static org.basex.util.Token.*; import static org.basex.util.XMLToken.*; import java.io.IOException; import java.util.Arrays; import org.basex.build.BuildException; import org.basex.build.BuildText.Type; import org.basex.core.Progress; import org.basex.core.Prop; import org.basex.io.IO; import org.basex.io.IOContent; import org.basex.io.in.TextInput; import org.basex.util.TokenBuilder; import org.basex.util.Util; import org.basex.util.hash.TokenMap; /** * This class scans an XML document and creates atomic tokens. * * @author BaseX Team 2005-12, BSD License * @author Christian Gruen * @author Andreas Weiler */ final class XMLScanner extends Progress { /** Entities. */ private static final String[] ENTITIES = { "amp", "&", "apos", "'", "quot", "\"", "lt", "<", "gt", ">" }; /** PublicID characters. */ private static final byte[] PUBIDTOK = token(" \n'()+,/=?;!*#@$%"); /** Question mark. */ private static final byte[] QUESTION = { '?' }; /** Ampersand entity. */ private static final byte[] AMPER = { '&' }; /** Scanning states. */ private enum State { /** Content state. */ CONTENT, /** Tag state. */ TAG, /** Attribute state. */ ATT, /** Quoted state. */ QUOTE, } /** Character buffer for the current token. */ final TokenBuilder token = new TokenBuilder(); /** Document encoding. */ final String encoding; /** Current token type. */ Type type; /** Index for all entity names. */ private final TokenMap ents = new TokenMap(); /** Index for all PEReferences. */ private final TokenMap pents = new TokenMap(); /** DTD flag. */ private final boolean dtd; /** Current scanner state. */ private State state = State.CONTENT; /** Opening tag found. */ private boolean prolog = true; /** Parameter entity parsing. */ private boolean pe; /** Tag flag. */ private boolean tag; /** Current quote character. */ private int quote; /** XML input. */ private TextInput input; /** * Initializes the scanner. * @param f input file * @param pr database properties * @throws IOException I/O exception */ XMLScanner(final IO f, final Prop pr) throws IOException { input = new TextInput(f); for(int e = 0; e < ENTITIES.length; e += 2) { ents.add(token(ENTITIES[e]), token(ENTITIES[e + 1])); } dtd = pr.is(Prop.DTD); String enc = null; // process document declaration... if(consume(DOCDECL)) { if(s()) { if(!version()) error(DECLSTART); boolean s = s(); enc = encoding(); if(enc != null) { if(!s) error(WSERROR); s = s(); } if(sddecl() != null && !s) error(WSERROR); s(); final int ch = nextChar(); if(ch != '?' || nextChar() != '>') error(DECLWRONG); } else { prev(5); } } encoding = enc == null ? UTF8 : enc; if(!s(consume())) prev(1); } /** * Reads and interprets the next token from the input stream. * @return true if the document scanning has been completed * @throws IOException I/O exception */ boolean more() throws IOException { // gets next character from the input stream token.reset(); final int ch = consume(); if(ch == 0) { type = Type.EOF; return false; } // checks the scanner state switch(state) { case CONTENT: scanCONTENT(ch); break; case TAG: case ATT: scanTAG(ch); break; case QUOTE: scanATTVALUE(ch); } return true; } /** * Finishes file scanning. * @throws IOException I/O exception */ void close() throws IOException { input.close(); if(prolog) error(DOCEMPTY); } /** * Scans XML content. * @param ch current character * @throws IOException I/O exception */ private void scanCONTENT(final int ch) throws IOException { // parse TEXT if(!tag && (ch != '<' || isCDATA())) { content(ch); return; } // parse a TAG tag = false; final int c = nextChar(); // parse comments etc... if(c == '!') { if(consume(DOCTYPE)) { type = Type.DTD; dtd(); } else { type = Type.COMMENT; if(!consume('-') || !consume('-')) error(COMMDASH); comment(); } return; } // checking a PI if(c == '?') { type = Type.PI; pi(); return; } prolog = false; state = State.TAG; // closing tag... if(c == '/') { type = Type.L_BR_CLOSE; return; } // opening tag... type = Type.L_BR; prev(1); } /** * Scans an XML tag. * @param ch current character * @throws IOException I/O exception */ private void scanTAG(final int ch) throws IOException { int c = ch; // scan tag end... if(c == '>') { type = Type.R_BR; state = State.CONTENT; } else if(c == '=') { // scan equal sign... type = Type.EQ; } else if(c == '\'' || c == '"') { // scan quote... type = Type.QUOTE; state = State.QUOTE; quote = c; } else if(c == '/') { // scan empty tag end... type = Type.CLOSE_R_BR; if((c = nextChar()) == '>') { state = State.CONTENT; } else { token.add(c); error(CLOSING); } } else if(s(c)) { // scan whitespace... type = Type.WS; } else if(isStartChar(c)) { // scan tag name... type = state == State.ATT ? Type.ATTNAME : Type.TAGNAME; do token.add(c); while(isChar(c = nextChar())); prev(1); state = State.ATT; } else { // undefined character... error(CHARACTER, (char) c); } } /** * Scans a quoted token. * @param ch current character * @throws IOException I/O exception */ private void scanATTVALUE(final int ch) throws IOException { if(ch == quote) { type = Type.QUOTE; state = State.ATT; } else { type = Type.ATTVALUE; attValue(ch); prev(1); } } /** * Scans an attribute value. [10] * @param ch current character * @throws IOException I/O exception */ private void attValue(final int ch) throws IOException { boolean wrong = false; int c = ch; do { if(c == 0) error(ATTCLOSE, (char) c); wrong |= c == '\'' || c == '"'; if(c == '<') error(wrong ? ATTCLOSE : ATTCHAR, (char) c); if(c == 0x0A) c = ' '; if(c == '&') { // verify... final byte[] r = ref(true); if(r.length == 1) token.add(r); else if(!input.add(r, false)) error(RECENT); } else { token.add(c); } } while((c = consume()) != quote); } /** * Scans XML text. * @param ch current character * @throws IOException I/O exception */ private void content(final int ch) throws IOException { type = Type.TEXT; boolean ws = true; boolean f = true; int c = ch; while(c != 0) { if(c != '<') { if(ws) ws = ws(c); if(c == '&') { // verify... final byte[] r = ref(true); if(r.length == 1) token.add(r); else if(!input.add(r, false)) error(RECENT); } else { if(c == ']') { if(consume() == ']') { if(consume() == '>') error(CONTCDATA); prev(1); } prev(1); } token.add(c); } } else { if(!f && !isCDATA()) { tag = true; prev(1); return; } ws = false; cDATA(); } c = consume(); f = false; } if(ws) type = Type.EOF; } /** * Checks input for CDATA section... <![DATA[...]]>. * @return true for CDATA * @throws IOException I/O exception */ private boolean isCDATA() throws IOException { if(!consume('!')) return false; if(!consume('[')) { prev(1); return false; } if(!consume(CDATA)) error(CDATASEC); return true; } /** * Scans CDATA. * @throws IOException I/O exception */ private void cDATA() throws IOException { int ch; while(true) { while((ch = nextChar()) != ']') token.add(ch); if(consume(']')) { if(consume('>')) return; prev(1); } token.add(ch); } } /** * Scans a comment. * @throws IOException I/O exception */ private void comment() throws IOException { do { final int ch = nextChar(); if(ch == '-') { if(consume('-')) { check('>'); return; } } token.add(ch); } while(true); } /** * Scans a processing instruction. * @throws IOException I/O exception */ private void pi() throws IOException { final byte[] tok = name(true); if(eq(lc(tok), XML)) error(PIRES); token.add(tok); int ch = nextChar(); if(ch != '?' && !ws(ch)) error(PITEXT); do { while(ch != '?') { token.add(ch); ch = nextChar(); } if((ch = consume()) == '>') return; token.add('?'); } while(true); } /** * Scans whitespaces. * @return true for whitespaces * @throws IOException I/O exception */ private boolean s() throws IOException { final int ch = consume(); if(s(ch)) return true; prev(1); return false; } /** * Checks input for whitespaces; if none are found, throws an error. * @throws IOException I/O exception */ private void checkS() throws IOException { if(!s()) error(NOWS, (char) consume()); } /** * Checks input for the specified character. * @param ch character to be found * @throws IOException I/O exception */ private void check(final char ch) throws IOException { final int c = consume(); if(c != ch) error(WRONGCHAR, ch, (char) c); } /** * Checks input for the specified token. * @param tok token to be found * @throws IOException I/O exception */ private void check(final byte[] tok) throws IOException { if(!consume(tok)) error(WRONGCHAR, tok, (char) consume()); } /** * Scans whitespaces. * @param ch current character * @return true for whitespaces * @throws IOException I/O exception */ private boolean s(final int ch) throws IOException { int c = ch; if(ws(c)) { do c = consume(); while(ws(c)); prev(1); return true; } return false; } /** * Consumes a quote. * @return found quote * @throws IOException I/O exception */ private int qu() throws IOException { final int qu = consume(); if(qu != '\'' && qu != '"') error(SCANQUOTE, (char) qu); return qu; } /** * Scans a reference. [67] * @param f dissolve entities * @return entity * @throws IOException I/O exception */ private byte[] ref(final boolean f) throws IOException { // scans numeric entities if(consume('#')) { // [66] final TokenBuilder ent = new TokenBuilder(); int b = 10; int ch = nextChar(); ent.add(ch); if(ch == 'x') { b = 16; ent.add(ch = nextChar()); } int n = 0; do { final boolean m = ch >= '0' && ch <= '9'; final boolean h = b == 16 && (ch >= 'a' && ch <= 'f' || ch >= 'A' && ch <= 'F'); if(!m && !h) { completeRef(ent); return QUESTION; } n *= b; n += ch & 15; if(!m) n += 9; ent.add(ch = nextChar()); } while(ch != ';'); if(!valid(n)) return QUESTION; ent.reset(); ent.add(n); return ent.finish(); } // scans predefined entities [68] final byte[] name = name(false); if(!consume(';')) return QUESTION; if(!f) return concat(AMPER, name, SEMI); byte[] en = ents.get(name); if(en == null) { // unknown entity: try HTML entities if(HTMLENTS.size() == 0) { for(int s = 0; s < HTMLENTITIES.length; s += 2) { HTMLENTS.add(token(HTMLENTITIES[s]), token(HTMLENTITIES[s + 1])); } } en = HTMLENTS.get(name); } return en == null ? QUESTION : en; } /** * Scans a PEReference. [69] * @return entity * @throws IOException I/O exception */ private byte[] peRef() throws IOException { // scans predefined entities final byte[] name = name(true); consume(';'); final byte[] en = pents.get(name); if(en != null) return en; return name; } /** * Adds some characters to the entity. * @param ent token builder * @throws IOException I/O exception */ private void completeRef(final TokenBuilder ent) throws IOException { int ch = consume(); while(ent.size() < 10 && ch >= ' ' && ch != ';') { ent.add(ch); ch = consume(); } } /** * Reads next character or throws an exception if all bytes have been read. * @return next character * @throws IOException I/O exception */ private int nextChar() throws IOException { final int ch = consume(); if(ch == 0) error(UNCLOSED, token); return ch; } /** * Jumps the specified number of characters back. * @param p number of characters */ private void prev(final int p) { input.prev(p); } /** * Reads next character or throws an exception if all bytes have been read. * @return next character * @throws IOException I/O exception */ private int consume() throws IOException { while(true) { final int ch = input.read(); if(ch < 0) return 0; if(ch > 0 && ch < ' ' && !ws(ch)) error(XMLCHAR, ch); if(ch == '%' && pe) { // [69] final byte[] key = name(true); final byte[] val = pents.get(key); if(val == null) error(UNKNOWNPE, key); check(';'); input.add(val, true); } else if(ch != 0x0D) { return ch; } } } /** * Consumes the specified character. * @param ch character to be found * @return true if token was found * @throws IOException I/O exception */ private boolean consume(final char ch) throws IOException { if(consume() == ch) return true; prev(1); return false; } /** * Consumes the specified token. * @param tok token to be found * @return true if token was found * @throws IOException I/O exception */ private boolean consume(final byte[] tok) throws IOException { for(int t = 0; t < tok.length; ++t) { final int ch = consume(); if(ch != tok[t]) { prev(t + 1); return false; } } return true; } /** * Consumes an XML name. [5] * @param f force parsing * @return name * @throws IOException I/O exception */ private byte[] name(final boolean f) throws IOException { final TokenBuilder name = new TokenBuilder(); int c = consume(); if(!isStartChar(c)) { if(f) error(INVNAME); prev(1); return null; } do name.add(c); while(isChar(c = nextChar())); prev(1); return name.finish(); } /** * Consumes an Nmtoken. [7] * @throws IOException I/O exception */ private void nmtoken() throws IOException { final TokenBuilder name = new TokenBuilder(); int c; while(isChar(c = nextChar())) name.add(c); prev(1); if(name.size() == 0) error(INVNAME); } /** * Scans doc type definitions. [28] * @throws IOException I/O exception */ private void dtd() throws IOException { if(!prolog) error(TYPEAFTER); if(!s()) error(ERRDT); name(true); // parse root tag s(); externalID(true, true); s(); while(consume('[')) { s(); while(markupDecl()); s(); check(']'); s(); } check('>'); } /** * Scans an external ID. * @param f full flag * @param r root flag * @return id * @throws IOException I/O exception */ private byte[] externalID(final boolean f, final boolean r) throws IOException { byte[] cont = null; final boolean pub = consume(PUBLIC); if(pub || consume(SYSTEM)) { checkS(); if(pub) { pubidLit(); if(f) checkS(); } final int qu = consume(); // [11] if(qu == '\'' || qu == '"') { int ch; final TokenBuilder tok = new TokenBuilder(); while((ch = nextChar()) != qu) tok.add(ch); if(!f) return null; final String name = string(tok.finish()); if(!dtd && r) return cont; final TextInput tin = input; try { final IO file = input.input().merge(name); cont = file.read(); input = new TextInput(new IOContent(cont, name)); } catch(final IOException ex) { Util.debug(ex); throw error(NOT_PARSED_X, name); } if(consume(XDECL)) { check(XML); s(); if(version()) checkS(); s(); if(encoding() == null) error(TEXTENC); ch = nextChar(); if(s(ch)) ch = nextChar(); if(ch != '?' || nextChar() != '>') error(DECLWRONG); cont = Arrays.copyOfRange(cont, input.pos(), cont.length); } s(); if(r) { extSubsetDecl(); if(!consume((char) 0)) error(INVEND); } input = tin; } else { if(f) error(SCANQUOTE, (char) qu); prev(1); } } return cont; } /** * Scans an public ID literal. [12] * @throws IOException I/O exception */ private void pubidLit() throws IOException { final int qu = qu(); int ch; while((ch = nextChar()) != qu) { if(!isChar(ch) && !contains(PUBIDTOK, ch)) error(PUBID, (char) ch); } } /** * Scans an external subset declaration. [31] * @throws IOException I/O exception */ private void extSubsetDecl() throws IOException { while(true) { s(); if(markupDecl()) continue; if(!consume(COND)) return; s(); // [61] final boolean incl = consume(INCL); if(!incl) check(IGNO); s(); check('['); if(incl) { extSubsetDecl(); check(CONE); } else { int c = 1; while(c != 0) { if(consume(COND)) ++c; else if(consume(CONE)) --c; else if(consume() == 0) error(INVEND); } } } } /** * Scans a markup declaration. [29] * @return true if a declaration was found * @throws IOException I/O exception */ private boolean markupDecl() throws IOException { if(consume(ENT)) { // [70] checkS(); if(consume('%')) { // [72] PEDecl checkS(); final byte[] key = name(true); checkS(); byte[] val = entityValue(true); //[74] if(val == null) { val = externalID(true, false); if(val == null) error(INVEND); } s(); pents.add(key, val); } else { // [71] GEDecl final byte[] key = name(true); checkS(); byte[] val = entityValue(false); // [73] EntityDef if(val == null) { val = externalID(true, false); if(val == null) error(INVEND); if(s()) { check(ND); checkS(); name(true); } } s(); ents.add(key, val); } check('>'); pe = true; } else if(consume(ELEM)) { // [45] checkS(); name(true); checkS(); pe = true; if(!consume(EMP) && !consume(ANY)) { // [46] if(consume('(')) { s(); if(consume(PC)) { // [51] s(); boolean alt = false; while(consume('|')) { s(); name(true); s(); alt = true; } check(')'); if(!consume('*') && alt) error(INVEND); } else { cp(); s(); //check(')'); // to be fixed... while(!consume(')')) consume(); //input.prev(1); occ(); } } else { error(INVEND); } } s(); check('>'); } else if(consume(ATTL)) { // [52] pe = true; checkS(); name(true); s(); while(name(false) != null) { // [53] checkS(); if(!consume(CD) && !consume(IDRS) && !consume(IDR) && !consume(ID) && !consume(ENTS) && !consume(ENT1) && !consume(NMTS) && !consume(NMT)) { // [56] if(consume(NOT)) { // [57,58] checkS(); check('('); s(); name(true); s(); while(consume('|')) { s(); name(true); s(); } check(')'); } else { // [59] check('('); s(); nmtoken(); s(); while(consume('|')) { s(); nmtoken(); s(); } check(')'); } } // [54] pe = true; checkS(); if(!consume(REQ) && !consume(IMP)) { // [60] if(consume(FIX)) checkS(); quote = qu(); attValue(consume()); } s(); } check('>'); } else if(consume(NOTA)) { // [82] checkS(); name(true); s(); externalID(false, false); s(); check('>'); } else if(consume(COMS)) { comment(); } else if(consume(XML)) { pi(); } else { return false; } s(); pe = false; return true; } /** * Scans a mixed value and children. [47-50] * @throws IOException I/O exception */ private void cp() throws IOException { s(); final byte[] name = name(false); if(name == null) { check('('); s(); cp(); } else { occ(); } s(); if(consume('|') || consume(',')) { cp(); s(); } if(name == null) { check(')'); occ(); } } /** * Scans occurrences. * @throws IOException I/O exception */ private void occ() throws IOException { if(!consume('+') && !consume('?')) consume('*'); } /** * Scans an entity value. [9] * @param p pe reference flag * @return value * @throws IOException I/O exception */ private byte[] entityValue(final boolean p) throws IOException { final int qu = consume(); if(qu != '\'' && qu != '"') { prev(1); return null; } TokenBuilder tok = new TokenBuilder(); int ch; while((ch = nextChar()) != qu) { if(ch == '&') tok.add(ref(false)); else if(ch == '%') { if(!p) error(INVPE); tok.add(peRef()); } else { tok.add(ch); } } final TextInput tmp = input; input = new TextInput(new IOContent(tok.finish())); tok = new TokenBuilder(); while((ch = consume()) != 0) { if(ch == '&') tok.add(ref(false)); else tok.add(ch); } input = tmp; return tok.finish(); } /** * Scans a document version. * @return true if version was found * @throws IOException I/O exception */ private boolean version() throws IOException { if(!consume(VERS)) return false; s(); check('='); s(); final int d = qu(); if(!consume(VERS10) && !consume(VERS11)) error(DECLVERSION); check((char) d); return true; } /** * Scans a document encoding. * @return encoding * @throws IOException I/O exception */ private String encoding() throws IOException { if(!consume(ENCOD)) return null; s(); check('='); s(); final TokenBuilder enc = new TokenBuilder(); final int d = qu(); int ch = nextChar(); if(letter(ch) && ch != '_') { while(letterOrDigit(ch) || ch == '.' || ch == '-') { enc.add(ch); ch = nextChar(); } prev(1); } check((char) d); if(enc.size() == 0) error(DECLENCODE, enc); final String e = string(enc.finish()); input.encoding(e); return e; } /** * Scans a standalone flag. * @return flag * @throws IOException I/O exception */ private byte[] sddecl() throws IOException { if(!consume(STANDALONE)) return null; s(); check('='); s(); final int d = qu(); byte[] sd = token(YES); if(!consume(sd)) { sd = token(NO); if(!consume(sd)) error(DECLSTANDALONE); } check((char) d); return sd; } /** * Throws an error. * @param e error message * @param a error arguments * @return build exception (indicates that an error is raised) * @throws IOException I/O exception */ private BuildException error(final String e, final Object... a) throws IOException { throw new BuildException(det() + COLS + e, a); } @Override public String det() { return Util.info(SCANPOS_X_X, input.input().path(), input.line()); } @Override public double prog() { return (double) input.pos() / input.length(); } /** Index for all HTML entities. */ private static final TokenMap HTMLENTS = new TokenMap(); /** HTML entities. */ private static final String[] HTMLENTITIES = { "Aacute", "\u00c1", "aacute", "\u00e1", "Acirc", "\u00c2", "acirc", "\u00e2", "acute", "\u00b4", "AElig", "\u00c6", "aelig", "\u00e6", "Agrave", "\u00c0", "agrave", "\u00e0", "alefsym", "\u2135", "Alpha", "\u0391", "alpha", "\u03b1", "and", "\u2227", "ang", "\u2220", "Aring", "\u00c5", "aring", "\u00e5", "asymp", "\u2248", "Atilde", "\u00c3", "atilde", "\u00e3", "Auml", "\u00c4", "auml", "\u00e4", "bdquo", "\u201e", "Beta", "\u0392", "beta", "\u03b2", "brvbar", "\u00a6", "bull", "\u2022", "cap", "\u2229", "Ccedil", "\u00c7", "ccedil", "\u00e7", "cedil", "\u00b8", "cent", "\u00a2", "Chi", "\u03a7", "chi", "\u03c7", "circ", "\u02c6", "clubs", "\u2663", "cong", "\u2245", "copy", "\u00a9", "crarr", "\u21b5", "cup", "\u222a", "curren", "\u00a4", "dagger", "\u2020", "Dagger", "\u2021", "darr", "\u2193", "dArr", "\u21d3", "deg", "\u00b0", "Delta", "\u0394", "delta", "\u03b4", "diams", "\u2666", "divide", "\u00f7", "Eacute", "\u00c9", "eacute", "\u00e9", "Ecirc", "\u00ca", "ecirc", "\u00ea", "Egrave", "\u00c8", "egrave", "\u00e8", "empty", "\u2205", "emsp", "\u2003", "ensp", "\u2002", "Epsilon", "\u0395", "epsilon", "\u03b5", "equiv", "\u2261", "Eta", "\u0397", "eta", "\u03b7", "ETH", "\u00d0", "eth", "\u00f0", "Euml", "\u00cb", "euml", "\u00eb", "euro", "\u20ac", "exist", "\u2203", "fnof", "\u0192", "forall", "\u2200", "frac12", "\u00bd", "frac14", "\u00bc", "frac34", "\u00be", "frasl", "\u2044", "Gamma", "\u0393", "gamma", "\u03b3", "ge", "\u2265", "harr", "\u2194", "hArr", "\u21d4", "hearts", "\u2665", "hellip", "\u2026", "Iacute", "\u00cd", "iacute", "\u00ed", "Icirc", "\u00ce", "icirc", "\u00ee", "iexcl", "\u00a1", "Igrave", "\u00cc", "igrave", "\u00ec", "image", "\u2111", "infin", "\u221e", "int", "\u222b", "Iota", "\u0399", "iota", "\u03b9", "iquest", "\u00bf", "isin", "\u2208", "Iuml", "\u00cf", "iuml", "\u00ef", "Kappa", "\u039a", "kappa", "\u03ba", "Lambda", "\u039b", "lambda", "\u03bb", "lang", "\u2329", "laquo", "\u00ab", "larr", "\u2190", "lArr", "\u21d0", "lceil", "\u2308", "ldquo", "\u201c", "le", "\u2264", "lfloor", "\u230a", "lowast", "\u2217", "loz", "\u25ca", "lrm", "\u200e", "lsaquo", "\u2039", "lsquo", "\u2018", "macr", "\u00af", "mdash", "\u2014", "micro", "\u00b5", "middot", "\u00b7", "minus", "\u2212", "Mu", "\u039c", "mu", "\u03bc", "nabla", "\u2207", "nbsp", "\u00a0", "ndash", "\u2013", "ne", "\u2260", "ni", "\u220b", "not", "\u00ac", "notin", "\u2209", "nsub", "\u2284", "Ntilde", "\u00d1", "ntilde", "\u00f1", "Nu", "\u039d", "nu", "\u03bd", "Oacute", "\u00d3", "oacute", "\u00f3", "Ocirc", "\u00d4", "ocirc", "\u00f4", "OElig", "\u0152", "oelig", "\u0153", "Ograve", "\u00d2", "ograve", "\u00f2", "oline", "\u203e", "Omega", "\u03a9", "omega", "\u03c9", "Omicron", "\u039f", "omicron", "\u03bf", "oplus", "\u2295", "or", "\u2228", "ordf", "\u00aa", "ordm", "\u00ba", "Oslash", "\u00d8", "oslash", "\u00f8", "Otilde", "\u00d5", "otilde", "\u00f5", "otimes", "\u2297", "Ouml", "\u00d6", "ouml", "\u00f6", "para", "\u00b6", "part", "\u2202", "permil", "\u2030", "perp", "\u22a5", "Phi", "\u03a6", "phi", "\u03c6", "Pi", "\u03a0", "pi", "\u03c0", "piv", "\u03d6", "plusmn", "\u00b1", "pound", "\u00a3", "prime", "\u2032", "Prime", "\u2033", "prod", "\u220f", "prop", "\u221d", "Psi", "\u03a8", "psi", "\u03c8", "radic", "\u221a", "rang", "\u232a", "raquo", "\u00bb", "rarr", "\u2192", "rArr", "\u21d2", "rceil", "\u2309", "rdquo", "\u201d", "real", "\u211c", "reg", "\u00ae", "rfloor", "\u230b", "Rho", "\u03a1", "rho", "\u03c1", "rlm", "\u200f", "rsaquo", "\u203a", "rsquo", "\u2019", "sbquo", "\u201a", "Scaron", "\u0160", "scaron", "\u0161", "sdot", "\u22c5", "sect", "\u00a7", "shy", "\u00ad", "Sigma", "\u03a3", "sigma", "\u03c3", "sigmaf", "\u03c2", "sim", "\u223c", "spades", "\u2660", "sub", "\u2282", "sube", "\u2286", "sum", "\u2211", "sup", "\u2283", "sup1", "\u00b9", "sup2", "\u00b2", "sup3", "\u00b3", "supe", "\u2287", "szlig", "\u00df", "Tau", "\u03a4", "tau", "\u03c4", "there4", "\u2234", "Theta", "\u0398", "theta", "\u03b8", "thetasym", "\u03d1", "thinsp", "\u2009", "THORN", "\u00de", "thorn", "\u00fe", "tilde", "\u02dc", "times", "\u00d7", "trade", "\u2122", "Uacute", "\u00da", "uacute", "\u00fa", "uarr", "\u2191", "uArr", "\u21d1", "Ucirc", "\u00db", "ucirc", "\u00fb", "Ugrave", "\u00d9", "ugrave", "\u00f9", "uml", "\u00a8", "upsih", "\u03d2", "Upsilon", "\u03a5", "upsilon", "\u03c5", "Uuml", "\u00dc", "uuml", "\u00fc", "weierp", "\u2118", "Xi", "\u039e", "xi", "\u03be", "Yacute", "\u00dd", "yacute", "\u00fd", "yen", "\u00a5", "yuml", "\u00ff", "Yuml", "\u0178", "Zeta", "\u0396", "zeta", "\u03b6", "zwj", "\u200d", "zwnj", "\u200c" }; }