/* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- * * The contents of this file are subject to the Netscape Public * License Version 1.1 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.mozilla.org/NPL/ * * Software distributed under the License is distributed on an "AS * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or * implied. See the License for the specific language governing * rights and limitations under the License. * * The Original Code is Rhino code, released * May 6, 1999. * * The Initial Developer of the Original Code is Netscape * Communications Corporation. Portions created by Netscape are * Copyright (C) 1997-1999 Netscape Communications Corporation. All * Rights Reserved. * * Contributor(s): * Roger Lawrence * Mike McCabe * * Alternatively, the contents of this file may be used under the * terms of the GNU Public License (the "GPL"), in which case the * provisions of the GPL are applicable instead of those above. * If you wish to allow use of your version of this file only * under the terms of the GPL and not to allow others to use your * version of this file under the NPL, indicate your decision by * deleting the provisions above and replace them with the notice * and other provisions required by the GPL. If you do not delete * the provisions above, a recipient may use your version of this * file under either the NPL or the GPL. */ // Modified by Google package com.google.gwt.dev.js.rhino; import java.io.*; import java.util.HashMap; import java.util.Map; /** * This class implements the JavaScript scanner. * * It is based on the C source files jsscan.c and jsscan.h * in the jsref package. */ public class TokenStream { private static final Map<String, Integer> KEYWORDS = new HashMap<String, Integer>(); /* * JSTokenStream flags, mirroring those in jsscan.h. These are used * by the parser to change/check the state of the scanner. */ final static int TSF_NEWLINES = 1 << 0, // tokenize newlines TSF_FUNCTION = 1 << 1, // scanning inside function body TSF_RETURN_EXPR = 1 << 2, // function has 'return expr;' TSF_RETURN_VOID = 1 << 3, // function has 'return;' TSF_REGEXP = 1 << 4, // looking for a regular expression TSF_DIRTYLINE = 1 << 5; // stuff other than whitespace since // start of line /* * For chars - because we need something out-of-range * to check. (And checking EOF by exception is annoying.) * Note distinction from EOF token type! */ private final static int EOF_CHAR = -1; /** * Token types. These values correspond to JSTokenType values in * jsscan.c. */ public final static int // start enum ERROR = -1, // well-known as the only code < EOF EOF = 0, // end of file token - (not EOF_CHAR) EOL = 1, // end of line // Beginning here are interpreter bytecodes. Their values // must not exceed 127. POPV = 2, ENTERWITH = 3, LEAVEWITH = 4, RETURN = 5, GOTO = 6, IFEQ = 7, IFNE = 8, DUP = 9, SETNAME = 10, BITOR = 11, BITXOR = 12, BITAND = 13, EQ = 14, NE = 15, LT = 16, LE = 17, GT = 18, GE = 19, LSH = 20, RSH = 21, URSH = 22, ADD = 23, SUB = 24, MUL = 25, DIV = 26, MOD = 27, BITNOT = 28, NEG = 29, NEW = 30, DELPROP = 31, TYPEOF = 32, NAMEINC = 33, PROPINC = 34, ELEMINC = 35, NAMEDEC = 36, PROPDEC = 37, ELEMDEC = 38, GETPROP = 39, SETPROP = 40, GETELEM = 41, SETELEM = 42, CALL = 43, NAME = 44, NUMBER = 45, STRING = 46, ZERO = 47, ONE = 48, NULL = 49, THIS = 50, FALSE = 51, TRUE = 52, SHEQ = 53, // shallow equality (===) SHNE = 54, // shallow inequality (!==) CLOSURE = 55, REGEXP = 56, POP = 57, POS = 58, VARINC = 59, VARDEC = 60, BINDNAME = 61, THROW = 62, IN = 63, INSTANCEOF = 64, GOSUB = 65, RETSUB = 66, CALLSPECIAL = 67, GETTHIS = 68, NEWTEMP = 69, USETEMP = 70, GETBASE = 71, GETVAR = 72, SETVAR = 73, UNDEFINED = 74, TRY = 75, ENDTRY = 76, NEWSCOPE = 77, TYPEOFNAME = 78, ENUMINIT = 79, ENUMNEXT = 80, GETPROTO = 81, GETPARENT = 82, SETPROTO = 83, SETPARENT = 84, SCOPE = 85, GETSCOPEPARENT = 86, THISFN = 87, JTHROW = 88, // End of interpreter bytecodes SEMI = 89, // semicolon LB = 90, // left and right brackets RB = 91, LC = 92, // left and right curlies (braces) RC = 93, LP = 94, // left and right parentheses GWT = 95, COMMA = 96, // comma operator ASSIGN = 97, // assignment ops (= += -= etc.) HOOK = 98, // conditional (?:) COLON = 99, OR = 100, // logical or (||) AND = 101, // logical and (&&) EQOP = 102, // equality ops (== !=) RELOP = 103, // relational ops (< <= > >=) SHOP = 104, // shift ops (<< >> >>>) UNARYOP = 105, // unary prefix operator INC = 106, // increment/decrement (++ --) DEC = 107, DOT = 108, // member operator (.) PRIMARY = 109, // true, false, null, this FUNCTION = 110, // function keyword EXPORT = 111, // export keyword IMPORT = 112, // import keyword IF = 113, // if keyword ELSE = 114, // else keyword SWITCH = 115, // switch keyword CASE = 116, // case keyword DEFAULT = 117, // default keyword WHILE = 118, // while keyword DO = 119, // do keyword FOR = 120, // for keyword BREAK = 121, // break keyword CONTINUE = 122, // continue keyword VAR = 123, // var keyword WITH = 124, // with keyword CATCH = 125, // catch keyword FINALLY = 126, // finally keyword /** Added by Mike - these are JSOPs in the jsref, but I * don't have them yet in the java implementation... * so they go here. Also whatever I needed. * Most of these go in the 'op' field when returning * more general token types, eg. 'DIV' as the op of 'ASSIGN'. */ NOP = 128, // NOP NOT = 129, // etc. PRE = 130, // for INC, DEC nodes. POST = 131, /** * For JSOPs associated with keywords... * eg. op = THIS; token = PRIMARY */ VOID = 132, /* types used for the parse tree - these never get returned * by the scanner. */ BLOCK = 133, // statement block ARRAYLIT = 134, // array literal OBJLIT = 135, // object literal LABEL = 136, // label TARGET = 137, LOOP = 138, ENUMDONE = 139, EXPRSTMT = 140, PARENT = 141, CONVERT = 142, JSR = 143, NEWLOCAL = 144, USELOCAL = 145, DEBUGGER = 146, SCRIPT = 147, // top-level node for entire script LAST_TOKEN = 147, NUMBER_INT = 148, // This value is only used as a return value for getTokenHelper, // which is only called from getToken and exists to avoid an excessive // recursion problem if a number of lines in a row are comments. RETRY_TOKEN = 65535; // end enum public static String tokenToName(int token) { if (Context.printTrees || Context.printICode) { switch (token) { case ERROR: return "error"; case EOF: return "eof"; case EOL: return "eol"; case POPV: return "popv"; case ENTERWITH: return "enterwith"; case LEAVEWITH: return "leavewith"; case RETURN: return "return"; case GOTO: return "goto"; case IFEQ: return "ifeq"; case IFNE: return "ifne"; case DUP: return "dup"; case SETNAME: return "setname"; case BITOR: return "bitor"; case BITXOR: return "bitxor"; case BITAND: return "bitand"; case EQ: return "eq"; case NE: return "ne"; case LT: return "lt"; case LE: return "le"; case GT: return "gt"; case GE: return "ge"; case LSH: return "lsh"; case RSH: return "rsh"; case URSH: return "ursh"; case ADD: return "add"; case SUB: return "sub"; case MUL: return "mul"; case DIV: return "div"; case MOD: return "mod"; case BITNOT: return "bitnot"; case NEG: return "neg"; case NEW: return "new"; case DELPROP: return "delprop"; case TYPEOF: return "typeof"; case NAMEINC: return "nameinc"; case PROPINC: return "propinc"; case ELEMINC: return "eleminc"; case NAMEDEC: return "namedec"; case PROPDEC: return "propdec"; case ELEMDEC: return "elemdec"; case GETPROP: return "getprop"; case SETPROP: return "setprop"; case GETELEM: return "getelem"; case SETELEM: return "setelem"; case CALL: return "call"; case NAME: return "name"; case NUMBER_INT: return "integer"; case NUMBER: return "double"; case STRING: return "string"; case ZERO: return "zero"; case ONE: return "one"; case NULL: return "null"; case THIS: return "this"; case FALSE: return "false"; case TRUE: return "true"; case SHEQ: return "sheq"; case SHNE: return "shne"; case CLOSURE: return "closure"; case REGEXP: return "object"; case POP: return "pop"; case POS: return "pos"; case VARINC: return "varinc"; case VARDEC: return "vardec"; case BINDNAME: return "bindname"; case THROW: return "throw"; case IN: return "in"; case INSTANCEOF: return "instanceof"; case GOSUB: return "gosub"; case RETSUB: return "retsub"; case CALLSPECIAL: return "callspecial"; case GETTHIS: return "getthis"; case NEWTEMP: return "newtemp"; case USETEMP: return "usetemp"; case GETBASE: return "getbase"; case GETVAR: return "getvar"; case SETVAR: return "setvar"; case UNDEFINED: return "undefined"; case TRY: return "try"; case ENDTRY: return "endtry"; case NEWSCOPE: return "newscope"; case TYPEOFNAME: return "typeofname"; case ENUMINIT: return "enuminit"; case ENUMNEXT: return "enumnext"; case GETPROTO: return "getproto"; case GETPARENT: return "getparent"; case SETPROTO: return "setproto"; case SETPARENT: return "setparent"; case SCOPE: return "scope"; case GETSCOPEPARENT: return "getscopeparent"; case THISFN: return "thisfn"; case JTHROW: return "jthrow"; case SEMI: return "semi"; case LB: return "lb"; case RB: return "rb"; case LC: return "lc"; case RC: return "rc"; case LP: return "lp"; case GWT: return "gwt"; case COMMA: return "comma"; case ASSIGN: return "assign"; case HOOK: return "hook"; case COLON: return "colon"; case OR: return "or"; case AND: return "and"; case EQOP: return "eqop"; case RELOP: return "relop"; case SHOP: return "shop"; case UNARYOP: return "unaryop"; case INC: return "inc"; case DEC: return "dec"; case DOT: return "dot"; case PRIMARY: return "primary"; case FUNCTION: return "function"; case EXPORT: return "export"; case IMPORT: return "import"; case IF: return "if"; case ELSE: return "else"; case SWITCH: return "switch"; case CASE: return "case"; case DEFAULT: return "default"; case WHILE: return "while"; case DO: return "do"; case FOR: return "for"; case BREAK: return "break"; case CONTINUE: return "continue"; case VAR: return "var"; case WITH: return "with"; case CATCH: return "catch"; case FINALLY: return "finally"; case NOP: return "nop"; case NOT: return "not"; case PRE: return "pre"; case POST: return "post"; case VOID: return "void"; case BLOCK: return "block"; case ARRAYLIT: return "arraylit"; case OBJLIT: return "objlit"; case LABEL: return "label"; case TARGET: return "target"; case LOOP: return "loop"; case ENUMDONE: return "enumdone"; case EXPRSTMT: return "exprstmt"; case PARENT: return "parent"; case CONVERT: return "convert"; case JSR: return "jsr"; case NEWLOCAL: return "newlocal"; case USELOCAL: return "uselocal"; case SCRIPT: return "script"; } return "<unknown="+token+">"; } return ""; } /* This function uses the cached op, string and number fields in * TokenStream; if getToken has been called since the passed token * was scanned, the op or string printed may be incorrect. */ public String tokenToString(int token) { if (Context.printTrees) { String name = tokenToName(token); switch (token) { case UNARYOP: case ASSIGN: case PRIMARY: case EQOP: case SHOP: case RELOP: return name + " " + tokenToName(this.op); case STRING: case REGEXP: case NAME: return name + " `" + this.string + "'"; case NUMBER_INT: return "NUMBER_INT " + (int) this.number; case NUMBER: return "NUMBER " + this.number; } return name; } return ""; } static { KEYWORDS.put("break", BREAK); KEYWORDS.put("case", CASE); KEYWORDS.put("continue", CONTINUE); KEYWORDS.put("default", DEFAULT); KEYWORDS.put("delete", DELPROP); KEYWORDS.put("do", DO); KEYWORDS.put("else", ELSE); KEYWORDS.put("export", EXPORT); KEYWORDS.put("false", PRIMARY | (FALSE << 8)); KEYWORDS.put("for", FOR); KEYWORDS.put("function", FUNCTION); KEYWORDS.put("if", IF); KEYWORDS.put("in", RELOP | (IN << 8)); KEYWORDS.put("new", NEW); KEYWORDS.put("null", PRIMARY | (NULL << 8)); KEYWORDS.put("return", RETURN); KEYWORDS.put("switch", SWITCH); KEYWORDS.put("this", PRIMARY | (THIS << 8)); KEYWORDS.put("true", PRIMARY | (TRUE << 8)); KEYWORDS.put("typeof", UNARYOP | (TYPEOF << 8)); KEYWORDS.put("var", VAR); KEYWORDS.put("void", UNARYOP | (VOID << 8)); KEYWORDS.put("while", WHILE); KEYWORDS.put("with", WITH); KEYWORDS.put("catch", CATCH); KEYWORDS.put("debugger", DEBUGGER); KEYWORDS.put("finally", FINALLY); KEYWORDS.put("import", IMPORT); KEYWORDS.put("instanceof", RELOP | (INSTANCEOF << 8)); KEYWORDS.put("throw", THROW); KEYWORDS.put("try", TRY); } private int stringToKeyword(String name) { Integer id = KEYWORDS.get(name); if (id == null) return EOF; this.op = id >> 8; return id & 0xff; } public TokenStream(Reader in, String sourceName, int lineno) { this.in = new LineBuffer(in, lineno); this.pushbackToken = EOF; this.sourceName = sourceName; flags = 0; secondToLastPosition = new CodePosition(lineno, 0); lastPosition = new CodePosition(lineno, 0); } /* return and pop the token from the stream if it matches... * otherwise return null */ public boolean matchToken(int toMatch) throws IOException { int token = getToken(); if (token == toMatch) return true; // didn't match, push back token tokenno--; this.pushbackToken = token; return false; } public void ungetToken(int tt) { if (this.pushbackToken != EOF && tt != ERROR) { String message = Context.getMessage2("msg.token.replaces.pushback", tokenToString(tt), tokenToString(this.pushbackToken)); throw new RuntimeException(message); } this.pushbackToken = tt; tokenno--; } public int peekToken() throws IOException { int result = getToken(); this.pushbackToken = result; tokenno--; return result; } public int peekTokenSameLine() throws IOException { int result; flags |= TSF_NEWLINES; // SCAN_NEWLINES from jsscan.h result = peekToken(); flags &= ~TSF_NEWLINES; // HIDE_NEWLINES from jsscan.h if (this.pushbackToken == EOL) this.pushbackToken = EOF; return result; } private static boolean isAlpha(int c) { return ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')); } static boolean isDigit(int c) { return (c >= '0' && c <= '9'); } static int xDigitToInt(int c) { if ('0' <= c && c <= '9') { return c - '0'; } if ('a' <= c && c <= 'f') { return c - ('a' - 10); } if ('A' <= c && c <= 'F') { return c - ('A' - 10); } return -1; } /* As defined in ECMA. jsscan.c uses C isspace() (which allows * \v, I think.) note that code in in.read() implicitly accepts * '\r' == \u000D as well. */ public static boolean isJSSpace(int c) { return (c == '\u0020' || c == '\u0009' || c == '\u000C' || c == '\u000B' || c == '\u00A0' || Character.getType((char)c) == Character.SPACE_SEPARATOR); } private void skipLine() throws IOException { // skip to end of line int c; while ((c = in.read()) != EOF_CHAR && c != '\n') { } in.unread(); } public int getToken() throws IOException { int c; do { c = getTokenHelper(); } while (c == RETRY_TOKEN); updatePosition(); return c; } private int getTokenHelper() throws IOException { int c; tokenno++; // Check for pushed-back token if (this.pushbackToken != EOF) { int result = this.pushbackToken; this.pushbackToken = EOF; return result; } // Eat whitespace, possibly sensitive to newlines. do { c = in.read(); if (c == '\n') { flags &= ~TSF_DIRTYLINE; if ((flags & TSF_NEWLINES) != 0) break; } } while (isJSSpace(c) || c == '\n'); if (c == EOF_CHAR) return EOF; if (c != '-' && c != '\n') flags |= TSF_DIRTYLINE; // identifier/keyword/instanceof? // watch out for starting with a <backslash> boolean identifierStart; boolean isUnicodeEscapeStart = false; if (c == '\\') { c = in.read(); if (c == 'u') { identifierStart = true; isUnicodeEscapeStart = true; stringBufferTop = 0; } else { identifierStart = false; c = '\\'; in.unread(); } } else { identifierStart = Character.isJavaIdentifierStart((char)c); if (identifierStart) { stringBufferTop = 0; addToString(c); } // bruce: special handling of JSNI signatures // - it would be nice to handle Unicode escapes in the future // if (c == '@') { stringBufferTop = 0; addToString(c); return jsniMatchReference(); } } if (identifierStart) { boolean containsEscape = isUnicodeEscapeStart; for (;;) { if (isUnicodeEscapeStart) { // strictly speaking we should probably push-back // all the bad characters if the <backslash>uXXXX // sequence is malformed. But since there isn't a // correct context(is there?) for a bad Unicode // escape sequence in an identifier, we can report // an error here. int escapeVal = 0; for (int i = 0; i != 4; ++i) { c = in.read(); escapeVal = (escapeVal << 4) | xDigitToInt(c); // Next check takes care about c < 0 and bad escape if (escapeVal < 0) { break; } } if (escapeVal < 0) { reportTokenError("msg.invalid.escape", null); return ERROR; } addToString(escapeVal); isUnicodeEscapeStart = false; } else { c = in.read(); if (c == '\\') { c = in.read(); if (c == 'u') { isUnicodeEscapeStart = true; containsEscape = true; } else { reportTokenError("msg.illegal.character", null); return ERROR; } } else { if (!Character.isJavaIdentifierPart((char)c)) { break; } addToString(c); } } } in.unread(); String str = getStringFromBuffer(); if (!containsEscape) { // OPT we shouldn't have to make a string (object!) to // check if it's a keyword. // Return the corresponding token if it's a keyword int result = stringToKeyword(str); if (result != EOF) { return result; } } this.string = str; return NAME; } // is it a number? if (isDigit(c) || (c == '.' && isDigit(in.peek()))) { stringBufferTop = 0; int base = 10; if (c == '0') { c = in.read(); if (c == 'x' || c == 'X') { base = 16; c = in.read(); } else if (isDigit(c)) { base = 8; } else { addToString('0'); } } if (base == 16) { while (0 <= xDigitToInt(c)) { addToString(c); c = in.read(); } } else { while ('0' <= c && c <= '9') { /* * We permit 08 and 09 as decimal numbers, which * makes our behavior a superset of the ECMA * numeric grammar. We might not always be so * permissive, so we warn about it. */ if (base == 8 && c >= '8') { Object[] errArgs = { c == '8' ? "8" : "9" }; reportTokenWarning("msg.bad.octal.literal", errArgs); base = 10; } addToString(c); c = in.read(); } } boolean isInteger = true; if (base == 10 && (c == '.' || c == 'e' || c == 'E')) { isInteger = false; if (c == '.') { do { addToString(c); c = in.read(); } while (isDigit(c)); } if (c == 'e' || c == 'E') { addToString(c); c = in.read(); if (c == '+' || c == '-') { addToString(c); c = in.read(); } if (!isDigit(c)) { reportTokenError("msg.missing.exponent", null); return ERROR; } do { addToString(c); c = in.read(); } while (isDigit(c)); } } in.unread(); String numString = getStringFromBuffer(); double dval; if (base == 10 && !isInteger) { try { // Use Java conversion to number from string... dval = (Double.valueOf(numString)).doubleValue(); } catch (NumberFormatException ex) { Object[] errArgs = { ex.getMessage() }; reportTokenError("msg.caught.nfe", errArgs); return ERROR; } } else { dval = ScriptRuntime.stringToNumber(numString, 0, base); } this.number = dval; if (isInteger && this.number >= Integer.MIN_VALUE && this.number <= Integer.MAX_VALUE) { return NUMBER_INT; } return NUMBER; } // is it a string? if (c == '"' || c == '\'') { // We attempt to accumulate a string the fast way, by // building it directly out of the reader. But if there // are any escaped characters in the string, we revert to // building it out of a StringBuffer. int quoteChar = c; int val = 0; stringBufferTop = 0; c = in.read(); strLoop: while (c != quoteChar) { if (c == '\n' || c == EOF_CHAR) { in.unread(); reportTokenError("msg.unterminated.string.lit", null); return ERROR; } if (c == '\\') { // We've hit an escaped character c = in.read(); switch (c) { case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; // \v a late addition to the ECMA spec, // it is not in Java, so use 0xb case 'v': c = 0xb; break; case 'u': { /* * Get 4 hex digits; if the u escape is not * followed by 4 hex digits, use 'u' + the literal * character sequence that follows. */ int escapeStart = stringBufferTop; addToString('u'); int escapeVal = 0; for (int i = 0; i != 4; ++i) { c = in.read(); escapeVal = (escapeVal << 4) | xDigitToInt(c); if (escapeVal < 0) { continue strLoop; } addToString(c); } // prepare for replace of stored 'u' sequence // by escape value stringBufferTop = escapeStart; c = escapeVal; } break; case 'x': { /* Get 2 hex digits, defaulting to 'x' + literal * sequence, as above. */ c = in.read(); int escapeVal = xDigitToInt(c); if (escapeVal < 0) { addToString('x'); continue strLoop; } else { int c1 = c; c = in.read(); escapeVal = (escapeVal << 4) | xDigitToInt(c); if (escapeVal < 0) { addToString('x'); addToString(c1); continue strLoop; } else { // got 2 hex digits c = escapeVal; } } } break; case '\n': // Remove line terminator c = in.read(); continue strLoop; default: if ('0' <= c && c < '8') { val = c - '0'; c = in.read(); if ('0' <= c && c < '8') { val = 8 * val + c - '0'; c = in.read(); if ('0' <= c && c < '8' && val <= 037) { // c is 3rd char of octal sequence only if // the resulting val <= 0377 val = 8 * val + c - '0'; c = in.read(); } } in.unread(); c = val; } } } addToString(c); c = in.read(); } this.string = getStringFromBuffer(); return STRING; } switch (c) { case '\n': return EOL; case ';': return SEMI; case '[': return LB; case ']': return RB; case '{': return LC; case '}': return RC; case '(': return LP; case ')': return GWT; case ',': return COMMA; case '?': return HOOK; case ':': return COLON; case '.': return DOT; case '|': if (in.match('|')) { return OR; } else if (in.match('=')) { this.op = BITOR; return ASSIGN; } else { return BITOR; } case '^': if (in.match('=')) { this.op = BITXOR; return ASSIGN; } else { return BITXOR; } case '&': if (in.match('&')) { return AND; } else if (in.match('=')) { this.op = BITAND; return ASSIGN; } else { return BITAND; } case '=': if (in.match('=')) { if (in.match('=')) this.op = SHEQ; else this.op = EQ; return EQOP; } else { this.op = NOP; return ASSIGN; } case '!': if (in.match('=')) { if (in.match('=')) this.op = SHNE; else this.op = NE; return EQOP; } else { this.op = NOT; return UNARYOP; } case '<': /* NB:treat HTML begin-comment as comment-till-eol */ if (in.match('!')) { if (in.match('-')) { if (in.match('-')) { skipLine(); return RETRY_TOKEN; // in place of 'goto retry' } in.unread(); } in.unread(); } if (in.match('<')) { if (in.match('=')) { this.op = LSH; return ASSIGN; } else { this.op = LSH; return SHOP; } } else { if (in.match('=')) { this.op = LE; return RELOP; } else { this.op = LT; return RELOP; } } case '>': if (in.match('>')) { if (in.match('>')) { if (in.match('=')) { this.op = URSH; return ASSIGN; } else { this.op = URSH; return SHOP; } } else { if (in.match('=')) { this.op = RSH; return ASSIGN; } else { this.op = RSH; return SHOP; } } } else { if (in.match('=')) { this.op = GE; return RELOP; } else { this.op = GT; return RELOP; } } case '*': if (in.match('=')) { this.op = MUL; return ASSIGN; } else { return MUL; } case '/': // is it a // comment? if (in.match('/')) { skipLine(); return RETRY_TOKEN; } if (in.match('*')) { while ((c = in.read()) != -1 && !(c == '*' && in.match('/'))) { ; // empty loop body } if (c == EOF_CHAR) { reportTokenError("msg.unterminated.comment", null); return ERROR; } return RETRY_TOKEN; // `goto retry' } // is it a regexp? if ((flags & TSF_REGEXP) != 0) { stringBufferTop = 0; while ((c = in.read()) != '/') { if (c == '\n' || c == EOF_CHAR) { in.unread(); reportTokenError("msg.unterminated.re.lit", null); return ERROR; } if (c == '\\') { addToString(c); c = in.read(); } addToString(c); } int reEnd = stringBufferTop; while (true) { if (in.match('g')) addToString('g'); else if (in.match('i')) addToString('i'); else if (in.match('m')) addToString('m'); else break; } if (isAlpha(in.peek())) { reportTokenError("msg.invalid.re.flag", null); return ERROR; } this.string = new String(stringBuffer, 0, reEnd); this.regExpFlags = new String(stringBuffer, reEnd, stringBufferTop - reEnd); return REGEXP; } if (in.match('=')) { this.op = DIV; return ASSIGN; } else { return DIV; } case '%': this.op = MOD; if (in.match('=')) { return ASSIGN; } else { return MOD; } case '~': this.op = BITNOT; return UNARYOP; case '+': if (in.match('=')) { this.op = ADD; return ASSIGN; } else if (in.match('+')) { return INC; } else { return ADD; } case '-': if (in.match('=')) { this.op = SUB; c = ASSIGN; } else if (in.match('-')) { if (0 == (flags & TSF_DIRTYLINE)) { // treat HTML end-comment after possible whitespace // after line start as comment-utill-eol if (in.match('>')) { skipLine(); return RETRY_TOKEN; } } c = DEC; } else { c = SUB; } flags |= TSF_DIRTYLINE; return c; default: reportTokenError("msg.illegal.character", null); return ERROR; } } private void skipWhitespace() throws IOException { int tmp; do { tmp = in.read(); } while (isJSSpace(tmp) || tmp == '\n'); // Reposition back to first non whitespace char. in.unread(); } private int jsniMatchReference() throws IOException { // First, read the type name whose member is being accessed. if (!jsniMatchQualifiedTypeName('.', ':')) { return ERROR; } // Now we must the second colon. // int c = in.read(); if (c != ':') { in.unread(); reportTokenError("msg.jsni.expected.char", new String[] {":"}); return ERROR; } addToString(c); // Skip whitespace starting after ::. skipWhitespace(); // Finish by reading the field or method signature. if (!jsniMatchMethodSignatureOrFieldName()) { return ERROR; } this.string = new String(stringBuffer, 0, stringBufferTop); return NAME; } private boolean jsniMatchParamListSignature() throws IOException { // Assume the opening '(' has already been read. // Read param type signatures until we see a closing ')'. skipWhitespace(); // First check for the special case of * as the parameter list, indicating // a wildcard if (in.peek() == '*') { addToString(in.read()); if (in.peek() != ')') { reportTokenError("msg.jsni.expected.char", new String[] {")"}); } addToString(in.read()); return true; } // Otherwise, loop through reading one param type at a time do { // Skip whitespace between parameters. skipWhitespace(); int c = in.read(); if (c == ')') { // Finished successfully. // addToString(c); return true; } in.unread(); } while (jsniMatchParamTypeSignature()); // If we made it here, we can assume that there was an invalid type // signature that was already reported and that the offending char // was already unread. // return false; } private boolean jsniMatchParamTypeSignature() throws IOException { int c = in.read(); switch (c) { case 'Z': case 'B': case 'C': case 'S': case 'I': case 'J': case 'F': case 'D': // Primitive type id. addToString(c); return true; case 'L': // Class/Interface type prefix. addToString(c); return jsniMatchQualifiedTypeName('/', ';'); case '[': // Array type prefix. addToString(c); return jsniMatchParamArrayTypeSignature(); default: in.unread(); reportTokenError("msg.jsni.expected.param.type", null); return false; } } private boolean jsniMatchParamArrayTypeSignature() throws IOException { // Assume the leading '[' has already been read. // What follows must be another param type signature. // return jsniMatchParamTypeSignature(); } private boolean jsniMatchMethodSignatureOrFieldName() throws IOException { int c = in.read(); // We must see an ident start here. // if (!Character.isJavaIdentifierStart((char)c)) { in.unread(); reportTokenError("msg.jsni.expected.identifier", null); return false; } addToString(c); for (;;) { c = in.read(); if (Character.isJavaIdentifierPart((char)c)) { addToString(c); } else if (c == '(') { // This means we're starting a JSNI method signature. // addToString(c); if (jsniMatchParamListSignature()) { // Finished a method signature with success. // Assume the callee unread the last char. // return true; } else { // Assume the callee reported the error and unread the last char. // return false; } } else { // We don't know this char, so it finishes the token. // in.unread(); return true; } } } /** * This method is called to match the fully-qualified type name that * should appear after the '@' in a JSNI reference. * @param sepChar the character that will separate the Java idents * (either a '.' or '/') * @param endChar the character that indicates the end of the */ private boolean jsniMatchQualifiedTypeName(char sepChar, char endChar) throws IOException { int c = in.read(); // Whether nested or not, we must see an ident start here. // if (!Character.isJavaIdentifierStart((char)c)) { in.unread(); reportTokenError("msg.jsni.expected.identifier", null); return false; } // Now actually add the first ident char. // addToString(c); // And append any other ident chars. // for (;;) { c = in.read(); if (Character.isJavaIdentifierPart((char)c)) { addToString(c); } else { break; } } // Arrray-type reference while (c == '[') { if (']' == in.peek()) { addToString('['); addToString(in.read()); c = in.read(); } else { break; } } // We have a non-ident char to classify. // if (c == sepChar) { addToString(c); if (jsniMatchQualifiedTypeName(sepChar, endChar)) { // We consumed up to the endChar, so we finished with total success. // return true; } else { // Assume that the nested call reported the syntax error and // unread the last character. // return false; } } else if (c == endChar) { // Matched everything up to the specified end char. // addToString(c); return true; } else { // This is an unknown char that finishes the token. // in.unread(); return true; } } private String getStringFromBuffer() { return new String(stringBuffer, 0, stringBufferTop); } private void addToString(int c) { if (stringBufferTop == stringBuffer.length) { char[] tmp = new char[stringBuffer.length * 2]; System.arraycopy(stringBuffer, 0, tmp, 0, stringBufferTop); stringBuffer = tmp; } stringBuffer[stringBufferTop++] = (char)c; } /** * Positions hold offset of an corresponding token's end. * So lastPosition holds an offset of char that is next to last token. * * Use secondToLastPosition for error reporting outside of TokenStream, because * usually we want to report beginning of erroneous token, * which is end of second to last read token. */ public void reportSyntaxError(String messageProperty, Object[] args) { String message = Context.getMessage(messageProperty, args); Context.reportError(message, secondToLastPosition, lastPosition); } /** * Token errors are reported before tokes is read, * so use lastPosition for reporting. * @see #reportSyntaxError */ private void reportTokenError(String messageProperty, Object[] args) { String message = Context.getMessage(messageProperty, args); Context.reportError(message, lastPosition, new CodePosition(getLineno(), getOffset())); } private void reportTokenWarning(String messageProperty, Object[] args) { String message = Context.getMessage(messageProperty, args); Context.reportWarning(message, lastPosition, new CodePosition(getLineno(), getOffset())); } /** * Updates last two known positions (for error reporting). */ private void updatePosition() { CodePosition currentPosition = new CodePosition(getLineno(), getOffset()); if (currentPosition.compareTo(lastPosition) > 0) { secondToLastPosition = lastPosition; lastPosition = currentPosition; } } public String getSourceName() { return sourceName; } public int getLineno() { return in.getLineno(); } public int getOp() { return op; } public String getString() { return string; } public double getNumber() { return number; } public String getLine() { return in.getLine(); } public int getOffset() { return in.getOffset(); } public int getTokenno() { return tokenno; } public boolean eof() { return in.eof(); } // instance variables private LineBuffer in; /* for TSF_REGEXP, etc. * should this be manipulated by gettor/settor functions? * should it be passed to getToken(); */ int flags; String regExpFlags; private String sourceName; private int pushbackToken; private int tokenno; CodePosition secondToLastPosition; CodePosition lastPosition; private int op; // Set this to an inital non-null value so that the Parser has // something to retrieve even if an error has occured and no // string is found. Fosters one class of error, but saves lots of // code. private String string = ""; private double number; private char[] stringBuffer = new char[128]; private int stringBufferTop; }