/** * Copyright (c) 2012-2016 AndrĂ© Bargull * Alle Rechte vorbehalten / All Rights Reserved. Use is subject to license terms. * * <https://github.com/anba/es6draft> */ package com.github.anba.es6draft.parser; import static com.github.anba.es6draft.parser.Characters.*; import static com.github.anba.es6draft.parser.NumberParser.*; import com.github.anba.es6draft.parser.ParserException.ExceptionType; import com.github.anba.es6draft.runtime.internal.CompatibilityOption; import com.github.anba.es6draft.runtime.internal.Messages; /** * Lexer for ECMAScript source code * <ul> * <li>10 ECMAScript Language: Source Code * <li>11 ECMAScript Language: Lexical Grammar * </ul> */ final class TokenStream { private final Parser parser; private final TokenStreamInput input; /** current line number */ private int line; /** start position of current line */ private int linestart; /** start position of current token, includes leading whitespace and comments */ private int position; /** start position of next token, includes leading whitespace and comments */ private int nextPosition; // token data /** current token in stream */ private Token current; /** next token in stream */ private Token next; /** line terminator preceding current token? */ private boolean hasCurrentLineTerminator; /** line terminator preceding next token? */ private boolean hasLineTerminator; /** start line/column info for current token */ private long sourcePosition; /** start line/column info for next token */ private long nextSourcePosition; // literal data private final StrBuffer buffer; private String string = null; private double number = 0; private boolean hasEscape = false; /** * Resets and returns the internal character buffer. * * @return the character buffer */ private StrBuffer buffer() { StrBuffer buffer = this.buffer; buffer.clear(); return buffer; } /** * Returns {@code true} if the compatibility option is enabled. * * @param option * the compatibility option * @return {@code true} if the compatibility option is enabled. */ private boolean isEnabled(CompatibilityOption option) { return parser.isEnabled(option); } /** * Returns {@code true} if parsing module code. * * @return {@code true} if parsing module code */ private boolean isModule() { return parser.isModule(); } /** * Updates line state information for line breaks within literals, does <strong>not</strong> set * the {@link #hasLineTerminator} flag. */ private void incrementLine() { line += 1; linestart = input.position(); } /** * Updates the line state information, must not be used for line breaks within literals. */ private void incrementLineAndUpdate() { line += 1; linestart = input.position(); hasLineTerminator = true; } /** * Sets the source position (line / column information) for the next token. */ private void updateSourcePosition() { nextSourcePosition = ((long) (input.position() - linestart) << 32) | line; } /** * Public constructor, token stream still needs to be initialized by calling the * {@link #initialize()} method. * * @param parser * the parser instance * @param input * the token stream instance */ public TokenStream(Parser parser, TokenStreamInput input) { this.parser = parser; this.input = input; this.buffer = new StrBuffer(input.length()); } /** * Return the start position of current token, includes leading whitespace and comments. Also * needed to reset the token stream. * * @return the token start position * @see #reset(long, long) */ public int position() { return position; } /** * Returns the last character from the input source. * * @return the last character */ public char lastChar() { return (char) input.lastChar(); } /** * Returns the raw source characters from the underlying input source. * * @param from * the start position (inclusive) * @param to * the end position (exclusive) * @return the source characters in the given range */ public String range(int from, int to) { return input.range(from, to); } /** * Returns the encoded line information, needed to reset the token stream. * * @return the current line information * @see #reset(long, long) */ public long lineinfo() { return ((long) line << 32) | linestart; } /** * Returns the encoded line/column information of the current source position. * * @return the current line/column information */ public long sourcePosition() { return sourcePosition; } /** * Returns the encoded start line/column information for current token. * * @return the begin line/column information */ public long beginPosition() { return sourcePosition; } /** * Returns the encoded end line/column information for current token. * * @return the end line/column information */ public long endPosition() { // add one to make columns 1-indexed return ((long) (1 + position - linestart) << 32) | line; } /** * Returns the encoded end line/column information for current position. * * @return the end line/column information */ public long rawEndPosition() { // add one to make columns 1-indexed return ((long) (1 + input.position() - linestart) << 32) | line; } /** * Initializes this token stream, needs to be called before fetching any tokens. * * @return this token stream */ public TokenStream initialize() { return initialize(parser.getSourceLine()); } /** * Initializes this token stream, needs to be called before fetching any tokens. * * @param line * the start line number * @return this token stream */ public TokenStream initialize(int line) { // set internal state to default values this.hasLineTerminator = true; this.hasCurrentLineTerminator = true; this.position = input.position(); this.line = line; this.linestart = input.position(); this.current = scanTokenNoComment(); this.sourcePosition = nextSourcePosition; this.nextPosition = input.position(); this.next = null; return this; } /** * Resets this token stream to the requested position. * * @param position * the new position * @param lineinfo * the new line information * @see #position() * @see #lineinfo() */ public void reset(long position, long lineinfo) { // reset character stream input.reset((int) position); // reset internal state this.hasLineTerminator = false; this.hasCurrentLineTerminator = true; this.position = input.position(); this.current = scanTokenNoComment(); this.sourcePosition = nextSourcePosition; this.nextPosition = input.position(); this.next = null; // reset line state last, effectively ignoring any changes from scanTokenNoComment() this.line = (int) (lineinfo >>> 32); this.linestart = (int) lineinfo; } /** * Returns the string data of the current token. * * @return the current string data */ public String getString() { if (string == null) { string = buffer.toString(); } return string; } /** * Returns the string data of the next token. * * @return the next string data */ public String getNextString() { if (next == null) { peekToken(); } return buffer.toString(); } /** * Returns <code>true</code> if the current token is a string literal which contains an escape * sequence. * * @return {@code true} if the string literal contains an escape sequence */ public boolean hasEscape() { return hasEscape; } /** * Returns the number data of the current token. * * @return the current number data */ public double getNumber() { return number; } /** * Returns the current line number. * * @return the line number */ public int getLine() { return line; } /** * Returns the current column number. * * @return the column number */ public int getColumn() { return input.position() - linestart; } /** * Returns <code>true</code> if there is a line terminator before the current token. * * @return {@code true} if there is a line terminator */ public boolean hasCurrentLineTerminator() { assert current != null; return hasCurrentLineTerminator; } /** * Returns <code>true</code> if there is a line terminator before the next token. * * @return {@code true} if there is a line terminator */ public boolean hasNextLineTerminator() { assert next != null; return hasLineTerminator; } /* token operations */ /** * Advances the token stream to the next token. * * @return the next token */ public Token nextToken() { if (next == null) { hasLineTerminator = false; nextPosition = input.position(); next = scanTokenNoComment(); } current = next; sourcePosition = nextSourcePosition; position = nextPosition; hasCurrentLineTerminator = hasLineTerminator; string = null; next = null; nextPosition = input.position(); hasLineTerminator = false; return current; } /** * Returns the current token. * * @return the current token */ public Token currentToken() { return current; } /** * Peeks the next token in this token stream. * * @return the next token */ public Token peekToken() { assert !(current == Token.DIV || current == Token.ASSIGN_DIV || current == Token.ERROR); if (next == null) { switch (current) { case NAME: case ESCAPED_NAME: case ESCAPED_RESERVED_WORD: case ESCAPED_STRICT_RESERVED_WORD: case ESCAPED_YIELD: case ESCAPED_ASYNC: case ESCAPED_AWAIT: case ESCAPED_LET: case STRING: string = getString(); default: } hasLineTerminator = false; nextPosition = input.position(); next = scanTokenNoComment(); } return next; } /* lexer operations */ /** * <strong>[11.8.5] Regular Expression Literals</strong> * * <pre> * RegularExpressionLiteral :: * / RegularExpressionBody / RegularExpressionFlags * RegularExpressionBody :: * RegularExpressionFirstChar RegularExpressionChars * RegularExpressionChars :: * [empty] * RegularExpressionChars RegularExpressionChar * RegularExpressionFirstChar :: * RegularExpressionNonTerminator but not one of * or \ or / or [ * RegularExpressionBackslashSequence * RegularExpressionClass * RegularExpressionChar :: * RegularExpressionNonTerminator but not one of \ or / or [ * RegularExpressionBackslashSequence * RegularExpressionClass * RegularExpressionBackslashSequence :: * \ RegularExpressionNonTerminator * RegularExpressionNonTerminator :: * SourceCharacter but not LineTerminator * RegularExpressionClass :: * [ RegularExpressionClassChars ] * RegularExpressionClassChars :: * [empty] * RegularExpressionClassChars RegularExpressionClassChar * RegularExpressionClassChar :: * RegularExpressionNonTerminator but not one of ] or \ * RegularExpressionBackslashSequence * RegularExpressionFlags :: * [empty] * RegularExpressionFlags IdentifierPart * </pre> * * @param start * the start token of the regular expression literal, either {@link Token#DIV} or * {@link Token#ASSIGN_DIV} * @return the regular expression pattern */ public String readRegularExpression(Token start) { assert start == Token.DIV || start == Token.ASSIGN_DIV; assert next == null : "regular expression in lookahead"; final int EOF = TokenStreamInput.EOF; TokenStreamInput input = this.input; StrBuffer buffer = buffer(); if (start == Token.ASSIGN_DIV) { buffer.append('='); } else { int c = input.peek(0); if (c == '/' || c == '*') { throw error(Messages.Key.InvalidRegExpLiteral); } } boolean inClass = false; for (;;) { int c = input.getChar(); if (c == '\\') { // escape sequence buffer.append(c); c = input.getChar(); } else if (c == '[') { inClass = true; } else if (c == ']') { inClass = false; } else if (c == '/' && !inClass) { return buffer.toString(); } if (c == EOF || isLineTerminator(c)) { throw error(Messages.Key.UnterminatedRegExpLiteral); } buffer.append(c); } } /** * <strong>[11.8.5] Regular Expression Literals</strong> * * <pre> * RegularExpressionFlags :: * [empty] * RegularExpressionFlags IdentifierPart * </pre> * * @return the regular expression literal flags */ public String readRegularExpressionFlags() { TokenStreamInput input = this.input; StrBuffer buffer = buffer(); for (;;) { int c = input.get(); if (!isIdentifierPart(c)) { if (c == '\\' && match('u')) { readUnicodeEscape(); throw error(Messages.Key.UnicodeEscapeInRegExpFlags); } input.unget(c); return buffer.toString(); } buffer.appendCodePoint(c); } } // /** * <strong>[11.8.6] Template Literal Lexical Components</strong> * * <pre> * Template :: * NoSubstitutionTemplate * TemplateHead * NoSubstitutionTemplate :: * ` TemplateCharacters<span><sub>opt</sub></span>` * TemplateHead :: * ` TemplateCharacters<span><sub>opt</sub></span>${ * TemplateSubstitutionTail :: * TemplateMiddle * TemplateTail * TemplateMiddle :: * } TemplateCharacters<span><sub>opt</sub></span>${ * TemplateTail :: * } TemplateCharacters<span><sub>opt</sub></span>` * TemplateCharacters :: * TemplateCharacter TemplateCharacters<span><sub>opt</sub></span> * TemplateCharacter :: * $ [LA ≠ { ] * \ EscapeSequence * LineContinuation * LineTerminatorSequence * SourceCharacter but not one of ` or \ or $ or LineTerminator * </pre> * * @param startToken * the start token of the template literal, either {@link Token#TEMPLATE} or * {@link Token#RC} * @return string tuple {cooked, raw} for the template literal */ public String[] readTemplateLiteral(Token startToken) { assert startToken == Token.TEMPLATE || startToken == Token.RC; assert currentToken() == startToken; assert next == null : "template literal in lookahead"; final int EOF = TokenStreamInput.EOF; TokenStreamInput input = this.input; StringBuilder raw = new StringBuilder(); StrBuffer buffer = buffer(); int pos = input.position(); int rawPos = input.position(); for (;;) { int c = input.getChar(); if (c == EOF) { throw eofError(Messages.Key.UnterminatedTemplateLiteral); } if (c == '`') { current = Token.TEMPLATE; buffer.append(input, pos, input.position() - 1); raw.append(input.range(rawPos, input.position() - 1)); return new String[] { buffer.toString(), raw.toString() }; } if (c == '$' && match('{')) { current = Token.LC; buffer.append(input, pos, input.position() - 2); raw.append(input.range(rawPos, input.position() - 2)); return new String[] { buffer.toString(), raw.toString() }; } if (c != '\\') { if (isLineTerminator(c)) { // line terminator sequence if (c == '\r') { // normalize \r and \r\n to \n buffer.append(input, pos, input.position() - 1); buffer.append('\n'); raw.append(input.range(rawPos, input.position() - 1)).append('\n'); match('\n'); pos = rawPos = input.position(); } incrementLine(); } continue; } buffer.append(input, pos, input.position() - 1); c = input.getChar(); if (c == EOF) { throw eofError(Messages.Key.UnterminatedTemplateLiteral); } if (isLineTerminator(c)) { // line continuation if (c == '\r') { // normalize \r and \r\n to \n raw.append(input.range(rawPos, input.position() - 1)).append('\n'); match('\n'); rawPos = input.position(); } incrementLine(); } else { buffer.appendCodePoint(readTemplateEscapeSequence(c)); } pos = input.position(); } } /** * <strong>[11.8.4] String Literals</strong> * * <pre> * EscapeSequence :: * CharacterEscapeSequence * 0 [lookahead ∉ DecimalDigit] * HexEscapeSequence * UnicodeEscapeSequence * CharacterEscapeSequence :: * SingleEscapeCharacter * NonEscapeCharacter * SingleEscapeCharacter :: one of * ' " \ b f n r t v * NonEscapeCharacter :: * SourceCharacter but not one of EscapeCharacter or LineTerminator * EscapeCharacter :: * SingleEscapeCharacter * DecimalDigit * x * u * HexEscapeSequence :: * x HexDigit HexDigit * UnicodeEscapeSequence :: * u HexDigit HexDigit HexDigit HexDigit * u{ HexDigits } * </pre> * * @param c * the start character * @return the escaped character */ private int readTemplateEscapeSequence(int c) { TokenStreamInput input = this.input; switch (c) { case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\u000B'; break; case 'x': c = (hexDigit(input.getChar()) << 4) | hexDigit(input.getChar()); if (c < 0) { throw error(Messages.Key.InvalidHexEscape); } break; case 'u': c = readUnicodeEscape(); break; case '0': if (isDecimalDigit(input.peek(0))) { throw error(Messages.Key.InvalidNULLEscape); } c = '\0'; break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': throw error(Messages.Key.OctalEscapeSequence); case '"': case '\'': case '\\': default: // fall-through } return c; } // /** * <strong>[11] ECMAScript Language: Lexical Grammar</strong> * * <pre> * InputElementDiv :: * WhiteSpace * LineTerminator * Comment * Token * DivPunctuator * RightBracePunctuator * InputElementRegExp :: * WhiteSpace * LineTerminator * Comment * Token * RightBracePunctuator * RegularExpressionLiteral * InputElementTemplateTail :: * WhiteSpace * LineTerminator * Comment * Token * DivPunctuator * TemplateSubstitutionTail * </pre> * * @return the next token */ private Token scanTokenNoComment() { Token tok; do { tok = scanToken(); } while (tok == Token.COMMENT); return tok; } /** * <strong>[11.5] Token</strong> * * <pre> * Token :: * IdentifierName * Punctuator * NumericLiteral * StringLiteral * Template * </pre> * * @return the next token */ private Token scanToken() { TokenStreamInput input = this.input; int c; for (;;) { c = input.get(); if (c == TokenStreamInput.EOF) { return Token.EOF; } else if (c <= 0x20) { if (c == 0x09 || c == 0x0B || c == 0x0C || c == 0x20) { // skip over whitespace continue; } if (c == '\n') { incrementLineAndUpdate(); continue; } if (c == '\r') { match('\n'); incrementLineAndUpdate(); continue; } } else if (c >= 0xA0) { if (isWhitespace(c)) { // skip over whitespace continue; } if (isLineTerminator(c)) { incrementLineAndUpdate(); continue; } } break; } updateSourcePosition(); switch (c) { case '\'': case '"': return readString(c); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return readNumberLiteral(c); case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case '$': case '_': return readIdentifier(c, false); case '{': return Token.LC; case '}': return Token.RC; case '(': return Token.LP; case ')': return Token.RP; case '[': return Token.LB; case ']': return Token.RB; case '.': switch (input.peek(0)) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return readNumberLiteral(c); case '.': if (input.peek(1) == '.') { mustMatch('.'); mustMatch('.'); return Token.TRIPLE_DOT; } } return Token.DOT; case ';': return Token.SEMI; case ',': return Token.COMMA; case '~': return Token.BITNOT; case '?': return Token.HOOK; case ':': return Token.COLON; case '<': if (match('<')) { if (match('=')) { return Token.ASSIGN_SHL; } else { return Token.SHL; } } else if (match('=')) { return Token.LE; } else if (input.peek(0) == '!' && input.peek(1) == '-' && input.peek(2) == '-' && isEnabled(CompatibilityOption.HTMLComments) && !isModule()) { // html start-comment mustMatch('!'); mustMatch('-'); mustMatch('-'); readSingleLineComment(); return Token.COMMENT; } else { return Token.LT; } case '>': if (match('>')) { if (match('>')) { if (match('=')) { return Token.ASSIGN_USHR; } else { return Token.USHR; } } else if (match('=')) { return Token.ASSIGN_SHR; } else { return Token.SHR; } } else if (match('=')) { return Token.GE; } else { return Token.GT; } case '=': if (match('=')) { if (match('=')) { return Token.SHEQ; } else { return Token.EQ; } } else if (match('>')) { return Token.ARROW; } else { return Token.ASSIGN; } case '!': if (match('=')) { if (match('=')) { return Token.SHNE; } else { return Token.NE; } } else { return Token.NOT; } case '+': if (match('+')) { return Token.INC; } else if (match('=')) { return Token.ASSIGN_ADD; } else { return Token.ADD; } case '-': if (match('-')) { if (input.peek(0) == '>' && hasLineTerminator && isEnabled(CompatibilityOption.HTMLComments) && !isModule()) { // html end-comment at line start mustMatch('>'); readSingleLineComment(); return Token.COMMENT; } return Token.DEC; } else if (match('=')) { return Token.ASSIGN_SUB; } else { return Token.SUB; } case '*': if (input.peek(0) == '*' && isEnabled(CompatibilityOption.Exponentiation)) { mustMatch('*'); if (match('=')) { return Token.ASSIGN_EXP; } return Token.EXP; } else if (match('=')) { return Token.ASSIGN_MUL; } else { return Token.MUL; } case '%': if (match('=')) { return Token.ASSIGN_MOD; } else { return Token.MOD; } case '/': if (match('=')) { return Token.ASSIGN_DIV; } else if (match('/')) { readSingleLineComment(); return Token.COMMENT; } else if (match('*')) { readMultiLineComment(); return Token.COMMENT; } else { return Token.DIV; } case '&': if (match('&')) { return Token.AND; } else if (match('=')) { return Token.ASSIGN_BITAND; } else { return Token.BITAND; } case '|': if (match('|')) { return Token.OR; } else if (match('=')) { return Token.ASSIGN_BITOR; } else { return Token.BITOR; } case '^': if (match('=')) { return Token.ASSIGN_BITXOR; } else { return Token.BITXOR; } case '`': return Token.TEMPLATE; case '@': if (isEnabled(CompatibilityOption.Decorator)) { return Token.AT; } return Token.ERROR; case '\\': mustMatch('u'); c = readUnicodeEscape(); if (isIdentifierStart(c)) { return readIdentifier(c, true); } throw error(Messages.Key.InvalidUnicodeEscapedIdentifierStart); default: if (isIdentifierStart(c)) { return readIdentifier(c, false); } return Token.ERROR; } } /** * <strong>[11.4] Comments</strong> * * <pre> * SingleLineComment :: * // SingleLineCommentChars<span><sub>opt</sub></span> * SingleLineCommentChars :: * SingleLineCommentChar SingleLineCommentChars<span><sub>opt</sub></span> * SingleLineCommentChar :: * SourceCharacter but not LineTerminator * </pre> * * @return the comment token */ private Token readSingleLineComment() { final int EOF = TokenStreamInput.EOF; TokenStreamInput input = this.input; for (;;) { int c = input.getChar(); if (c == EOF) { break; } if (isLineTerminator(c)) { // EOL is not part of the single-line comment! input.ungetChar(c); break; } } return Token.COMMENT; } /** * <strong>[11.4] Comments</strong> * * <pre> * MultiLineComment :: * /* MultiLineCommentChars<span><sub>opt</sub></span> */ * MultiLineCommentChars :: * MultiLineNotAsteriskChar MultiLineCommentChars<span><sub>opt</sub></span> * PostAsteriskCommentChars<span><sub>opt</sub></span> * PostAsteriskCommentChars :: * MultiLineNotForwardSlashOrAsteriskChar MultiLineCommentChars<span><sub>opt</sub></span> * PostAsteriskCommentChars<span><sub>opt</sub></span> * MultiLineNotAsteriskChar :: * SourceCharacter but not * * MultiLineNotForwardSlashOrAsteriskChar :: * SourceCharacter but not one of / or * * </pre> * * @return the comment token */ private Token readMultiLineComment() { final int EOF = TokenStreamInput.EOF; TokenStreamInput input = this.input; loop: for (;;) { int c = input.getChar(); while (c == '*') { if ((c = input.getChar()) == '/') break loop; } if (isLineTerminator(c)) { if (c == '\r') { match('\n'); } incrementLineAndUpdate(); } if (c == EOF) { throw eofError(Messages.Key.UnterminatedComment); } } return Token.COMMENT; } /** * <strong>[11.6] Names and Keywords</strong> * * <pre> * Identifier :: * IdentifierName but not ReservedWord * IdentifierName :: * IdentifierStart * IdentifierName IdentifierPart * </pre> * * @param c * the start character of the identifier * @param hasEscape * the flag for escaped identifiers * @return the identifier token */ private Token readIdentifier(int c, boolean hasEscape) { assert isIdentifierStart(c); TokenStreamInput input = this.input; StrBuffer buffer = this.buffer(); buffer.appendCodePoint(c); for (;;) { c = input.get(); if (isIdentifierPart(c)) { buffer.appendCodePoint(c); } else if (c == '\\') { hasEscape = true; mustMatch('u'); c = readUnicodeEscape(); if (!isIdentifierPart(c)) { throw error(Messages.Key.InvalidUnicodeEscapedIdentifierPart); } buffer.appendCodePoint(c); } else { input.unget(c); break; } } Token tok = readReservedWord(buffer.array(), buffer.length()); if (hasEscape) { return Token.toEscapedNameToken(tok); } return tok; } /** * <strong>[11.8.4] String Literals</strong> * * <pre> * UnicodeEscapeSequence :: * u HexDigit HexDigit HexDigit HexDigit * u{ HexDigits } * </pre> * * @return the unicode escape sequence value */ private int readUnicodeEscape() { TokenStreamInput input = this.input; int c = input.getChar(); if (c == '{') { int acc = 0; c = input.getChar(); do { acc = (acc << 4) | hexDigit(c); } while ((acc >= 0 && acc <= 0x10FFFF) && (c = input.getChar()) != '}'); if (c == '}') { c = acc; } else { c = -1; } } else { c = (hexDigit(c) << 12) | (hexDigit(input.getChar()) << 8) | (hexDigit(input.getChar()) << 4) | hexDigit(input.getChar()); } if (c < 0 || c > 0x10FFFF) { throw error(Messages.Key.InvalidUnicodeEscape); } return c; } static Token readReservedWord(String name) { int length = name.length(); if (length < 2 || length > 10) return Token.NAME; return readReservedWord(name.toCharArray(), length); } /** * <strong>[11.6.2] Reserved Words</strong> * * <pre> * ReservedWord :: * Keyword * FutureReservedWord * NullLiteral * BooleanLiteral * </pre> * * <strong>[11.6.2.1] Keywords</strong> * * <pre> * Keyword :: one of * break do in typeof * case else instanceof var * catch export new void * class extends return while * const finally super with * continue for switch yield * debugger function this * default if throw * delete import try * </pre> * * <strong>[11.6.2.2] Future Reserved Words</strong> * * <pre> * FutureReservedWord :: one of * enum * </pre> * * <pre> * StrictFutureReservedWord :: one of * implements package protected static * interface private public * </pre> * * <pre> * ContextualKeyword :: one of * let * </pre> * * <strong>[11.8.1] Null Literals</strong> * * <pre> * NullLiteral :: * null * </pre> * * <strong>[11.8.2] Boolean Literals</strong> * * <pre> * BooleanLiteral :: * true * false * </pre> * * @param cbuf * the character array * @param length * the number of characters to read * @return the token type for the identifier */ private static Token readReservedWord(char[] cbuf, int length) { if (length < 2 || length > 10) return Token.NAME; char c0 = cbuf[0], c1 = cbuf[1]; Token test = null; switch (c0) { case 'a': // async, await if (length == 5) test = (c1 == 's' ? Token.ASYNC : Token.AWAIT); break; case 'b': // break if (length == 5) test = Token.BREAK; break; case 'c': // case, catch, continue, class, const if (length == 4) test = Token.CASE; else if (length == 5) test = (c1 == 'a' ? Token.CATCH : c1 == 'l' ? Token.CLASS : Token.CONST); else if (length == 8) test = Token.CONTINUE; break; case 'd': // debugger, default, delete, do if (length == 2) test = Token.DO; else if (length == 6) test = Token.DELETE; else if (length == 7) test = Token.DEFAULT; else if (length == 8) test = Token.DEBUGGER; break; case 'e': // else, enum, export, extends if (length == 4) test = (c1 == 'l' ? Token.ELSE : Token.ENUM); else if (length == 6) test = Token.EXPORT; else if (length == 7) test = Token.EXTENDS; break; case 'f': // finally, for, function, false if (length == 3) test = Token.FOR; else if (length == 5) test = Token.FALSE; else if (length == 7) test = Token.FINALLY; else if (length == 8) test = Token.FUNCTION; break; case 'i': // if, in, instanceof, import, implements, interface if (length == 2) test = (c1 == 'f' ? Token.IF : Token.IN); else if (length == 6) test = Token.IMPORT; else if (length == 9) test = Token.INTERFACE; else if (length == 10) test = (c1 == 'n' ? Token.INSTANCEOF : Token.IMPLEMENTS); break; case 'l': // let if (length == 3) test = Token.LET; break; case 'n': // new, null if (length == 3) test = Token.NEW; else if (length == 4) test = Token.NULL; break; case 'p': // package, private, protected, public if (length == 6) test = Token.PUBLIC; else if (length == 7) test = (c1 == 'a' ? Token.PACKAGE : Token.PRIVATE); else if (length == 9) test = Token.PROTECTED; break; case 'r': // return if (length == 6) test = Token.RETURN; break; case 's': // switch, super, static if (length == 5) test = Token.SUPER; else if (length == 6) test = (c1 == 'w' ? Token.SWITCH : Token.STATIC); break; case 't': // this, throw, try, typeof, true if (length == 3) test = Token.TRY; else if (length == 4) test = (c1 == 'h' ? Token.THIS : Token.TRUE); else if (length == 5) test = Token.THROW; else if (length == 6) test = Token.TYPEOF; break; case 'v': // var, void if (length == 3) test = Token.VAR; else if (length == 4) test = Token.VOID; break; case 'w': // while, with if (length == 4) test = Token.WITH; else if (length == 5) test = Token.WHILE; break; case 'y': // yield if (length == 5) test = Token.YIELD; break; } if (test != null && equals(cbuf, test.getName())) { return test; } return Token.NAME; } private static boolean equals(char[] cbuf, String test) { for (int i = 0, length = test.length(); i < length; ++i) { if (cbuf[i] != test.charAt(i)) return false; } return true; } /** * <strong>[11.8.4] String Literals</strong> * * <pre> * StringLiteral :: * " DoubleStringCharacters<span><sub>opt</sub></span> " * ' SingleStringCharacters<span><sub>opt</sub></span> ' * DoubleStringCharacters :: * DoubleStringCharacter DoubleStringCharacters<span><sub>opt</sub></span> * SingleStringCharacters :: * SingleStringCharacter SingleStringCharacters<span><sub>opt</sub></span> * DoubleStringCharacter :: * SourceCharacter but not one of " or \ or LineTerminator * \ EscapeSequence * LineContinuation * SingleStringCharacter :: * SourceCharacter but not one of ' or \ or LineTerminator * \ EscapeSequence * LineContinuation * LineContinuation :: * \ LineTerminatorSequence * </pre> * * @param quoteChar * the quotation character for the string literal * @return the string literal value */ private Token readString(int quoteChar) { assert quoteChar == '"' || quoteChar == '\''; final int EOF = TokenStreamInput.EOF; TokenStreamInput input = this.input; int start = input.position(); StrBuffer buffer = this.buffer(); hasEscape = false; for (;;) { int c = input.getChar(); if (c == EOF) { throw eofError(Messages.Key.UnterminatedStringLiteral); } if (c == quoteChar) { buffer.append(input, start, input.position() - 1); break; } if (isLineTerminator(c)) { throw error(Messages.Key.UnterminatedStringLiteral); } if (c != '\\') { continue; } buffer.append(input, start, input.position() - 1); // EscapeSequence or LineContinuation hasEscape = true; c = input.getChar(); if (isLineTerminator(c)) { if (c == '\r' && match('\n')) { // \r\n sequence } incrementLine(); } else { buffer.appendCodePoint(readStringEscapeSequence(c)); } start = input.position(); } return Token.STRING; } /** * <strong>[11.8.4] String Literals</strong> * * <pre> * EscapeSequence :: * CharacterEscapeSequence * 0 [lookahead ∉ DecimalDigit] * HexEscapeSequence * UnicodeEscapeSequence * CharacterEscapeSequence :: * SingleEscapeCharacter * NonEscapeCharacter * SingleEscapeCharacter :: one of * ' " \ b f n r t v * NonEscapeCharacter :: * SourceCharacter but not one of EscapeCharacter or LineTerminator * EscapeCharacter :: * SingleEscapeCharacter * DecimalDigit * x * u * HexEscapeSequence :: * x HexDigit HexDigit * UnicodeEscapeSequence :: * u HexDigit HexDigit HexDigit HexDigit * u{ HexDigits } * </pre> * * <strong>[B.1.2] String Literals</strong> * * <pre> * EscapeSequence :: * CharacterEscapeSequence * LegacyOctalEscapeSequence * HexEscapeSequence * UnicodeEscapeSequence * </pre> * * @param c * the start character * @return the escaped character */ private int readStringEscapeSequence(int c) { TokenStreamInput input = this.input; switch (c) { case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\u000B'; break; case 'x': c = (hexDigit(input.getChar()) << 4) | hexDigit(input.getChar()); if (c < 0) { throw error(Messages.Key.InvalidHexEscape); } break; case 'u': c = readUnicodeEscape(); break; case '0': if (isDecimalDigit(input.peek(0))) { if (!isEnabled(CompatibilityOption.OctalEscapeSequence)) { throw error(Messages.Key.InvalidNULLEscape); } c = readLegacyOctalEscape(c); } else { c = '\0'; } break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': if (!isEnabled(CompatibilityOption.OctalEscapeSequence)) { throw error(Messages.Key.OctalEscapeSequence); } c = readLegacyOctalEscape(c); break; case '8': case '9': // FIXME: spec bug - undefined behaviour for \8 and \9 if (!isEnabled(CompatibilityOption.OctalEscapeSequence)) { throw error(Messages.Key.OctalEscapeSequence); } // fall-through case '"': case '\'': case '\\': default: // fall-through } return c; } /** * <strong>[B.1.2] String Literals</strong> * * <pre> * LegacyOctalEscapeSequence :: * OctalDigit [lookahead ∉ OctalDigit] * ZeroToThree OctalDigit [lookahead ∉ OctalDigit] * FourToSeven OctalDigit * ZeroToThree OctalDigit OctalDigit * ZeroToThree :: one of * 0 1 2 3 * FourToSeven :: one of * 4 5 6 7 * </pre> * * @param c * the start character of the octal escape sequence * @return the octal escape value */ private int readLegacyOctalEscape(int c) { assert '0' <= c && c <= '7'; strictModeError(Messages.Key.StrictModeOctalEscapeSequence); int d = (c - '0'); c = input.getChar(); if (isOctalDigit(c)) { d = d * 8 + (c - '0'); if (d <= 037) { c = input.getChar(); if (isOctalDigit(c)) { d = d * 8 + (c - '0'); } else { input.ungetChar(c); } } } else { input.ungetChar(c); } return d; } /** * <strong>[11.8.3] Numeric Literals</strong> * * <pre> * NumericLiteral :: * DecimalLiteral * BinaryIntegerLiteral * OctalIntegerLiteral * HexIntegerLiteral * LegacyOctalIntegerLiteral * </pre> * * @param c * the start character of the decimal integer literal * @return the number token */ private Token readNumberLiteral(int c) { if (c == '0') { int d = input.getChar(); if (d == 'x' || d == 'X') { number = readHexIntegerLiteral(); } else if (d == 'b' || d == 'B') { number = readBinaryIntegerLiteral(); } else if (d == 'o' || d == 'O') { number = readOctalIntegerLiteral(); } else if (isDecimalDigit(d)) { if (isEnabled(CompatibilityOption.LegacyOctalIntegerLiteral)) { input.ungetChar(d); number = readLegacyOctalIntegerLiteral(); } else { throw error(Messages.Key.InvalidNumberLiteral); } } else { input.ungetChar(d); number = readDecimalLiteral(c); } } else { number = readDecimalLiteral(c); } return Token.NUMBER; } /** * <strong>[11.8.3] Numeric Literals</strong> * * <pre> * HexIntegerLiteral :: * 0x HexDigits * 0X HexDigits * HexDigits :: * HexDigit * HexDigits HexDigit * </pre> * * @return the hexadecimal integer literal */ private double readHexIntegerLiteral() { TokenStreamInput input = this.input; StrBuffer buffer = this.buffer(); int c; while (isHexDigit(c = input.get())) { buffer.append(c); } if (isDecimalDigitOrIdentifierStart(c)) { throw error(Messages.Key.InvalidHexIntegerLiteral); } input.unget(c); if (buffer.length() == 0) { throw error(Messages.Key.InvalidHexIntegerLiteral); } return parseHex(buffer.array(), buffer.length()); } /** * <strong>[11.8.3] Numeric Literals</strong> * * <pre> * BinaryIntegerLiteral :: * 0b BinaryDigit * 0B BinaryDigit * BinaryIntegerLiteral BinaryDigit * </pre> * * @return the binary integer literal */ private double readBinaryIntegerLiteral() { TokenStreamInput input = this.input; StrBuffer buffer = this.buffer(); int c; while (isBinaryDigit(c = input.get())) { buffer.append(c); } if (isDecimalDigitOrIdentifierStart(c)) { throw error(Messages.Key.InvalidBinaryIntegerLiteral); } input.unget(c); if (buffer.length() == 0) { throw error(Messages.Key.InvalidBinaryIntegerLiteral); } return parseBinary(buffer.array(), buffer.length()); } /** * <strong>[11.8.3] Numeric Literals</strong> * * <pre> * OctalIntegerLiteral :: * 0o OctalDigit * 0O OctalDigit * OctalIntegerLiteral OctalDigit * </pre> * * @return the octal integer literal */ private double readOctalIntegerLiteral() { TokenStreamInput input = this.input; StrBuffer buffer = this.buffer(); int c; while (isOctalDigit(c = input.get())) { buffer.append(c); } if (isDecimalDigitOrIdentifierStart(c)) { throw error(Messages.Key.InvalidOctalIntegerLiteral); } input.unget(c); if (buffer.length() == 0) { throw error(Messages.Key.InvalidOctalIntegerLiteral); } return parseOctal(buffer.array(), buffer.length()); } /** * <strong>[B.1.1] Numeric Literals</strong> * * <pre> * LegacyOctalIntegerLiteral :: * 0 OctalDigit * LegacyOctalIntegerLiteral OctalDigit * </pre> * * @return the octal integer literal */ private double readLegacyOctalIntegerLiteral() { TokenStreamInput input = this.input; StrBuffer buffer = this.buffer(); int c; while (isOctalDigit(c = input.get())) { buffer.append(c); } if (c == '8' || c == '9') { // invalid octal integer literal -> treat as decimal literal in non-strict mode strictModeError(Messages.Key.StrictModeDecimalLeadingZero); return readDecimalLiteral(c, false); } strictModeError(Messages.Key.StrictModeOctalIntegerLiteral); if (isDecimalDigitOrIdentifierStart(c)) { throw error(Messages.Key.InvalidOctalIntegerLiteral); } input.unget(c); assert buffer.length() != 0; return parseOctal(buffer.array(), buffer.length()); } /** * <strong>[11.8.3] Numeric Literals</strong> * * <pre> * DecimalLiteral :: * DecimalIntegerLiteral . DecimalDigits<span><sub>opt</sub></span> ExponentPart<span><sub>opt</sub></span> * . DecimalDigits ExponentPart<span><sub>opt</sub></span> * DecimalIntegerLiteral ExponentPart<span><sub>opt</sub></span> * DecimalIntegerLiteral :: * 0 * NonZeroDigit DecimalDigits<span><sub>opt</sub></span> * DecimalDigits :: * DecimalDigit * DecimalDigits DecimalDigit * NonZeroDigit :: one of * 1 2 3 4 5 6 7 8 9 * ExponentPart :: * ExponentIndicator SignedInteger * ExponentIndicator :: one of * e E * SignedInteger :: * DecimalDigits * + DecimalDigits * - DecimalDigits * </pre> * * <strong>[B.1.1] Numeric Literals</strong> * * <pre> * DecimalIntegerLiteral :: * 0 * NonZeroDigit DecimalDigits<span><sub>opt</sub></span> * NonOctalDecimalIntegerLiteral * NonOctalDecimalIntegerLiteral :: * 0 NonOctalDigit * LegacyOctalLikeDecimalIntegerLiteral NonOctalDigit * NonOctalDecimalIntegerLiteral DecimalDigit * LegacyOctalLikeDecimalIntegerLiteral :: * 0 OctalDigit * LegacyOctalLikeDecimalIntegerLiteral OctalDigit * NonOctalDigit :: one of * 8 9 * </pre> * * @param c * the start character of the decimal integer literal * @return the decimal integer literal */ private double readDecimalLiteral(int c) { return readDecimalLiteral(c, true); } private double readDecimalLiteral(int c, boolean resetBuffer) { assert c == '.' || isDecimalDigit(c); boolean isInteger = true; TokenStreamInput input = this.input; StrBuffer buffer = resetBuffer ? this.buffer() : this.buffer; if (c != '.' && c != '0') { buffer.append(c); while (isDecimalDigit(c = input.get())) { buffer.append(c); } } else if (c == '0') { buffer.append(c); c = input.get(); } if (c == '.') { isInteger = false; buffer.append(c); while (isDecimalDigit(c = input.get())) { buffer.append(c); } } if (c == 'e' || c == 'E') { isInteger = false; buffer.append(c); c = input.get(); if (c == '+' || c == '-') { buffer.append(c); c = input.get(); } if (!isDecimalDigit(c)) { throw error(Messages.Key.InvalidNumberLiteral); } buffer.append(c); while (isDecimalDigit(c = input.get())) { buffer.append(c); } } if (isDecimalDigitOrIdentifierStart(c)) { throw error(Messages.Key.InvalidNumberLiteral); } input.unget(c); if (isInteger) { return parseInteger(buffer.array(), buffer.length()); } return parseDecimal(buffer.array(), buffer.length()); } /** * Returns <code>true</code> if {@code c} is either a decimal digit or an identifier start * character. * * @param c * the character to test * @return {@code true} if the character is either a decimal digit or an identifier start * character */ private boolean isDecimalDigitOrIdentifierStart(int c) { return isDecimalDigit(c) || isIdentifierStart(c); } /** * Throws a {@link ParserException}. * * @param messageKey * the error message key * @param args * the error message arguments * @return the parser exception */ private ParserException error(Messages.Key messageKey, String... args) { throw new ParserException(ExceptionType.SyntaxError, parser.getSourceName(), getLine(), getColumn(), messageKey, args); } /** * Throws a {@link ParserEOFException}. * * @param messageKey * the error message key * @param args * the error message arguments * @return the parser exception */ private ParserException eofError(Messages.Key messageKey, String... args) { throw new ParserEOFException(parser.getSourceName(), getLine(), getColumn(), messageKey, args); } /** * Reports a strict-mode error. * * @param messageKey * the error message key * @param args * the error message arguments * @return the parser exception */ private void strictModeError(Messages.Key messageKey, String... args) { // Report the error from the start position of the currently parsed token. long sourcePosition = nextSourcePosition; parser.reportStrictModeError(ExceptionType.SyntaxError, sourcePosition, messageKey, args); } /** * Returns <code>true</code> and advances the source position if the current character is * {@code c}. Otherwise returns <code>false</code> and does not advance the source position. * * @param c * the character to test * @return {@code true} if the current character matches */ private boolean match(char c) { return input.match(c); } /** * Advances the source position if the current character is {@code c}. Otherwise throws a parser * exception. * * @param c * the character to test */ private void mustMatch(char c) { if (input.getChar() != c) { throw error(Messages.Key.IllegalCharacter, String.valueOf(c)); } } }