/* * 10/08/2004 * * JavaScriptTokenMaker.java - An object that can take a chunk of text and * return a linked list of <code>Token</code>s representing it in the * JavaScript programming language. * Copyright (C) 2004 Robert Futrell * robert_futrell at users.sourceforge.net * http://fifesoft.com/rsyntaxtextarea * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ package org.fife.ui.rsyntaxtextarea.modes; import javax.swing.text.Segment; import org.fife.ui.rsyntaxtextarea.*; /** * A token maker that turns text into a linked list of <code>Token</code>s for syntax highlighting in the JavaScript * programming language. * * @author Robert Futrell * @version 0.1 */ public class JavaScriptTokenMaker extends AbstractTokenMaker { protected final String operators = "+-*/%!=<>^&|?:"; protected final String separators = "()[]{}"; protected final String separators2 = ".,;"; // Characters you don't want syntax highlighted but separate // identifiers. protected final String hexCharacters = "0123456789ABCDEFabcdef"; protected final String numberEndChars = "FfLl"; // Characters used to specify literal number types. private int currentTokenStart; private int currentTokenType; /** * Constructor. */ public JavaScriptTokenMaker() { super(); // Initializes tokensToHighlight. } /** * Checks the token to give it the exact ID it deserves before being passed up to the super method. * * @param segment * <code>Segment</code> to get text from. * @param start * Start offset in <code>segment</code> of token. * @param end * End offset in <code>segment</code> of token. * @param tokenType * The token's type. * @param startOffset * The offset in the document at which the token occurs. */ public void addToken(Segment segment, int start, int end, int tokenType, int startOffset) { switch (tokenType) { // Since reserved words, functions, and data types are all passed // into here as "identifiers," we have to see what the token // really is... case Token.IDENTIFIER: int value = wordsToHighlight.get(segment, start, end); if (value != -1) tokenType = value; break; case Token.WHITESPACE: case Token.SEPARATOR: case Token.OPERATOR: case Token.ERROR_IDENTIFIER: case Token.ERROR_NUMBER_FORMAT: case Token.ERROR_STRING_DOUBLE: case Token.ERROR_CHAR: case Token.COMMENT_EOL: case Token.COMMENT_MULTILINE: case Token.LITERAL_BOOLEAN: case Token.LITERAL_NUMBER_DECIMAL_INT: case Token.LITERAL_NUMBER_FLOAT: case Token.LITERAL_NUMBER_HEXADECIMAL: case Token.LITERAL_STRING_DOUBLE_QUOTE: case Token.LITERAL_CHAR: break; default: new Exception("Unknown tokenType: '" + tokenType + "'"). printStackTrace(); tokenType = Token.IDENTIFIER; break; } super.addToken(segment, start, end, tokenType, startOffset); } /** * Returns the text to place at the beginning and end of a line to "comment" it in a this programming language. * * @return The start and end strings to add to a line to "comment" it out. */ public String[] getLineCommentStartAndEnd() { return new String[] { "//", null }; } /** * Returns the words to highlight for the JavaScript programming language. * * @return A <code>TokenMap</code> containing the words to highlight for the JavaScript programming language. * @see org.fife.ui.rsyntaxtextarea.AbstractTokenMaker#getWordsToHighlight */ public TokenMap getWordsToHighlight() { TokenMap tokenMap = new TokenMap(52); int reservedWord = Token.RESERVED_WORD; tokenMap.put("abstract", reservedWord); tokenMap.put("as", reservedWord); tokenMap.put("break", reservedWord); tokenMap.put("case", reservedWord); tokenMap.put("catch", reservedWord); tokenMap.put("class", reservedWord); tokenMap.put("const", reservedWord); tokenMap.put("continue", reservedWord); tokenMap.put("debugger", reservedWord); tokenMap.put("default", reservedWord); tokenMap.put("delete", reservedWord); tokenMap.put("do", reservedWord); tokenMap.put("else", reservedWord); tokenMap.put("enum", reservedWord); tokenMap.put("export", reservedWord); tokenMap.put("extends", reservedWord); tokenMap.put("final", reservedWord); tokenMap.put("finally", reservedWord); tokenMap.put("for", reservedWord); tokenMap.put("function", reservedWord); tokenMap.put("goto", reservedWord); tokenMap.put("if", reservedWord); tokenMap.put("implements", reservedWord); tokenMap.put("import", reservedWord); tokenMap.put("in", reservedWord); tokenMap.put("instanceof", reservedWord); tokenMap.put("interface", reservedWord); tokenMap.put("item", reservedWord); tokenMap.put("namespace", reservedWord); tokenMap.put("native", reservedWord); tokenMap.put("new", reservedWord); tokenMap.put("null", reservedWord); tokenMap.put("package", reservedWord); tokenMap.put("private", reservedWord); tokenMap.put("protected", reservedWord); tokenMap.put("public", reservedWord); tokenMap.put("return", reservedWord); tokenMap.put("static", reservedWord); tokenMap.put("super", reservedWord); tokenMap.put("switch", reservedWord); tokenMap.put("synchronized", reservedWord); tokenMap.put("this", reservedWord); tokenMap.put("throw", reservedWord); tokenMap.put("throws", reservedWord); tokenMap.put("transient", reservedWord); tokenMap.put("try", reservedWord); tokenMap.put("typeof", reservedWord); tokenMap.put("var", reservedWord); tokenMap.put("void", reservedWord); tokenMap.put("while", reservedWord); tokenMap.put("with", reservedWord); int literalBoolean = Token.LITERAL_BOOLEAN; tokenMap.put("false", literalBoolean); tokenMap.put("true", literalBoolean); int dataType = Token.DATA_TYPE; tokenMap.put("boolean", dataType); tokenMap.put("byte", dataType); tokenMap.put("char", dataType); tokenMap.put("double", dataType); tokenMap.put("float", dataType); tokenMap.put("int", dataType); tokenMap.put("long", dataType); tokenMap.put("short", dataType); return tokenMap; } /** * Returns <code>true</code> always as JavaScript uses curly braces to denote code blocks. * * @return <code>true</code> always. */ public boolean getCurlyBracesDenoteCodeBlocks() { return true; } /** * Returns <code>true</code> if the specified token is a '<tt>{</tt>' or '<tt>(</tt>' separator. * * @param t * The token the previous line ends with. * @return Whether the next line should be indented after this token. */ public boolean getShouldIndentNextLineAfter(Token t) { if (t != null && t.textCount == 1) { char ch = t.text[t.textOffset]; return ch == '{' || ch == '('; } return false; } /** * Returns the first token in the linked list of tokens generated from <code>text</code>. This method must be * implemented by subclasses so they can correctly implement syntax highlighting. * * @param text * The text from which to get tokens. * @param initialTokenType * The token type we should start with. * @param startOffset * The offset into the document at which <code>text</code> starts. * @return The first <code>Token</code> in a linked list representing the syntax highlighted text. */ public Token getTokenList(Segment text, int initialTokenType, int startOffset) { resetTokenList(); char[] array = text.array; int offset = text.offset; int count = text.count; int end = offset + count; // See, when we find a token, its starting position is always of the // form: 'startOffset + (currentTokenStart-offset)'; but since // startOffset and offset are constant, tokens' starting positions // become: 'newStartOffset+currentTokenStart' for one less subtraction // operation. int newStartOffset = startOffset - offset; currentTokenStart = offset; currentTokenType = initialTokenType; boolean backslash = false; boolean numContainsExponent = false; boolean numContainsEndCharacter = false; for (int i = offset; i < end; i++) { char c = array[i]; switch (currentTokenType) { case Token.NULL: currentTokenStart = i; // Starting a new token here. switch (c) { case ' ': case '\t': currentTokenType = Token.WHITESPACE; break; case '"': if (backslash) { // Escaped double quote => call '"' an identifier.. addToken(text, currentTokenStart, i, Token.IDENTIFIER, newStartOffset + currentTokenStart); backslash = false; } else { currentTokenType = Token.ERROR_STRING_DOUBLE; } break; case '\'': if (backslash) { // Escaped single quote => call '\'' an identifier. addToken(text, currentTokenStart, i, Token.IDENTIFIER, newStartOffset + currentTokenStart); backslash = false; } else { currentTokenType = Token.ERROR_CHAR; } break; case '\\': addToken(text, currentTokenStart, i, Token.IDENTIFIER, newStartOffset + currentTokenStart); currentTokenType = Token.NULL; backslash = !backslash; break; default: if (RSyntaxUtilities.isDigit(c)) { currentTokenType = Token.LITERAL_NUMBER_DECIMAL_INT; break; } else if (RSyntaxUtilities.isLetter(c) || c == '_') { currentTokenType = Token.IDENTIFIER; break; } int indexOf = operators.indexOf(c, 0); if (indexOf > -1) { currentTokenStart = i; currentTokenType = Token.OPERATOR; break; } indexOf = separators.indexOf(c, 0); if (indexOf > -1) { addToken(text, currentTokenStart, i, Token.SEPARATOR, newStartOffset + currentTokenStart); currentTokenType = Token.NULL; break; } indexOf = separators2.indexOf(c, 0); if (indexOf > -1) { addToken(text, currentTokenStart, i, Token.IDENTIFIER, newStartOffset + currentTokenStart); currentTokenType = Token.NULL; break; } else { currentTokenType = Token.ERROR_IDENTIFIER; break; } } // End of switch (c). break; case Token.WHITESPACE: switch (c) { case ' ': case '\t': break; // Still whitespace. case '\\': addToken(text, currentTokenStart, i - 1, Token.WHITESPACE, newStartOffset + currentTokenStart); addToken(text, i, i, Token.IDENTIFIER, newStartOffset + i); currentTokenType = Token.NULL; backslash = true; // Previous char whitespace => this must be first backslash. break; case '"': // Don't need to worry about backslashes as previous char is space. addToken(text, currentTokenStart, i - 1, Token.WHITESPACE, newStartOffset + currentTokenStart); currentTokenStart = i; currentTokenType = Token.ERROR_STRING_DOUBLE; backslash = false; break; case '\'': // Don't need to worry about backslashes as previous char is space. addToken(text, currentTokenStart, i - 1, Token.WHITESPACE, newStartOffset + currentTokenStart); currentTokenStart = i; currentTokenType = Token.ERROR_CHAR; backslash = false; break; default: // Add the whitespace token and start anew. addToken(text, currentTokenStart, i - 1, Token.WHITESPACE, newStartOffset + currentTokenStart); currentTokenStart = i; if (RSyntaxUtilities.isDigit(c)) { currentTokenType = Token.LITERAL_NUMBER_DECIMAL_INT; break; } else if (RSyntaxUtilities.isLetter(c) || c == '_') { currentTokenType = Token.IDENTIFIER; break; } int indexOf = operators.indexOf(c, 0); if (indexOf > -1) { currentTokenStart = i; currentTokenType = Token.OPERATOR; break; } indexOf = separators.indexOf(c, 0); if (indexOf > -1) { addToken(text, i, i, Token.SEPARATOR, newStartOffset + i); currentTokenType = Token.NULL; break; } indexOf = separators2.indexOf(c, 0); if (indexOf > -1) { addToken(text, i, i, Token.IDENTIFIER, newStartOffset + i); currentTokenType = Token.NULL; break; } else { currentTokenType = Token.ERROR_IDENTIFIER; } } // End of switch (c). break; default: // Should never happen case Token.IDENTIFIER: switch (c) { case ' ': case '\t': addToken(text, currentTokenStart, i - 1, Token.IDENTIFIER, newStartOffset + currentTokenStart); currentTokenStart = i; currentTokenType = Token.WHITESPACE; break; case '"': // Don't need to worry about backslashes as previous char is non-backslash. addToken(text, currentTokenStart, i - 1, Token.IDENTIFIER, newStartOffset + currentTokenStart); currentTokenStart = i; currentTokenType = Token.ERROR_STRING_DOUBLE; backslash = false; break; case '\'': // Don't need to worry about backslashes as previous char is non-backslash. addToken(text, currentTokenStart, i - 1, Token.IDENTIFIER, newStartOffset + currentTokenStart); currentTokenStart = i; currentTokenType = Token.ERROR_CHAR; backslash = false; break; case '\\': addToken(text, currentTokenStart, i - 1, Token.IDENTIFIER, newStartOffset + currentTokenStart); addToken(text, i, i, Token.IDENTIFIER, newStartOffset + i); currentTokenType = Token.NULL; backslash = true; break; default: if (RSyntaxUtilities.isLetterOrDigit(c) || c == '_') { break; // Still an identifier of some type. } int indexOf = operators.indexOf(c); if (indexOf > -1) { addToken(text, currentTokenStart, i - 1, Token.IDENTIFIER, newStartOffset + currentTokenStart); currentTokenStart = i; currentTokenType = Token.OPERATOR; break; } indexOf = separators.indexOf(c, 0); if (indexOf > -1) { addToken(text, currentTokenStart, i - 1, Token.IDENTIFIER, newStartOffset + currentTokenStart); addToken(text, i, i, Token.SEPARATOR, newStartOffset + i); currentTokenType = Token.NULL; break; } indexOf = separators2.indexOf(c, 0); if (indexOf > -1) { addToken(text, currentTokenStart, i - 1, Token.IDENTIFIER, newStartOffset + currentTokenStart); addToken(text, i, i, Token.IDENTIFIER, newStartOffset + i); currentTokenType = Token.NULL; break; } else { currentTokenType = Token.ERROR_IDENTIFIER; } } // End of switch (c). break; case Token.LITERAL_NUMBER_DECIMAL_INT: // Reset our boolean states if we only have one digit char before // the current one. if (currentTokenStart == i - 1) { numContainsExponent = false; numContainsEndCharacter = false; } switch (c) { case ' ': case '\t': addToken(text, currentTokenStart, i - 1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset + currentTokenStart); currentTokenStart = i; currentTokenType = Token.WHITESPACE; break; case '"': // Don't need to worry about backslashes as previous char is non-backslash. addToken(text, currentTokenStart, i - 1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset + currentTokenStart); currentTokenStart = i; currentTokenType = Token.ERROR_STRING_DOUBLE; backslash = false; break; case '\'': // Don't need to worry about backslashes as previous char is non-backslash. addToken(text, currentTokenStart, i - 1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset + currentTokenStart); currentTokenStart = i; currentTokenType = Token.ERROR_CHAR; backslash = false; break; case '\\': addToken(text, currentTokenStart, i - 1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset + currentTokenStart); addToken(text, i, i, Token.IDENTIFIER, newStartOffset + i); currentTokenType = Token.NULL; backslash = true; break; default: if (RSyntaxUtilities.isDigit(c)) { break; // Still a literal number. } else if (c == 'e') { // Exponent. if (numContainsExponent == false) { numContainsExponent = true; } else { currentTokenType = Token.ERROR_NUMBER_FORMAT; } break; } else if (c == '.') { // Decimal point. if (numContainsExponent == true) { currentTokenType = Token.ERROR_NUMBER_FORMAT; } else { currentTokenType = Token.LITERAL_NUMBER_FLOAT; } break; } int indexOf = numberEndChars.indexOf(c); if (indexOf > -1) { // Numbers can end in 'f','F','l','L', etc. if (numContainsEndCharacter == true) { currentTokenType = Token.ERROR_NUMBER_FORMAT; } else { numContainsEndCharacter = true; } break; } indexOf = operators.indexOf(c); if (indexOf > -1) { addToken(text, currentTokenStart, i - 1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset + currentTokenStart); currentTokenStart = i; currentTokenType = Token.OPERATOR; break; } indexOf = separators.indexOf(c); if (indexOf > -1) { addToken(text, currentTokenStart, i - 1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset + currentTokenStart); addToken(text, i, i, Token.SEPARATOR, newStartOffset + i); currentTokenType = Token.NULL; break; } indexOf = separators2.indexOf(c); if (indexOf > -1) { addToken(text, currentTokenStart, i - 1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset + currentTokenStart); addToken(text, i, i, Token.IDENTIFIER, newStartOffset + i); currentTokenType = Token.NULL; break; } // Otherwise, the token is an error. currentTokenType = Token.ERROR_NUMBER_FORMAT; } // End of switch (c). break; case Token.LITERAL_NUMBER_FLOAT: switch (c) { case ' ': case '\t': addToken(text, currentTokenStart, i - 1, Token.LITERAL_NUMBER_FLOAT, newStartOffset + currentTokenStart); currentTokenStart = i; currentTokenType = Token.WHITESPACE; break; case '"': // Don't need to worry about backslashes as previous char is non-backslash. addToken(text, currentTokenStart, i - 1, Token.LITERAL_NUMBER_FLOAT, newStartOffset + currentTokenStart); currentTokenStart = i; currentTokenType = Token.ERROR_STRING_DOUBLE; backslash = false; break; case '\'': // Don't need to worry about backslashes as previous char is non-backslash. addToken(text, currentTokenStart, i - 1, Token.LITERAL_NUMBER_FLOAT, newStartOffset + currentTokenStart); currentTokenStart = i; currentTokenType = Token.ERROR_CHAR; backslash = false; break; case '\\': addToken(text, currentTokenStart, i - 1, Token.LITERAL_NUMBER_FLOAT, newStartOffset + currentTokenStart); addToken(text, i, i, Token.IDENTIFIER, newStartOffset + i); currentTokenType = Token.NULL; backslash = true; break; default: if (RSyntaxUtilities.isDigit(c)) { break; // Still a literal number. } else if (c == 'e') { // Exponent. if (numContainsExponent == false) { numContainsExponent = true; } else { currentTokenType = Token.ERROR_NUMBER_FORMAT; } break; } else if (c == '.') { // Second decimal point; must catch now because it's a "separator" below. currentTokenType = Token.ERROR_NUMBER_FORMAT; break; } int indexOf = numberEndChars.indexOf(c); if (indexOf > -1) { // Numbers can end in 'f','F','l','L', etc. if (numContainsEndCharacter == true) { currentTokenType = Token.ERROR_NUMBER_FORMAT; } else { numContainsEndCharacter = true; } break; } indexOf = operators.indexOf(c); if (indexOf > -1) { addToken(text, currentTokenStart, i - 1, Token.LITERAL_NUMBER_FLOAT, newStartOffset + currentTokenStart); currentTokenStart = i; currentTokenType = Token.OPERATOR; break; } indexOf = separators.indexOf(c); if (indexOf > -1) { addToken(text, currentTokenStart, i - 1, Token.LITERAL_NUMBER_FLOAT, newStartOffset + currentTokenStart); addToken(text, i, i, Token.SEPARATOR, newStartOffset + i); currentTokenType = Token.NULL; break; } indexOf = separators2.indexOf(c); if (indexOf > -1) { addToken(text, currentTokenStart, i - 1, Token.LITERAL_NUMBER_FLOAT, newStartOffset + currentTokenStart); addToken(text, i, i, Token.IDENTIFIER, newStartOffset + i); currentTokenType = Token.NULL; break; } // Otherwise, the token is an error. currentTokenType = Token.ERROR_NUMBER_FORMAT; } // End of switch (c). break; case Token.LITERAL_NUMBER_HEXADECIMAL: switch (c) { case ' ': case '\t': addToken(text, currentTokenStart, i - 1, Token.LITERAL_NUMBER_HEXADECIMAL, newStartOffset + currentTokenStart); currentTokenStart = i; currentTokenType = Token.WHITESPACE; break; case '"': // Don't need to worry about backslashes as previous char is non-backslash. addToken(text, currentTokenStart, i - 1, Token.LITERAL_NUMBER_HEXADECIMAL, newStartOffset + currentTokenStart); currentTokenStart = i; currentTokenType = Token.ERROR_STRING_DOUBLE; backslash = false; break; case '\'': // Don't need to worry about backslashes as previous char is non-backslash. addToken(text, currentTokenStart, i - 1, Token.LITERAL_NUMBER_HEXADECIMAL, newStartOffset + currentTokenStart); currentTokenStart = i; currentTokenType = Token.ERROR_CHAR; backslash = false; break; case '\\': addToken(text, currentTokenStart, i - 1, Token.LITERAL_NUMBER_HEXADECIMAL, newStartOffset + currentTokenStart); addToken(text, i, i, Token.IDENTIFIER, newStartOffset + i); currentTokenType = Token.NULL; backslash = true; break; default: if (c == 'e') { // Exponent. if (numContainsExponent == false) { numContainsExponent = true; } else { currentTokenType = Token.ERROR_NUMBER_FORMAT; } break; } int indexOf = hexCharacters.indexOf(c); if (indexOf > -1) { break; // Still a hexadecimal number. } indexOf = numberEndChars.indexOf(c); if (indexOf > -1) { // Numbers can end in 'f','F','l','L', etc. if (numContainsEndCharacter == true) { currentTokenType = Token.ERROR_NUMBER_FORMAT; } else { numContainsEndCharacter = true; } break; } indexOf = operators.indexOf(c); if (indexOf > -1) { addToken(text, currentTokenStart, i - 1, Token.LITERAL_NUMBER_HEXADECIMAL, newStartOffset + currentTokenStart); currentTokenStart = i; currentTokenType = Token.OPERATOR; break; } indexOf = separators.indexOf(c); if (indexOf > -1) { addToken(text, currentTokenStart, i - 1, Token.LITERAL_NUMBER_HEXADECIMAL, newStartOffset + currentTokenStart); addToken(text, i, i, Token.SEPARATOR, newStartOffset + i); currentTokenType = Token.NULL; break; } indexOf = separators2.indexOf(c); if (indexOf > -1) { addToken(text, currentTokenStart, i - 1, Token.LITERAL_NUMBER_HEXADECIMAL, newStartOffset + currentTokenStart); addToken(text, i, i, Token.IDENTIFIER, newStartOffset + i); currentTokenType = Token.NULL; break; } // Otherwise, the token is an error. currentTokenType = Token.ERROR_NUMBER_FORMAT; } // End of switch (c). break; case Token.COMMENT_MULTILINE: // Find either end of MLC or end of the current line. while (i < end - 1) { if (array[i] == '*' && array[i + 1] == '/') { addToken(text, currentTokenStart, i + 1, Token.COMMENT_MULTILINE, newStartOffset + currentTokenStart); i = i + 1; currentTokenType = Token.NULL; backslash = false; // Backslashes can't accumulate before and after a comment... break; } i++; } break; case Token.COMMENT_EOL: i = end - 1; addToken(text, currentTokenStart, i, Token.COMMENT_EOL, newStartOffset + currentTokenStart); // We need to set token type to null so at the bottom we don't add one more token. currentTokenType = Token.NULL; break; // We need this state because comments always start with '/', which is an operator. // Note that when we enter this state, the PREVIOUS character was an operator. case Token.OPERATOR: if (array[i - 1] == '/') { // Possibility of comments. if (c == '*') { currentTokenType = Token.COMMENT_MULTILINE; break; } else if (c == '/') { currentTokenType = Token.COMMENT_EOL; i = end - 1; // Since we know the rest of the line is in this token. } else { // We MUST add the token at the previous char now; if we don't and let // operators accumulate before we print them, we will mess up syntax // highlighting if we get an end-of-line comment. addToken(text, currentTokenStart, i - 1, Token.OPERATOR, newStartOffset + currentTokenStart); currentTokenType = Token.NULL; i = i - 1; } } else { addToken(text, currentTokenStart, i - 1, Token.OPERATOR, newStartOffset + currentTokenStart); // Hack to keep code size down... i--; currentTokenType = Token.NULL; } break; case Token.ERROR_IDENTIFIER: switch (c) { case ' ': case '\t': addToken(text, currentTokenStart, i - 1, Token.ERROR_IDENTIFIER, newStartOffset + currentTokenStart); currentTokenStart = i; currentTokenType = Token.WHITESPACE; break; case '"': // Don't need to worry about backslashes as previous char is non-backslash. addToken(text, currentTokenStart, i - 1, Token.ERROR_IDENTIFIER, newStartOffset + currentTokenStart); currentTokenStart = i; currentTokenType = Token.ERROR_STRING_DOUBLE; backslash = false; break; case '\'': // Don't need to worry about backslashes as previous char is non-backslash. addToken(text, currentTokenStart, i - 1, Token.ERROR_IDENTIFIER, newStartOffset + currentTokenStart); currentTokenStart = i; currentTokenType = Token.ERROR_CHAR; backslash = false; break; case ';': addToken(text, currentTokenStart, i - 1, Token.ERROR_IDENTIFIER, newStartOffset + currentTokenStart); addToken(text, i, i, Token.IDENTIFIER, newStartOffset + i); currentTokenType = Token.NULL; break; case '\\': addToken(text, currentTokenStart, i - 1, Token.ERROR_IDENTIFIER, newStartOffset + currentTokenStart); addToken(text, i, i, Token.IDENTIFIER, newStartOffset + i); currentTokenType = Token.NULL; backslash = true; // Must be first backslash in a row since previous character is identifier char. break; default: int indexOf = operators.indexOf(c); if (indexOf > -1) { addToken(text, currentTokenStart, i - 1, Token.ERROR_IDENTIFIER, newStartOffset + currentTokenStart); currentTokenStart = i; currentTokenType = Token.OPERATOR; } indexOf = separators.indexOf(c); if (indexOf > -1) { addToken(text, currentTokenStart, i - 1, Token.ERROR_IDENTIFIER, newStartOffset + currentTokenStart); addToken(text, i, i, Token.SEPARATOR, newStartOffset + i); currentTokenType = Token.NULL; } indexOf = separators2.indexOf(c); if (indexOf > -1) { addToken(text, currentTokenStart, i - 1, Token.ERROR_IDENTIFIER, newStartOffset + currentTokenStart); addToken(text, i, i, Token.IDENTIFIER, newStartOffset + i); currentTokenType = Token.NULL; } } // End of switch (c). break; case Token.ERROR_NUMBER_FORMAT: switch (c) { case ' ': case '\t': addToken(text, currentTokenStart, i - 1, Token.ERROR_NUMBER_FORMAT, newStartOffset + currentTokenStart); currentTokenStart = i; currentTokenType = Token.WHITESPACE; break; case '"': // Don't need to worry about backslashes as previous char is non-backslash. addToken(text, currentTokenStart, i - 1, Token.ERROR_NUMBER_FORMAT, newStartOffset + currentTokenStart); currentTokenStart = i; currentTokenType = Token.ERROR_STRING_DOUBLE; backslash = false; break; case '\'': // Don't need to worry about backslashes as previous char is non-backslash. addToken(text, currentTokenStart, i - 1, Token.ERROR_NUMBER_FORMAT, newStartOffset + currentTokenStart); currentTokenStart = i; currentTokenType = Token.ERROR_CHAR; backslash = false; break; case ';': addToken(text, currentTokenStart, i - 1, Token.ERROR_NUMBER_FORMAT, newStartOffset + currentTokenStart); addToken(text, i, i, Token.IDENTIFIER, newStartOffset + i); currentTokenType = Token.NULL; break; case '\\': addToken(text, currentTokenStart, i - 1, Token.ERROR_NUMBER_FORMAT, newStartOffset + currentTokenStart); addToken(text, i, i, Token.IDENTIFIER, newStartOffset + i); currentTokenType = Token.NULL; backslash = true; // Must be first backslash in a row since previous char is a number char. break; default: // Could be going into hexadecimal. int indexOf = hexCharacters.indexOf(c); if (indexOf > -1 && (i - currentTokenStart == 2 && array[i - 1] == 'x' && array[i - 2] == '0')) { currentTokenType = Token.LITERAL_NUMBER_HEXADECIMAL; break; } indexOf = operators.indexOf(c); if (indexOf > -1) { addToken(text, currentTokenStart, i - 1, Token.ERROR_NUMBER_FORMAT, newStartOffset + currentTokenStart); currentTokenStart = i; currentTokenType = Token.OPERATOR; } indexOf = separators.indexOf(c); if (indexOf > -1) { addToken(text, currentTokenStart, i - 1, Token.ERROR_NUMBER_FORMAT, newStartOffset + currentTokenStart); addToken(text, i, i, Token.SEPARATOR, newStartOffset + i); currentTokenType = Token.NULL; } indexOf = separators2.indexOf(c); if (indexOf > -1) { addToken(text, currentTokenStart, i - 1, Token.ERROR_NUMBER_FORMAT, newStartOffset + currentTokenStart); addToken(text, i, i, Token.IDENTIFIER, newStartOffset + i); currentTokenType = Token.NULL; } } // End of switch (c). break; case Token.ERROR_CHAR: if (c == '\\') { backslash = !backslash; // Okay because if we got in here, backslash was initially false. } else { if (c == '\'' && !backslash) { addToken(text, currentTokenStart, i, Token.LITERAL_CHAR, newStartOffset + currentTokenStart); currentTokenType = Token.NULL; // backslash is definitely false when we leave. } backslash = false; // Need to set backslash to false here as a character was typed. } // Otherwise, we're still an unclosed char... break; case Token.ERROR_STRING_DOUBLE: if (c == '\\') { backslash = !backslash; // Okay because if we got in here, backslash was initially false. } else { if (c == '"' && !backslash) { addToken(text, currentTokenStart, i, Token.LITERAL_STRING_DOUBLE_QUOTE, newStartOffset + currentTokenStart); currentTokenType = Token.NULL; // backslash is definitely false when we leave. } backslash = false; // Need to set backslash to false here as a character was typed. } // Otherwise, we're still an unclosed string... break; } // End of switch (currentTokenType). } // End of for (int i=offset; i<end; i++). // Deal with the (possibly there) last token. if (currentTokenType != Token.NULL) { addToken(text, currentTokenStart, end - 1, currentTokenType, newStartOffset + currentTokenStart); } if (currentTokenType != Token.COMMENT_MULTILINE) { addNullToken(); } // Return the first token in our linked list. return firstToken; } }