/* * 03/16/2004 * * UnixShellTokenMaker.java - Scanner for UNIX shell scripts. * * This library is distributed under a modified BSD license. See the included * RSyntaxTextArea.License.txt file for details. */ package org.fife.ui.rsyntaxtextarea.modes; import javax.swing.text.Segment; import org.fife.ui.rsyntaxtextarea.*; /** * A token maker that turns text into a linked list of <code>Token</code>s * for syntax highlighting UNIX shell scripts. * * @author Robert Futrell * @version 0.1 */ public class UnixShellTokenMaker extends AbstractTokenMaker { protected final String operators = "=|><&"; protected final String separators = "()[]"; protected final String separators2 = ".,;"; // Characters you don't want syntax highlighted but separate identifiers. protected final String shellVariables = "#-?$!*@_"; // Characters that are part of "$<char>" shell variables; e.g., "$_". private int currentTokenStart; private int currentTokenType; /** * Constructor. */ public UnixShellTokenMaker() { super(); // Initializes tokensToHighlight. } /** * Checks the token to give it the exact ID it deserves before * being passed up to the super method. * * @param segment <code>Segment</code> to get text from. * @param start Start offset in <code>segment</code> of token. * @param end End offset in <code>segment</code> of token. * @param tokenType The token's type. * @param startOffset The offset in the document at which the token occurs. */ @Override public void addToken(Segment segment, int start, int end, int tokenType, int startOffset) { switch (tokenType) { // Since reserved words, functions, and data types are all passed into here // as "identifiers," we have to see what the token really is... case Token.IDENTIFIER: int value = wordsToHighlight.get(segment, start,end); if (value!=-1) tokenType = value; break; case Token.WHITESPACE: case Token.SEPARATOR: case Token.OPERATOR: case Token.LITERAL_NUMBER_DECIMAL_INT: case Token.LITERAL_STRING_DOUBLE_QUOTE: case Token.LITERAL_CHAR: case Token.LITERAL_BACKQUOTE: case Token.COMMENT_EOL: case Token.PREPROCESSOR: case Token.VARIABLE: break; default: tokenType = Token.IDENTIFIER; break; } super.addToken(segment, start, end, tokenType, startOffset); } /** * {@inheritDoc} */ @Override public String[] getLineCommentStartAndEnd(int languageIndex) { return new String[] { "#", null }; } /** * Returns whether tokens of the specified type should have "mark * occurrences" enabled for the current programming language. * * @param type The token type. * @return Whether tokens of this type should have "mark occurrences" * enabled. */ @Override public boolean getMarkOccurrencesOfTokenType(int type) { return type==Token.IDENTIFIER || type==Token.VARIABLE; } /** * Returns the words to highlight for UNIX shell scripts. * * @return A <code>TokenMap</code> containing the words to highlight for * UNIX shell scripts. * @see org.fife.ui.rsyntaxtextarea.AbstractTokenMaker#getWordsToHighlight */ @Override public TokenMap getWordsToHighlight() { TokenMap tokenMap = new TokenMap(); int reservedWord = Token.RESERVED_WORD; tokenMap.put("case", reservedWord); tokenMap.put("do", reservedWord); tokenMap.put("done", reservedWord); tokenMap.put("elif", reservedWord); tokenMap.put("else", reservedWord); tokenMap.put("esac", reservedWord); tokenMap.put("fi", reservedWord); tokenMap.put("for", reservedWord); tokenMap.put("if", reservedWord); tokenMap.put("in", reservedWord); tokenMap.put("select", reservedWord); tokenMap.put("then", reservedWord); tokenMap.put("until", reservedWord); tokenMap.put("while", reservedWord); int function = Token.FUNCTION; tokenMap.put("addbib", function); tokenMap.put("admin", function); tokenMap.put("alias", function); tokenMap.put("apropos", function); tokenMap.put("ar", function); tokenMap.put("at", function); tokenMap.put("awk", function); tokenMap.put("banner", function); tokenMap.put("basename", function); tokenMap.put("batch", function); tokenMap.put("bg", function); tokenMap.put("biff", function); tokenMap.put("bin-mail", function); tokenMap.put("binmail", function); tokenMap.put("break", function); tokenMap.put("cal", function); tokenMap.put("calendar", function); tokenMap.put("cancel", function); tokenMap.put("cat", function); tokenMap.put("cb", function); tokenMap.put("cc", function); tokenMap.put("cd", function); tokenMap.put("cdc", function); tokenMap.put("chdir", function); tokenMap.put("checkeq", function); tokenMap.put("checknr", function); tokenMap.put("chfn", function); tokenMap.put("chgrp", function); tokenMap.put("chmod", function); tokenMap.put("chown", function); tokenMap.put("chsh", function); tokenMap.put("clear", function); tokenMap.put("cmp", function); tokenMap.put("colcrt", function); tokenMap.put("comb", function); tokenMap.put("comm", function); tokenMap.put("command", function); tokenMap.put("compress", function); tokenMap.put("continue", function); tokenMap.put("cp", function); tokenMap.put("cpio", function); tokenMap.put("cpp", function); tokenMap.put("crontab", function); tokenMap.put("csh", function); tokenMap.put("ctags", function); tokenMap.put("cut", function); tokenMap.put("cvs", function); tokenMap.put("date", function); tokenMap.put("dbx", function); tokenMap.put("delta", function); tokenMap.put("deroff", function); tokenMap.put("df", function); tokenMap.put("diff", function); tokenMap.put("dtree", function); tokenMap.put("du", function); tokenMap.put("e", function); tokenMap.put("echo", function); tokenMap.put("ed", function); tokenMap.put("edit", function); tokenMap.put("enscript", function); tokenMap.put("eqn", function); tokenMap.put("error", function); tokenMap.put("eval", function); tokenMap.put("ex", function); tokenMap.put("exec", function); tokenMap.put("exit", function); tokenMap.put("expand", function); tokenMap.put("export", function); tokenMap.put("expr", function); tokenMap.put("false", function); tokenMap.put("fc", function); tokenMap.put("fg", function); tokenMap.put("file", function); tokenMap.put("find", function); tokenMap.put("finger", function); tokenMap.put("fmt", function); tokenMap.put("fmt_mail", function); tokenMap.put("fold", function); tokenMap.put("ftp", function); tokenMap.put("function", function); tokenMap.put("gcore", function); tokenMap.put("get", function); tokenMap.put("getopts", function); tokenMap.put("gprof", function); tokenMap.put("grep", function); tokenMap.put("groups", function); tokenMap.put("gunzip", function); tokenMap.put("gzip", function); tokenMap.put("hashcheck", function); tokenMap.put("hashmake", function); tokenMap.put("head", function); tokenMap.put("help", function); tokenMap.put("history", function); tokenMap.put("imake", function); tokenMap.put("indent", function); tokenMap.put("install", function); tokenMap.put("jobs", function); tokenMap.put("join", function); tokenMap.put("kill", function); tokenMap.put("last", function); tokenMap.put("ld", function); tokenMap.put("leave", function); tokenMap.put("less", function); tokenMap.put("let", function); tokenMap.put("lex", function); tokenMap.put("lint", function); tokenMap.put("ln", function); tokenMap.put("login", function); tokenMap.put("look", function); tokenMap.put("lookbib", function); tokenMap.put("lorder", function); tokenMap.put("lp", function); tokenMap.put("lpq", function); tokenMap.put("lpr", function); tokenMap.put("lprm", function); tokenMap.put("ls", function); tokenMap.put("mail", function); tokenMap.put("Mail", function); tokenMap.put("make", function); tokenMap.put("man", function); tokenMap.put("md", function); tokenMap.put("mesg", function); tokenMap.put("mkdir", function); tokenMap.put("mkstr", function); tokenMap.put("more", function); tokenMap.put("mount", function); tokenMap.put("mv", function); tokenMap.put("nawk", function); tokenMap.put("neqn", function); tokenMap.put("nice", function); tokenMap.put("nm", function); tokenMap.put("nroff", function); tokenMap.put("od", function); tokenMap.put("page", function); tokenMap.put("passwd", function); tokenMap.put("paste", function); tokenMap.put("pr", function); tokenMap.put("print", function); tokenMap.put("printf", function); tokenMap.put("printenv", function); tokenMap.put("prof", function); tokenMap.put("prs", function); tokenMap.put("prt", function); tokenMap.put("ps", function); tokenMap.put("ptx", function); tokenMap.put("pwd", function); tokenMap.put("quota", function); tokenMap.put("ranlib", function); tokenMap.put("rcp", function); tokenMap.put("rcs", function); tokenMap.put("rcsdiff", function); tokenMap.put("read", function); tokenMap.put("readonly", function); tokenMap.put("red", function); tokenMap.put("return", function); tokenMap.put("rev", function); tokenMap.put("rlogin", function); tokenMap.put("rm", function); tokenMap.put("rmdel", function); tokenMap.put("rmdir", function); tokenMap.put("roffbib", function); tokenMap.put("rsh", function); tokenMap.put("rup", function); tokenMap.put("ruptime", function); tokenMap.put("rusers", function); tokenMap.put("rwall", function); tokenMap.put("rwho", function); tokenMap.put("sact", function); tokenMap.put("sccs", function); tokenMap.put("sccsdiff", function); tokenMap.put("script", function); tokenMap.put("sed", function); tokenMap.put("set", function); tokenMap.put("setgroups", function); tokenMap.put("setsenv", function); tokenMap.put("sh", function); tokenMap.put("shift", function); tokenMap.put("size", function); tokenMap.put("sleep", function); tokenMap.put("sort", function); tokenMap.put("sortbib", function); tokenMap.put("spell", function); tokenMap.put("split", function); tokenMap.put("ssh", function); tokenMap.put("strings", function); tokenMap.put("strip", function); tokenMap.put("stty", function); tokenMap.put("su", function); tokenMap.put("sudo", function); tokenMap.put("symorder", function); tokenMap.put("tabs", function); tokenMap.put("tail", function); tokenMap.put("talk", function); tokenMap.put("tar", function); tokenMap.put("tbl", function); tokenMap.put("tee", function); tokenMap.put("telnet", function); tokenMap.put("test", function); tokenMap.put("tftp", function); tokenMap.put("time", function); tokenMap.put("times", function); tokenMap.put("touch", function); tokenMap.put("trap", function); tokenMap.put("troff", function); tokenMap.put("true", function); tokenMap.put("tsort", function); tokenMap.put("tty", function); tokenMap.put("type", function); tokenMap.put("typeset", function); tokenMap.put("ue", function); tokenMap.put("ul", function); tokenMap.put("ulimit", function); tokenMap.put("umask", function); tokenMap.put("unalias", function); tokenMap.put("uncompress", function); tokenMap.put("unexpand", function); tokenMap.put("unget", function); tokenMap.put("unifdef", function); tokenMap.put("uniq", function); tokenMap.put("units", function); tokenMap.put("unset", function); tokenMap.put("uptime", function); tokenMap.put("users", function); tokenMap.put("uucp", function); tokenMap.put("uudecode", function); tokenMap.put("uuencode", function); tokenMap.put("uulog", function); tokenMap.put("uuname", function); tokenMap.put("uusend", function); tokenMap.put("uux", function); tokenMap.put("vacation", function); tokenMap.put("val", function); tokenMap.put("vedit", function); tokenMap.put("vgrind", function); tokenMap.put("vi", function); tokenMap.put("view", function); tokenMap.put("vtroff", function); tokenMap.put("w", function); tokenMap.put("wait", function); tokenMap.put("wall", function); tokenMap.put("wc", function); tokenMap.put("wait", function); tokenMap.put("what", function); tokenMap.put("whatis", function); tokenMap.put("whence", function); tokenMap.put("whereis", function); tokenMap.put("which", function); tokenMap.put("who", function); tokenMap.put("whoami", function); tokenMap.put("write", function); tokenMap.put("xargs", function); tokenMap.put("xstr", function); tokenMap.put("yacc", function); tokenMap.put("yes", function); tokenMap.put("zcat", function); return tokenMap; } /** * Returns a list of tokens representing the given text. * * @param text The text to break into tokens. * @param startTokenType The token with which to start tokenizing. * @param startOffset The offset at which the line of tokens begins. * @return A linked list of tokens representing <code>text</code>. */ public Token getTokenList(Segment text, int startTokenType, final int startOffset) { resetTokenList(); char[] array = text.array; int offset = text.offset; int count = text.count; int end = offset + count; // See, when we find a token, its starting position is always of the form: // 'startOffset + (currentTokenStart-offset)'; but since startOffset and // offset are constant, tokens' starting positions become: // 'newStartOffset+currentTokenStart' for one less subraction operation. int newStartOffset = startOffset - offset; currentTokenStart = offset; currentTokenType = startTokenType; boolean backslash = false; //beginning: for (int i=offset; i<end; i++) { char c = array[i]; switch (currentTokenType) { case Token.NULL: currentTokenStart = i; // Starting a new token here. switch (c) { case ' ': case '\t': currentTokenType = Token.WHITESPACE; break; case '`': if (backslash) { // Escaped back quote => call '`' an identifier.. addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart); backslash = false; } else { currentTokenType = Token.LITERAL_BACKQUOTE; } break; case '"': if (backslash) { // Escaped double quote => call '"' an identifier.. addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart); backslash = false; } else { currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE; } break; case '\'': if (backslash) { // Escaped single quote => call '\'' an identifier. addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart); backslash = false; } else { currentTokenType = Token.LITERAL_CHAR; } break; case '\\': addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart); currentTokenType = Token.NULL; backslash = !backslash; break; case '$': if (backslash) { // Escaped dollar sign => call '$' an identifier.. addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart); backslash = false; } else { currentTokenType = Token.VARIABLE; } break; case '#': backslash = false; currentTokenType = Token.COMMENT_EOL; break; default: if (RSyntaxUtilities.isDigit(c)) { currentTokenType = Token.LITERAL_NUMBER_DECIMAL_INT; break; } else if (RSyntaxUtilities.isLetter(c) || c=='/' || c=='_') { currentTokenType = Token.IDENTIFIER; break; } int indexOf = operators.indexOf(c,0); if (indexOf>-1) { addToken(text, currentTokenStart,i, Token.OPERATOR, newStartOffset+currentTokenStart); currentTokenType = Token.NULL; break; } indexOf = separators.indexOf(c,0); if (indexOf>-1) { addToken(text, currentTokenStart,i, Token.SEPARATOR, newStartOffset+currentTokenStart); currentTokenType = Token.NULL; break; } indexOf = separators2.indexOf(c,0); if (indexOf>-1) { addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart); currentTokenType = Token.NULL; break; } else { currentTokenType = Token.IDENTIFIER; break; } } // End of switch (c). break; case Token.WHITESPACE: switch (c) { case ' ': case '\t': break; // Still whitespace. case '\\': addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart); addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i); currentTokenType = Token.NULL; backslash = true; // Previous char whitespace => this must be first backslash. break; case '`': // Don't need to worry about backslashes as previous char is space. addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart); currentTokenStart = i; currentTokenType = Token.LITERAL_BACKQUOTE; backslash = false; break; case '"': // Don't need to worry about backslashes as previous char is space. addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart); currentTokenStart = i; currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE; backslash = false; break; case '\'': // Don't need to worry about backslashes as previous char is space. addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart); currentTokenStart = i; currentTokenType = Token.LITERAL_CHAR; backslash = false; break; case '$': // Don't need to worry about backslashes as previous char is space. addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart); currentTokenStart = i; currentTokenType = Token.VARIABLE; backslash = false; break; case '#': addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart); currentTokenStart = i; currentTokenType = Token.COMMENT_EOL; break; default: // Add the whitespace token and start anew. addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart); currentTokenStart = i; if (RSyntaxUtilities.isDigit(c)) { currentTokenType = Token.LITERAL_NUMBER_DECIMAL_INT; break; } else if (RSyntaxUtilities.isLetter(c) || c=='/' || c=='_') { currentTokenType = Token.IDENTIFIER; break; } int indexOf = operators.indexOf(c,0); if (indexOf>-1) { addToken(text, i,i, Token.OPERATOR, newStartOffset+i); currentTokenType = Token.NULL; break; } indexOf = separators.indexOf(c,0); if (indexOf>-1) { addToken(text, i,i, Token.SEPARATOR, newStartOffset+i); currentTokenType = Token.NULL; break; } indexOf = separators2.indexOf(c,0); if (indexOf>-1) { addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i); currentTokenType = Token.NULL; break; } else { currentTokenType = Token.IDENTIFIER; } } // End of switch (c). break; default: // Should never happen case Token.IDENTIFIER: switch (c) { case ' ': case '\t': addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart); currentTokenStart = i; currentTokenType = Token.WHITESPACE; break; case '/': // Special-case to colorize commands like "echo" in "/bin/echo" addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart); currentTokenStart = i+1; currentTokenType = Token.NULL; break; case '`': // Don't need to worry about backslashes as previous char is space. addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart); currentTokenStart = i; currentTokenType = Token.LITERAL_BACKQUOTE; backslash = false; break; case '"': // Don't need to worry about backslashes as previous char is non-backslash. addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart); currentTokenStart = i; currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE; backslash = false; break; case '\'': // Don't need to worry about backslashes as previous char is non-backslash. addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart); currentTokenStart = i; currentTokenType = Token.LITERAL_CHAR; backslash = false; break; case '\\': addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart); addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i); currentTokenType = Token.NULL; backslash = true; break; case '$': // Don't need to worry about backslashes as previous char is non-backslash. addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart); currentTokenStart = i; currentTokenType = Token.VARIABLE; backslash = false; break; case '=': // Special case here; when you have "identifier=<value>" in shell, "identifier" is a variable. addToken(text, currentTokenStart,i-1, Token.VARIABLE, newStartOffset+currentTokenStart); addToken(text, i,i, Token.OPERATOR, newStartOffset+i); currentTokenType = Token.NULL; break; default: if (RSyntaxUtilities.isLetterOrDigit(c) || c=='/' || c=='_') { break; // Still an identifier of some type. } int indexOf = operators.indexOf(c); if (indexOf>-1) { addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart); addToken(text, i,i, Token.OPERATOR, newStartOffset+i); currentTokenType = Token.NULL; break; } indexOf = separators.indexOf(c,0); if (indexOf>-1) { addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart); addToken(text, i,i, Token.SEPARATOR, newStartOffset+i); currentTokenType = Token.NULL; break; } indexOf = separators2.indexOf(c,0); if (indexOf>-1) { addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart); addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i); currentTokenType = Token.NULL; break; } // Otherwise, we're still an identifier (?). } // End of switch (c). break; case Token.LITERAL_NUMBER_DECIMAL_INT: switch (c) { case ' ': case '\t': addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart); currentTokenStart = i; currentTokenType = Token.WHITESPACE; break; case '`': // Don't need to worry about backslashes as previous char is space. addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart); currentTokenStart = i; currentTokenType = Token.LITERAL_BACKQUOTE; backslash = false; break; case '"': // Don't need to worry about backslashes as previous char is non-backslash. addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart); currentTokenStart = i; currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE; backslash = false; break; case '\'': // Don't need to worry about backslashes as previous char is non-backslash. addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart); currentTokenStart = i; currentTokenType = Token.LITERAL_CHAR; backslash = false; break; case '$': // Don't need to worry about backslashes as previous char is non-backslash. addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart); currentTokenStart = i; currentTokenType = Token.VARIABLE; backslash = false; break; case '\\': addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart); addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i); currentTokenType = Token.NULL; backslash = true; break; default: if (RSyntaxUtilities.isDigit(c)) { break; // Still a literal number. } int indexOf = operators.indexOf(c); if (indexOf>-1) { addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart); addToken(text, i,i, Token.OPERATOR, newStartOffset+i); currentTokenType = Token.NULL; break; } indexOf = separators.indexOf(c); if (indexOf>-1) { addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart); addToken(text, i,i, Token.SEPARATOR, newStartOffset+i); currentTokenType = Token.NULL; break; } indexOf = separators2.indexOf(c); if (indexOf>-1) { addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart); addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i); currentTokenType = Token.NULL; break; } // Otherwise, remember this was a number and start over. addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart); i--; currentTokenType = Token.NULL; } // End of switch (c). break; case Token.VARIABLE: // Note that we first arrive here AFTER the '$' character. // First check if the variable name is enclosed in '{' and '}' characters. if (c=='{') { while (++i<end) { if (array[i]=='}') { addToken(text, currentTokenStart,i, Token.VARIABLE, newStartOffset+currentTokenStart); currentTokenType = Token.NULL; break; } } // End of while (++i<end). if (i==end) { // Happens when '}' wasn't found... addToken(text, currentTokenStart,end-1, Token.VARIABLE, newStartOffset+currentTokenStart); currentTokenType = Token.NULL; } break; } // End of if (i<end-1 && array[i+1]=='{'). // If we didn't find the '{' character, find the end of the variable... while (i<end) { c = array[i]; // Not needed the first iteration, but can't think of a better way to do it... if (!RSyntaxUtilities.isLetterOrDigit(c) && shellVariables.indexOf(c)==-1 && c!='_') { addToken(text, currentTokenStart,i-1, Token.VARIABLE, newStartOffset+currentTokenStart); i--; currentTokenType = Token.NULL; break; } i++; } // This only happens if we never found the end of the variable in the loop above. if (i==end) { addToken(text, currentTokenStart,i-1, Token.VARIABLE, newStartOffset+currentTokenStart); currentTokenType = Token.NULL; } break; case Token.COMMENT_EOL: // If we got here, then the line != "#" only, so check for "#!". if (c=='!') currentTokenType = Token.PREPROCESSOR; i = end - 1; addToken(text, currentTokenStart,i, currentTokenType, newStartOffset+currentTokenStart); // We need to set token type to null so at the bottom we don't add one more token. currentTokenType = Token.NULL; break; case Token.LITERAL_CHAR: if (c=='\\') { backslash = !backslash; // Okay because if we got in here, backslash was initially false. } else { if (c=='\'' && !backslash) { addToken(text, currentTokenStart,i, Token.LITERAL_CHAR, newStartOffset+currentTokenStart); currentTokenStart = i + 1; currentTokenType = Token.NULL; // backslash is definitely false when we leave. } backslash = false; // Need to set backslash to false here as a character was typed. } // Otherwise, we're still an unclosed char literal... break; case Token.LITERAL_BACKQUOTE: switch (c) { case '\\': backslash = !backslash; break; case '`': if (!backslash) { addToken(text, currentTokenStart,i, Token.LITERAL_BACKQUOTE, newStartOffset+currentTokenStart); currentTokenType = Token.NULL; // backslash is definitely false when we leave. break; } backslash = false; break; // Variable in the backquote string... case '$': if (backslash==true) { backslash = false; break; } // Add the string up-to the variable. addToken(text, currentTokenStart,i-1, Token.LITERAL_BACKQUOTE, newStartOffset+currentTokenStart); currentTokenType = Token.VARIABLE; currentTokenStart = i; // First check if the variable name is enclosed in '{' and '}' characters. if (i<end-1 && array[i+1]=='{') { i++; // Now we're on the '{' char. while (++i<end) { if (array[i]=='}') { addToken(text, currentTokenStart,i, Token.VARIABLE, newStartOffset+currentTokenStart); i++; if (i<end) { c = array[i]; if (c=='`') { // The only rub - back quote right after variable. addToken(text, i,i, Token.LITERAL_BACKQUOTE, newStartOffset+i); currentTokenType = Token.NULL; break; } else { // Continue on with the string. currentTokenStart = i; currentTokenType = Token.LITERAL_BACKQUOTE; i--; break; } } else { // i==end = "trick" this method so that the string is continued to the next line. currentTokenStart = i; currentTokenType = Token.LITERAL_BACKQUOTE; break; // So we don't hit the condition below. } } // End of if (array[i]=='}'). } // End of while (++i<end). if (i==end) { // Happens when '}' wasn't found... addToken(text, currentTokenStart,end-1, Token.VARIABLE, newStartOffset+currentTokenStart); currentTokenStart = end; // ??? currentTokenType = Token.LITERAL_BACKQUOTE; break; } } // End of if (i<end-1 && array[i+1]=='{'). // If we reached the end of the variable, get out. if (currentTokenType==Token.NULL || currentTokenType==Token.LITERAL_BACKQUOTE) break; // If we didn't find the '{' character, find the end of the variable... // Increment first to skip the '$'. while (++i<end) { c = array[i]; if (!RSyntaxUtilities.isLetterOrDigit(c) && shellVariables.indexOf(c)==-1 && c!='_') { addToken(text, currentTokenStart,i-1, Token.VARIABLE, newStartOffset+currentTokenStart); if (c=='`') { // The only rub. addToken(text, i,i, Token.LITERAL_BACKQUOTE, newStartOffset+i); currentTokenType = Token.NULL; break; } else { currentTokenStart = i; currentTokenType = Token.LITERAL_BACKQUOTE; i--; break; } } } // This only happens if we never found the end of the variable in the loop above. // We "trick" this method so that the backquote string token is at the end. if (i==end) { addToken(text, currentTokenStart,i-1, Token.VARIABLE, newStartOffset+currentTokenStart); currentTokenStart = i; currentTokenType = Token.LITERAL_BACKQUOTE; } break; // Otherwise, we're still in an unclosed string... default: backslash = false; // Need to set backslash to false here as a character was typed. } // End of switch (c). break; case Token.LITERAL_STRING_DOUBLE_QUOTE: switch (c) { case '\\': backslash = !backslash; break; case '"': if (!backslash) { addToken(text, currentTokenStart,i, Token.LITERAL_STRING_DOUBLE_QUOTE, newStartOffset+currentTokenStart); currentTokenType = Token.NULL; // backslash is definitely false when we leave. break; } backslash = false; break; // Variable in the double-quoted string... case '$': if (backslash==true) { backslash = false; break; } // Add the string up-to the variable. addToken(text, currentTokenStart,i-1, Token.LITERAL_STRING_DOUBLE_QUOTE, newStartOffset+currentTokenStart); currentTokenType = Token.VARIABLE; currentTokenStart = i; // First check if the variable name is enclosed in '{' and '}' characters. if (i<end-1 && array[i+1]=='{') { i++; // Now we're on the '{' char. while (++i<end) { if (array[i]=='}') { addToken(text, currentTokenStart,i, Token.VARIABLE, newStartOffset+currentTokenStart); i++; if (i<end) { c = array[i]; if (c=='"') { // The only rub - double-quote right after variable. addToken(text, i,i, Token.LITERAL_STRING_DOUBLE_QUOTE, newStartOffset+i); currentTokenType = Token.NULL; break; } else { // Continue on with the string. currentTokenStart = i; currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE; i--; break; } } else { // i==end = "trick" this method so that the string is continued to the next line. currentTokenStart = i; currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE; break; // So we don't hit the condition below. } } // End of if (array[i]=='}'). } // End of while (++i<end). if (i==end) { // Happens when '}' wasn't found... addToken(text, currentTokenStart,end-1, Token.VARIABLE, newStartOffset+currentTokenStart); currentTokenStart = end; // ??? currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE; break; } } // End of if (i<end-1 && array[i+1]=='{'). // If we reached the end of the variable, get out. if (currentTokenType==Token.NULL || currentTokenType==Token.LITERAL_STRING_DOUBLE_QUOTE) break; // If we didn't find the '{' character, find the end of the variable... // Increment first to skip the '$'. while (++i<end) { c = array[i]; if (!RSyntaxUtilities.isLetterOrDigit(c) && shellVariables.indexOf(c)==-1 && c!='_') { addToken(text, currentTokenStart,i-1, Token.VARIABLE, newStartOffset+currentTokenStart); if (c=='"') { // The only rub. addToken(text, i,i, Token.LITERAL_STRING_DOUBLE_QUOTE, newStartOffset+i); currentTokenType = Token.NULL; break; } else { currentTokenStart = i; currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE; i--; break; } } } // This only happens if we never found the end of the variable in the loop above. // We "trick" this method so that the double-quote string token is at the end. if (i==end) { addToken(text, currentTokenStart,i-1, Token.VARIABLE, newStartOffset+currentTokenStart); currentTokenStart = i; currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE; } break; // Otherwise, we're still in an unclosed string... default: backslash = false; // Need to set backslash to false here as a character was typed. } // End of switch (c). break; } // End of switch (currentTokenType). } // End of for (int i=offset; i<end; i++). switch (currentTokenType) { // Remember what token type to begin the next line with. case Token.LITERAL_BACKQUOTE: case Token.LITERAL_STRING_DOUBLE_QUOTE: case Token.LITERAL_CHAR: addToken(text, currentTokenStart,end-1, currentTokenType, newStartOffset+currentTokenStart); break; // Do nothing if everything was okay. case Token.NULL: addNullToken(); break; // All other token types don't continue to the next line... default: addToken(text, currentTokenStart,end-1, currentTokenType, newStartOffset+currentTokenStart); addNullToken(); } // Return the first token in our linked list. return firstToken; } }