UnixShellTokenMaker.java example

Explorer
domainmath-ide-master
/*
 * 03/16/2004
 *
 * UnixShellTokenMaker.java - Scanner for UNIX shell scripts.
 * 
 * This library is distributed under a modified BSD license.  See the included
 * RSyntaxTextArea.License.txt file for details.
 */
package org.fife.ui.rsyntaxtextarea.modes;

import javax.swing.text.Segment;

import org.fife.ui.rsyntaxtextarea.*;


/**
 * A token maker that turns text into a linked list of <code>Token</code>s
 * for syntax highlighting UNIX shell scripts.
 *
 * @author Robert Futrell
 * @version 0.1
 */
public class UnixShellTokenMaker extends AbstractTokenMaker {

	protected final String operators = "=|><&";
	protected final String separators = "()[]";
	protected final String separators2 = ".,;";			// Characters you don't want syntax highlighted but separate identifiers.
	protected final String shellVariables = "#-?$!*@_";	// Characters that are part of "$<char>" shell variables; e.g., "$_".


	private int currentTokenStart;
	private int currentTokenType;


	/**
	 * Constructor.
	 */
	public UnixShellTokenMaker() {
		super();	// Initializes tokensToHighlight.
	}


	/**
	 * Checks the token to give it the exact ID it deserves before
	 * being passed up to the super method.
	 *
	 * @param segment <code>Segment</code> to get text from.
	 * @param start Start offset in <code>segment</code> of token.
	 * @param end End offset in <code>segment</code> of token.
	 * @param tokenType The token's type.
	 * @param startOffset The offset in the document at which the token occurs.
	 */
	@Override
	public void addToken(Segment segment, int start, int end, int tokenType, int startOffset) {

		switch (tokenType) {
			// Since reserved words, functions, and data types are all passed into here
			// as "identifiers," we have to see what the token really is...
			case Token.IDENTIFIER:
				int value = wordsToHighlight.get(segment, start,end);
				if (value!=-1)
					tokenType = value;
				break;
			case Token.WHITESPACE:
			case Token.SEPARATOR:
			case Token.OPERATOR:
			case Token.LITERAL_NUMBER_DECIMAL_INT:
			case Token.LITERAL_STRING_DOUBLE_QUOTE:
			case Token.LITERAL_CHAR:
			case Token.LITERAL_BACKQUOTE:
			case Token.COMMENT_EOL:
			case Token.PREPROCESSOR:
			case Token.VARIABLE:
				break;

			default:
				new Exception("Unknown tokenType: '" + tokenType + "'").
													printStackTrace();
				tokenType = Token.IDENTIFIER;
				break;

		}

		super.addToken(segment, start, end, tokenType, startOffset);

	}


	/**
	 * Returns the text to place at the beginning and end of a
	 * line to "comment" it in a this programming language.
	 *
	 * @return The start and end strings to add to a line to "comment"
	 *         it out.
	 */
	@Override
	public String[] getLineCommentStartAndEnd() {
		return new String[] { "#", null };
	}


	/**
	 * Returns whether tokens of the specified type should have "mark
	 * occurrences" enabled for the current programming language.
	 *
	 * @param type The token type.
	 * @return Whether tokens of this type should have "mark occurrences"
	 *         enabled.
	 */
	@Override
	public boolean getMarkOccurrencesOfTokenType(int type) {
		return type==Token.IDENTIFIER || type==Token.VARIABLE;
	}


	/**
	 * Returns the words to highlight for UNIX shell scripts.
	 *
	 * @return A <code>TokenMap</code> containing the words to highlight for
	 *         UNIX shell scripts.
	 * @see org.fife.ui.rsyntaxtextarea.AbstractTokenMaker#getWordsToHighlight
	 */
	@Override
	public TokenMap getWordsToHighlight() {

		TokenMap tokenMap = new TokenMap();

		int reservedWord = Token.RESERVED_WORD;
		tokenMap.put("case",				reservedWord);
		tokenMap.put("do",				reservedWord);
		tokenMap.put("done",				reservedWord);
		tokenMap.put("elif",				reservedWord);
		tokenMap.put("else",				reservedWord);
		tokenMap.put("esac",				reservedWord);
		tokenMap.put("fi",				reservedWord);
		tokenMap.put("for",				reservedWord);
		tokenMap.put("if",				reservedWord);
		tokenMap.put("in",				reservedWord);
		tokenMap.put("select",			reservedWord);
		tokenMap.put("then",				reservedWord);
		tokenMap.put("until",			reservedWord);
		tokenMap.put("while",			reservedWord);

		int function = Token.FUNCTION;
		tokenMap.put("addbib",			function);
		tokenMap.put("admin",			function);
		tokenMap.put("alias",			function);
		tokenMap.put("apropos",			function);
		tokenMap.put("ar",			function);
		tokenMap.put("at",				function);
		tokenMap.put("awk",			function);
		tokenMap.put("banner",			function);
		tokenMap.put("basename",			function);
		tokenMap.put("batch",			function);
		tokenMap.put("bg",			function);
		tokenMap.put("biff",			function);
		tokenMap.put("bin-mail",			function);
		tokenMap.put("binmail",			function);
		tokenMap.put("break",			function);
		tokenMap.put("cal",			function);
		tokenMap.put("calendar",			function);
		tokenMap.put("cancel",			function);
		tokenMap.put("cat",			function);
		tokenMap.put("cb",			function);
		tokenMap.put("cc",			function);
		tokenMap.put("cd",			function);
		tokenMap.put("cdc",			function);
		tokenMap.put("chdir",			function);
		tokenMap.put("checkeq",		function);
		tokenMap.put("checknr",			function);
		tokenMap.put("chfn",			function);
		tokenMap.put("chgrp",			function);
		tokenMap.put("chmod",			function);
		tokenMap.put("chown",			function);
		tokenMap.put("chsh",			function);
		tokenMap.put("clear",			function);
		tokenMap.put("cmp",			function);
		tokenMap.put("colcrt",			function);
		tokenMap.put("comb",			function);
		tokenMap.put("comm",			function);
		tokenMap.put("command",			function);
		tokenMap.put("compress",			function);
		tokenMap.put("continue",			function);
		tokenMap.put("cp",			function);
		tokenMap.put("cpio",			function);
		tokenMap.put("cpp",			function);
		tokenMap.put("crontab",			function);
		tokenMap.put("csh",			function);
		tokenMap.put("ctags",			function);
		tokenMap.put("cut", 		function);
		tokenMap.put("cvs",			function);
		tokenMap.put("date",			function);
		tokenMap.put("dbx",			function);
		tokenMap.put("delta",			function);
		tokenMap.put("deroff",			function);
		tokenMap.put("df",			function);
		tokenMap.put("diff",			function);
		tokenMap.put("dtree",			function);
		tokenMap.put("du",			function);
		tokenMap.put("e",			function);
		tokenMap.put("echo",			function);
		tokenMap.put("ed",			function);
		tokenMap.put("edit",			function);
		tokenMap.put("enscript",		function);
		tokenMap.put("eqn",			function);
		tokenMap.put("error",		function);
		tokenMap.put("eval",		function);
		tokenMap.put("ex",			function);
		tokenMap.put("exec",			function);
		tokenMap.put("exit",			function);
		tokenMap.put("expand",		function);
		tokenMap.put("export",		function);
		tokenMap.put("expr",			function);
		tokenMap.put("false",			function);
		tokenMap.put("fc",			function);
		tokenMap.put("fg",			function);
		tokenMap.put("file",			function);
		tokenMap.put("find",			function);
		tokenMap.put("finger",			function);
		tokenMap.put("fmt",			function);
		tokenMap.put("fmt_mail",			function);
		tokenMap.put("fold",			function);
		tokenMap.put("ftp",			function);
		tokenMap.put("function",		function);
		tokenMap.put("gcore",			function);
		tokenMap.put("get",			function);
		tokenMap.put("getopts",			function);
		tokenMap.put("gprof",			function);
		tokenMap.put("grep",			function);
		tokenMap.put("groups",			function);
		tokenMap.put("gunzip",			function);
		tokenMap.put("gzip",			function);
		tokenMap.put("hashcheck",			function);
		tokenMap.put("hashmake",			function);
		tokenMap.put("head",			function);
		tokenMap.put("help",			function);
		tokenMap.put("history",			function);
		tokenMap.put("imake",			function);
		tokenMap.put("indent",			function);
		tokenMap.put("install",			function);
		tokenMap.put("jobs",			function);
		tokenMap.put("join",			function);
		tokenMap.put("kill",			function);
		tokenMap.put("last",			function);
		tokenMap.put("ld",			function);
		tokenMap.put("leave",			function);
		tokenMap.put("less",			function);
		tokenMap.put("let",			function);
		tokenMap.put("lex",			function);
		tokenMap.put("lint",			function);
		tokenMap.put("ln",			function);
		tokenMap.put("login",			function);
		tokenMap.put("look",			function);
		tokenMap.put("lookbib",			function);
		tokenMap.put("lorder",			function);
		tokenMap.put("lp",			function);
		tokenMap.put("lpq",			function);
		tokenMap.put("lpr",			function);
		tokenMap.put("lprm",			function);
		tokenMap.put("ls",			function);
		tokenMap.put("mail",			function);
		tokenMap.put("Mail",			function);
		tokenMap.put("make",			function);
		tokenMap.put("man",			function);
		tokenMap.put("md",			function);
		tokenMap.put("mesg",			function);
		tokenMap.put("mkdir",		function);
		tokenMap.put("mkstr",			function);
		tokenMap.put("more",			function);
		tokenMap.put("mount",			function);
		tokenMap.put("mv",			function);
		tokenMap.put("nawk",			function);
		tokenMap.put("neqn",			function);
		tokenMap.put("nice",			function);
		tokenMap.put("nm",			function);
		tokenMap.put("nroff",			function);
		tokenMap.put("od",			function);
		tokenMap.put("page",			function);
		tokenMap.put("passwd",			function);
		tokenMap.put("paste",			function);
		tokenMap.put("pr",			function);
		tokenMap.put("print",		function);
		tokenMap.put("printf",		function);
		tokenMap.put("printenv",			function);
		tokenMap.put("prof",			function);
		tokenMap.put("prs",			function);
		tokenMap.put("prt",			function);
		tokenMap.put("ps",			function);
		tokenMap.put("ptx",			function);
		tokenMap.put("pwd",			function);
		tokenMap.put("quota",			function);
		tokenMap.put("ranlib",			function);
		tokenMap.put("rcp",			function);
		tokenMap.put("rcs",			function);
		tokenMap.put("rcsdiff",			function);
		tokenMap.put("read",			function);
		tokenMap.put("readonly",			function);
		tokenMap.put("red",			function);
		tokenMap.put("return",		function);
		tokenMap.put("rev",			function);
		tokenMap.put("rlogin",			function);
		tokenMap.put("rm",			function);
		tokenMap.put("rmdel",			function);
		tokenMap.put("rmdir",			function);
		tokenMap.put("roffbib",			function);
		tokenMap.put("rsh",			function);
		tokenMap.put("rup",			function);
		tokenMap.put("ruptime",			function);
		tokenMap.put("rusers",			function);
		tokenMap.put("rwall",			function);
		tokenMap.put("rwho",			function);
		tokenMap.put("sact",			function);
		tokenMap.put("sccs",			function);
		tokenMap.put("sccsdiff",			function);
		tokenMap.put("script",			function);
		tokenMap.put("sed",			function);
		tokenMap.put("set",			function);
		tokenMap.put("setgroups",			function);
		tokenMap.put("setsenv",			function);
		tokenMap.put("sh",			function);
		tokenMap.put("shift",		function);
		tokenMap.put("size",			function);
		tokenMap.put("sleep",			function);
		tokenMap.put("sort",			function);
		tokenMap.put("sortbib",			function);
		tokenMap.put("spell",		function);
		tokenMap.put("split",			function);
		tokenMap.put("ssh",				function);
		tokenMap.put("strings",			function);
		tokenMap.put("strip",			function);
		tokenMap.put("stty",			function);
		tokenMap.put("su",			function);
		tokenMap.put("sudo",			function);
		tokenMap.put("symorder",			function);
		tokenMap.put("tabs",			function);
		tokenMap.put("tail",			function);
		tokenMap.put("talk",			function);
		tokenMap.put("tar",			function);
		tokenMap.put("tbl",			function);
		tokenMap.put("tee",			function);
		tokenMap.put("telnet",			function);
		tokenMap.put("test",			function);
		tokenMap.put("tftp",			function);
		tokenMap.put("time",			function);
		tokenMap.put("times",			function);
		tokenMap.put("touch",			function);
		tokenMap.put("trap",			function);
		tokenMap.put("troff",			function);
		tokenMap.put("true",			function);
		tokenMap.put("tsort",			function);
		tokenMap.put("tty",			function);
		tokenMap.put("type",			function);
		tokenMap.put("typeset",		function);
		tokenMap.put("ue",			function);
		tokenMap.put("ul",			function);
		tokenMap.put("ulimit",			function);
		tokenMap.put("umask",		function);
		tokenMap.put("unalias",			function);
		tokenMap.put("uncompress",			function);
		tokenMap.put("unexpand",		function);
		tokenMap.put("unget",			function);
		tokenMap.put("unifdef",			function);
		tokenMap.put("uniq",			function);
		tokenMap.put("units",			function);
		tokenMap.put("unset",			function);
		tokenMap.put("uptime",			function);
		tokenMap.put("users",			function);
		tokenMap.put("uucp",			function);
		tokenMap.put("uudecode",			function);
		tokenMap.put("uuencode",			function);
		tokenMap.put("uulog",			function);
		tokenMap.put("uuname",			function);
		tokenMap.put("uusend",			function);
		tokenMap.put("uux",			function);
		tokenMap.put("vacation",			function);
		tokenMap.put("val",			function);
		tokenMap.put("vedit",			function);
		tokenMap.put("vgrind",			function);
		tokenMap.put("vi",			function);
		tokenMap.put("view",			function);
		tokenMap.put("vtroff",			function);
		tokenMap.put("w",			function);
		tokenMap.put("wait",			function);
		tokenMap.put("wall",			function);
		tokenMap.put("wc",			function);
		tokenMap.put("wait",			function);
		tokenMap.put("what",			function);
		tokenMap.put("whatis",			function);
		tokenMap.put("whence",			function);
		tokenMap.put("whereis",			function);
		tokenMap.put("which",			function);
		tokenMap.put("who",			function);
		tokenMap.put("whoami",			function);
		tokenMap.put("write",			function);
		tokenMap.put("xargs",			function);
		tokenMap.put("xstr",			function);
		tokenMap.put("yacc",			function);
		tokenMap.put("yes",			function);
		tokenMap.put("zcat",			function);

		return tokenMap;

	}


	/**
	 * Returns a list of tokens representing the given text.
	 *
	 * @param text The text to break into tokens.
	 * @param startTokenType The token with which to start tokenizing.
	 * @param startOffset The offset at which the line of tokens begins.
	 * @return A linked list of tokens representing <code>text</code>.
	 */
	public Token getTokenList(Segment text, int startTokenType, final int startOffset) {

		resetTokenList();

		char[] array = text.array;
		int offset = text.offset;
		int count = text.count;
		int end = offset + count;

		// See, when we find a token, its starting position is always of the form:
		// 'startOffset + (currentTokenStart-offset)'; but since startOffset and
		// offset are constant, tokens' starting positions become:
		// 'newStartOffset+currentTokenStart' for one less subraction operation.
		int newStartOffset = startOffset - offset;

		currentTokenStart = offset;
		currentTokenType  = startTokenType;
		boolean backslash = false;

//beginning:
		for (int i=offset; i<end; i++) {

			char c = array[i];

			switch (currentTokenType) {

				case Token.NULL:

					currentTokenStart = i;	// Starting a new token here.

					switch (c) {

						case ' ':
						case '\t':
							currentTokenType = Token.WHITESPACE;
							break;

						case '`':
							if (backslash) { // Escaped back quote => call '`' an identifier..
								addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
								backslash = false;
							}
							else {
								currentTokenType = Token.LITERAL_BACKQUOTE;
							}
							break;

						case '"':
							if (backslash) { // Escaped double quote => call '"' an identifier..
								addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
								backslash = false;
							}
							else {
								currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE;
							}
							break;

						case '\'':
							if (backslash) { // Escaped single quote => call '\'' an identifier.
								addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
								backslash = false;
							}
							else {
								currentTokenType = Token.LITERAL_CHAR;
							}
							break;

						case '\\':
							addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
							currentTokenType = Token.NULL;
							backslash = !backslash;
							break;

						case '$':
							if (backslash) { // Escaped dollar sign => call '$' an identifier..
								addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
								backslash = false;
							}
							else {
								currentTokenType = Token.VARIABLE;
							}
							break;

						case '#':
							backslash = false;
							currentTokenType = Token.COMMENT_EOL;
							break;

						default:
							if (RSyntaxUtilities.isDigit(c)) {
								currentTokenType = Token.LITERAL_NUMBER_DECIMAL_INT;
								break;
							}
							else if (RSyntaxUtilities.isLetter(c) || c=='/' || c=='_') {
								currentTokenType = Token.IDENTIFIER;
								break;
							}
							int indexOf = operators.indexOf(c,0);
							if (indexOf>-1) {
								addToken(text, currentTokenStart,i, Token.OPERATOR, newStartOffset+currentTokenStart);
								currentTokenType = Token.NULL;
								break;
							}
							indexOf = separators.indexOf(c,0);
							if (indexOf>-1) {
								addToken(text, currentTokenStart,i, Token.SEPARATOR, newStartOffset+currentTokenStart);
								currentTokenType = Token.NULL;
								break;
							}
							indexOf = separators2.indexOf(c,0);
							if (indexOf>-1) {
								addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
								currentTokenType = Token.NULL;
								break;
							}
							else {
								currentTokenType = Token.IDENTIFIER;
								break;
							}

					} // End of switch (c).

					break;

				case Token.WHITESPACE:

					switch (c) {

						case ' ':
						case '\t':
							break;	// Still whitespace.

						case '\\':
							addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
							addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
							currentTokenType = Token.NULL;
							backslash = true; // Previous char whitespace => this must be first backslash.
							break;

						case '`': // Don't need to worry about backslashes as previous char is space.
							addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.LITERAL_BACKQUOTE;
							backslash = false;
							break;

						case '"': // Don't need to worry about backslashes as previous char is space.
							addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE;
							backslash = false;
							break;

						case '\'': // Don't need to worry about backslashes as previous char is space.
							addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.LITERAL_CHAR;
							backslash = false;
							break;
						
						case '$': // Don't need to worry about backslashes as previous char is space.
							addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.VARIABLE;
							backslash = false;
							break;

						case '#':
							addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.COMMENT_EOL;
							break;

						default:	// Add the whitespace token and start anew.

							addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart);
							currentTokenStart = i;

							if (RSyntaxUtilities.isDigit(c)) {
								currentTokenType = Token.LITERAL_NUMBER_DECIMAL_INT;
								break;
							}
							else if (RSyntaxUtilities.isLetter(c) || c=='/' || c=='_') {
								currentTokenType = Token.IDENTIFIER;
								break;
							}
							int indexOf = operators.indexOf(c,0);
							if (indexOf>-1) {
								addToken(text, i,i, Token.OPERATOR, newStartOffset+i);
								currentTokenType = Token.NULL;
								break;
							}
							indexOf = separators.indexOf(c,0);
							if (indexOf>-1) {
								addToken(text, i,i, Token.SEPARATOR, newStartOffset+i);
								currentTokenType = Token.NULL;
								break;
							}
							indexOf = separators2.indexOf(c,0);
							if (indexOf>-1) {
								addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
								currentTokenType = Token.NULL;
								break;
							}
							else {
								currentTokenType = Token.IDENTIFIER;
							}

					} // End of switch (c).

					break;

				default: // Should never happen
				case Token.IDENTIFIER:

					switch (c) {

						case ' ':
						case '\t':
							addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.WHITESPACE;
							break;

						case '/': // Special-case to colorize commands like "echo" in "/bin/echo"
							addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart);
							currentTokenStart = i+1;
							currentTokenType = Token.NULL;
							break;

						case '`': // Don't need to worry about backslashes as previous char is space.
							addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.LITERAL_BACKQUOTE;
							backslash = false;
							break;

						case '"': // Don't need to worry about backslashes as previous char is non-backslash.
							addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE;
							backslash = false;
							break;

						case '\'': // Don't need to worry about backslashes as previous char is non-backslash.
							addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.LITERAL_CHAR;
							backslash = false;
							break;

						case '\\':
							addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
							addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
							currentTokenType = Token.NULL;
							backslash = true;
							break;

						case '$': // Don't need to worry about backslashes as previous char is non-backslash.
							addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.VARIABLE;
							backslash = false;
							break;
						
						case '=': // Special case here; when you have "identifier=<value>" in shell, "identifier" is a variable.
							addToken(text, currentTokenStart,i-1, Token.VARIABLE, newStartOffset+currentTokenStart);
							addToken(text, i,i, Token.OPERATOR, newStartOffset+i);
							currentTokenType = Token.NULL;
							break;

						default:
							if (RSyntaxUtilities.isLetterOrDigit(c) || c=='/' || c=='_') {
								break;	// Still an identifier of some type.
							}
							int indexOf = operators.indexOf(c);
							if (indexOf>-1) {
								addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
								addToken(text, i,i, Token.OPERATOR, newStartOffset+i);
								currentTokenType = Token.NULL;
								break;
							}
							indexOf = separators.indexOf(c,0);
							if (indexOf>-1) {
								addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
								addToken(text, i,i, Token.SEPARATOR, newStartOffset+i);
								currentTokenType = Token.NULL;
								break;
							}
							indexOf = separators2.indexOf(c,0);
							if (indexOf>-1) {
								addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart);
								addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
								currentTokenType = Token.NULL;
								break;
							}
							// Otherwise, we're still an identifier (?).

					} // End of switch (c).

					break;

				case Token.LITERAL_NUMBER_DECIMAL_INT:

					switch (c) {

						case ' ':
						case '\t':
							addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.WHITESPACE;
							break;

						case '`': // Don't need to worry about backslashes as previous char is space.
							addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.LITERAL_BACKQUOTE;
							backslash = false;
							break;

						case '"': // Don't need to worry about backslashes as previous char is non-backslash.
							addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE;
							backslash = false;
							break;

						case '\'': // Don't need to worry about backslashes as previous char is non-backslash.
							addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.LITERAL_CHAR;
							backslash = false;
							break;

						case '$': // Don't need to worry about backslashes as previous char is non-backslash.
							addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart);
							currentTokenStart = i;
							currentTokenType = Token.VARIABLE;
							backslash = false;
							break;

						case '\\':
							addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart);
							addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
							currentTokenType = Token.NULL;
							backslash = true;
							break;

						default:

							if (RSyntaxUtilities.isDigit(c)) {
								break;	// Still a literal number.
							}
							int indexOf = operators.indexOf(c);
							if (indexOf>-1) {
								addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart);
								addToken(text, i,i, Token.OPERATOR, newStartOffset+i);
								currentTokenType = Token.NULL;
								break;
							}
							indexOf = separators.indexOf(c);
							if (indexOf>-1) {
								addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart);
								addToken(text, i,i, Token.SEPARATOR, newStartOffset+i);
								currentTokenType = Token.NULL;
								break;
							}
							indexOf = separators2.indexOf(c);
							if (indexOf>-1) {
								addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart);
								addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i);
								currentTokenType = Token.NULL;
								break;
							}

							// Otherwise, remember this was a number and start over.
							addToken(text, currentTokenStart,i-1, Token.LITERAL_NUMBER_DECIMAL_INT, newStartOffset+currentTokenStart);
							i--;
							currentTokenType = Token.NULL;

					} // End of switch (c).

					break;
				
				case Token.VARIABLE:

					// Note that we first arrive here AFTER the '$' character.
					// First check if the variable name is enclosed in '{' and '}' characters.
					if (c=='{') {
						while (++i<end) {
							if (array[i]=='}') {
								addToken(text, currentTokenStart,i, Token.VARIABLE, newStartOffset+currentTokenStart);
								currentTokenType = Token.NULL;
								break;
							}
						} // End of while (++i<end).
						if (i==end) { // Happens when '}' wasn't found...
							addToken(text, currentTokenStart,end-1, Token.VARIABLE, newStartOffset+currentTokenStart);
							currentTokenType = Token.NULL;
						}
						break;
					} // End of if (i<end-1 && array[i+1]=='{').

					// If we didn't find the '{' character, find the end of the variable...
					while (i<end) {
						c = array[i];	// Not needed the first iteration, but can't think of a better way to do it...
						if (!RSyntaxUtilities.isLetterOrDigit(c) && shellVariables.indexOf(c)==-1 && c!='_') {
							addToken(text, currentTokenStart,i-1, Token.VARIABLE, newStartOffset+currentTokenStart);
							i--;
							currentTokenType = Token.NULL;
							break;
						}
						i++;
					}
								
					// This only happens if we never found the end of the variable in the loop above.
					if (i==end) {
						addToken(text, currentTokenStart,i-1, Token.VARIABLE, newStartOffset+currentTokenStart);
						currentTokenType = Token.NULL;
					}
								
					break;

				case Token.COMMENT_EOL:
					// If we got here, then the line != "#" only, so check for "#!".
					if (c=='!')
						currentTokenType = Token.PREPROCESSOR;
					i = end - 1;
					addToken(text, currentTokenStart,i, currentTokenType, newStartOffset+currentTokenStart);
					// We need to set token type to null so at the bottom we don't add one more token.
					currentTokenType = Token.NULL;
					
					break;

				case Token.LITERAL_CHAR:

						if (c=='\\') {
							backslash = !backslash; // Okay because if we got in here, backslash was initially false.
						}
						else {
							if (c=='\'' && !backslash) {
								addToken(text, currentTokenStart,i, Token.LITERAL_CHAR, newStartOffset+currentTokenStart);
								currentTokenStart = i + 1;
								currentTokenType = Token.NULL;
								// backslash is definitely false when we leave.
							}

							backslash = false; // Need to set backslash to false here as a character was typed.

						}
						// Otherwise, we're still an unclosed char literal...

						break;

				case Token.LITERAL_BACKQUOTE:

						switch (c) {
								
							case '\\':
								backslash = !backslash;
								break;
								
							case '`':
								if (!backslash) {
									addToken(text, currentTokenStart,i, Token.LITERAL_BACKQUOTE, newStartOffset+currentTokenStart);
									currentTokenType = Token.NULL;
									// backslash is definitely false when we leave.
									break;
								}
								backslash = false;
								break;
							
							// Variable in the backquote string...
							case '$':

								if (backslash==true) {
									backslash = false;
									break;
								}
							
								// Add the string up-to the variable.
								addToken(text, currentTokenStart,i-1, Token.LITERAL_BACKQUOTE, newStartOffset+currentTokenStart);
								currentTokenType = Token.VARIABLE;
								currentTokenStart = i;
								
								// First check if the variable name is enclosed in '{' and '}' characters.
								if (i<end-1 && array[i+1]=='{') {
									i++; // Now we're on the '{' char.
									while (++i<end) {
										if (array[i]=='}') {
											addToken(text, currentTokenStart,i, Token.VARIABLE, newStartOffset+currentTokenStart);
											i++;
											if (i<end) {
												c = array[i];
												if (c=='`') { // The only rub - back quote right after variable.
													addToken(text, i,i, Token.LITERAL_BACKQUOTE, newStartOffset+i);
													currentTokenType = Token.NULL;
													break;
												}
												else { // Continue on with the string.
													currentTokenStart = i;
													currentTokenType = Token.LITERAL_BACKQUOTE;
													i--;
													break;
												}
											}
											else { // i==end = "trick" this method so that the string is continued to the next line.
												currentTokenStart = i;
												currentTokenType = Token.LITERAL_BACKQUOTE;
												break; // So we don't hit the condition below.
											}
										} // End of if (array[i]=='}').
									} // End of while (++i<end).
									if (i==end) { // Happens when '}' wasn't found...
										addToken(text, currentTokenStart,end-1, Token.VARIABLE, newStartOffset+currentTokenStart);
										currentTokenStart = end; // ???
										currentTokenType = Token.LITERAL_BACKQUOTE;
										break;
									}
								} // End of if (i<end-1 && array[i+1]=='{').
								
								// If we reached the end of the variable, get out.
								if (currentTokenType==Token.NULL || currentTokenType==Token.LITERAL_BACKQUOTE)
									break;

								// If we didn't find the '{' character, find the end of the variable...
								// Increment first to skip the '$'.
								while (++i<end) {
									c = array[i];
									if (!RSyntaxUtilities.isLetterOrDigit(c) && shellVariables.indexOf(c)==-1 && c!='_') {
										addToken(text, currentTokenStart,i-1, Token.VARIABLE, newStartOffset+currentTokenStart);
										if (c=='`') { // The only rub.
											addToken(text, i,i, Token.LITERAL_BACKQUOTE, newStartOffset+i);
											currentTokenType = Token.NULL;
											break;
										}
										else {
											currentTokenStart = i;
											currentTokenType = Token.LITERAL_BACKQUOTE;
											i--;
											break;
										}
									}
								}
								
								// This only happens if we never found the end of the variable in the loop above.
								// We "trick" this method so that the backquote string token is at the end.
								if (i==end) {
									addToken(text, currentTokenStart,i-1, Token.VARIABLE, newStartOffset+currentTokenStart);
									currentTokenStart = i;
									currentTokenType = Token.LITERAL_BACKQUOTE;
								}
								
								break;
								
							// Otherwise, we're still in an unclosed string...
							default:
								backslash = false; // Need to set backslash to false here as a character was typed.

						} // End of switch (c).
				
						break;

				case Token.LITERAL_STRING_DOUBLE_QUOTE:
				
						switch (c) {
								
							case '\\':
								backslash = !backslash;
								break;
								
							case '"':
								if (!backslash) {
									addToken(text, currentTokenStart,i, Token.LITERAL_STRING_DOUBLE_QUOTE, newStartOffset+currentTokenStart);
									currentTokenType = Token.NULL;
									// backslash is definitely false when we leave.
									break;
								}
								backslash = false;
								break;
							
							// Variable in the double-quoted string...
							case '$':
							
								if (backslash==true) {
									backslash = false;
									break;
								}
							
								// Add the string up-to the variable.
								addToken(text, currentTokenStart,i-1, Token.LITERAL_STRING_DOUBLE_QUOTE, newStartOffset+currentTokenStart);
								currentTokenType = Token.VARIABLE;
								currentTokenStart = i;
								
								// First check if the variable name is enclosed in '{' and '}' characters.
								if (i<end-1 && array[i+1]=='{') {
									i++; // Now we're on the '{' char.
									while (++i<end) {
										if (array[i]=='}') {
											addToken(text, currentTokenStart,i, Token.VARIABLE, newStartOffset+currentTokenStart);
											i++;
											if (i<end) {
												c = array[i];
												if (c=='"') { // The only rub - double-quote right after variable.
													addToken(text, i,i, Token.LITERAL_STRING_DOUBLE_QUOTE, newStartOffset+i);
													currentTokenType = Token.NULL;
													break;
												}
												else { // Continue on with the string.
													currentTokenStart = i;
													currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE;
													i--;
													break;
												}
											}
											else { // i==end = "trick" this method so that the string is continued to the next line.
												currentTokenStart = i;
												currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE;
												break; // So we don't hit the condition below.
											}
										} // End of if (array[i]=='}').
									} // End of while (++i<end).
									if (i==end) { // Happens when '}' wasn't found...
										addToken(text, currentTokenStart,end-1, Token.VARIABLE, newStartOffset+currentTokenStart);
										currentTokenStart = end; // ???
										currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE;
										break;
									}
								} // End of if (i<end-1 && array[i+1]=='{').
								
								// If we reached the end of the variable, get out.
								if (currentTokenType==Token.NULL || currentTokenType==Token.LITERAL_STRING_DOUBLE_QUOTE)
									break;

								// If we didn't find the '{' character, find the end of the variable...
								// Increment first to skip the '$'.
								while (++i<end) {
									c = array[i];
									if (!RSyntaxUtilities.isLetterOrDigit(c) && shellVariables.indexOf(c)==-1 && c!='_') {
										addToken(text, currentTokenStart,i-1, Token.VARIABLE, newStartOffset+currentTokenStart);
										if (c=='"') { // The only rub.
											addToken(text, i,i, Token.LITERAL_STRING_DOUBLE_QUOTE, newStartOffset+i);
											currentTokenType = Token.NULL;
											break;
										}
										else {
											currentTokenStart = i;
											currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE;
											i--;
											break;
										}
									}
								}
								
								// This only happens if we never found the end of the variable in the loop above.
								// We "trick" this method so that the double-quote string token is at the end.
								if (i==end) {
									addToken(text, currentTokenStart,i-1, Token.VARIABLE, newStartOffset+currentTokenStart);
									currentTokenStart = i;
									currentTokenType = Token.LITERAL_STRING_DOUBLE_QUOTE;
								}
								
								break;
								
							// Otherwise, we're still in an unclosed string...
							default:
								backslash = false; // Need to set backslash to false here as a character was typed.

						} // End of switch (c).
				
						break;

			} // End of switch (currentTokenType).

		} // End of for (int i=offset; i<end; i++).

		switch (currentTokenType) {

			// Remember what token type to begin the next line with.
			case Token.LITERAL_BACKQUOTE:
			case Token.LITERAL_STRING_DOUBLE_QUOTE:
			case Token.LITERAL_CHAR:
						addToken(text, currentTokenStart,end-1, currentTokenType, newStartOffset+currentTokenStart);
						break;

			// Do nothing if everything was okay.
			case Token.NULL:
						addNullToken();
						break;

			// All other token types don't continue to the next line...
			default:
						addToken(text, currentTokenStart,end-1, currentTokenType, newStartOffset+currentTokenStart);
						addNullToken();

		}

		// Return the first token in our linked list.
		return firstToken;

	}


}