/* * 03/07/2004 * * WindowsBatchTokenMaker.java - Scanner for Windows batch files. * * This library is distributed under a modified BSD license. See the included * RSyntaxTextArea.License.txt file for details. */ package org.fife.ui.rsyntaxtextarea.modes; import javax.swing.text.Segment; import org.fife.ui.rsyntaxtextarea.*; /** * A token maker that turns text into a linked list of * <code>Token</code>s for syntax highlighting Microsoft * Windows batch files. * * @author Robert Futrell * @version 0.1 */ public class WindowsBatchTokenMaker extends AbstractTokenMaker { protected final String operators = "@:*<>=?"; private int currentTokenStart; private int currentTokenType; private VariableType varType; /** * Constructor. */ public WindowsBatchTokenMaker() { super(); // Initializes tokensToHighlight. } /** * Checks the token to give it the exact ID it deserves before * being passed up to the super method. * * @param segment <code>Segment</code> to get text from. * @param start Start offset in <code>segment</code> of token. * @param end End offset in <code>segment</code> of token. * @param tokenType The token's type. * @param startOffset The offset in the document at which the token occurs. */ @Override public void addToken(Segment segment, int start, int end, int tokenType, int startOffset) { switch (tokenType) { // Since reserved words, functions, and data types are all passed // into here as "identifiers," we have to see what the token // really is... case Token.IDENTIFIER: int value = wordsToHighlight.get(segment, start,end); if (value!=-1) tokenType = value; break; } super.addToken(segment, start, end, tokenType, startOffset); } /** * {@inheritDoc} */ @Override public String[] getLineCommentStartAndEnd(int languageIndex) { return new String[] { "rem ", null }; } /** * Returns whether tokens of the specified type should have "mark * occurrences" enabled for the current programming language. * * @param type The token type. * @return Whether tokens of this type should have "mark occurrences" * enabled. */ @Override public boolean getMarkOccurrencesOfTokenType(int type) { return type==Token.IDENTIFIER || type==Token.VARIABLE; } /** * Returns the words to highlight for Windows batch files. * * @return A <code>TokenMap</code> containing the words to highlight for * Windows batch files. * @see org.fife.ui.rsyntaxtextarea.AbstractTokenMaker#getWordsToHighlight */ @Override public TokenMap getWordsToHighlight() { TokenMap tokenMap = new TokenMap(true); // Ignore case. int reservedWord = Token.RESERVED_WORD; // Batch-file specific stuff (?) tokenMap.put("goto", reservedWord); tokenMap.put("if", reservedWord); tokenMap.put("shift", reservedWord); tokenMap.put("start", reservedWord); // General command line stuff tokenMap.put("ansi.sys", reservedWord); tokenMap.put("append", reservedWord); tokenMap.put("arp", reservedWord); tokenMap.put("assign", reservedWord); tokenMap.put("assoc", reservedWord); tokenMap.put("at", reservedWord); tokenMap.put("attrib", reservedWord); tokenMap.put("break", reservedWord); tokenMap.put("cacls", reservedWord); tokenMap.put("call", reservedWord); tokenMap.put("cd", reservedWord); tokenMap.put("chcp", reservedWord); tokenMap.put("chdir", reservedWord); tokenMap.put("chkdsk", reservedWord); tokenMap.put("chknfts", reservedWord); tokenMap.put("choice", reservedWord); tokenMap.put("cls", reservedWord); tokenMap.put("cmd", reservedWord); tokenMap.put("color", reservedWord); tokenMap.put("comp", reservedWord); tokenMap.put("compact", reservedWord); tokenMap.put("control", reservedWord); tokenMap.put("convert", reservedWord); tokenMap.put("copy", reservedWord); tokenMap.put("ctty", reservedWord); tokenMap.put("date", reservedWord); tokenMap.put("debug", reservedWord); tokenMap.put("defrag", reservedWord); tokenMap.put("del", reservedWord); tokenMap.put("deltree", reservedWord); tokenMap.put("dir", reservedWord); tokenMap.put("diskcomp", reservedWord); tokenMap.put("diskcopy", reservedWord); tokenMap.put("do", reservedWord); tokenMap.put("doskey", reservedWord); tokenMap.put("dosshell", reservedWord); tokenMap.put("drivparm", reservedWord); tokenMap.put("echo", reservedWord); tokenMap.put("edit", reservedWord); tokenMap.put("edlin", reservedWord); tokenMap.put("emm386", reservedWord); tokenMap.put("erase", reservedWord); tokenMap.put("exist", reservedWord); tokenMap.put("exit", reservedWord); tokenMap.put("expand", reservedWord); tokenMap.put("extract", reservedWord); tokenMap.put("fasthelp", reservedWord); tokenMap.put("fc", reservedWord); tokenMap.put("fdisk", reservedWord); tokenMap.put("find", reservedWord); tokenMap.put("for", reservedWord); tokenMap.put("format", reservedWord); tokenMap.put("ftp", reservedWord); tokenMap.put("graftabl", reservedWord); tokenMap.put("help", reservedWord); tokenMap.put("ifshlp.sys", reservedWord); tokenMap.put("in", reservedWord); tokenMap.put("ipconfig", reservedWord); tokenMap.put("keyb", reservedWord); tokenMap.put("kill", reservedWord); tokenMap.put("label", reservedWord); tokenMap.put("lh", reservedWord); tokenMap.put("loadfix", reservedWord); tokenMap.put("loadhigh", reservedWord); tokenMap.put("lock", reservedWord); tokenMap.put("md", reservedWord); tokenMap.put("mem", reservedWord); tokenMap.put("mkdir", reservedWord); tokenMap.put("mklink", reservedWord); tokenMap.put("mode", reservedWord); tokenMap.put("more", reservedWord); tokenMap.put("move", reservedWord); tokenMap.put("msav", reservedWord); tokenMap.put("msd", reservedWord); tokenMap.put("mscdex", reservedWord); tokenMap.put("nbtstat", reservedWord); tokenMap.put("net", reservedWord); tokenMap.put("netstat", reservedWord); tokenMap.put("nlsfunc", reservedWord); tokenMap.put("not", reservedWord); tokenMap.put("nslookup", reservedWord); tokenMap.put("path", reservedWord); tokenMap.put("pathping", reservedWord); tokenMap.put("pause", reservedWord); tokenMap.put("ping", reservedWord); tokenMap.put("power", reservedWord); tokenMap.put("print", reservedWord); tokenMap.put("prompt", reservedWord); tokenMap.put("pushd", reservedWord); tokenMap.put("popd", reservedWord); tokenMap.put("qbasic", reservedWord); tokenMap.put("rd", reservedWord); tokenMap.put("ren", reservedWord); tokenMap.put("rename", reservedWord); tokenMap.put("rmdir", reservedWord); tokenMap.put("route", reservedWord); tokenMap.put("sc", reservedWord); tokenMap.put("scandisk", reservedWord); tokenMap.put("scandreg", reservedWord); tokenMap.put("set", reservedWord); tokenMap.put("setx", reservedWord); tokenMap.put("setver", reservedWord); tokenMap.put("share", reservedWord); tokenMap.put("shutdown", reservedWord); tokenMap.put("smartdrv", reservedWord); tokenMap.put("sort", reservedWord); tokenMap.put("subset", reservedWord); tokenMap.put("switches", reservedWord); tokenMap.put("sys", reservedWord); tokenMap.put("time", reservedWord); tokenMap.put("tracert", reservedWord); tokenMap.put("tree", reservedWord); tokenMap.put("type", reservedWord); tokenMap.put("undelete", reservedWord); tokenMap.put("unformat", reservedWord); tokenMap.put("unlock", reservedWord); tokenMap.put("ver", reservedWord); tokenMap.put("verify", reservedWord); tokenMap.put("vol", reservedWord); tokenMap.put("xcopy", reservedWord); return tokenMap; } /** * Returns a list of tokens representing the given text. * * @param text The text to break into tokens. * @param startTokenType The token with which to start tokenizing. * @param startOffset The offset at which the line of tokens begins. * @return A linked list of tokens representing <code>text</code>. */ public Token getTokenList(Segment text, int startTokenType, final int startOffset) { resetTokenList(); char[] array = text.array; int offset = text.offset; int count = text.count; int end = offset + count; // See, when we find a token, its starting position is always of the form: // 'startOffset + (currentTokenStart-offset)'; but since startOffset and // offset are constant, tokens' starting positions become: // 'newStartOffset+currentTokenStart' for one less subtraction operation. int newStartOffset = startOffset - offset; currentTokenStart = offset; currentTokenType = startTokenType; //beginning: for (int i=offset; i<end; i++) { char c = array[i]; switch (currentTokenType) { case Token.NULL: currentTokenStart = i; // Starting a new token here. switch (c) { case ' ': case '\t': currentTokenType = Token.WHITESPACE; break; case '"': currentTokenType = Token.ERROR_STRING_DOUBLE; break; case '%': currentTokenType = Token.VARIABLE; break; // The "separators". case '(': case ')': addToken(text, currentTokenStart,i, Token.SEPARATOR, newStartOffset+currentTokenStart); currentTokenType = Token.NULL; break; // The "separators2". case ',': case ';': addToken(text, currentTokenStart,i, Token.IDENTIFIER, newStartOffset+currentTokenStart); currentTokenType = Token.NULL; break; // Newer version of EOL comments, or a label case ':': // If this will be the first token added, it is // a new-style comment or a label if (firstToken==null) { if (i<end-1 && array[i+1]==':') { // new-style comment currentTokenType = Token.COMMENT_EOL; } else { // Label currentTokenType = Token.PREPROCESSOR; } } else { // Just a colon currentTokenType = Token.IDENTIFIER; } break; default: // Just to speed things up a tad, as this will usually be the case (if spaces above failed). if (RSyntaxUtilities.isLetterOrDigit(c) || c=='\\') { currentTokenType = Token.IDENTIFIER; break; } int indexOf = operators.indexOf(c,0); if (indexOf>-1) { addToken(text, currentTokenStart,i, Token.OPERATOR, newStartOffset+currentTokenStart); currentTokenType = Token.NULL; break; } else { currentTokenType = Token.IDENTIFIER; break; } } // End of switch (c). break; case Token.WHITESPACE: switch (c) { case ' ': case '\t': break; // Still whitespace. case '"': addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart); currentTokenStart = i; currentTokenType = Token.ERROR_STRING_DOUBLE; break; case '%': addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart); currentTokenStart = i; currentTokenType = Token.VARIABLE; break; // The "separators". case '(': case ')': addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart); addToken(text, i,i, Token.SEPARATOR, newStartOffset+i); currentTokenType = Token.NULL; break; // The "separators2". case ',': case ';': addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart); addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i); currentTokenType = Token.NULL; break; // Newer version of EOL comments, or a label case ':': addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart); currentTokenStart = i; // If the previous (whitespace) token was the first token // added, this is a new-style comment or a label if (firstToken.getNextToken()==null) { if (i<end-1 && array[i+1]==':') { // new-style comment currentTokenType = Token.COMMENT_EOL; } else { // Label currentTokenType = Token.PREPROCESSOR; } } else { // Just a colon currentTokenType = Token.IDENTIFIER; } break; default: // Add the whitespace token and start anew. addToken(text, currentTokenStart,i-1, Token.WHITESPACE, newStartOffset+currentTokenStart); currentTokenStart = i; // Just to speed things up a tad, as this will usually be the case (if spaces above failed). if (RSyntaxUtilities.isLetterOrDigit(c) || c=='\\') { currentTokenType = Token.IDENTIFIER; break; } int indexOf = operators.indexOf(c,0); if (indexOf>-1) { addToken(text, currentTokenStart,i, Token.OPERATOR, newStartOffset+currentTokenStart); currentTokenType = Token.NULL; break; } else { currentTokenType = Token.IDENTIFIER; } } // End of switch (c). break; default: // Should never happen case Token.IDENTIFIER: switch (c) { case ' ': case '\t': // Check for REM comments. if (i-currentTokenStart==3 && (array[i-3]=='r' || array[i-3]=='R') && (array[i-2]=='e' || array[i-2]=='E') && (array[i-1]=='m' || array[i-1]=='M')) { currentTokenType = Token.COMMENT_EOL; break; } addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart); currentTokenStart = i; currentTokenType = Token.WHITESPACE; break; case '"': addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart); currentTokenStart = i; currentTokenType = Token.ERROR_STRING_DOUBLE; break; case '%': addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart); currentTokenStart = i; currentTokenType = Token.VARIABLE; break; // Should be part of identifiers, but not at end of "REM". case '\\': // Check for REM comments. if (i-currentTokenStart==3 && (array[i-3]=='r' || array[i-3]=='R') && (array[i-2]=='e' || array[i-2]=='E') && (array[i-1]=='m' || array[i-1]=='M')) { currentTokenType = Token.COMMENT_EOL; } break; case '.': case '_': break; // Characters good for identifiers. // The "separators". case '(': case ')': addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart); addToken(text, i,i, Token.SEPARATOR, newStartOffset+i); currentTokenType = Token.NULL; break; // The "separators2". case ',': case ';': addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart); addToken(text, i,i, Token.IDENTIFIER, newStartOffset+i); currentTokenType = Token.NULL; break; default: // Just to speed things up a tad, as this will usually be the case. if (RSyntaxUtilities.isLetterOrDigit(c) || c=='\\') { break; } int indexOf = operators.indexOf(c); if (indexOf>-1) { addToken(text, currentTokenStart,i-1, Token.IDENTIFIER, newStartOffset+currentTokenStart); addToken(text, i,i, Token.OPERATOR, newStartOffset+i); currentTokenType = Token.NULL; break; } // Otherwise, fall through and assume we're still okay as an IDENTIFIER... } // End of switch (c). break; case Token.COMMENT_EOL: i = end - 1; addToken(text, currentTokenStart,i, Token.COMMENT_EOL, newStartOffset+currentTokenStart); // We need to set token type to null so at the bottom we don't add one more token. currentTokenType = Token.NULL; break; case Token.PREPROCESSOR: // Used for labels i = end - 1; addToken(text, currentTokenStart,i, Token.PREPROCESSOR, newStartOffset+currentTokenStart); // We need to set token type to null so at the bottom we don't add one more token. currentTokenType = Token.NULL; break; case Token.ERROR_STRING_DOUBLE: if (c=='"') { addToken(text, currentTokenStart,i, Token.LITERAL_STRING_DOUBLE_QUOTE, newStartOffset+currentTokenStart); currentTokenStart = i + 1; currentTokenType = Token.NULL; } // Otherwise, we're still an unclosed string... break; case Token.VARIABLE: if (i==currentTokenStart+1) { // first character after '%'. varType = VariableType.NORMAL_VAR; switch (c) { case '{': varType = VariableType.BRACKET_VAR; break; case '~': varType = VariableType.TILDE_VAR; break; case '%': varType = VariableType.DOUBLE_PERCENT_VAR; break; default: if (RSyntaxUtilities.isLetter(c) || c=='_' || c==' ') { // No tab, just space; spaces are okay in variable names. break; } else if (RSyntaxUtilities.isDigit(c)) { // Single-digit command-line argument ("%1"). addToken(text, currentTokenStart,i, Token.VARIABLE, newStartOffset+currentTokenStart); currentTokenType = Token.NULL; break; } else { // Anything else, ???. addToken(text, currentTokenStart,i-1, Token.VARIABLE, newStartOffset+currentTokenStart); // ??? i--; currentTokenType = Token.NULL; break; } } // End of switch (c). } else { // Character other than first after the '%'. switch (varType) { case BRACKET_VAR: if (c=='}') { addToken(text, currentTokenStart,i, Token.VARIABLE, newStartOffset+currentTokenStart); currentTokenType = Token.NULL; } break; case TILDE_VAR: if (!RSyntaxUtilities.isLetterOrDigit(c)) { addToken(text, currentTokenStart,i-1, Token.VARIABLE, newStartOffset+currentTokenStart); i--; currentTokenType = Token.NULL; } break; case DOUBLE_PERCENT_VAR: // Can be terminated with "%%", or (essentially) a space. // substring chars are valid if (c=='%') { if (i<end-1 && array[i+1]=='%') { i++; addToken(text, currentTokenStart,i, Token.VARIABLE, newStartOffset+currentTokenStart); currentTokenType = Token.NULL; } } else if (!RSyntaxUtilities.isLetterOrDigit(c) && c!=':' && c!='~' && c!=',' && c!='-') { addToken(text, currentTokenStart,i-1, Token.VARIABLE, newStartOffset+currentTokenStart); currentTokenType = Token.NULL; i--; } break; default: if (c=='%') { addToken(text, currentTokenStart,i, Token.VARIABLE, newStartOffset+currentTokenStart); currentTokenType = Token.NULL; } break; } } break; } // End of switch (currentTokenType). } // End of for (int i=offset; i<end; i++). // Deal with the (possibly there) last token. if (currentTokenType != Token.NULL) { // Check for REM comments. if (end-currentTokenStart==3 && (array[end-3]=='r' || array[end-3]=='R') && (array[end-2]=='e' || array[end-2]=='E') && (array[end-1]=='m' || array[end-1]=='M')) { currentTokenType = Token.COMMENT_EOL; } addToken(text, currentTokenStart,end-1, currentTokenType, newStartOffset+currentTokenStart); } addNullToken(); // Return the first token in our linked list. return firstToken; } private enum VariableType { BRACKET_VAR, TILDE_VAR, NORMAL_VAR, DOUBLE_PERCENT_VAR; // Escaped '%' var, special highlighting rules? } }