package net.sourceforge.texlipse.editor.partitioner; import org.eclipse.jface.text.IDocument; import org.eclipse.jface.text.rules.ICharacterScanner; import org.eclipse.jface.text.rules.IPartitionTokenScanner; import org.eclipse.jface.text.rules.IToken; import org.eclipse.jface.text.rules.Token; /** * This scanner recognizes math, verbatim and comments. */ public class FastLaTeXPartitionScanner implements IPartitionTokenScanner { public final static String TEX_DEFAULT = "__tex_default"; public final static String TEX_COMMENT = "__tex_commentPartition"; public static final String TEX_MATH = "__tex_mathPartition"; public static final String TEX_CURLY_BRACKETS = "__tex_curlyBracketPartition"; public static final String TEX_SQUARE_BRACKETS = "__tex_squareBracketPartition"; public static final String TEX_VERBATIM = "__tex_VerbatimPartition"; public static final String TEX_TIKZPIC = "__tex_TikzPartition"; public static final String[] TEX_PARTITION_TYPES = new String[] { IDocument.DEFAULT_CONTENT_TYPE, TEX_COMMENT, TEX_MATH, TEX_CURLY_BRACKETS, TEX_SQUARE_BRACKETS, TEX_VERBATIM, TEX_TIKZPIC}; // states private static final int TEX = 0; private static final int COMMENT = 1; private static final int MATH = 2; private static final int VERBATIM = 3; private static final int ARGS = 4; private static final int OPT_ARGS = 5; private static final int TIKZPIC = 6; private static final String BEGIN = "begin"; private static final String END = "end"; private static final String VERB = "verb"; private static final String LSTINLINE = "lstinline"; private static final String TIKZCOMMAND = "tikz"; private static final String TIKZPICTURE_BEGIN = "tikzpicture"; private static final String TIKZPICTURE_END = "endtikzpicture"; private static final String[] MATHRULES = {"equation", "eqnarray", "align", "alignat", "flalign", "multline", "gather"}; private static final String[] MATHRULESSTAR = {"equation*", "eqnarray*", "align*", "alignat*", "flalign*", "multline*", "gather*"}; private static final String[] MATHRULESNOSTAR = {"math", "displaymath"}; private static final String[] COMMENTRULES = {"comment"}; private static final String[] VERBATIMRULES = {"verbatim", "Verbatim", "lstlisting"}; private static final String[] TIKZRULES = {"tikzpicture"}; /** The scanner. */ private final BufferedDocumentScanner fScanner= new BufferedDocumentScanner(1000); // faster implementation private final IToken[] fTokens= new IToken[] { new Token(null), new Token(TEX_COMMENT), new Token(TEX_MATH), new Token(TEX_VERBATIM), new Token(TEX_CURLY_BRACKETS), new Token(TEX_SQUARE_BRACKETS), new Token(TEX_TIKZPIC) }; private int fTokenOffset; private int fTokenLength; private String currContentType; public void setPartialRange(IDocument document, int offset, int length, String contentType, int partitionOffset) { //Set start of range to partitionOffset if contentType is not default if(!IDocument.DEFAULT_CONTENT_TYPE.equals(contentType) && partitionOffset != -1 && partitionOffset < offset){ fScanner.setRange(document, partitionOffset, length+(offset-partitionOffset)); fTokenOffset= partitionOffset; fTokenLength= 0; currContentType = null; }else{ fScanner.setRange(document, offset, length); fTokenOffset= offset; fTokenLength= 0; currContentType = contentType; } } public void setRange(IDocument document, int offset, int length) { currContentType = null; fScanner.setRange(document, offset, length); fTokenOffset= offset; fTokenLength= 0; } public int getTokenOffset() { return fTokenOffset; } public int getTokenLength() { return fTokenLength; } /* * @see org.eclipse.jface.text.rules.ITokenScanner#nextToken() */ public IToken nextToken() { fTokenOffset += fTokenLength; fTokenLength= 0; int ch= fScanner.read(); if(ch == ICharacterScanner.EOF){ fTokenLength++; return Token.EOF; } if(currContentType != null){ //Ignore this case } // characters switch (ch) { case '\\': int c1 = fScanner.read(); if (c1 != 'b' && c1 != '[' && c1 != '(' && c1 != 'v' && c1 != 'l' && c1 != 't') { fTokenLength+=2; return fTokens[TEX]; } if (c1 == '(' || c1 == '[') { int offsetEnd = fTokenOffset; ch = fScanner.read(); offsetEnd+=2; while (true){ ch = fScanner.read(); offsetEnd++; if (ch == '\\'){ int ch2 = fScanner.read(); offsetEnd++; if ((ch2 == ']' && c1 == '[') || (ch2 == ')' && c1 == '(')) { offsetEnd++; fTokenLength = offsetEnd-fTokenOffset; return fTokens[MATH]; } } else if (ch == '%') { offsetEnd += ignoreComment(); } else if (ch == '$') { //Something wrong in the code //Tag everything except $ as MATH and stop fScanner.unread(); fTokenLength = offsetEnd-fTokenOffset; return fTokens[MATH]; } else if (ch == ICharacterScanner.EOF){ fTokenLength = offsetEnd-fTokenOffset-1; return fTokens[MATH]; } } } else if (c1 == 'b') { return checkForEnv(); } else if (c1 == 'v' || c1 == 'l') { return checkForVerb(); } else if (c1 == 't') { return checkForTikz(); } else { fTokenLength+=2; return fTokens[TEX]; } case '$': int offsetEnd = fTokenOffset; c1 = fScanner.read(); offsetEnd+=2; while (true) { ch = fScanner.read(); offsetEnd++; if (ch == '\\'){ ch = fScanner.read(); offsetEnd++; if (ch == '[' || ch == ']' || ch == '(' || ch == ')'){ //Something is wrong here //Tag everything except \( as MATH and stop fScanner.unread(); fScanner.unread(); offsetEnd -= 2; fTokenLength = offsetEnd-fTokenOffset; return fTokens[MATH]; } } else if (ch == '%') { offsetEnd += ignoreComment(); } else if (ch == '$' && c1 != '$') { fTokenLength = offsetEnd-fTokenOffset; return fTokens[MATH]; } else if (ch == '$') { c1 = ' '; } else if (ch == ICharacterScanner.EOF) { fTokenLength = offsetEnd-fTokenOffset-1; return fTokens[MATH]; } } case '%': offsetEnd = fTokenOffset; offsetEnd++; while (true) { ch = fScanner.read(); offsetEnd++; if (ch == '\r' || ch == '\n') { fScanner.unread(); offsetEnd--; fTokenLength = offsetEnd-fTokenOffset; return fTokens[COMMENT]; } else if (ch == ICharacterScanner.EOF) { fTokenLength = offsetEnd-fTokenOffset-1; return fTokens[COMMENT]; } } case '{': return scanBracket('{', '}', ARGS, fTokenOffset + 1); case '[': return scanBracket('[', ']', OPT_ARGS, fTokenOffset + 1); default: offsetEnd = fTokenOffset+1; while (ch != '$' && ch != '\\' && ch != '%' && ch != '{' && ch != '[' && ch != ICharacterScanner.EOF) { ch = fScanner.read(); offsetEnd++; } if (ch != ICharacterScanner.EOF) fScanner.unread(); offsetEnd--; fTokenLength=offsetEnd-fTokenOffset; return fTokens[TEX]; } } private int ignoreComment() { int ch = fScanner.read(); int r=1; while (ch != '\r' && ch != '\n' && ch != ICharacterScanner.EOF) { ch = fScanner.read(); r++; } return r; } private int checkForCommand(String command, int start) { for (int i=start; i<command.length(); i++) { int ch = fScanner.read(); if (command.charAt(i) != ch) { unReadScanner(i - start + 1); return 0; } } return command.length() - start; } private IToken checkForVerb() { int o = checkForCommand(VERB, 1); if (o == 0) { o = checkForCommand(LSTINLINE, 1); if (o == 0) { fTokenLength += 2; return fTokens[TEX]; } } int offsetEnd = fTokenOffset; offsetEnd += o + 2; //verbch is the termination character int verbch = fScanner.read(); offsetEnd++; if (Character.isLetter(verbch)) { fTokenLength = offsetEnd-fTokenOffset; return fTokens[TEX]; } int ch = fScanner.read(); offsetEnd++; while (ch != verbch && ch != ICharacterScanner.EOF && ch != '\r' && ch != '\n') { ch = fScanner.read(); offsetEnd++; } if (ch != verbch) offsetEnd--; fTokenLength = offsetEnd-fTokenOffset; return fTokens[VERBATIM]; } private IToken checkForTikz() { boolean single = true; int offsetEnd = fTokenOffset + 2; int o = checkForCommand(TIKZCOMMAND, 1); if (o > 0) { offsetEnd += o; int ob = checkForCommand(TIKZPICTURE_BEGIN, 4); if (ob > 0) { single = false; offsetEnd += ob; } } else { fTokenLength += 2; return fTokens[TEX]; } int ch = fScanner.read(); offsetEnd++; // Skip optional arguments at beginning of environment if (ch == '[') { boolean skip = true; while (skip) { ch = fScanner.read(); offsetEnd++; if (ch == '\\') { ch = fScanner.read(); offsetEnd++; } else if (ch == ']') { ch = fScanner.read(); offsetEnd++; skip = false; } else if (ch == '%') { offsetEnd += ignoreComment(); } else if (ch == ICharacterScanner.EOF) { // Something got screwed up when setting optional arguments // for the environment - mark everything as plain latex fTokenLength = offsetEnd - fTokenOffset - 1; return fTokens[TEX]; } } } while (true) { ch = fScanner.read(); offsetEnd++; if (single && ch == ';') { fTokenLength = offsetEnd - fTokenOffset; return fTokens[TIKZPIC]; } else if (ch == '%') { offsetEnd += ignoreComment(); } else if (ch == '\\') { ch = fScanner.read(); offsetEnd++; if (!single && ch == 'e') { o = checkForCommand(TIKZPICTURE_END, 1); if (o > 0) { fTokenLength = offsetEnd + o - fTokenOffset; return fTokens[TIKZPIC]; } } } else if (ch == ICharacterScanner.EOF) { fTokenLength = offsetEnd - fTokenOffset - 1; return fTokens[TIKZPIC]; } } } private IToken checkForEnv() { int o = checkForCommand(BEGIN, 1); if (o == 0) { fTokenLength += 2; return fTokens[TEX]; } int offsetEnd = fTokenOffset; offsetEnd += 6; int ch = fScanner.read(); offsetEnd++; while (Character.isWhitespace(ch)) { ch = fScanner.read(); offsetEnd++; } if (ch != '{'){ unReadScanner(offsetEnd - fTokenOffset - 2); fTokenLength += 2; return fTokens[TEX]; } final StringBuilder b = new StringBuilder(); ch = fScanner.read(); offsetEnd++; while (ch != '}' && ch != ICharacterScanner.EOF && ch != '{' && ch != '\\'){ b.append((char)ch); ch = fScanner.read(); offsetEnd++; } String envName = b.toString(); if (getEnvIndex(envName) != TEX) { return checkForEndEnv(envName, offsetEnd); } else { unReadScanner(offsetEnd - fTokenOffset - 2); fTokenLength += 2; return fTokens[TEX]; } } private IToken checkForEndEnv(String name, int offsetEnd) { while (true) { int ch = fScanner.read(); offsetEnd++; if (ch == '%') { offsetEnd += ignoreComment(); ch = fScanner.read(); offsetEnd++; } if (ch == '\\') { boolean isEnv = true; for (int i=0; i<END.length(); i++) { ch = fScanner.read(); offsetEnd++; if (END.charAt(i) != ch) { isEnv = false; break; } } if (!isEnv) continue; ch = fScanner.read(); offsetEnd++; while (Character.isWhitespace(ch)) { ch = fScanner.read(); offsetEnd++; } if (ch != '{') continue; StringBuilder b = new StringBuilder(); ch = fScanner.read(); offsetEnd++; while (ch != '}' && ch != ICharacterScanner.EOF && ch != '{' && ch != '\\'){ b.append((char)ch); ch = fScanner.read(); offsetEnd++; } String envName = b.toString(); if (envName.equals(name)) { fTokenLength = offsetEnd-fTokenOffset; return fTokens[getEnvIndex(envName)]; } } else if (ch == ICharacterScanner.EOF) { fTokenLength = offsetEnd-fTokenOffset-1; return fTokens[getEnvIndex(name)]; } } } private IToken scanBracket(int openChar, int closeChar, int type, int currentOffset) { int ch; int offsetEnd = currentOffset; int stack = 0; while (true) { ch = fScanner.read(); offsetEnd++; if (ch == closeChar) { stack--; if (stack < 0) { fTokenLength = offsetEnd - fTokenOffset; return fTokens[type]; } } else if (ch == openChar) { stack++; } else if (ch == '%') { offsetEnd += ignoreComment(); } else if (ch == '\\') { ch = fScanner.read(); offsetEnd++; } else if (ch == ICharacterScanner.EOF) { fTokenLength = offsetEnd - fTokenOffset - 1; return fTokens[type]; } } } private boolean unReadScanner(int readChar) { for (int j = 0; j < readChar; j++) fScanner.unread(); return false; } private static final boolean matchesAny(final String envName, final String[] rules) { for (String st : rules) { if (st.equals(envName)) return true; } return false; } private static final int getEnvIndex(final String envName) { if (isMathEnv(envName)) return MATH; else if (isVerbatimEnv(envName)) return VERBATIM; else if (isCommentEnv(envName)) return COMMENT; else if (isTikzEnv(envName)) return TIKZPIC; else return TEX; } /** * * @param envName Name of the environment * @return true, if the given name denotes a math environment */ public static final boolean isMathEnv(String envName) { return matchesAny(envName, MATHRULES) || matchesAny(envName, MATHRULESSTAR) || matchesAny(envName, MATHRULESNOSTAR); } public static final boolean isVerbatimEnv(String envName) { return matchesAny(envName, VERBATIMRULES); } public static final boolean isCommentEnv(String envName) { return matchesAny(envName, COMMENTRULES); } public static final boolean isTikzEnv(String envName) { return matchesAny(envName, TIKZRULES); } }