/* * TokenMarker.java - Tokenizes lines of text * :tabSize=4:indentSize=4:noTabs=false: * :folding=explicit:collapseFolds=1: * * Copyright (C) 1998, 2003 Slava Pestov * Copyright (C) 1999, 2000 mike dillon * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ package org.gjt.sp.jedit.syntax; //{{{ Imports import javax.swing.text.Segment; import java.lang.ref.WeakReference; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.gjt.sp.jedit.TextUtilities; import org.gjt.sp.util.SegmentCharSequence; //}}} /** * A token marker splits lines of text into tokens. Each token carries * a length field and an identification tag that can be mapped to a color * or font style for painting that token. * * @author Slava Pestov, mike dillon * @version $Id$ * * @see org.gjt.sp.jedit.syntax.Token * @see org.gjt.sp.jedit.syntax.TokenHandler */ public class TokenMarker { //{{{ TokenMarker constructor public TokenMarker() {} //}}} //{{{ addRuleSet() method public void addRuleSet(ParserRuleSet rules) { ruleSets.put(rules.getSetName(), rules); if ("MAIN".equals(rules.getSetName())) mainRuleSet = rules; } //}}} //{{{ getMainRuleSet() method public ParserRuleSet getMainRuleSet() { return mainRuleSet; } //}}} //{{{ getRuleSet() method public ParserRuleSet getRuleSet(String setName) { return ruleSets.get(setName); } //}}} //{{{ getRuleSets() method /** * @since jEdit 4.2pre3 */ public ParserRuleSet[] getRuleSets() { return ruleSets.values().toArray(new ParserRuleSet[ruleSets.size()]); } //}}} //{{{ markTokens() method /** * Do not call this method directly; call Buffer.markTokens() instead. * * @param prevContext the context of the previous line, it can be null * @param tokenHandler the token handler * @param line a segment containing the content of the line */ public synchronized LineContext markTokens(LineContext prevContext, TokenHandler tokenHandler, Segment line) { //{{{ Set up some instance variables // this is to avoid having to pass around lots and lots of // parameters. this.tokenHandler = tokenHandler; this.line = line; lastOffset = line.offset; lineLength = line.count + line.offset; context = new LineContext(); if(prevContext == null) { context.rules = getMainRuleSet(); context.escapeRule = context.rules.getEscapeRule(); } else { context.parent = prevContext.parent; context.setInRule(prevContext.inRule); context.rules = prevContext.rules; context.spanEndSubst = prevContext.spanEndSubst; context.spanEndSubstRegex = prevContext.spanEndSubstRegex; } keywords = context.rules.getKeywords(); seenWhitespaceEnd = false; whitespaceEnd = line.offset; //}}} //{{{ Main parser loop int terminateChar = context.rules.getTerminateChar(); boolean terminated = false; main_loop: for(pos = line.offset; pos < lineLength; pos++) { //{{{ check if we have to stop parsing (happens if the terminateChar has been exceeded) if(terminateChar >= 0 && pos - line.offset >= terminateChar && !terminated) { terminated = true; context = new LineContext(ParserRuleSet .getStandardRuleSet(context.rules .getDefault()),context); keywords = context.rules.getKeywords(); } //}}} //{{{ Check for the escape rule before anything else. if (context.escapeRule != null && handleRuleStart(context.escapeRule)) { continue main_loop; } //}}} //{{{ check for end of delegate if (context.parent != null && context.parent.inRule != null && checkDelegateEnd(context.parent.inRule)) { seenWhitespaceEnd = true; continue main_loop; } //}}} //{{{ check every rule Character ch = Character.valueOf(line.array[pos]); List<ParserRule> rules = context.rules.getRules(ch); for (ParserRule rule : rules) { // stop checking rules if there was a match if (handleRuleStart(rule)) { seenWhitespaceEnd = true; continue main_loop; } } //}}} //{{{ check if current character is a word separator if(Character.isWhitespace(ch)) { if(!seenWhitespaceEnd) whitespaceEnd = pos + 1; if(context.inRule != null) handleRuleEnd(context.inRule); handleNoWordBreak(); markKeyword(false); if(lastOffset != pos) { tokenHandler.handleToken(line, context.rules.getDefault(), lastOffset - line.offset, pos - lastOffset, context); } tokenHandler.handleToken(line, context.rules.getDefault(), pos - line.offset,1,context); lastOffset = pos + 1; } else { if(keywords != null || context.rules.getRuleCount() != 0) { String noWordSep = context.rules.getNoWordSep(); if(!Character.isLetterOrDigit(ch) && noWordSep.indexOf(ch) == -1) { if(context.inRule != null) handleRuleEnd(context.inRule); handleNoWordBreak(); markKeyword(true); tokenHandler.handleToken(line, context.rules.getDefault(), lastOffset - line.offset,1, context); lastOffset = pos + 1; } } seenWhitespaceEnd = true; } //}}} } //}}} //{{{ Mark all remaining characters pos = lineLength; if(context.inRule != null) handleRuleEnd(context.inRule); handleNoWordBreak(); markKeyword(true); //}}} //{{{ Unwind any NO_LINE_BREAK parent delegates unwind: while(context.parent != null) { ParserRule rule = context.parent.inRule; if((rule != null && (rule.action & ParserRule.NO_LINE_BREAK) == ParserRule.NO_LINE_BREAK) || terminated) { context = context.parent; keywords = context.rules.getKeywords(); context.setInRule(null); } else break unwind; } //}}} tokenHandler.handleToken(line,Token.END, pos - line.offset,0,context); context = context.intern(); tokenHandler.setLineContext(context); /* for GC. */ this.tokenHandler = null; this.line = null; return context; } //}}} //{{{ Private members //{{{ Instance variables private final Map<String, ParserRuleSet> ruleSets = new Hashtable<String, ParserRuleSet>(64); private ParserRuleSet mainRuleSet; // Instead of passing these around to each method, we just store them // as instance variables. Note that this is not thread-safe. private TokenHandler tokenHandler; /** The line from which we will mark the tokens. */ private Segment line; /** The context of the current line. */ private LineContext context; private KeywordMap keywords; private final Segment pattern = new Segment(); private int lastOffset; private int lineLength; private int pos; private int whitespaceEnd; private boolean seenWhitespaceEnd; //}}} //{{{ checkDelegateEnd() method private boolean checkDelegateEnd(ParserRule rule) { if(rule.end == null && rule.endRegexp == null) return false; LineContext tempContext = context; context = context.parent; keywords = context.rules.getKeywords(); boolean handled = handleRuleEnd(rule); context = tempContext; keywords = context.rules.getKeywords(); if (handled) { if(context.inRule != null) handleRuleEnd(context.inRule); markKeyword(true); context = (LineContext)context.parent.clone(); tokenHandler.handleToken(line, matchToken(context.inRule, context.inRule, context), pos - line.offset,pattern.count,context); keywords = context.rules.getKeywords(); context.setInRule(null); lastOffset = pos + pattern.count; // move pos to last character of match sequence pos += pattern.count - 1; return true; } return false; } //}}} //{{{ offsetMatches /** * Checks if the offset matches given position-match-hint of * ParserRule. */ private boolean offsetMatches(int offset, int posMatch) { if((posMatch & ParserRule.AT_LINE_START) == ParserRule.AT_LINE_START) { if(offset != line.offset) { return false; } } else if((posMatch & ParserRule.AT_WHITESPACE_END) == ParserRule.AT_WHITESPACE_END) { if(offset != whitespaceEnd) { return false; } } else if((posMatch & ParserRule.AT_WORD_START) == ParserRule.AT_WORD_START) { if(offset != lastOffset) { return false; } } return true; } //}}} //{{{ handleRuleStart() method /** * Checks if the rule matches the line at the current position * as its start and handles the rule if it does match */ private boolean handleRuleStart(ParserRule checkRule) { // Some rules can only match in certain locations if (null == checkRule.upHashChars) { if (checkRule.upHashChar != null && (pos + checkRule.upHashChar.length < line.array.length) && !checkHashString(checkRule)) { return false; } } else { if (-1 == Arrays.binarySearch( checkRule.upHashChars, Character.toUpperCase(line.array[pos]))) { return false; } } int offset = (checkRule.action & ParserRule.MARK_PREVIOUS) != 0 ? lastOffset : pos; if(!offsetMatches(offset, checkRule.startPosMatch)) { return false; } int matchedChars; Matcher match = null; // See if the rule's start sequence matches here if((checkRule.action & ParserRule.REGEXP) == 0) { pattern.array = checkRule.start; pattern.offset = 0; pattern.count = pattern.array.length; matchedChars = pattern.count; if(!SyntaxUtilities.regionMatches(context.rules .getIgnoreCase(),line,pos,pattern.array)) { return false; } } else { // note that all regexps start with \A so they only // match the start of the string //int matchStart = pos - line.offset; CharSequence charSeq = new SegmentCharSequence(line, pos - line.offset, line.count - (pos - line.offset)); match = checkRule.startRegexp.matcher(charSeq); if(!match.lookingAt()) { return false; } else if(match.start() != 0) { throw new InternalError("Can't happen"); } else { matchedChars = match.end(); /* workaround for hang if match was * zero-width. not sure if there is * a better way to handle this */ if(matchedChars == 0) matchedChars = 1; } } if((checkRule.action & ParserRule.IS_ESCAPE) == ParserRule.IS_ESCAPE) { pos += pattern.count; } else { if(context.inRule != null) handleRuleEnd(context.inRule); markKeyword((checkRule.action & ParserRule.MARK_PREVIOUS) != ParserRule.MARK_PREVIOUS); switch(checkRule.action & ParserRule.MAJOR_ACTIONS) { //{{{ SEQ case ParserRule.SEQ: context.spanEndSubst = null; context.spanEndSubstRegex = null; if((checkRule.action & ParserRule.REGEXP) != 0) { handleTokenWithSpaces(tokenHandler, checkRule.token, pos - line.offset, matchedChars, context); } else { tokenHandler.handleToken(line, checkRule.token, pos - line.offset, matchedChars,context); } // a DELEGATE attribute on a SEQ changes the // ruleset from the end of the SEQ onwards if(checkRule.delegate != null) { context = new LineContext( checkRule.delegate, context.parent); keywords = context.rules.getKeywords(); } break; //}}} //{{{ SPAN, EOL_SPAN case ParserRule.SPAN: case ParserRule.EOL_SPAN: context.setInRule(checkRule); byte tokenType = matchToken(checkRule, context.inRule, context); if((checkRule.action & ParserRule.REGEXP) != 0) { handleTokenWithSpaces(tokenHandler, tokenType, pos - line.offset, matchedChars, context); } else { tokenHandler.handleToken(line,tokenType, pos - line.offset, matchedChars,context); } char[] spanEndSubst = null; Pattern spanEndSubstRegex = null; /* substitute result of matching the rule start * into the end string. * * eg, in shell script mode, <<\s*(\w+) is * matched into \<$1\> to construct rules for * highlighting read-ins like this <<EOF * ... * EOF */ if(match != null && match.groupCount() > 0) { if (checkRule.end != null) { spanEndSubst = substitute(match, checkRule.end, false); } else if (checkRule.endRegexp != null) { char[] pattern = checkRule.endRegexp.pattern().toCharArray(); pattern = substitute(match, pattern, true); spanEndSubstRegex = Pattern.compile(new String(pattern)); } } context.spanEndSubst = spanEndSubst; context.spanEndSubstRegex = spanEndSubstRegex; context = new LineContext( checkRule.delegate, context); keywords = context.rules.getKeywords(); break; //}}} //{{{ MARK_FOLLOWING case ParserRule.MARK_FOLLOWING: tokenHandler.handleToken(line, matchToken(checkRule, checkRule, context), pos - line.offset, pattern.count,context); context.spanEndSubst = null; context.spanEndSubstRegex = null; context.setInRule(checkRule); break; //}}} //{{{ MARK_PREVIOUS case ParserRule.MARK_PREVIOUS: context.spanEndSubst = null; context.spanEndSubstRegex = null; if(pos != lastOffset) { tokenHandler.handleToken(line, checkRule.token, lastOffset - line.offset, pos - lastOffset, context); } tokenHandler.handleToken(line, matchToken(checkRule, checkRule, context), pos - line.offset,pattern.count, context); break; //}}} default: throw new InternalError("Unhandled major action"); } // move pos to last character of match sequence pos += matchedChars - 1; lastOffset = pos + 1; // break out of inner for loop to check next char } return true; } //}}} //{{{ handleRuleEnd() method /** * Checks if the rule matches the line at the current position * as its end and handles the rule if it does match */ private boolean handleRuleEnd(ParserRule checkRule) { // Some rules can only match in certain locations int offset = (checkRule.action & ParserRule.MARK_PREVIOUS) != 0 ? lastOffset : pos; if (!offsetMatches(offset, checkRule.endPosMatch)) { return false; } // See if the rule's end sequence matches here if((checkRule.action & ParserRule.MARK_FOLLOWING) == 0) { if ((checkRule.action & ParserRule.END_REGEXP) == 0) { if(context.spanEndSubst != null) pattern.array = context.spanEndSubst; else pattern.array = checkRule.end; pattern.offset = 0; pattern.count = pattern.array.length; if(!SyntaxUtilities.regionMatches(context.rules .getIgnoreCase(),line,pos,pattern.array)) { return false; } } else { CharSequence charSeq = new SegmentCharSequence(line, pos - line.offset, line.count - (pos - line.offset)); Pattern regex; if (context.spanEndSubstRegex != null) regex = context.spanEndSubstRegex; else regex = checkRule.endRegexp; Matcher match = regex.matcher(charSeq); if (!match.lookingAt()) { return false; } else { // This is used in checkDelegateEnd pattern.count = match.end(); } } } // Escape rules are handled in handleRuleStart() assert (checkRule.action & ParserRule.IS_ESCAPE) == 0; // Handle end of MARK_FOLLOWING if((context.inRule.action & ParserRule.MARK_FOLLOWING) != 0) { if(pos != lastOffset) { tokenHandler.handleToken(line, context.inRule.token, lastOffset - line.offset, pos - lastOffset,context); } lastOffset = pos; context.setInRule(null); } return true; } //}}} //{{{ handleNoWordBreak() method private void handleNoWordBreak() { if(context.parent != null) { ParserRule rule = context.parent.inRule; if(rule != null && (context.parent.inRule.action & ParserRule.NO_WORD_BREAK) != 0) { if(pos != lastOffset) { tokenHandler.handleToken(line, rule.token, lastOffset - line.offset, pos - lastOffset,context); } lastOffset = pos; context = context.parent; keywords = context.rules.getKeywords(); context.setInRule(null); } } } //}}} //{{{ handleTokenWithSpaces() method private void handleTokenWithSpaces(TokenHandler tokenHandler, byte tokenType, int start, int len, LineContext context) { int last = start; int end = start + len; for(int i = start; i < end; i++) { if(Character.isWhitespace(line.array[i + line.offset])) { if(last != i) { tokenHandler.handleToken(line, tokenType,last,i - last,context); } tokenHandler.handleToken(line,tokenType,i,1,context); last = i + 1; } } if(last != end) { tokenHandler.handleToken(line,tokenType,last, end - last,context); } } //}}} //{{{ markKeyword() method private void markKeyword(boolean addRemaining) { int len = pos - lastOffset; if(len == 0) return; //{{{ Do digits if(context.rules.getHighlightDigits()) { boolean digit = false; boolean mixed = false; for(int i = lastOffset; i < pos; i++) { char ch = line.array[i]; if(Character.isDigit(ch)) digit = true; else mixed = true; } if(mixed) { Pattern digitRE = context.rules.getDigitRegexp(); // only match against regexp if its not all // digits; if all digits, no point matching if(digit) { if(digitRE == null) { // mixed digit/alpha keyword, // and no regexp... don't // highlight as DIGIT digit = false; } else { int oldCount = line.count; int oldOffset = line.offset; line.offset = lastOffset; line.count = len; CharSequence seq = new SegmentCharSequence(line); digit = digitRE.matcher(seq).matches(); line.offset = oldOffset; line.count = oldCount; } } } if(digit) { tokenHandler.handleToken(line,Token.DIGIT, lastOffset - line.offset, len,context); lastOffset = pos; return; } } //}}} //{{{ Do keywords if(keywords != null) { byte id = keywords.lookup(line, lastOffset, len); if(id != Token.NULL) { tokenHandler.handleToken(line,id, lastOffset - line.offset, len,context); lastOffset = pos; return; } } //}}} //{{{ Handle any remaining crud if(addRemaining) { tokenHandler.handleToken(line,context.rules.getDefault(), lastOffset - line.offset,len,context); lastOffset = pos; } //}}} } //}}} //{{{ substitute() method /** * Perform substitution references in <code>end</code> to the matched groups in * <code>match</code>. In particular, "$1" is replaced with the first match group in * <code>match</code>, $2 is replaced with the second, all the way up to "$9". Moreover, * if group <code>i</code> matched a single bracket character, then "~i" is replaced with * the complementary bracket. * @param match the <code>Matcher</code> produced by matching the "start" regex * @param end the pattern to substitute in to * @param escape if true, then escape matched text before inserting into <code>end</code>, so that * the result can be interpreted as a regex pattern. * @return the substituted pattern */ private static char[] substitute(Matcher match, char[] end, boolean escape) { StringBuilder buf = new StringBuilder(); for(int i = 0; i < end.length; i++) { char ch = end[i]; if(ch == '$' || ch == '~') { if(i == end.length - 1) buf.append(ch); else { char digit = end[i + 1]; if(!Character.isDigit(digit)) buf.append(ch); else if (ch == '$') { String text = match.group(digit - '0'); if (escape) text = TextUtilities.escapeText(text); buf.append(text); i++; } else { String s = match.group(digit - '0'); if (s.length() == 1) { char b = TextUtilities.getComplementaryBracket(s.charAt(0), null); if (b == '\0') b = s.charAt(0); buf.append(b); } else buf.append(ch); i++; } } } else buf.append(ch); } char[] returnValue = new char[buf.length()]; buf.getChars(0,buf.length(),returnValue,0); return returnValue; } //}}} //{{{ matchToken() method private byte matchToken(ParserRule rule, ParserRule base, LineContext ctx) { switch (rule.matchType) { case ParserRule.MATCH_TYPE_RULE: return base.token; case ParserRule.MATCH_TYPE_CONTEXT: return context.rules.getDefault(); default: return rule.matchType; } } //}}} //{{{ checkHashString() method private boolean checkHashString(ParserRule rule) { for (int i = 0; i < rule.upHashChar.length; i++) { if (Character.toUpperCase(line.array[pos+i]) != rule.upHashChar[i]) { return false; } } return true; } //}}} //}}} //{{{ LineContext class /** * Stores persistent per-line syntax parser state. */ public static class LineContext { private static final WeakHashMap<LineContext, WeakReference<LineContext>> intern = new WeakHashMap<LineContext, WeakReference<LineContext>>(); public LineContext parent; public ParserRule inRule; public ParserRuleSet rules; // used for SPAN_REGEXP rules; otherwise null public char[] spanEndSubst; public Pattern spanEndSubstRegex; public ParserRule escapeRule; //{{{ LineContext constructor public LineContext(ParserRuleSet rs, LineContext lc) { rules = rs; parent = (lc == null ? null : (LineContext)lc.clone()); /* * SPANs with no delegate need to propagate the * escape rule to the child context, so this is * needed. */ if (rs.getModeName() != null) escapeRule = rules.getEscapeRule(); else escapeRule = lc.escapeRule; } //}}} //{{{ LineContext constructor public LineContext() { } //}}} //{{{ intern() method public LineContext intern() { WeakReference<LineContext> ref = intern.get(this); if(ref != null) { LineContext obj = ref.get(); if(obj != null) { return obj; } } intern.put(this, new WeakReference<LineContext>(this)); return this; } //}}} //{{{ hashCode() method public int hashCode() { int code = 0; code += (parent != null) ? parent.hashCode() : 0; code += (inRule != null) ? inRule.hashCode() : 0; code += (rules != null) ? rules.hashCode() : 0; code += (spanEndSubst != null) ? spanEndSubst.hashCode() : 0; code += (spanEndSubstRegex != null) ? spanEndSubstRegex.hashCode() : 0; return code; } //}}} //{{{ equals() method public boolean equals(Object obj) { if(obj instanceof LineContext) { LineContext lc = (LineContext)obj; return lc.inRule == inRule && lc.rules == rules && Objects.equals(parent,lc.parent) && charArraysEqual(spanEndSubst,lc.spanEndSubst) && Objects.equals(spanEndSubstRegex, lc.spanEndSubstRegex); } else return false; } //}}} //{{{ clone() method @Override public Object clone() { LineContext lc = new LineContext(); lc.inRule = inRule; lc.rules = rules; lc.parent = (parent == null) ? null : (LineContext) parent.clone(); lc.spanEndSubst = spanEndSubst; lc.spanEndSubstRegex = spanEndSubstRegex; lc.escapeRule = escapeRule; return lc; } //}}} //{{{ charArraysEqual() method private static boolean charArraysEqual(char[] c1, char[] c2) { if(c1 == null) return c2 == null; // c1 is not null if(c2 == null) return false; if(c1.length != c2.length) return false; for(int i = 0; i < c1.length; i++) { if(c1[i] != c2[i]) return false; } return true; } //}}} //{{{ setInRule() method /** * Sets the current rule being processed and adjusts the * escape rule for the context based on the rule. */ public void setInRule(ParserRule rule) { inRule = rule; if (rule != null && rule.escapeRule != null) escapeRule = rule.escapeRule; else if (rules != null && rules.getModeName() != null) escapeRule = rules.getEscapeRule(); else if (parent != null) escapeRule = parent.escapeRule; else escapeRule = null; } //}}} } //}}} }