package org.rubypeople.rdt.internal.ui.text.ruby; import java.io.IOException; import java.io.StringReader; import org.eclipse.core.runtime.Preferences; import org.eclipse.jface.text.BadLocationException; import org.eclipse.jface.text.IDocument; import org.eclipse.jface.text.rules.IToken; import org.eclipse.jface.text.rules.ITokenScanner; import org.eclipse.jface.text.rules.Token; import org.jruby.CompatVersion; import org.jruby.common.NullWarnings; import org.jruby.lexer.yacc.LexerSource; import org.jruby.lexer.yacc.RubyYaccLexer; import org.jruby.lexer.yacc.SyntaxException; import org.jruby.lexer.yacc.RubyYaccLexer.LexState; import org.jruby.parser.ParserConfiguration; import org.jruby.parser.ParserSupport; import org.jruby.parser.RubyParserResult; import org.jruby.parser.Tokens; import org.jruby.util.KCode; import org.rubypeople.rdt.internal.ui.RubyPlugin; import org.rubypeople.rdt.ui.PreferenceConstants; /** * A token scanner which returns integers for ruby tokens. These can later be mapped to colors. * Does some smoothing on the tokens to add additional token types that the JRuby parser ignores. * * @author Chris Williams * */ public class RubyTokenScanner implements ITokenScanner { private static final int COMMA = 44; private static final int COLON = 58; private static final int NEWLINE = 10; public static final int CHARACTER = 128; static final int MIN_KEYWORD = 257; static final int MAX_KEYWORD = 303; private RubyYaccLexer lexer; private LexerSource lexerSource; private ParserSupport parserSupport; private int fTokenLength; private int fOffset; private boolean isInSymbol; private boolean inAlias; private RubyParserResult result; private int origOffset; private int origLength; private String fContents; public RubyTokenScanner() { lexer = new RubyYaccLexer(); parserSupport = new ParserSupport(); ParserConfiguration config = new ParserConfiguration(KCode.NIL, 0, true, false, CompatVersion.RUBY1_8); parserSupport.setConfiguration(config); result = new RubyParserResult(); parserSupport.setResult(result); lexer.setParserSupport(parserSupport); lexer.setWarnings(new NullWarnings()); lexer.setEncoding(config.getKCode().getEncoding()); } public int getTokenLength() { return fTokenLength; } public int getTokenOffset() { return fOffset; } public IToken nextToken() { fOffset = getOffset(); fTokenLength = 0; IToken returnValue = new Token(Tokens.tIDENTIFIER); boolean isEOF = false; try { isEOF = !lexer.advance(); // FIXME if we're assigning a string to a variable we may get a NumberFormatException here! if (isEOF) { returnValue = Token.EOF; } else { fTokenLength = getOffset() - fOffset; returnValue = token(lexer.token()); } } catch (SyntaxException se) { if (lexerSource.getOffset() - origLength == 0) return Token.EOF; // return eof if we hit a problem found at // end of parsing fTokenLength = getOffset() - fOffset; return token(Tokens.yyErrorCode); // FIXME This should return a special error token! } catch (NumberFormatException nfe) { fTokenLength = getOffset() - fOffset; return returnValue; } catch (IOException e) { RubyPlugin.log(e); } return returnValue; } private int getOffset() { return lexerSource.getOffset() + origOffset; } private IToken token(int i) { if (isInSymbol) { if (isSymbolTerminator(i)) { isInSymbol = false; // we're at the end of the symbol if (shouldReturnDefault(i)) return new Token(new Integer(i)); } return new Token(new Integer(Tokens.tSYMBEG)); } // The next two conditionals work around a JRuby parsing bug // JRuby returns the number for ':' on second symbol's beginning in alias calls if (i == Tokens.kALIAS) { inAlias = true; } if (i == COLON && inAlias) { isInSymbol = true; inAlias = false; return new Token(new Integer(Tokens.tSYMBEG)); } // end JRuby parsing hack for alias if (isKeyword(i)) return new Token(new Integer(Tokens.k__FILE__)); // FIXME Set up a token for user defined keywords switch (i) { case Tokens.tSYMBEG: if (looksLikeTertiaryConditionalWithNoSpaces()) { return new Token(new Integer(Tokens.tCOLON2)); } isInSymbol = true; // FIXME Set up a token for symbols return new Token(new Integer(Tokens.tSYMBEG)); case Tokens.tGVAR: case Tokens.tBACK_REF: return new Token(new Integer(Tokens.tGVAR)); case Tokens.tFLOAT: case Tokens.tINTEGER: // A character is marked as an integer, lets check for that special case... if ((((fOffset - origOffset) + 1) < fContents.length()) && (fContents.charAt((fOffset - origOffset) + 1) == '?')) return new Token(new Integer(CHARACTER)); return new Token(new Integer(i)); default: return new Token(new Integer(i)); } } private boolean looksLikeTertiaryConditionalWithNoSpaces() { if (fTokenLength > 1) return false; int index = (fOffset - origOffset) - 1; if (index < 0) return false; try { char c = fContents.charAt(index); return !Character.isWhitespace(c) && Character.isUnicodeIdentifierPart(c); } catch (RuntimeException e) { return false; } } private boolean shouldReturnDefault(int i) { switch (i) { case NEWLINE: case COMMA: case Tokens.tASSOC: case Tokens.tRPAREN: return true; default: return false; } } private boolean isSymbolTerminator(int i) { if (isRealKeyword(i)) return true; switch (i) { case Tokens.tAREF: case Tokens.tCVAR: case Tokens.tMINUS: case Tokens.tPLUS: case Tokens.tPIPE: case Tokens.tCARET: case Tokens.tLT: case Tokens.tGT: case Tokens.tAMPER: case Tokens.tSTAR2: case Tokens.tDIVIDE: case Tokens.tPERCENT: case Tokens.tBACK_REF2: case Tokens.tTILDE: case Tokens.tCONSTANT: case Tokens.tFID: case Tokens.tASET: case Tokens.tIDENTIFIER: case Tokens.tIVAR: case Tokens.tGVAR: case Tokens.tASSOC: case Tokens.tLSHFT: case Tokens.tRPAREN: case COMMA: case NEWLINE: return true; default: return false; } } private boolean isRealKeyword(int i) { if (i >= MIN_KEYWORD && i <= MAX_KEYWORD) return true; return false; } private boolean isKeyword(int i) { if (i != Tokens.tIDENTIFIER) return false; String src; try { src = fContents.substring((fOffset - origOffset), (fOffset - origOffset) + fTokenLength); } catch (RuntimeException e) { RubyPlugin.log(e); return false; } if (src == null || src.trim().length() == 0) return false; Preferences prefs = RubyPlugin.getDefault().getPluginPreferences(); if (prefs == null) return false; String rawKeywords = prefs.getString(PreferenceConstants.EDITOR_USER_KEYWORDS); if (rawKeywords == null || rawKeywords.length() == 0) { return false; } String[] keywords = rawKeywords.split(","); if (keywords == null || keywords.length == 0) { return false; } for (int j = 0; j < keywords.length; j++) { if (keywords[j] == null) continue; if (keywords[j].equals(src.trim())) return true; } return false; } public void setRange(IDocument document, int offset, int length) { lexer.reset(); lexer.setState(LexState.EXPR_BEG); parserSupport.initTopLocalVariables(); isInSymbol = false; ParserConfiguration config = new ParserConfiguration(KCode.NIL, 0, true, false, CompatVersion.RUBY1_8); try { fContents = document.get(offset, length); lexerSource = LexerSource.getSource("filename", new StringReader(fContents), null, config); lexer.setSource(lexerSource); } catch (BadLocationException e) { lexerSource = LexerSource.getSource("filename", new StringReader(""), null, config); lexer.setSource(lexerSource); } origOffset = offset; origLength = length; } }