/**
* Aptana Studio
* Copyright (c) 2005-2011 by Appcelerator, Inc. All Rights Reserved.
* Licensed under the terms of the GNU Public License (GPL) v3 (with exceptions).
* Please see the license.html included with this distribution for details.
* Any modifications to this file must keep this entire header intact.
*/
package com.aptana.editor.ruby;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringReader;
import org.eclipse.jface.text.BadLocationException;
import org.eclipse.jface.text.IDocument;
import org.eclipse.jface.text.ITypedRegion;
import org.eclipse.jface.text.rules.IToken;
import org.eclipse.jface.text.rules.ITokenScanner;
import org.eclipse.jface.text.rules.Token;
import org.jrubyparser.CompatVersion;
import org.jrubyparser.Parser.NullWarnings;
import org.jrubyparser.lexer.Lexer;
import org.jrubyparser.lexer.Lexer.LexState;
import org.jrubyparser.lexer.LexerSource;
import org.jrubyparser.lexer.SyntaxException;
import org.jrubyparser.parser.ParserConfiguration;
import org.jrubyparser.parser.ParserResult;
import org.jrubyparser.parser.ParserSupport;
import org.jrubyparser.parser.Tokens;
import com.aptana.core.logging.IdeLog;
import com.aptana.core.util.StringUtil;
/**
* A token scanner which returns integers for ruby tokens. These can later be mapped to colors. Does some smoothing on
* the tokens to add additional token types that the JRuby parser ignores.
*
* @author Chris Williams
*/
public class RubyTokenScanner implements ITokenScanner
{
public static final int COMMA = 44;
public static final int COLON = 58;
public static final int ASSIGNMENT = 61;
public static final int QUESTION = 63;
public static final int NEWLINE = 10;
public static final int CHARACTER = 128;
static final int MIN_KEYWORD = 257;
static final int MAX_KEYWORD = 305;
public static final int SPACE = 32;
private static final int LBRACK = 91;
public static final int SEMICOLON = 59;
private Lexer lexer;
private LexerSource lexerSource;
private ParserSupport parserSupport;
private int fTokenLength;
private int fOffset;
private boolean isInSymbol;
private boolean inAlias;
private ParserResult result;
private int origOffset;
private int origLength;
private String fContents;
private BufferedReader reader;
public RubyTokenScanner()
{
lexer = new Lexer();
parserSupport = new ParserSupport();
ParserConfiguration config = new ParserConfiguration(0, CompatVersion.RUBY1_8);
parserSupport.setConfiguration(config);
result = new ParserResult();
parserSupport.setResult(result);
lexer.setParserSupport(parserSupport);
lexer.setWarnings(new NullWarnings());
}
public int getTokenLength()
{
return fTokenLength;
}
public int getTokenOffset()
{
return fOffset;
}
public IToken nextToken()
{
fOffset = getOffset();
fTokenLength = 0;
IToken returnValue = new Token(Tokens.tIDENTIFIER);
boolean isEOF = false;
try
{
isEOF = !lexer.advance(); // FIXME if we're assigning a string to a
// variable we may get a
// NumberFormatException here!
if (isEOF)
{
returnValue = Token.EOF;
// TODO Close the lexer's reader
}
else
{
fTokenLength = getOffset() - fOffset;
returnValue = token(lexer.token());
}
}
catch (SyntaxException se)
{
if (lexerSource.getOffset() - origLength == 0)
{
return Token.EOF; // return eof if we hit a problem found at end of parsing
}
fTokenLength = getOffset() - fOffset;
return token(Tokens.yyErrorCode); // FIXME This should return a special error token!
}
catch (NumberFormatException nfe)
{
fTokenLength = getOffset() - fOffset;
return returnValue;
}
catch (Exception e)
{
IdeLog.logError(RubyEditorPlugin.getDefault(), e);
}
return returnValue;
}
private int getOffset()
{
return lexerSource.getOffset() + origOffset;
}
private IToken token(int i)
{
if (isInSymbol)
{
if (isSymbolTerminator(i))
{
isInSymbol = false; // we're at the end of the symbol
if (shouldReturnDefault(i))
{
return new Token(i);
}
}
return new Token(Tokens.tSYMBEG);
}
// The next two conditionals work around a JRuby parsing bug
// JRuby returns the number for ':' on second symbol's beginning in
// alias calls
if (i == Tokens.kALIAS)
{
inAlias = true;
}
if (i == COLON && inAlias)
{
isInSymbol = true;
inAlias = false;
return new Token(Tokens.tSYMBEG);
} // end JRuby parsing hack for alias
switch (i)
{
case LBRACK:
return new Token(Tokens.tLBRACK);
case Tokens.tSYMBEG:
if (looksLikeTertiaryConditionalWithNoSpaces())
{
return new Token(Tokens.tCOLON2);
}
isInSymbol = true;
// FIXME Set up a token for symbols
return new Token(Tokens.tSYMBEG);
case Tokens.tGVAR:
case Tokens.tBACK_REF:
return new Token(Tokens.tGVAR);
case Tokens.tFLOAT:
case Tokens.tINTEGER:
// A character is marked as an integer, lets check for that special
// case...
if ((((fOffset - origOffset) + 1) < fContents.length())
&& (fContents.charAt((fOffset - origOffset) + 1) == '?'))
{
return new Token(CHARACTER);
}
return new Token(i);
default:
return new Token(i);
}
}
private boolean looksLikeTertiaryConditionalWithNoSpaces()
{
if (fTokenLength > 1)
{
return false;
}
int index = (fOffset - origOffset) - 1;
if (index < 0)
{
return false;
}
try
{
char c = fContents.charAt(index);
return !Character.isWhitespace(c) && Character.isUnicodeIdentifierPart(c);
}
catch (RuntimeException e)
{
return false;
}
}
private boolean shouldReturnDefault(int i)
{
switch (i)
{
case NEWLINE:
case COMMA:
case Tokens.tASSOC:
case Tokens.tRPAREN:
case Tokens.tWHITESPACE:
return true;
default:
return false;
}
}
private boolean isSymbolTerminator(int i)
{
if (isRealKeyword(i))
{
return true;
}
switch (i)
{
case Tokens.tAREF:
case Tokens.tCVAR:
case Tokens.tMINUS:
case Tokens.tPLUS:
case Tokens.tPIPE:
case Tokens.tCARET:
case Tokens.tLT:
case Tokens.tGT:
case Tokens.tAMPER:
case Tokens.tSTAR2:
case Tokens.tDIVIDE:
case Tokens.tPERCENT:
case Tokens.tBACK_REF2:
case Tokens.tTILDE:
case Tokens.tCONSTANT:
case Tokens.tFID:
case Tokens.tASET:
case Tokens.tIDENTIFIER:
case Tokens.tIVAR:
case Tokens.tGVAR:
case Tokens.tASSOC:
case Tokens.tLSHFT:
case Tokens.tRPAREN:
case Tokens.tWHITESPACE:
case COMMA:
case NEWLINE:
return true;
default:
return false;
}
}
private boolean isRealKeyword(int i)
{
if (i >= MIN_KEYWORD && i <= MAX_KEYWORD)
{
return true;
}
return false;
}
public void setRange(IDocument document, int offset, int length)
{
reset();
ParserConfiguration config = new ParserConfiguration(0, CompatVersion.BOTH);
try
{
fContents = document.get(offset, length);
}
catch (BadLocationException e)
{
fContents = StringUtil.EMPTY;
}
reader = new BufferedReader(new StringReader(fContents)); // $codepro.audit.disable closeWhereCreated
lexerSource = LexerSource.getSource("filename", reader, config); //$NON-NLS-1$
lexer.setSource(lexerSource);
// FIXME If we're resuming after a string/regexp/command, set up lex state to be expression end.
if (offset > 0)
{
try
{
ITypedRegion region = document.getPartition(offset - 1);
if (RubySourceConfiguration.STRING_DOUBLE.equals(region.getType())
|| RubySourceConfiguration.STRING_SINGLE.equals(region.getType())
|| RubySourceConfiguration.REGULAR_EXPRESSION.equals(region.getType())
|| RubySourceConfiguration.COMMAND.equals(region.getType()))
{
lexer.setLexState(LexState.EXPR_END);
}
}
catch (BadLocationException e)
{
IdeLog.logError(RubyEditorPlugin.getDefault(), "Unable to get previous partition at offset: " + offset, //$NON-NLS-1$
e);
}
}
origOffset = offset;
origLength = length;
}
protected void reset()
{
if (reader != null)
{
try
{
reader.close(); // $codepro.audit.disable closeInFinally
}
catch (IOException e) // $codepro.audit.disable emptyCatchClause
{
// ignore
}
}
lexer.reset();
lexer.setState(LexState.EXPR_BEG);
lexer.setPreserveSpaces(true);
parserSupport.initTopLocalVariables();
isInSymbol = false;
inAlias = false;
}
String getSource(int offset, int length)
{
if (fContents == null || offset < 0 || (offset + length) > fContents.length())
{
return null;
}
return new String(fContents.substring(offset, offset + length));
}
}