package org.rubypeople.rdt.internal.ui.text;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import org.eclipse.core.runtime.Assert;
import org.eclipse.jface.text.BadLocationException;
import org.eclipse.jface.text.Document;
import org.eclipse.jface.text.IDocument;
import org.eclipse.jface.text.rules.IPartitionTokenScanner;
import org.eclipse.jface.text.rules.IToken;
import org.eclipse.jface.text.rules.Token;
import org.jruby.CompatVersion;
import org.jruby.ast.CommentNode;
import org.jruby.ast.Node;
import org.jruby.common.NullWarnings;
import org.jruby.lexer.yacc.LexerSource;
import org.jruby.lexer.yacc.RubyYaccLexer;
import org.jruby.lexer.yacc.SyntaxException;
import org.jruby.lexer.yacc.RubyYaccLexer.LexState;
import org.jruby.parser.ParserConfiguration;
import org.jruby.parser.ParserSupport;
import org.jruby.parser.RubyParserResult;
import org.jruby.parser.Tokens;
import org.jruby.util.KCode;
import org.rubypeople.rdt.internal.core.util.ASTUtil;
import org.rubypeople.rdt.internal.ui.RubyPlugin;
public class RubyPartitionScanner implements IPartitionTokenScanner
{
private static final String BEGIN = "=begin";
private static class QueuedToken
{
private IToken token;
private int length;
private int offset;
QueuedToken(IToken token, int offset, int length)
{
this.token = token;
this.length = length;
this.offset = offset;
}
public int getLength()
{
return length;
}
public int getOffset()
{
return offset;
}
public IToken getToken()
{
return token;
}
@Override
public String toString()
{
return getToken().getData() + ": offset: " + getOffset() + ", length: " + getLength();
}
}
private RubyYaccLexer lexer;
private ParserSupport parserSupport;
private RubyParserResult result;
private String fContents;
private LexerSource lexerSource;
private int origOffset;
private int origLength;
private int fLength;
private int fOffset;
private List<QueuedToken> fQueue = new ArrayList<QueuedToken>();
private String fContentType = RUBY_DEFAULT;
private boolean inSingleQuote;
private String fOpeningString;
public final static String RUBY_MULTI_LINE_COMMENT = IRubyPartitions.RUBY_MULTI_LINE_COMMENT;
public final static String RUBY_SINGLE_LINE_COMMENT = IRubyPartitions.RUBY_SINGLE_LINE_COMMENT;
public final static String RUBY_STRING = IRubyPartitions.RUBY_STRING;
public final static String RUBY_REGULAR_EXPRESSION = IRubyPartitions.RUBY_REGULAR_EXPRESSION;
public static final String RUBY_DEFAULT = IRubyPartitions.RUBY_DEFAULT;
public static final String RUBY_COMMAND = IRubyPartitions.RUBY_COMMAND;
public static final String[] LEGAL_CONTENT_TYPES = { RUBY_DEFAULT, RUBY_MULTI_LINE_COMMENT,
RUBY_SINGLE_LINE_COMMENT, RUBY_REGULAR_EXPRESSION, RUBY_STRING, RUBY_COMMAND };
public RubyPartitionScanner()
{
lexer = new RubyYaccLexer();
parserSupport = new ParserSupport();
ParserConfiguration config = new ParserConfiguration(KCode.NIL, 0, false, CompatVersion.RUBY1_8);
config.setExtraPositionInformation(true);
parserSupport.setConfiguration(config);
result = new RubyParserResult();
parserSupport.setResult(result);
lexer.setParserSupport(parserSupport);
lexer.setWarnings(new NullWarnings());
lexer.setEncoding(config.getKCode().getEncoding());
}
public void setPartialRange(IDocument document, int offset, int length, String contentType, int partitionOffset)
{
reset();
int myOffset = offset;
if (contentType != null)
{
int diff = offset - partitionOffset;
myOffset = partitionOffset; // backtrack to beginning of partition so we don't get in weird state
length += diff;
this.fContentType = contentType;
}
if (myOffset == -1)
myOffset = 0;
ParserConfiguration config = new ParserConfiguration(KCode.NIL, 0, true, false, CompatVersion.RUBY1_8);
try
{
fContents = document.get(myOffset, length);
lexerSource = LexerSource.getSource("filename", new StringReader(fContents), null, config);
lexer.setSource(lexerSource);
}
catch (BadLocationException e)
{
lexerSource = LexerSource.getSource("filename", new StringReader(""), null, config);
lexer.setSource(lexerSource);
}
origOffset = myOffset;
origLength = length;
}
private void reset()
{
lexer.reset();
lexer.setState(LexState.EXPR_BEG);
parserSupport.initTopLocalVariables();
fQueue.clear();
inSingleQuote = false;
}
public int getTokenLength()
{
return fLength;
}
public int getTokenOffset()
{
return fOffset;
}
public IToken nextToken()
{
if (!fQueue.isEmpty())
{
return popTokenOffQueue();
}
fOffset = getOffset();
fLength = 0;
IToken returnValue = new Token(RUBY_DEFAULT);
boolean isEOF = false;
try
{
isEOF = !lexer.advance();
if (isEOF)
{
returnValue = Token.EOF;
}
else
{
int lexerToken = lexer.token();
if (!inSingleQuote && lexerToken == Tokens.tSTRING_DVAR)
{ // we hit a single dynamic variable
addPoundToken();
scanDynamicVariable();
setLexerPastDynamicSectionOfString();
return popTokenOffQueue();
}
else if (!inSingleQuote && lexerToken == Tokens.tSTRING_DBEG)
{ // if we hit dynamic code inside a string
addPoundBraceToken();
scanTokensInsideDynamicPortion();
addClosingBraceToken();
setLexerPastDynamicSectionOfString();
return popTokenOffQueue();
}
else if (lexerToken == Tokens.tSTRING_BEG)
{
String opening = getUntrimmedOpeningString();
int endOfMarker = indexOf(opening.trim(), ", +)");
if (opening.trim().startsWith("<<") && endOfMarker != -1)
{
adjustOffset(opening);
addHereDocStartToken(endOfMarker);
addCommaToken(endOfMarker);
scanRestOfLineAfterHeredocBegins(opening.trim(), endOfMarker);
setLexerPastHeredocBeginning(opening.trim());
return popTokenOffQueue();
}
}
returnValue = getToken(lexerToken);
}
List<CommentNode> comments = result.getCommentNodes();
if (comments != null && !comments.isEmpty())
{
parseOutComments(comments);
addQueuedToken(returnValue, isEOF); // Queue the normal token we just ate up
comments.clear();
return popTokenOffQueue();
}
}
catch (SyntaxException se)
{
if (se.getMessage().equals("embedded document meets end of file"))
{
// Add to the queue (at end), then try to just do the rest of the file...
// TODO recover somehow by removing this chunk out of the fContents?
int start = se.getPosition().getStartOffset();
int length = fContents.length() - start;
QueuedToken qtoken = new QueuedToken(new Token(RUBY_MULTI_LINE_COMMENT), start + origOffset, length);
if (fOffset == origOffset)
{ // If we never got to read in beginning contents
RubyPartitionScanner scanner = new RubyPartitionScanner();
String possible = new String(fContents.substring(0, start));
IDocument document = new Document(possible);
scanner.setRange(document, origOffset, possible.length());
IToken token;
while (!(token = scanner.nextToken()).isEOF())
{
push(new QueuedToken(token, scanner.getTokenOffset() + fOffset, scanner.getTokenLength()));
}
}
push(qtoken);
push(new QueuedToken(Token.EOF, start + origOffset + length, 0));
return popTokenOffQueue();
}
else if (se.getMessage().equals("unterminated string meets end of file"))
{
// Add to the queue (at end), then try to just do the rest of the file...
// TODO recover somehow by removing this chunk out of the fContents?
int start = se.getPosition().getStartOffset();
int length = fContents.length() - start;
QueuedToken qtoken = new QueuedToken(new Token(fContentType), start + origOffset, length);
if (fOffset == origOffset)
{ // If we never got to read in beginning contents
RubyPartitionScanner scanner = new RubyPartitionScanner();
String possible = new String(fContents.substring(0, start));
IDocument document = new Document(possible);
scanner.setRange(document, origOffset, possible.length());
IToken token;
while (!(token = scanner.nextToken()).isEOF())
{
push(new QueuedToken(token, scanner.getTokenOffset() + fOffset, scanner.getTokenLength()));
}
}
push(qtoken);
push(new QueuedToken(Token.EOF, start + origOffset + length, 0));
return popTokenOffQueue();
}
if (lexerSource.getOffset() - origLength == 0)
return Token.EOF; // return eof if we hit a problem found at
// end of parsing
else
fLength = getOffset() - fOffset;
Assert.isTrue(fLength >= 0);
return new Token(RUBY_DEFAULT);
}
catch (IOException e)
{
RubyPlugin.log(e);
}
if (!isEOF)
{
fLength = getOffset() - fOffset;
Assert.isTrue(fLength >= 0);
}
return returnValue;
}
private void setLexerPastHeredocBeginning(String rawBeginning) throws IOException
{
StringBuffer fakeContents = new StringBuffer();
int toAdd = 1;
if (rawBeginning.startsWith("<<-"))
{
toAdd = 2;
}
int start = fOffset - (fOpeningString.length() + toAdd);
for (int i = 0; i < start; i++)
{
fakeContents.append(" ");
}
fakeContents.append("<<");
if (rawBeginning.startsWith("<<-"))
{
fakeContents.append("-");
}
fakeContents.append(fOpeningString.trim());
if ((fOffset - origOffset) < origLength)
{
fakeContents.append(new String(fContents.substring((fOffset - origOffset)))); // BLAH removed + 1 from end
// here
}
IDocument document = new Document(fakeContents.toString());
List<QueuedToken> queueCopy = new ArrayList<QueuedToken>(fQueue);
setPartialRange(document, start, fakeContents.length() - start, null, start);
fQueue = new ArrayList<QueuedToken>(queueCopy);
lexer.advance();
}
private void adjustOffset(String opening)
{
int index = opening.indexOf("<<");
if (index > 0)
setOffset(fOffset + index);
}
private int indexOf(String opening, String string)
{
String trimmed = opening.trim();
int diff;
if (trimmed.length() == 0)
{
diff = opening.length();
}
else
{
diff = opening.indexOf(trimmed.charAt(0)); // Count leading whitespace
}
int lowest = -1;
for (int i = 0; i < string.length(); i++)
{
char c = string.charAt(i);
int value = trimmed.indexOf(c);
if (value == -1)
continue;
value += diff;
if (lowest == -1)
{
lowest = value;
continue;
}
if (value < lowest)
lowest = value;
}
return lowest;
}
private void scanRestOfLineAfterHeredocBegins(String opening, int index)
{
String possible = new String(opening.substring(index + 1));
RubyPartitionScanner scanner = new RubyPartitionScanner();
IDocument document = new Document(possible);
scanner.setRange(document, 0, possible.length());
IToken token;
while (!(token = scanner.nextToken()).isEOF())
{
push(new QueuedToken(token, scanner.getTokenOffset() + fOffset + index + 1, scanner.getTokenLength()));
}
setOffset(fOffset + index + 1 + possible.length());
if (scanner.fOpeningString != null && scanner.fOpeningString.endsWith("\n"))
{
fOpeningString = scanner.fOpeningString;
}
else
{
String marker = new String(opening.substring(0, index).trim());
fOpeningString = generateHeredocMarker(marker);
}
fContentType = RUBY_STRING;
}
private void addCommaToken(int index)
{
push(new QueuedToken(new Token(RUBY_DEFAULT), fOffset + index, 1));
}
private void addHereDocStartToken(int index)
{
push(new QueuedToken(new Token(RUBY_STRING), fOffset, index));
}
private void setOffset(int offset)
{
fOffset = offset;
}
private void addPoundToken()
{
addStringToken(1);// add token for the #
}
private void scanDynamicVariable()
{
int whitespace = fContents.indexOf(' ', fOffset - origOffset); // read until whitespace or '"'
if (whitespace == -1)
whitespace = Integer.MAX_VALUE;
int doubleQuote = fContents.indexOf('"', fOffset - origOffset);
if (doubleQuote == -1)
doubleQuote = Integer.MAX_VALUE;
int end = Math.min(whitespace, doubleQuote);
// FIXME If we can't find whitespace or doubleQuote, we are pretty screwed.
String possible = null;
if (end == -1)
{
possible = new String(fContents.substring(fOffset - origOffset));
}
else
{
possible = new String(fContents.substring(fOffset - origOffset, end));
}
RubyPartitionScanner scanner = new RubyPartitionScanner();
IDocument document = new Document(possible);
scanner.setRange(document, 0, possible.length());
IToken token;
while (!(token = scanner.nextToken()).isEOF())
{
push(new QueuedToken(token, scanner.getTokenOffset() + (fOffset), scanner.getTokenLength()));
}
setOffset(fOffset + possible.length());
}
private void scanTokensInsideDynamicPortion()
{
String possible = new String(fContents.substring(fOffset - origOffset));
int end = findEnd(possible);
if (end != -1)
{
possible = new String(possible.substring(0, end));
}
RubyPartitionScanner scanner = new RubyPartitionScanner();
IDocument document = new Document(possible);
scanner.setRange(document, 0, possible.length());
IToken token;
while (!(token = scanner.nextToken()).isEOF())
{
push(new QueuedToken(token, scanner.getTokenOffset() + fOffset, scanner.getTokenLength()));
}
setOffset(fOffset + possible.length());
}
private int findEnd(String possible)
{
return new EndBraceFinder(possible).find();
}
private void addPoundBraceToken()
{
addStringToken(2); // add token for the #{
}
private void addStringToken(int length)
{
push(new QueuedToken(new Token(fContentType), fOffset, length));
setOffset(fOffset + length); // move past token
}
private void addClosingBraceToken()
{
addStringToken(1);
}
private void setLexerPastDynamicSectionOfString() throws IOException
{
StringBuffer fakeContents = new StringBuffer();
String opening = fOpeningString;
if (opening.endsWith("\n"))
{
// What about When it should remain <<-!
// try searching backwards from fOffset in fContents for <<-opening or <<opening and take whichever we find
// first. If we fail to find, assume <<
String heredocStart = "<<";
int lastIndent = fContents.lastIndexOf("<<-" + opening, fOffset);
if (lastIndent != -1)
{
if (lastIndent > fContents.lastIndexOf("<<" + opening, fOffset))
heredocStart = "<<-";
}
opening = heredocStart + opening;
}
int start = fOffset - opening.length();
for (int i = 0; i < start; i++)
{
fakeContents.append(" ");
}
fakeContents.append(opening);
if ((fOffset - origOffset) < origLength)
{
fakeContents.append(new String(fContents.substring((fOffset - origOffset)))); // BLAH removed + 1 from end
// here
}
IDocument document = new Document(fakeContents.toString());
List<QueuedToken> queueCopy = new ArrayList<QueuedToken>(fQueue);
setPartialRange(document, start, fakeContents.length() - start, null, start);
fQueue = new ArrayList<QueuedToken>(queueCopy);
lexer.advance();
}
private void parseOutComments(List<CommentNode> comments)
{
for (CommentNode comment : comments)
{
int offset = correctOffset(comment);
int length = comment.getContent().length();
if (isCommentMultiLine(comment))
{
length = (origOffset + comment.getPosition().getEndOffset()) - offset;
if (comment.getContent().charAt(0) != '=')
{
length++;
}
}
Token token = new Token(getContentType(comment));
push(new QueuedToken(token, offset, length));
}
}
private IToken popTokenOffQueue()
{
QueuedToken token = fQueue.remove(0);
setOffset(token.getOffset());
Assert.isTrue(token.getLength() >= 0);
fLength = token.getLength();
return token.getToken();
}
private IToken getToken(int i)
{
// If we hit a 32 (space) inside a qword, just return string content type (not default)
// FIXME IF we're in qwords, we should inspect the contents because it may be a variable
if (i == 32)
{
return new Token(fContentType);
}
switch (i)
{
case Tokens.tSTRING_CONTENT:
return new Token(fContentType);
case Tokens.tSTRING_BEG:
fOpeningString = getOpeningString();
if (fOpeningString.equals("'") || fOpeningString.startsWith("%q"))
{
inSingleQuote = true;
}
else if (fOpeningString.startsWith("<<"))
{ // here-doc
fOpeningString = generateHeredocMarker(fOpeningString);
}
fContentType = RUBY_STRING;
return new Token(RUBY_STRING);
case Tokens.tXSTRING_BEG:
fOpeningString = getOpeningString();
fContentType = RUBY_COMMAND;
return new Token(RUBY_COMMAND);
case Tokens.tQWORDS_BEG:
case Tokens.tWORDS_BEG:
fOpeningString = getOpeningString();
fContentType = RUBY_STRING;
return new Token(RUBY_STRING);
case Tokens.tSTRING_END:
String oldContentType = fContentType;
fContentType = RUBY_DEFAULT;
inSingleQuote = false;
return new Token(oldContentType);
case Tokens.tREGEXP_BEG:
fOpeningString = getOpeningString();
fContentType = RUBY_REGULAR_EXPRESSION;
return new Token(RUBY_REGULAR_EXPRESSION);
case Tokens.tREGEXP_END:
fContentType = RUBY_DEFAULT;
return new Token(RUBY_REGULAR_EXPRESSION);
case Tokens.tSYMBEG:
// Sometimes we need to add 1, sometimes two. Depends on if there's a space preceding the ':'
int nextCharOffset = (fOffset + 1);
int charAt = nextCharOffset - origOffset;
if (fContents.length() <= charAt)
{
return new Token(RUBY_DEFAULT);
}
char c = fContents.charAt(charAt);
if (c == ':')
{
if (fContents.length() <= charAt + 1)
{
return new Token(RUBY_DEFAULT);
}
nextCharOffset++;
c = fContents.charAt(charAt + 1);
}
if (c == '"')
{
fOpeningString = "\"";
push(new QueuedToken(new Token(RUBY_STRING), nextCharOffset, 1));
fContentType = RUBY_STRING;
}
return new Token(RUBY_DEFAULT);
default:
return new Token(RUBY_DEFAULT);
}
}
private String generateHeredocMarker(String marker)
{
if (marker.startsWith("<<"))
{
marker = marker.substring(2);
}
if (marker.startsWith("-"))
{
marker = marker.substring(1);
}
return marker + "\n";
}
private String getOpeningString()
{
return getUntrimmedOpeningString().trim();
}
private String getUntrimmedOpeningString()
{
int start = fOffset - origOffset;
List<CommentNode> comments = result.getCommentNodes();
if (comments != null && !comments.isEmpty())
{
Node comment = (Node) comments.get(comments.size() - 1);
int end = comment.getPosition().getEndOffset();
start = end;
}
return new String(fContents.substring(start, lexerSource.getOffset()));
}
/**
* correct start offset, since when a line with nothing but spaces on it appears before comment, we get messed up
* positions
*/
private int correctOffset(CommentNode comment)
{
return origOffset + comment.getPosition().getStartOffset();
}
private boolean isCommentMultiLine(CommentNode comment)
{
String src = ASTUtil.getSource(fContents, comment);
if (src != null && src.startsWith(BEGIN))
return true;
return false;
}
private String getContentType(CommentNode comment)
{
if (isCommentMultiLine(comment))
return RUBY_MULTI_LINE_COMMENT;
return RUBY_SINGLE_LINE_COMMENT;
}
private void addQueuedToken(IToken returnValue, boolean isEOF)
{
// grab end of last comment (last thing in queue)
QueuedToken token = peek();
setOffset(token.getOffset() + token.getLength());
int length = getOffset() - fOffset;
if (length < 0)
{
length = 0;
}
push(new QueuedToken(returnValue, fOffset, length));
}
private QueuedToken peek()
{
return fQueue.get(fQueue.size() - 1);
}
private void push(QueuedToken token)
{
Assert.isTrue(token.getLength() >= 0);
fQueue.add(token);
}
private int getOffset()
{
return lexerSource.getOffset() + origOffset;
}
public void setRange(IDocument document, int offset, int length)
{
setPartialRange(document, offset, length, RUBY_DEFAULT, 0);
}
public static class EndBraceFinder
{
private String input;
private List<String> stack;
public EndBraceFinder(String possible)
{
this.input = possible;
stack = new ArrayList<String>();
}
public int find()
{
for (int i = 0; i < input.length(); i++)
{
char c = input.charAt(i);
switch (c)
{
case '\\':
case '$':
// skip next character
i++;
break;
case '"':
if (topEquals("\""))
{
pop();
}
else
{
if (!topEquals("'"))
push("\"");
}
break;
case '/':
if (topEquals("/"))
{
pop();
}
else
{
push("/");
}
break;
case '\'':
if (topEquals("'"))
{
pop();
}
else if (!topEquals("\"") && !topEquals("/")) // not inside a double quoted string or a regex
{
push("'");
}
break;
case '{':
// Only if we're not inside a string
if (!topEquals("'") && !topEquals("\""))
{
push("{");
}
break;
case '#':
// Only add if we're inside a double quote string
if (topEquals("\""))
{
c = input.charAt(i + 1);
if (c == '{')
push("#{");
}
break;
case '}':
if (stack.isEmpty())
{ // if not in open state
return i;
}
if (topEquals("#{") || topEquals("{"))
{
pop();
}
break;
default:
break;
}
}
return -1;
}
private boolean topEquals(String string)
{
String open = peek();
return open != null && open.equals(string);
}
private boolean push(String string)
{
return stack.add(string);
}
private String pop()
{
return stack.remove(stack.size() - 1);
}
private String peek()
{
if (stack.isEmpty())
return null;
return stack.get(stack.size() - 1);
}
}
}