/** * Aptana Studio * Copyright (c) 2005-2011 by Appcelerator, Inc. All Rights Reserved. * Licensed under the terms of the GNU Public License (GPL) v3 (with exceptions). * Please see the license.html included with this distribution for details. * Any modifications to this file must keep this entire header intact. */ package com.aptana.editor.ruby; import java.io.BufferedReader; import java.io.IOException; import java.io.Reader; import java.io.StringReader; import java.util.ArrayList; import java.util.List; import org.eclipse.core.runtime.Assert; import org.eclipse.jface.text.BadLocationException; import org.eclipse.jface.text.Document; import org.eclipse.jface.text.IDocument; import org.eclipse.jface.text.ITypedRegion; import org.eclipse.jface.text.rules.IPartitionTokenScanner; import org.eclipse.jface.text.rules.IToken; import org.eclipse.jface.text.rules.Token; import org.jrubyparser.CompatVersion; import org.jrubyparser.Parser.NullWarnings; import org.jrubyparser.SourcePosition; import org.jrubyparser.ast.CommentNode; import org.jrubyparser.ast.Node; import org.jrubyparser.lexer.HeredocTerm; import org.jrubyparser.lexer.Lexer; import org.jrubyparser.lexer.Lexer.LexState; import org.jrubyparser.lexer.LexerSource; import org.jrubyparser.lexer.StrTerm; import org.jrubyparser.lexer.SyntaxException; import org.jrubyparser.lexer.SyntaxException.PID; import org.jrubyparser.parser.ParserConfiguration; import org.jrubyparser.parser.ParserResult; import org.jrubyparser.parser.ParserSupport; import org.jrubyparser.parser.Tokens; import com.aptana.core.logging.IdeLog; import com.aptana.core.util.StringUtil; import com.aptana.editor.common.CommonUtil; public class RubySourcePartitionScanner implements IPartitionTokenScanner { private static final String INDENTED_HEREDOC_MARKER_PREFIX = "<<-"; //$NON-NLS-1$ private static final String HEREDOC_MARKER_PREFIX = "<<"; //$NON-NLS-1$ private static final String DEFAULT_FILENAME = "filename"; //$NON-NLS-1$ private static final String BEGIN = "=begin"; //$NON-NLS-1$ private Lexer lexer; private ParserSupport parserSupport; private ParserResult result; private String fContents; private LexerSource lexerSource; private Reader reader; private int origOffset; private int origLength; private int fLength; private int fOffset; private List<QueuedToken> fQueue = new ArrayList<QueuedToken>(); private String fContentType = RubySourceConfiguration.DEFAULT; private boolean inSingleQuote; private String fOpeningString; public RubySourcePartitionScanner() { lexer = new Lexer(); parserSupport = new ParserSupport(); ParserConfiguration config = new ParserConfiguration(0, CompatVersion.BOTH); parserSupport.setConfiguration(config); result = new ParserResult(); parserSupport.setResult(result); lexer.setParserSupport(parserSupport); lexer.setWarnings(new NullWarnings()); } public void setPartialRange(IDocument document, int offset, int length, String contentType, int partitionOffset) { reset(); int myOffset = offset; if (contentType != null) { int diff = offset - partitionOffset; // backtrack to beginning of partition so we don't get in weird // state myOffset = partitionOffset; length += diff; // $codepro.audit.disable questionableAssignment this.fContentType = contentType; if (this.fContentType.equals(RubySourceConfiguration.SINGLE_LINE_COMMENT) || this.fContentType.equals(IDocument.DEFAULT_CONTENT_TYPE)) { this.fContentType = RubySourceConfiguration.DEFAULT; } // FIXME What if a heredoc with dynamic code inside is broken? contents will start with "}" rather than // expected } if (myOffset == -1) { myOffset = 0; } ParserConfiguration config = new ParserConfiguration(0, CompatVersion.BOTH); try { fContents = document.get(myOffset, length); } catch (BadLocationException e) { fContents = StringUtil.EMPTY; } reader = new BufferedReader(new StringReader(fContents)); // $codepro.audit.disable closeWhereCreated lexerSource = LexerSource.getSource(DEFAULT_FILENAME, reader, config); lexer.setSource(lexerSource); // FIXME If we're resuming after a string/regexp/command, set up lex state to be expression end. if (partitionOffset > 0) { try { ITypedRegion region = document.getPartition(partitionOffset - 1); if (RubySourceConfiguration.STRING_DOUBLE.equals(region.getType()) || RubySourceConfiguration.STRING_SINGLE.equals(region.getType()) || RubySourceConfiguration.REGULAR_EXPRESSION.equals(region.getType()) || RubySourceConfiguration.COMMAND.equals(region.getType())) { lexer.setLexState(LexState.EXPR_END); } } catch (BadLocationException e) { IdeLog.logError(RubyEditorPlugin.getDefault(), "Unable to get previous partition at offset: " + offset, //$NON-NLS-1$ e); } } origOffset = myOffset; origLength = length; } public int getTokenLength() { return fLength; } public int getTokenOffset() { return fOffset; } public IToken nextToken() { if (!fQueue.isEmpty()) { return popTokenOffQueue(); } setOffset(getAdjustedOffset()); setLength(0); IToken returnValue = createToken(fContentType); boolean isEOF = false; try { isEOF = !lexer.advance(); if (isEOF) { returnValue = Token.EOF; // TODO Close the lexer's reader? } else { int lexerToken = lexer.token(); if (isSingleVariableStringInterpolation(lexerToken)) { return handleSingleVariableStringInterpolation(); } else if (isStringInterpolation(lexerToken)) { return handleStringInterpolation(); } // Set up lexer to process embedded code in strings! else if (lexerToken == Tokens.tSTRING_BEG || lexerToken == Tokens.tREGEXP_BEG || lexerToken == Tokens.tXSTRING_BEG || lexerToken == Tokens.tQWORDS_BEG || lexerToken == Tokens.tWORDS_BEG || lexerToken == Tokens.tSYMBEG) { StrTerm strTerm = lexer.getStrTerm(); if (strTerm != null) { strTerm.splitEmbeddedTokens(); } } returnValue = getToken(lexerToken); } // TODO Are there ever comment nodes anymore? Do we need this code?! List<CommentNode> comments = result.getCommentNodes(); if (comments != null && !comments.isEmpty()) { parseOutComments(comments); // Queue the normal token we just ate up addQueuedToken(returnValue); comments.clear(); return popTokenOffQueue(); } } catch (SyntaxException se) { if ("embedded document meets end of file".equals(se.getMessage())) //$NON-NLS-1$ { return handleUnterminedMultilineComment(se); } else if (se.getPid().equals(PID.STRING_MARKER_MISSING) || se.getPid().equals(PID.STRING_HITS_EOF)) { return handleUnterminatedString(se); } if (lexerSource.getOffset() - origLength == 0) { // return eof if we hit a problem found at end of parsing return Token.EOF; } setLength(getAdjustedOffset() - fOffset); return createToken(RubySourceConfiguration.DEFAULT); } catch (IOException e) { IdeLog.logError(RubyEditorPlugin.getDefault(), e); } if (!isEOF) { setLength(getAdjustedOffset() - fOffset); // HACK End of heredocs are returning a zero length token for end of string that hoses us if (fLength == 0 && (returnValue.getData().equals(RubySourceConfiguration.STRING_DOUBLE) || returnValue.getData() .equals(RubySourceConfiguration.STRING_SINGLE))) { return nextToken(); } } return returnValue; } private boolean isSingleVariableStringInterpolation(int lexerToken) { return !inSingleQuote && lexerToken == Tokens.tSTRING_DVAR; } private boolean isStringInterpolation(int lexerToken) { return !inSingleQuote && lexerToken == Tokens.tSTRING_DBEG; } private void setLength(int newLength) { fLength = newLength; Assert.isTrue(fLength >= 0); } private IToken handleUnterminedMultilineComment(SyntaxException se) { return handleUnterminatedPartition(se.getPosition().getStartOffset(), RubySourceConfiguration.MULTI_LINE_COMMENT); } private IToken handleUnterminatedString(SyntaxException se) { return handleUnterminatedPartition(se.getPosition().getStartOffset(), fContentType); } private IToken handleUnterminatedPartition(int start, String contentType) { // Add to the queue (at end), then try to just do the rest of // the file... // TODO recover somehow by removing this chunk out of the // fContents? int length = fContents.length() - start; QueuedToken qtoken = new QueuedToken(createToken(contentType), start + origOffset, length); if (fOffset == origOffset) { // If we never got to read in beginning contents RubySourcePartitionScanner scanner = new RubySourcePartitionScanner(); String possible = new String(fContents.substring(0, start)); IDocument document = new Document(possible); scanner.setRange(document, origOffset, possible.length()); IToken token; while (!(token = scanner.nextToken()).isEOF()) // $codepro.audit.disable assignmentInCondition { push(new QueuedToken(token, scanner.getTokenOffset() + fOffset, scanner.getTokenLength())); } } push(qtoken); push(new QueuedToken(Token.EOF, start + origOffset + length, 0)); return popTokenOffQueue(); } private IToken handleSingleVariableStringInterpolation() throws IOException { addPoundToken(); // let lexer scan the dynamic variable... int start = lexerSource.getOffset(); lexer.nextToken(); int end = lexerSource.getOffset(); String content = fContents.substring(start, end); // push the dynamic var onto the queue push(new QueuedToken(createToken(RubySourceConfiguration.DEFAULT), fOffset, content.length())); setOffset(fOffset + content.length()); // move past dynamic var after we're done with queue return popTokenOffQueue(); } private IToken handleStringInterpolation() throws IOException { // Can we just treat the arg token normally somehow? addPoundBraceToken(); // We need to record the offset here, and the offset after asking for next token. Then grab code in between to // recurse on! int start = lexerSource.getOffset(); // Seems like next token returned is considered string content and contains the interpolated code. We need to // dive into it specially. // FIXME JRuby parser lexer StringTerm doesn't properly handle nested strings inside DExpr. It just stops at // first '}'. lexer.nextToken(); int end = lexerSource.getOffset(); String content = fContents.substring(start, end); scanTokensInsideDynamicPortion(content); // Then lexer will resume by returning the "}" token as string content too return popTokenOffQueue(); } public void setRange(IDocument document, int offset, int length) { setPartialRange(document, offset, length, RubySourceConfiguration.DEFAULT, 0); } private void reset() { // Close the lexer's reader? if (reader != null) { try { reader.close(); // $codepro.audit.disable closeInFinally } catch (IOException e) // $codepro.audit.disable emptyCatchClause { // ignore } reader = null; } lexer.reset(); lexer.setState(LexState.EXPR_BEG); lexer.setPreserveSpaces(true); parserSupport.initTopLocalVariables(); fQueue.clear(); inSingleQuote = false; fContentType = RubySourceConfiguration.DEFAULT; } private void setOffset(int offset) { fOffset = offset; } private void addPoundToken() { addStringToken(1);// add token for the # } private void scanTokensInsideDynamicPortion(String content) { RubySourcePartitionScanner scanner = new RubySourcePartitionScanner(); IDocument document = new Document(content); scanner.setRange(document, 0, content.length()); IToken token; while (!(token = scanner.nextToken()).isEOF()) // $codepro.audit.disable assignmentInCondition { push(new QueuedToken(token, scanner.getTokenOffset() + fOffset, scanner.getTokenLength())); } setOffset(fOffset + content.length()); } private void addPoundBraceToken() { addStringToken(2); // add token for the #{ } private void addStringToken(int length) { String contentType = getStringType(); if (RubySourceConfiguration.DEFAULT.equals(contentType)) { contentType = RubySourceConfiguration.STRING_DOUBLE; } push(new QueuedToken(createToken(contentType), fOffset, length)); setOffset(fOffset + length); // move past token } private void parseOutComments(List<CommentNode> comments) { for (CommentNode comment : comments) { int offset = correctOffset(comment); int length = comment.getContent().length(); if (isCommentMultiLine(comment)) { length = (origOffset + comment.getPosition().getEndOffset()) - offset; if (comment.getContent().charAt(0) != '=') { length++; } } IToken token = createToken(getContentType(comment)); push(new QueuedToken(token, offset, length)); } } private IToken popTokenOffQueue() { QueuedToken token = fQueue.remove(0); setOffset(token.getOffset()); setLength(token.getLength()); return token.getToken(); } private IToken getToken(int i) { // We have an unresolved heredoc if (fContentType.equals(RubySourceConfiguration.STRING_DOUBLE) && insideHeredoc()) { if (reachedEndOfHeredoc()) { fContentType = RubySourceConfiguration.DEFAULT; inSingleQuote = false; return createToken(RubySourceConfiguration.STRING_DOUBLE); } } if (fContentType.equals(RubySourceConfiguration.MULTI_LINE_COMMENT) && i != Tokens.tWHITESPACE) { fContentType = RubySourceConfiguration.DEFAULT; } switch (i) { case RubyTokenScanner.SPACE: case Tokens.tWHITESPACE: return createToken(getStringType()); case Tokens.tCOMMENT: return createToken(RubySourceConfiguration.SINGLE_LINE_COMMENT); case Tokens.tDOCUMENTATION: return createToken(fContentType = RubySourceConfiguration.MULTI_LINE_COMMENT); case Tokens.tSTRING_CONTENT: return createToken(fContentType = getStringType()); case Tokens.tSTRING_BEG: String opening = getOpeningString(); if ("%".equals(opening)) // space after percent sign, it's an operator //$NON-NLS-1$ { return createToken(fContentType); } fOpeningString = opening; if (fOpeningString.equals("'") || fOpeningString.startsWith("%q")) //$NON-NLS-1$//$NON-NLS-2$ { inSingleQuote = true; fContentType = RubySourceConfiguration.STRING_SINGLE; } else if (fOpeningString.startsWith(HEREDOC_MARKER_PREFIX)) // here-doc { // FIXME If it's a heredoc mid-line, don't change the content type! fOpeningString = generateOpeningStringForHeredocMarker(fOpeningString); if (fOpeningString.length() > 0 && fOpeningString.charAt(0) == '\'') { return createToken(RubySourceConfiguration.STRING_SINGLE); } return createToken(RubySourceConfiguration.STRING_DOUBLE); } else { fContentType = RubySourceConfiguration.STRING_DOUBLE; } return createToken(fContentType); case Tokens.tXSTRING_BEG: fOpeningString = getOpeningString(); return createToken(fContentType = RubySourceConfiguration.COMMAND); case Tokens.tQWORDS_BEG: case Tokens.tWORDS_BEG: fOpeningString = getOpeningString(); fContentType = RubySourceConfiguration.STRING_SINGLE; if (fOpeningString.length() > 1 && fOpeningString.charAt(0) == '%' && Character.isUpperCase(fOpeningString.charAt(1))) { fContentType = RubySourceConfiguration.STRING_DOUBLE; } return createToken(fContentType); case Tokens.tSTRING_END: String oldContentType = fContentType; // FIXME What if this is a nested heredoc? // FIXME What if the old content type wass default? make it a string of some sort in string content... fContentType = RubySourceConfiguration.DEFAULT; // at end of string, the strterm is wiped, how can we tell what string type it was? return createToken(oldContentType); case Tokens.tREGEXP_BEG: fOpeningString = getOpeningString(); return createToken(fContentType = RubySourceConfiguration.REGULAR_EXPRESSION); case Tokens.tREGEXP_END: fContentType = RubySourceConfiguration.DEFAULT; return createToken(RubySourceConfiguration.REGULAR_EXPRESSION); case Tokens.tSYMBEG: // Sometimes we need to add 1, sometimes two. Depends on if there's // a space preceding the ':' int charAt = fOffset - origOffset; char c = fContents.charAt(charAt); int nextCharOffset = (fOffset + 1); while (c == ' ') // skip past space if it's there { nextCharOffset++; c = fContents.charAt(++charAt); } if (fContents.length() <= charAt + 1) { return createToken(RubySourceConfiguration.DEFAULT); } if (c == '%') // %s syntax { fOpeningString = getOpeningString(); fContentType = RubySourceConfiguration.STRING_SINGLE; } else if (c == ':') // normal syntax (i.e. ":symbol") { if (fContents.length() <= charAt + 1) { return createToken(RubySourceConfiguration.DEFAULT); } nextCharOffset++; c = fContents.charAt(++charAt); if (c == '"') // Check for :"symbol" syntax { fOpeningString = "\""; //$NON-NLS-1$ push(new QueuedToken(createToken(RubySourceConfiguration.STRING_DOUBLE), nextCharOffset - 1, 1)); fContentType = RubySourceConfiguration.STRING_DOUBLE; } } return createToken(RubySourceConfiguration.DEFAULT); default: return createToken(fContentType); } } /** * Wrap generating tokens so we can re-use the same object for the same data. * * @param data * @return */ protected IToken createToken(String tokenName) { return CommonUtil.getToken(tokenName); } private String getStringType() { StrTerm strTerm = lexer.getStrTerm(); if (strTerm != null) { if (strTerm instanceof HeredocTerm) { strTerm.splitEmbeddedTokens(); } if (strTerm.isSubstituting()) { if (RubySourceConfiguration.REGULAR_EXPRESSION.equals(fContentType) || RubySourceConfiguration.COMMAND.equals(fContentType)) { return fContentType; } return RubySourceConfiguration.STRING_DOUBLE; } inSingleQuote = true; return RubySourceConfiguration.STRING_SINGLE; } return fContentType; } private boolean insideHeredoc() { return fOpeningString != null && fOpeningString.endsWith("\n"); //$NON-NLS-1$ // $codepro.audit.disable platformSpecificLineSeparator } private boolean reachedEndOfHeredoc() { return fContents.startsWith(fOpeningString.trim(), (fOffset - origOffset)); } private String generateOpeningStringForHeredocMarker(String marker) { if (marker.startsWith(INDENTED_HEREDOC_MARKER_PREFIX)) { marker = marker.substring(3); // $codepro.audit.disable questionableAssignment } else if (marker.startsWith(HEREDOC_MARKER_PREFIX)) { marker = marker.substring(2); // $codepro.audit.disable questionableAssignment } return marker + "\n"; //$NON-NLS-1$ // $codepro.audit.disable platformSpecificLineSeparator } private String getOpeningString() { return getUntrimmedOpeningString().trim(); } private String getUntrimmedOpeningString() { int start = fOffset - origOffset; List<CommentNode> comments = result.getCommentNodes(); if (comments != null && !comments.isEmpty()) { Node comment = comments.get(comments.size() - 1); int end = comment.getPosition().getEndOffset(); start = end; } return new String(fContents.substring(start, lexerSource.getOffset())); } /** * correct start offset, since when a line with nothing but spaces on it appears before comment, we get messed up * positions */ private int correctOffset(CommentNode comment) { return origOffset + comment.getPosition().getStartOffset(); } private boolean isCommentMultiLine(CommentNode comment) { String src = getSource(fContents, comment); return src != null && src.startsWith(BEGIN); } private String getContentType(CommentNode comment) { if (isCommentMultiLine(comment)) { return RubySourceConfiguration.MULTI_LINE_COMMENT; } return RubySourceConfiguration.SINGLE_LINE_COMMENT; } private void addQueuedToken(IToken returnValue) { // grab end of last comment (last thing in queue) QueuedToken token = peek(); setOffset(token.getOffset() + token.getLength()); int length = getAdjustedOffset() - fOffset; if (length < 0) { length = 0; } push(new QueuedToken(returnValue, fOffset, length)); } private QueuedToken peek() { return fQueue.get(fQueue.size() - 1); } private void push(QueuedToken token) { Assert.isTrue(token.getLength() >= 0); fQueue.add(token); } private int getAdjustedOffset() { return lexerSource.getOffset() + origOffset; } private static String getSource(String contents, Node node) { if (node == null || contents == null) { return null; } SourcePosition pos = node.getPosition(); if (pos == null) { return null; } if (pos.getStartOffset() >= contents.length()) { return null; // position is past end of our source } if (pos.getEndOffset() > contents.length()) { return null; // end is past end of source } return new String(contents.substring(pos.getStartOffset(), pos.getEndOffset())); } }