/*
* Copyright 2009-2017 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.codehaus.groovy.eclipse.refactoring.formatter;
import java.util.ArrayList;
import java.util.List;
import groovyjarjarantlr.Token;
import groovyjarjarantlr.TokenStreamException;
import org.codehaus.greclipse.GroovyTokenTypeBridge;
import org.codehaus.groovy.antlr.GroovySourceToken;
import org.codehaus.groovy.eclipse.core.GroovyCore;
import org.eclipse.core.runtime.Assert;
import org.eclipse.jdt.groovy.core.util.GroovyScanner;
import org.eclipse.jdt.internal.core.util.Util;
import org.eclipse.jface.text.BadLocationException;
import org.eclipse.jface.text.DocumentEvent;
import org.eclipse.jface.text.IDocument;
import org.eclipse.jface.text.IDocumentListener;
import org.eclipse.jface.text.ITextSelection;
/**
* Provides methods to retrieve tokens for a given IDocument presumed to
* contain Groovy source code.
* <p>
* A sophisticated implementation could try to work incrementally and invalidate
* only some tokens when the document is changed. This implementation is rather
* naive and clears the whole cache on every document change.
*/
public class GroovyDocumentScanner implements IDocumentListener {
private static final boolean TOKEN_POSITION_ASSERTS = true;
/**
* This is the document that we are chopping into tokens. This may not be
* null, except when the GroovyDocumentScanner has been disposed (in which
* case it should no longer be used.
*/
private IDocument document;
/**
* This caches the list of tokens we have gotten from the document so far.
* This may be null before we have started reading tokens.
*/
protected List<Token> tokens;
private GroovyScanner tokenScanner;
/** At most this number of scanner errors will be reported */
private static int logLimit = 4;
/** Used as index for tokens that could not be found */
private static final int NOT_FOUND = -1;
public GroovyDocumentScanner(IDocument document) {
this.document = document;
this.document.addDocumentListener(this);
reset();
}
public void documentChanged(DocumentEvent event) {
reset();
}
public void documentAboutToBeChanged(DocumentEvent event) {}
/**
* This method must be called internally before operating on the list of
* scanned tokens, to ensure that we have scanned the file at least upto the
* position that we are interested in.
* <p>
* Current implementation is very naive and just scans the whole file at
* once. A smarter implementation could stop scanning when the position of
* interest is reached, and keep enough state to be able to scan onward
* later if a request for tokens requires it.
*/
protected void ensureScanned(int end) {
if (tokens == null) {
// We haven't started scanning yet. Initialise the scanner and token list.
tokenScanner = new GroovyScanner(document.get());
tokens = getTokensIncludingEOF();
}
}
private List<Token> getTokensIncludingEOF() {
List<Token> result = new ArrayList<Token>();
Token token;
try {
do {
token = nextToken();
result.add(token);
} while (token.getType() != GroovyTokenTypeBridge.EOF);
} catch (BadLocationException bad) {
// document may be unreconciled
} catch (Exception e) {
if (logLimit-- > 0) {
Util.log(e);
}
}
return result;
}
private Token nextToken() throws TokenStreamException, BadLocationException {
Token token;
try {
token = tokenScanner.nextToken();
} catch (TokenStreamException e) {
// Try to recover
tokenScanner.recover(document);
// If it fails again we give up.
token = tokenScanner.nextToken();
}
return token;
}
/**
* Called upon initialisation and upon any change to the document to
* invalidate the list of cached tokens.
*/
private void reset() {
tokens = null;
}
/**
* Translate Antlr line/column positions of a token into Eclipse document offset.
*
* @param token
* @return offset of the start of the token in the document.
* @throws BadLocationException
*/
public int getOffset(Token token) throws BadLocationException {
int offset = GroovyScanner.getOffset(document, token.getLine(), token.getColumn());
if (TOKEN_POSITION_ASSERTS) {
// These asserts should give some confidence we compute
// positions correctly.
if (token.getType() == GroovyTokenTypeBridge.EOF) {
// EOF token is an exception, it is not actually in the
// document so its position info doesn't seem to obey these
// assumptions.
} else {
int col = token.getColumn() - 1;
int line = token.getLine() - 1;
Assert.isTrue(col >= 0);
Assert.isTrue(col < document.getLineLength(line), "Token: " + token);
Assert.isTrue(offset < document.getLength());
if (token.getType() == GroovyTokenTypeBridge.IDENT) {
// Don't check this for other tokens, because the Antlr token's
// "getText()" method doesn't always return the actual text from
// the document (e.g. it returns "<newline>" for newline tokens).
String antlrText = token.getText();
String eclipseText = document.get(offset, antlrText.length());
Assert.isTrue(eclipseText.equals(antlrText));
}
}
}
return offset;
}
/**
* Translate antlr line/column position of the end of the token into
* Eclipse document offset.
*
* @throws BadLocationException
*/
public int getEnd(Token token) throws BadLocationException {
GroovySourceToken gToken = (GroovySourceToken) token;
return GroovyScanner.getOffset(document, gToken.getLineLast(), gToken.getColumnLast());
}
/**
* Call this method when you don't need the scanner anymore, to release
* resources it may be holding on to.
* <p>
* Disposing an already disposed object is tolerated.
*/
public void dispose() {
if (this.document != null) {
document.removeDocumentListener(this);
this.document = null;
}
}
@Override
protected void finalize() throws Throwable {
this.dispose();
super.finalize();
}
/**
* @return The document that this scanner is operating on.
*/
public IDocument getDocument() {
return document;
}
// TODO: add support for IBlockTextSelection
public List<Token> getTokens(ITextSelection selection) {
int start;
int end;
if (selection == null || selection.getLength() == 0) {
start = 0;
end = document.getLength();
} else {
start = selection.getOffset();
end = start + selection.getLength();
}
return getTokens(start, end);
}
/**
* Retrieve a list of tokens for a range of text in the document.
* Any token who's starting position is in the range [start..end]
* (end is exclusive) will be included in the list.
*
* @param start
* @param end
* @return
*/
public List<Token> getTokens(int start, int end) {
if (start >= end)
return new ArrayList<Token>();
try {
int startTokenIndex = findTokenFrom(start);
if (startTokenIndex == NOT_FOUND)
return new ArrayList<Token>();
if (getOffset(tokens.get(startTokenIndex)) >= end)
return new ArrayList<Token>();
int endTokenIndex = findTokenFrom(end);
if (endTokenIndex == NOT_FOUND) {
endTokenIndex = tokens.size() - 1;
// Take the last token in the file as end token:
// Since startToken is between start and end, and since there
// are no
// tokens after end, all tokens from startToken onward should be
// returned!
} else {
// Actually the token before is the one we want!
// The one we found is >= end.
endTokenIndex = endTokenIndex - 1;
}
Assert.isTrue(startTokenIndex <= endTokenIndex);
return tokens.subList(startTokenIndex, endTokenIndex + 1);
} catch (BadLocationException e) {
throw new Error(e);
}
}
/**
* Get the tokens up to a given offset, from the start of the line that this
* offset is in.
*
* @param d
* @param offset
* @return List of tokens from start of line to given offset.
*/
public List<Token> getLineTokensUpto(int offset) {
try {
int start = document.getLineOffset(document.getLineOfOffset(offset));
return getTokens(start, offset);
} catch (BadLocationException e) {
return new ArrayList<Token>();
}
}
/**
* Get the tokens starting from a given offset, upto the end of the line
* offset is in.
*
* @param d
* @param offset
* @return List of tokens from start of line to given offset.
*/
public List<Token> getLineTokensFrom(int offset) {
try {
int line = document.getLineOfOffset(offset);
int lineEnd = document.getLineOffset(line) + document.getLineLength(line);
return getTokens(offset, lineEnd);
} catch (BadLocationException e) {
GroovyCore.logException("Recoverable internal error", e);
return new ArrayList<Token>();
}
}
/**
* Find the index of the first token that has an offset
* greater or equal to a given offset.
* If such a token is not found, then NOT_FOUND is returned.
*/
protected int findTokenFrom(int offset) {
Assert.isLegal(offset >= 0);
ensureScanned(offset);
try {
int start = 0;
int end = tokens.size() - 1;
// The "candidates" are all indexes in range [start..end]
// We will binary search, until we either exhaust the range,
// or find a start that matches the condition.
while (start <= end) {
if (getOffset(tokens.get(start)) >= offset) {
// start token is good: done
return start;
} else {
// start token was bad. No need to consider it anymore
start++;
if (start > end)
return NOT_FOUND;
}
int mid = (start + end) / 2;
int midOfs = getOffset(tokens.get(mid));
if (midOfs >= offset) {
// Mid token is already good (matches condition)
end = mid;
} else {
// Mid token is still bad (does not match condition)
start = mid + 1;
}
}
return NOT_FOUND;
} catch (BadLocationException e) {
throw new Error(e); // If this code works as it should exceptions
// should not happen!
}
}
/**
* Find the last token who's starting position occurs before
* a given offset.
*
* @return A token, or null if no such token exists.
*/
public Token getLastTokenBefore(int offset) {
int index = findTokenFrom(offset);
if (index == NOT_FOUND) {
return getLastToken();
}
if (index > 0) {
return tokens.get(index - 1);
}
return null;
}
/**
* Get the last token who's offset is before this token's offset.
*/
public Token getLastTokenBefore(Token token) throws BadLocationException {
return getLastTokenBefore(getOffset(token));
}
/**
* @return The last token in the document.
*/
public Token getLastToken() {
ensureScanned(Integer.MAX_VALUE);
return tokens.get(tokens.size() - 1);
}
/**
* Get tokens on a given line, this include tokens corresponding to
* newlines. However, newline tokens are only returned for non empty lines,
* since the GroovyScanner only returns a single newline token for a
* sequence of newlines.
*/
public List<Token> getLineTokens(int line) throws BadLocationException {
int lineOffset = document.getLineOffset(line);
return getTokens(lineOffset, lineOffset + document.getLineLength(line));
}
/**
* Get first token with position >= pos.
*
* @return The token or null, if such a token doesn't exist.
*/
public Token getTokenFrom(int offset) {
int index = findTokenFrom(offset);
if (index == NOT_FOUND)
return null;
return tokens.get(index);
}
public Token getNextToken(Token token) throws BadLocationException {
return getTokenFrom(getOffset(token) + 1);
}
public Token getLastNonWhitespaceTokenBefore(int offset) throws BadLocationException {
Token result = getLastTokenBefore(offset);
while (result != null && isWhitespace(result))
result = getLastTokenBefore(result);
return result;
}
private boolean isWhitespace(Token result) {
int type = result.getType();
return type == GroovyTokenTypeBridge.WS || type == GroovyTokenTypeBridge.NLS;
}
}