/* * Copyright 2009-2017 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.codehaus.groovy.eclipse.refactoring.formatter; import java.util.ArrayList; import java.util.List; import groovyjarjarantlr.Token; import groovyjarjarantlr.TokenStreamException; import org.codehaus.greclipse.GroovyTokenTypeBridge; import org.codehaus.groovy.antlr.GroovySourceToken; import org.codehaus.groovy.eclipse.core.GroovyCore; import org.eclipse.core.runtime.Assert; import org.eclipse.jdt.groovy.core.util.GroovyScanner; import org.eclipse.jdt.internal.core.util.Util; import org.eclipse.jface.text.BadLocationException; import org.eclipse.jface.text.DocumentEvent; import org.eclipse.jface.text.IDocument; import org.eclipse.jface.text.IDocumentListener; import org.eclipse.jface.text.ITextSelection; /** * Provides methods to retrieve tokens for a given IDocument presumed to * contain Groovy source code. * <p> * A sophisticated implementation could try to work incrementally and invalidate * only some tokens when the document is changed. This implementation is rather * naive and clears the whole cache on every document change. */ public class GroovyDocumentScanner implements IDocumentListener { private static final boolean TOKEN_POSITION_ASSERTS = true; /** * This is the document that we are chopping into tokens. This may not be * null, except when the GroovyDocumentScanner has been disposed (in which * case it should no longer be used. */ private IDocument document; /** * This caches the list of tokens we have gotten from the document so far. * This may be null before we have started reading tokens. */ protected List<Token> tokens; private GroovyScanner tokenScanner; /** At most this number of scanner errors will be reported */ private static int logLimit = 4; /** Used as index for tokens that could not be found */ private static final int NOT_FOUND = -1; public GroovyDocumentScanner(IDocument document) { this.document = document; this.document.addDocumentListener(this); reset(); } public void documentChanged(DocumentEvent event) { reset(); } public void documentAboutToBeChanged(DocumentEvent event) {} /** * This method must be called internally before operating on the list of * scanned tokens, to ensure that we have scanned the file at least upto the * position that we are interested in. * <p> * Current implementation is very naive and just scans the whole file at * once. A smarter implementation could stop scanning when the position of * interest is reached, and keep enough state to be able to scan onward * later if a request for tokens requires it. */ protected void ensureScanned(int end) { if (tokens == null) { // We haven't started scanning yet. Initialise the scanner and token list. tokenScanner = new GroovyScanner(document.get()); tokens = getTokensIncludingEOF(); } } private List<Token> getTokensIncludingEOF() { List<Token> result = new ArrayList<Token>(); Token token; try { do { token = nextToken(); result.add(token); } while (token.getType() != GroovyTokenTypeBridge.EOF); } catch (BadLocationException bad) { // document may be unreconciled } catch (Exception e) { if (logLimit-- > 0) { Util.log(e); } } return result; } private Token nextToken() throws TokenStreamException, BadLocationException { Token token; try { token = tokenScanner.nextToken(); } catch (TokenStreamException e) { // Try to recover tokenScanner.recover(document); // If it fails again we give up. token = tokenScanner.nextToken(); } return token; } /** * Called upon initialisation and upon any change to the document to * invalidate the list of cached tokens. */ private void reset() { tokens = null; } /** * Translate Antlr line/column positions of a token into Eclipse document offset. * * @param token * @return offset of the start of the token in the document. * @throws BadLocationException */ public int getOffset(Token token) throws BadLocationException { int offset = GroovyScanner.getOffset(document, token.getLine(), token.getColumn()); if (TOKEN_POSITION_ASSERTS) { // These asserts should give some confidence we compute // positions correctly. if (token.getType() == GroovyTokenTypeBridge.EOF) { // EOF token is an exception, it is not actually in the // document so its position info doesn't seem to obey these // assumptions. } else { int col = token.getColumn() - 1; int line = token.getLine() - 1; Assert.isTrue(col >= 0); Assert.isTrue(col < document.getLineLength(line), "Token: " + token); Assert.isTrue(offset < document.getLength()); if (token.getType() == GroovyTokenTypeBridge.IDENT) { // Don't check this for other tokens, because the Antlr token's // "getText()" method doesn't always return the actual text from // the document (e.g. it returns "<newline>" for newline tokens). String antlrText = token.getText(); String eclipseText = document.get(offset, antlrText.length()); Assert.isTrue(eclipseText.equals(antlrText)); } } } return offset; } /** * Translate antlr line/column position of the end of the token into * Eclipse document offset. * * @throws BadLocationException */ public int getEnd(Token token) throws BadLocationException { GroovySourceToken gToken = (GroovySourceToken) token; return GroovyScanner.getOffset(document, gToken.getLineLast(), gToken.getColumnLast()); } /** * Call this method when you don't need the scanner anymore, to release * resources it may be holding on to. * <p> * Disposing an already disposed object is tolerated. */ public void dispose() { if (this.document != null) { document.removeDocumentListener(this); this.document = null; } } @Override protected void finalize() throws Throwable { this.dispose(); super.finalize(); } /** * @return The document that this scanner is operating on. */ public IDocument getDocument() { return document; } // TODO: add support for IBlockTextSelection public List<Token> getTokens(ITextSelection selection) { int start; int end; if (selection == null || selection.getLength() == 0) { start = 0; end = document.getLength(); } else { start = selection.getOffset(); end = start + selection.getLength(); } return getTokens(start, end); } /** * Retrieve a list of tokens for a range of text in the document. * Any token who's starting position is in the range [start..end] * (end is exclusive) will be included in the list. * * @param start * @param end * @return */ public List<Token> getTokens(int start, int end) { if (start >= end) return new ArrayList<Token>(); try { int startTokenIndex = findTokenFrom(start); if (startTokenIndex == NOT_FOUND) return new ArrayList<Token>(); if (getOffset(tokens.get(startTokenIndex)) >= end) return new ArrayList<Token>(); int endTokenIndex = findTokenFrom(end); if (endTokenIndex == NOT_FOUND) { endTokenIndex = tokens.size() - 1; // Take the last token in the file as end token: // Since startToken is between start and end, and since there // are no // tokens after end, all tokens from startToken onward should be // returned! } else { // Actually the token before is the one we want! // The one we found is >= end. endTokenIndex = endTokenIndex - 1; } Assert.isTrue(startTokenIndex <= endTokenIndex); return tokens.subList(startTokenIndex, endTokenIndex + 1); } catch (BadLocationException e) { throw new Error(e); } } /** * Get the tokens up to a given offset, from the start of the line that this * offset is in. * * @param d * @param offset * @return List of tokens from start of line to given offset. */ public List<Token> getLineTokensUpto(int offset) { try { int start = document.getLineOffset(document.getLineOfOffset(offset)); return getTokens(start, offset); } catch (BadLocationException e) { return new ArrayList<Token>(); } } /** * Get the tokens starting from a given offset, upto the end of the line * offset is in. * * @param d * @param offset * @return List of tokens from start of line to given offset. */ public List<Token> getLineTokensFrom(int offset) { try { int line = document.getLineOfOffset(offset); int lineEnd = document.getLineOffset(line) + document.getLineLength(line); return getTokens(offset, lineEnd); } catch (BadLocationException e) { GroovyCore.logException("Recoverable internal error", e); return new ArrayList<Token>(); } } /** * Find the index of the first token that has an offset * greater or equal to a given offset. * If such a token is not found, then NOT_FOUND is returned. */ protected int findTokenFrom(int offset) { Assert.isLegal(offset >= 0); ensureScanned(offset); try { int start = 0; int end = tokens.size() - 1; // The "candidates" are all indexes in range [start..end] // We will binary search, until we either exhaust the range, // or find a start that matches the condition. while (start <= end) { if (getOffset(tokens.get(start)) >= offset) { // start token is good: done return start; } else { // start token was bad. No need to consider it anymore start++; if (start > end) return NOT_FOUND; } int mid = (start + end) / 2; int midOfs = getOffset(tokens.get(mid)); if (midOfs >= offset) { // Mid token is already good (matches condition) end = mid; } else { // Mid token is still bad (does not match condition) start = mid + 1; } } return NOT_FOUND; } catch (BadLocationException e) { throw new Error(e); // If this code works as it should exceptions // should not happen! } } /** * Find the last token who's starting position occurs before * a given offset. * * @return A token, or null if no such token exists. */ public Token getLastTokenBefore(int offset) { int index = findTokenFrom(offset); if (index == NOT_FOUND) { return getLastToken(); } if (index > 0) { return tokens.get(index - 1); } return null; } /** * Get the last token who's offset is before this token's offset. */ public Token getLastTokenBefore(Token token) throws BadLocationException { return getLastTokenBefore(getOffset(token)); } /** * @return The last token in the document. */ public Token getLastToken() { ensureScanned(Integer.MAX_VALUE); return tokens.get(tokens.size() - 1); } /** * Get tokens on a given line, this include tokens corresponding to * newlines. However, newline tokens are only returned for non empty lines, * since the GroovyScanner only returns a single newline token for a * sequence of newlines. */ public List<Token> getLineTokens(int line) throws BadLocationException { int lineOffset = document.getLineOffset(line); return getTokens(lineOffset, lineOffset + document.getLineLength(line)); } /** * Get first token with position >= pos. * * @return The token or null, if such a token doesn't exist. */ public Token getTokenFrom(int offset) { int index = findTokenFrom(offset); if (index == NOT_FOUND) return null; return tokens.get(index); } public Token getNextToken(Token token) throws BadLocationException { return getTokenFrom(getOffset(token) + 1); } public Token getLastNonWhitespaceTokenBefore(int offset) throws BadLocationException { Token result = getLastTokenBefore(offset); while (result != null && isWhitespace(result)) result = getLastTokenBefore(result); return result; } private boolean isWhitespace(Token result) { int type = result.getType(); return type == GroovyTokenTypeBridge.WS || type == GroovyTokenTypeBridge.NLS; } }