/*
* Copyright (c) 2012, the Dart project authors.
*
* Licensed under the Eclipse Public License v1.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.eclipse.org/legal/epl-v10.html
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.dart.tools.ui.internal.text.functions;
import com.google.dart.tools.ui.DartUI;
import com.google.dart.tools.ui.text.DartPartitions;
import org.eclipse.jface.text.IDocument;
import org.eclipse.jface.text.rules.ICharacterScanner;
import org.eclipse.jface.text.rules.IPartitionTokenScanner;
import org.eclipse.jface.text.rules.IToken;
import org.eclipse.jface.text.rules.Token;
/**
* This scanner recognizes doc comments, multi-line comments, single-line comments, strings, and
* multi-line strings, in addition to the default.
*/
public class FastDartPartitionScanner implements IPartitionTokenScanner, DartPartitions {
/**
* Values of the enumeration <code>ScannerState</code> represent the states that the scanner can
* be in. The scanner is essentially a state machine with these states.
*/
private enum ScannerState {
//
// Final states corresponding to partitions.
//
CODE(CODE_TOKEN), //
SINGLE_LINE_COMMENT(SINGLE_LINE_COMMENT_TOKEN), //
SINGLE_LINE_DOC_COMMENT(SINGLE_LINE_DOC_COMMENT_TOKEN), //
MULTI_LINE_COMMENT(MULTI_LINE_COMMENT_TOKEN), //
DOC_COMMENT(DOC_COMMENT_TOKEN), //
STRING(STRING_TOKEN), //
MULTI_LINE_STRING(MULTI_LINE_STRING_TOKEN),
//
// Non-final states. The token type associated with these states is the type that will be
// returned if we are in the state at the end of the file.
//
SINGLE_LINE_COMMENT_PREFIX(SINGLE_LINE_COMMENT_TOKEN), //
SINGLE_LINE_DOC_COMMENT_PREFIX(SINGLE_LINE_DOC_COMMENT_TOKEN), //
MULTI_LINE_COMMENT_PREFIX(MULTI_LINE_COMMENT_TOKEN), //
DOC_COMMENT_PREFIX(DOC_COMMENT_TOKEN), //
RAW_STRING_PREFIX(STRING_TOKEN), //
STRING_PREFIX(STRING_TOKEN), //
RAW_MULTI_LINE_STRING_PREFIX(MULTI_LINE_STRING_TOKEN), //
MULTI_LINE_STRING_PREFIX(MULTI_LINE_STRING_TOKEN), //
SIMPLE_INTERPOLATION_PREFIX(CODE_TOKEN), //
SIMPLE_INTERPOLATION(CODE_TOKEN), //
BLOCK_INTERPOLATION_PREFIX(CODE_TOKEN), //
BLOCK_INTERPOLATION(CODE_TOKEN);
/**
* The token that will be returned to represent the state as a partition.
*/
private IToken token;
/**
* Initialize a newly created state to have the given token.
*
* @param token the token that will be returned to represent the state as a partition
*/
private ScannerState(IToken token) {
this.token = token;
}
}
/**
* Instances of the class <code>StringState</code> represent the information about a string that
* is needed when we return to scanning that string, such as after completing an interpolation
* within a string.
*/
private static class StringState {
/**
* The state that was current before this state.
*/
private StringState previous;
/**
* A flag indicating whether this string is a raw string.
*/
private boolean raw;
/**
* The quote character used to start this string.
*/
private int quote;
/**
* The number of quote characters (1 or 3) used to start this string.
*/
private int quoteCount;
/**
* The number of unclosed braces that have been encountered in the current string interpolation.
*/
private int braceCount;
/**
* Initialize a newly created string state to supersede the previous state.
*
* @param previous the state that was current before this state
* @param raw a flag indicating whether this string is a raw string
* @param quote the quote character used to start this string
* @param quoteCount the number of quote characters (1 or 3) used to start this string
*/
public StringState(StringState previous, boolean raw, int quote, int quoteCount) {
this.previous = previous;
this.raw = raw;
this.quote = quote;
this.quoteCount = quoteCount;
this.braceCount = 0;
}
}
/**
* Instances of the class <code>TokenData</code> represent a single token that was scanned. The
* scanner scans all of the tokens, creating a linked list of tokens to be returned by
* {@link FastDartPartitionScanner#nextToken()}.
*/
private static class TokenData {
/**
* Create a new token that comes after the given token in the linked list with the given
* information.
*
* @param previous the token before the new token in the linked list
* @param token the token being added to the list
* @param tokenOffset the offset of the token in the source
* @param tokenLength the length of the token
* @return the token that was created
*/
public static TokenData following(TokenData previous, IToken token, int tokenOffset,
int tokenLength) {
TokenData data = new TokenData(token, tokenOffset, tokenLength);
previous.next = data;
return data;
}
/**
* The token value being represented.
*/
private IToken token;
/**
* The offset of the token in the source.
*/
private int tokenOffset;
/**
* The length of the token.
*/
private int tokenLength;
/**
* The data for the token following this token.
*/
private TokenData next;
/**
* Initialize a newly created node in the linked list of token data to store the information
* associated with the given token.
*
* @param token the token being represented by this node
* @param tokenOffset the offset of the token in the source
* @param tokenLength the length of the token
*/
public TokenData(IToken token, int tokenOffset, int tokenLength) {
this.token = token;
this.tokenOffset = tokenOffset;
this.tokenLength = tokenLength;
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
printOn(builder);
return builder.toString();
}
/**
* Append a textual representation of this token to the given builder.
*
* @param builder the builder to which the textual representation is to be added
*/
private void printOn(StringBuilder builder) {
builder.append(token.getData());
builder.append(" (");
builder.append(tokenOffset);
builder.append(" - ");
builder.append(tokenOffset + tokenLength - 1);
builder.append(")");
if (next != null && next != this) {
builder.append(", ");
next.printOn(builder);
}
}
}
private static IToken CODE_TOKEN = new Token(null);
private static IToken SINGLE_LINE_COMMENT_TOKEN = new Token(DART_SINGLE_LINE_COMMENT);
private static IToken SINGLE_LINE_DOC_COMMENT_TOKEN = new Token(DART_SINGLE_LINE_DOC);
private static IToken MULTI_LINE_COMMENT_TOKEN = new Token(DART_MULTI_LINE_COMMENT);
private static IToken DOC_COMMENT_TOKEN = new Token(DART_DOC);
private static IToken STRING_TOKEN = new Token(DART_STRING);
private static IToken MULTI_LINE_STRING_TOKEN = new Token(DART_MULTI_LINE_STRING);
/**
* Return the scanner state corresponding to the given partition type.
*
* @param contentType the partition type being converted to a scanner state
* @return the scanner state corresponding to the given partition type
*/
// private static ScannerState getState(String contentType) {
// if (contentType == null) {
// return ScannerState.CODE;
// } else if (contentType.equals(DART_SINGLE_LINE_COMMENT)) {
// return ScannerState.SINGLE_LINE_COMMENT;
// } else if (contentType.equals(DART_MULTI_LINE_COMMENT)) {
// return ScannerState.MULTI_LINE_COMMENT;
// } else if (contentType.equals(DART_DOC)) {
// return ScannerState.DOC_COMMENT;
// } else if (contentType.equals(DART_STRING)) {
// return ScannerState.STRING;
// } else if (contentType.equals(DART_MULTI_LINE_STRING)) {
// return ScannerState.MULTI_LINE_STRING;
// } else {
// return ScannerState.CODE;
// }
// }
/**
* The scanner used to read characters from the document.
*/
private final BufferedDocumentScanner scanner = new BufferedDocumentScanner(1000); // faster implementation
/**
* The offset of the last returned token.
*/
private int tokenOffset;
/**
* The length of the last returned token.
*/
private int tokenLength;
/**
* At the beginning of a scan, the number of characters between the beginning of the partition and
* the beginning of the range being scanned. At other times, zero (0).
*/
private int prefixLength;
/**
* The state of the scanner.
*/
private ScannerState scannerState;
/**
* The state of the string that we are currently parsing, or <code>null</code> if we are not
* inside a string.
*/
private StringState stringState = null;
/**
* The current nesting depth for block comments.
*/
private int commentDepth = 0;
/**
* The head of the linked list, which always points to the data for the token that was last
* returned.
*/
private TokenData currentToken;
/**
* A flag used to determine whether debugging output should be produced.
*/
private static final boolean DEBUG = false;
/**
* Initialize a newly created scanner.
*/
public FastDartPartitionScanner() {
super();
}
@Override
public int getTokenLength() {
return currentToken.tokenLength;
}
@Override
public int getTokenOffset() {
return currentToken.tokenOffset;
}
@Override
public IToken nextToken() {
currentToken = currentToken.next;
if (DEBUG) {
System.out.println(" " + currentToken.tokenOffset + " - "
+ (currentToken.tokenOffset + currentToken.tokenLength - 1) + " ("
+ currentToken.tokenLength + ") : " + currentToken.token.getData());
}
return currentToken.token;
}
@Override
public void setPartialRange(IDocument document, int offset, int length, String contentType,
int partitionOffset) {
if (DartUI.isTooComplexDartDocument(document)) {
return;
}
if (DEBUG) {
System.out.println("setPartialRange(?, " + offset + ", " + length + ", " + contentType + ", "
+ partitionOffset + ")");
}
// Scan a multi-line string from the beginning, so that the active string delimiter gets set.
if (contentType != null && contentType.equals(DART_MULTI_LINE_STRING)) {
length += offset - partitionOffset;
offset = partitionOffset;
}
setRange(document, offset, length);
}
@Override
public void setRange(IDocument document, int offset, int length) {
commentDepth = 0;
scanner.setRange(document, 0, document.getLength());
tokenOffset = 0;
tokenLength = 0;
prefixLength = 0;
scannerState = ScannerState.CODE;
stringState = null;
currentToken = buildData();
trimTokenData(offset, length);
}
/**
* Advance to the next character in the input.
*/
private void advance() {
tokenLength++;
scanner.read();
}
/**
* Build the linked list of tokens representing the content of the entire document.
*
* @return a fake token that is logically the last token returned before any tokens have actually
* been returned
*/
private TokenData buildData() {
if (DEBUG) {
System.out.println(" buildData()");
}
//
// Create a fake token so that the first invocation of nextToken() will return the real first
// token.
//
TokenData head = new TokenData(Token.UNDEFINED, 0, 0);
TokenData current = head;
while (current.token != Token.EOF) {
current = TokenData.following(current, parseToken(), tokenOffset, tokenLength);
}
current.next = current;
return head;
}
/**
* Return the code-like scanner state to which the scanner should return at the end of the current
* state. This can either be {@link ScannerState#CODE} or {@link ScannerState#BLOCK_INTERPOLATION}
* , depending on whether the scanner is currently within a multi-line string.
*
* @return the code-like scanner state to which the scanner should return
*/
private ScannerState getCodeLikeState() {
if (stringState == null) {
return ScannerState.CODE;
} else {
return ScannerState.BLOCK_INTERPOLATION;
}
}
/**
* Return <code>true</code> if the given character is an end-of-line character.
*
* @param character the character being tested
* @return <code>true</code> if the given character is an end-of-line character
*/
private boolean isEol(int character) {
return character == '\r' || character == '\n' || character == '\u2028' || character == '\u2029';
}
/**
* Return <code>true</code> if the given character is a valid character within an identifier.
*
* @param character the character being tested
* @return <code>true</code> if the given character is a valid character within an identifier
*/
private boolean isIdentifierChar(int character) {
return (character >= 'a' && character <= 'z') || (character >= 'A' && character <= 'Z')
|| (character >= '0' && character <= '9') || character == '_';
}
/**
* Parse a single token from the input.
*
* @return the token that was parsed
*/
private IToken parseToken() {
IToken result = parseToken_internal();
if (DEBUG) {
System.out.println(" " + tokenOffset + " - " + (tokenOffset + tokenLength - 1) + " ("
+ tokenLength + ") : " + result.getData());
}
return result;
}
/**
* Parse a single token from the input. This helper method exists so that debugging output can be
* produced in a single location.
*
* @return the token that was parsed
*/
private IToken parseToken_internal() {
tokenOffset += tokenLength;
tokenLength = prefixLength;
prefixLength = 0;
int currentChar = scanner.peek(0);
while (currentChar != ICharacterScanner.EOF) {
switch (scannerState) {
case SINGLE_LINE_COMMENT_PREFIX:
advance();
advance();
scannerState = ScannerState.SINGLE_LINE_COMMENT;
break;
case SINGLE_LINE_DOC_COMMENT_PREFIX:
advance();
advance();
advance();
scannerState = ScannerState.SINGLE_LINE_DOC_COMMENT;
break;
case SINGLE_LINE_COMMENT:
if (isEol(currentChar)) {
advance();
scannerState = getCodeLikeState();
return ScannerState.SINGLE_LINE_COMMENT.token;
}
advance();
break;
case SINGLE_LINE_DOC_COMMENT:
if (isEol(currentChar)) {
advance();
scannerState = getCodeLikeState();
return ScannerState.SINGLE_LINE_DOC_COMMENT.token;
}
advance();
break;
case MULTI_LINE_COMMENT_PREFIX:
advance();
advance();
scannerState = ScannerState.MULTI_LINE_COMMENT;
commentDepth++;
break;
case MULTI_LINE_COMMENT:
if (currentChar == '*') {
advance();
if (scanner.peek(0) == '/') {
advance();
commentDepth--;
if (commentDepth == 0) {
scannerState = getCodeLikeState();
return ScannerState.MULTI_LINE_COMMENT.token;
}
}
} else if (currentChar == '/') {
advance();
if (scanner.peek(0) == '*') {
advance();
commentDepth++;
}
} else {
advance();
}
break;
case DOC_COMMENT_PREFIX:
advance();
advance();
advance();
scannerState = ScannerState.DOC_COMMENT;
commentDepth++;
break;
case DOC_COMMENT:
if (currentChar == '*') {
advance();
if (scanner.peek(0) == '/') {
advance();
commentDepth--;
if (commentDepth == 0) {
scannerState = getCodeLikeState();
return ScannerState.DOC_COMMENT.token;
}
}
} else if (currentChar == '/') {
advance();
if (scanner.peek(0) == '*') {
advance();
commentDepth++;
}
} else {
advance();
}
break;
case RAW_STRING_PREFIX:
advance();
case STRING_PREFIX:
advance();
scannerState = ScannerState.STRING;
break;
case STRING:
if (isEol(currentChar)) {
stringState = stringState.previous;
scannerState = getCodeLikeState();
return ScannerState.STRING.token;
} else if (currentChar == stringState.quote) {
advance();
stringState = stringState.previous;
scannerState = getCodeLikeState();
return ScannerState.STRING.token;
} else if (!stringState.raw && currentChar == '\\') {
advance();
advance();
} else if (!stringState.raw && currentChar == '$') {
if (scanner.peek(1) == '{') {
scannerState = ScannerState.BLOCK_INTERPOLATION_PREFIX;
} else {
scannerState = ScannerState.SIMPLE_INTERPOLATION_PREFIX;
}
return ScannerState.STRING.token;
} else {
advance();
}
break;
case RAW_MULTI_LINE_STRING_PREFIX:
advance();
case MULTI_LINE_STRING_PREFIX:
advance();
advance();
advance();
scannerState = ScannerState.MULTI_LINE_STRING;
break;
case MULTI_LINE_STRING:
if (currentChar == stringState.quote) {
advance();
if (scanner.peek(0) == stringState.quote) {
advance();
if (scanner.peek(0) == stringState.quote) {
advance();
stringState = stringState.previous;
scannerState = getCodeLikeState();
return ScannerState.MULTI_LINE_STRING.token;
}
}
} else if (currentChar == '\\') {
advance();
advance();
} else if (!stringState.raw && currentChar == '$') {
if (scanner.peek(1) == '{') {
scannerState = ScannerState.BLOCK_INTERPOLATION_PREFIX;
} else {
scannerState = ScannerState.SIMPLE_INTERPOLATION_PREFIX;
}
return ScannerState.MULTI_LINE_STRING.token;
} else {
advance();
}
break;
case SIMPLE_INTERPOLATION_PREFIX:
advance();
scannerState = ScannerState.SIMPLE_INTERPOLATION;
break;
case SIMPLE_INTERPOLATION:
if (currentChar == '$') {
if (scanner.peek(1) == '{') {
scannerState = ScannerState.BLOCK_INTERPOLATION_PREFIX;
} else {
scannerState = ScannerState.SIMPLE_INTERPOLATION_PREFIX;
}
} else if (!isIdentifierChar(currentChar)) {
if (stringState.quoteCount == 1) {
scannerState = ScannerState.STRING;
} else {
scannerState = ScannerState.MULTI_LINE_STRING;
}
return ScannerState.CODE.token;
}
advance();
break;
case BLOCK_INTERPOLATION_PREFIX:
advance();
advance();
scannerState = ScannerState.BLOCK_INTERPOLATION;
break;
case BLOCK_INTERPOLATION:
if (currentChar == '}') {
if (stringState.braceCount == 0) {
advance();
if (scanner.peek(0) == '$') {
if (scanner.peek(1) == '{') {
scannerState = ScannerState.BLOCK_INTERPOLATION_PREFIX;
} else {
scannerState = ScannerState.SIMPLE_INTERPOLATION_PREFIX;
}
} else {
if (stringState.quoteCount == 1) {
scannerState = ScannerState.STRING;
} else {
scannerState = ScannerState.MULTI_LINE_STRING;
}
return ScannerState.CODE.token;
}
} else {
stringState.braceCount--;
}
} else if (currentChar == '{') {
stringState.braceCount++;
}
// Intentional fall-through
case CODE:
if (currentChar == '/') {
int nextChar = scanner.peek(1);
if (nextChar == '*') {
scannerState = ScannerState.MULTI_LINE_COMMENT_PREFIX;
if (scanner.peek(2) == '*' && scanner.peek(3) != '/') {
scannerState = ScannerState.DOC_COMMENT_PREFIX;
}
return ScannerState.CODE.token;
} else if (nextChar == '/') {
if (scanner.peek(2) == '/') {
if (scanner.peek(3) == '/' || scanner.peek(3) == '*') {
scannerState = ScannerState.SINGLE_LINE_COMMENT_PREFIX;
} else {
scannerState = ScannerState.SINGLE_LINE_DOC_COMMENT_PREFIX;
}
} else {
scannerState = ScannerState.SINGLE_LINE_COMMENT_PREFIX;
}
return ScannerState.CODE.token;
} else {
advance();
}
} else if (currentChar == 'r') {
int secondChar = scanner.peek(1);
if (secondChar == '\'' || secondChar == '"') {
int thirdChar = scanner.peek(2);
int fourthChar = scanner.peek(3);
if (thirdChar == secondChar && fourthChar == secondChar) {
stringState = new StringState(stringState, true, secondChar, 3);
scannerState = ScannerState.RAW_MULTI_LINE_STRING_PREFIX;
} else {
stringState = new StringState(stringState, true, secondChar, 1);
scannerState = ScannerState.RAW_STRING_PREFIX;
}
return ScannerState.CODE.token;
} else {
advance();
}
} else if (currentChar == '\'' || currentChar == '"') {
int secondChar = scanner.peek(1);
int thirdChar = scanner.peek(2);
if (secondChar == currentChar && thirdChar == currentChar) {
stringState = new StringState(stringState, false, currentChar, 3);
scannerState = ScannerState.MULTI_LINE_STRING_PREFIX;
} else {
stringState = new StringState(stringState, false, currentChar, 1);
scannerState = ScannerState.STRING_PREFIX;
}
return ScannerState.CODE.token;
} else {
advance();
}
break;
}
currentChar = scanner.peek(0);
}
if (tokenLength > 0) {
return scannerState.token;
}
return Token.EOF;
}
/**
* Adjust the linked list of tokens so that only those that encompass the given range of
* characters will be returned.
*
* @param offset the offset of the first character to be included in a token
* @param length the number of characters to be included in tokens
*/
private void trimTokenData(int offset, int length) {
//
// Skip over any tokens that should not be returned. currentToken is assumed to be the fake
// token created before the first real token.
//
TokenData nextToken = currentToken.next;
while (nextToken != nextToken.next && nextToken.next.tokenOffset <= offset) {
nextToken = nextToken.next;
}
currentToken.next = nextToken;
//
// Fix the token offset of the first token to match the requested offset.
//
// TokenData firstToken = currentToken.next;
// if (firstToken.tokenOffset < offset) {
// firstToken.tokenLength = firstToken.tokenLength - (offset - firstToken.tokenOffset);
// firstToken.tokenOffset = offset;
// }
//
// Trim the tail of the list to cover only the requested length.
//
int totalLength = nextToken.tokenLength - (offset - nextToken.tokenOffset);
while (nextToken != nextToken.next && totalLength < length) {
nextToken = nextToken.next;
totalLength += nextToken.tokenLength;
}
if (totalLength > length) {
// nextToken.tokenLength = nextToken.tokenLength - (tokenLength - length);
TokenData lastToken = nextToken.next;
while (lastToken != lastToken.next) {
lastToken = lastToken.next;
}
nextToken.next = lastToken;
}
}
}