/**
* Aptana Studio
* Copyright (c) 2005-2011 by Appcelerator, Inc. All Rights Reserved.
* Licensed under the terms of the GNU Public License (GPL) v3 (with exceptions).
* Please see the license.html included with this distribution for details.
* Any modifications to this file must keep this entire header intact.
*/
package com.aptana.editor.php.internal.text.rules;
import org.eclipse.core.runtime.Assert;
import org.eclipse.jface.text.IDocument;
import org.eclipse.jface.text.rules.ICharacterScanner;
import org.eclipse.jface.text.rules.IToken;
import org.eclipse.jface.text.rules.Token;
import com.aptana.editor.common.text.rules.QueuedTokenScanner;
import com.aptana.editor.epl.BufferedDocumentScanner;
import com.aptana.editor.php.internal.parser.PHPTokenType;
import com.aptana.editor.php.internal.ui.editor.scanner.tokenMap.PHPTokenMapperFactory;
/**
* @author Max Stepanov
*
*/
public class FastPHPStringTokenScanner extends QueuedTokenScanner {
private static final IToken TOKEN_BEGIN_QUOTE = getToken(PHPTokenType.PUNCTUATION_STRING_BEGIN);
private static final IToken TOKEN_END_QUOTE = getToken(PHPTokenType.PUNCTUATION_STRING_END);
private static final IToken TOKEN_ESCAPE_CHARACTER = getToken(PHPTokenType.META_STRING_CONTENTS_DOUBLE, PHPTokenType.CHARACTER_ESCAPE);
private static final IToken TOKEN_VARIABLE_OTHER = getToken(PHPTokenType.META_STRING_CONTENTS_DOUBLE, PHPTokenType.VARIABLE_OTHER);
private static final IToken TOKEN_VARIABLE_GLOBAL = getToken(PHPTokenType.META_STRING_CONTENTS_DOUBLE, PHPTokenType.VARIABLE_OTHER_GLOBAL);
private static final IToken TOKEN_VARIABLE_OTHER_PUNCTUATION = getToken(PHPTokenType.META_STRING_CONTENTS_DOUBLE, PHPTokenType.VARIABLE_OTHER, PHPTokenType.VARIABLE_PUNCTUATION);
private static final IToken TOKEN_VARIABLE_GLOBAL_PUNCTUATION = getToken(PHPTokenType.META_STRING_CONTENTS_DOUBLE, PHPTokenType.VARIABLE_OTHER_GLOBAL, PHPTokenType.VARIABLE_PUNCTUATION);
private static final IToken TOKEN_NUMERIC = getToken(PHPTokenType.META_STRING_CONTENTS_DOUBLE, PHPTokenType.NUMERIC);
private static final IToken TOKEN_CLASS_OPERATOR = getToken(PHPTokenType.META_STRING_CONTENTS_DOUBLE, PHPTokenType.CLASS_OPERATOR);
private static final IToken TOKEN_ARRAY_BEGIN = getToken(PHPTokenType.META_STRING_CONTENTS_DOUBLE, PHPTokenType.PUNCTUATION_LBRACKET);
private static final IToken TOKEN_ARRAY_END = getToken(PHPTokenType.META_STRING_CONTENTS_DOUBLE, PHPTokenType.PUNCTUATION_RBRACKET);
private static final IToken TOKEN_VARIABLE_PUNCTUATION = getToken(PHPTokenType.META_STRING_CONTENTS_DOUBLE, PHPTokenType.VARIABLE_PUNCTUATION);
private static final IToken TOKEN_FUNCTION_PUNCTUATION = getToken(PHPTokenType.META_STRING_CONTENTS_DOUBLE, PHPTokenType.FUNCTION_PUNCTUATION);
private static final IToken TOKEN_STATIC_PUNCTUATION = getToken(PHPTokenType.META_STRING_CONTENTS_DOUBLE, PHPTokenType.STATIC_PUNCTUATION);
private static final IToken TOKEN_SINGLE_QUOTED = getToken(PHPTokenType.STRING_SINGLE);
private static final IToken TOKEN_BEGIN_QUOTE_INNER = getToken(PHPTokenType.META_STRING_CONTENTS_DOUBLE, PHPTokenType.VARIABLE_OTHER, PHPTokenType.STRING_DOUBLE, PHPTokenType.PUNCTUATION_STRING_BEGIN);
private static final IToken TOKEN_END_QUOTE_INNER = getToken(PHPTokenType.META_STRING_CONTENTS_DOUBLE, PHPTokenType.VARIABLE_OTHER, PHPTokenType.STRING_DOUBLE, PHPTokenType.PUNCTUATION_STRING_END);
private static final IToken TOKEN_DOUBLE_QUOTED_INNER = getToken(PHPTokenType.META_STRING_CONTENTS_DOUBLE, PHPTokenType.VARIABLE_OTHER, PHPTokenType.STRING_DOUBLE, PHPTokenType.META_STRING_CONTENTS_DOUBLE);
private final IToken fDefaultToken;
private final BufferedDocumentScanner fScanner = new BufferedDocumentScanner(100);
/**
*
*/
public FastPHPStringTokenScanner(IToken defaultToken) {
fDefaultToken = defaultToken;
}
/*
* (non-Javadoc)
* @see org.eclipse.jface.text.rules.ITokenScanner#setRange(org.eclipse.jface .text.IDocument, int, int)
*/
public void setRange(IDocument document, int offset, int length) {
super.setRange(document, offset, length);
fScanner.setRange(document, offset, length);
if (fScanner.read() == '"') {
queueToken(TOKEN_BEGIN_QUOTE, fScanner.getOffset()-1, 1);
} else {
fScanner.unread();
}
}
/*
* (non-Javadoc)
* @see org.eclipse.jface.text.rules.ITokenScanner#nextToken()
*/
public IToken nextToken() {
IToken token = super.nextToken();
if (!token.isEOF()) {
return token;
}
resumeTokenScan();
return super.nextToken();
}
private void resumeTokenScan() {
int startOffset = fScanner.getOffset();
int ch = fScanner.read();
switch (ch) {
case '\\':
readEscape(startOffset);
break;
case '$':
fScanner.unread();
readSimpleVariable(startOffset);
break;
case '{':
ch = fScanner.read();
if (ch == '$') {
fScanner.unread();
readComplexVariable(startOffset);
} else if (ch != ICharacterScanner.EOF) {
fScanner.unread();
readDefault(startOffset);
}
break;
case '"':
if (fScanner.read() == ICharacterScanner.EOF) {
queueToken(TOKEN_END_QUOTE, startOffset, 1);
} else {
fScanner.unread();
readDefault(startOffset);
}
break;
case ICharacterScanner.EOF:
break;
default:
readDefault(startOffset);
break;
}
}
private void readDefault(int offset) {
int ch = fScanner.read();
while (ch != '\\' && ch != '$' && ch != '{' && ch != '"' && ch != ICharacterScanner.EOF) {
ch = fScanner.read();
}
if (ch != ICharacterScanner.EOF) {
fScanner.unread();
}
queueToken(fDefaultToken, offset, fScanner.getOffset() - offset);
}
private void readEscape(int offset) {
int ch = fScanner.read();
int count = 2, radix = 8;
switch (ch) {
case 'f':
case 'v':
case 't':
case 'r':
case 'n':
case '\\':
case '"':
case '$':
break;
case 'x':
ch = fScanner.read();
count = 1;
radix = 16;
default: // $codepro.audit.disable nonTerminatedCaseClause
while (Character.digit(ch, radix) >= 0 && count > 0) {
ch = fScanner.read();
--count;
}
if (Character.digit(ch, radix) < 0 && ch != ICharacterScanner.EOF) {
fScanner.unread();
}
break;
}
queueToken(TOKEN_ESCAPE_CHARACTER, offset, fScanner.getOffset() - offset);
}
private void readSimpleVariable(int offset) {
int ch = fScanner.read();
int unread = 0;
if (ch == '$') {
ch = fScanner.read();
if (ch != ICharacterScanner.EOF) {
++unread;
}
}
if (Character.isLetter(ch) || ch == '_') {
StringBuilder name = new StringBuilder();
name.append('$').append((char)ch);
ch = fScanner.read();
while (Character.isLetterOrDigit(ch) || ch == '_') {
name.append((char)ch);
ch = fScanner.read();
}
if (ch != ICharacterScanner.EOF) {
fScanner.unread();
}
IToken token = PHPTokenMapperFactory.GLOBALS.contains(name.toString()) ? TOKEN_VARIABLE_GLOBAL : TOKEN_VARIABLE_OTHER;
if (unread != 0) {
queueToken(token == TOKEN_VARIABLE_GLOBAL ? TOKEN_VARIABLE_GLOBAL_PUNCTUATION : TOKEN_VARIABLE_OTHER_PUNCTUATION, offset, 1);
++offset;
}
queueToken(token, offset, fScanner.getOffset() - offset);
readVariableOperator(fScanner.getOffset());
return;
} else if (ch == '{') { // we have ${
queueToken(TOKEN_VARIABLE_PUNCTUATION, offset, fScanner.getOffset() - offset);
readLiteral(fScanner.getOffset());
readDefaultUntil('}', TOKEN_VARIABLE_PUNCTUATION, fScanner.getOffset());
return;
}
while (unread-- > 0) {
fScanner.unread();
}
readDefault(offset);
}
private void readComplexVariable(int offset) {
queueToken(TOKEN_VARIABLE_PUNCTUATION, offset, fScanner.getOffset() - offset);
offset = fScanner.getOffset();
Assert.isTrue(fScanner.read() == '$');
int ch = fScanner.read();
if (Character.isLetter(ch) || ch == '_') {
StringBuilder name = new StringBuilder();
name.append('$').append((char)ch);
ch = fScanner.read();
while (Character.isLetterOrDigit(ch) || ch == '_') {
name.append((char)ch);
ch = fScanner.read();
}
if (ch != ICharacterScanner.EOF) {
fScanner.unread();
}
// FIXME We really need to delegate to PHPCodeScanner to properly get the correct tokens here...
IToken token = PHPTokenMapperFactory.GLOBALS.contains(name.toString()) ? TOKEN_VARIABLE_GLOBAL : TOKEN_VARIABLE_OTHER;
queueToken(token == TOKEN_VARIABLE_GLOBAL ? TOKEN_VARIABLE_GLOBAL_PUNCTUATION : TOKEN_VARIABLE_OTHER_PUNCTUATION, offset, 1);
queueToken(token, offset + 1, fScanner.getOffset() - offset - 1);
readVariableOperator(fScanner.getOffset());
} else if (ch == '{') { // we have ${
queueToken(TOKEN_VARIABLE_PUNCTUATION, offset, fScanner.getOffset() - offset);
readLiteral(fScanner.getOffset());
readDefaultUntil('}', TOKEN_VARIABLE_PUNCTUATION, fScanner.getOffset());
}
readDefaultUntil('}', TOKEN_VARIABLE_PUNCTUATION, fScanner.getOffset());
}
private void readVariableOperator(int offset) {
int unread = 1;
int ch = fScanner.read();
if (ch == '-') {
ch = fScanner.read();
++unread;
if (ch == '>') {
queueToken(TOKEN_CLASS_OPERATOR, offset, fScanner.getOffset() - offset);
readSimpleVariable(fScanner.getOffset());
readVariableOperator(fScanner.getOffset());
unread = 0;
}
} else if (ch == '[') {
queueToken(TOKEN_ARRAY_BEGIN, offset, fScanner.getOffset() - offset);
offset = fScanner.getOffset();
ch = fScanner.read();
if (ch == '$') {
readSimpleVariable(offset);
} else if (Character.isDigit(ch)) {
readNumeric(offset);
} else if (Character.isLetter(ch)) {
readLiteral(offset);
} else if (ch == '\'') {
readSingleQuotedString(offset);
} else if (ch == '"') {
readDoubleQuotedString(offset);
}
offset = fScanner.getOffset();
ch = fScanner.read();
if (ch == ']') {
queueToken(TOKEN_ARRAY_END, offset, fScanner.getOffset() - offset);
readVariableOperator(fScanner.getOffset());
unread = 0;
}
} else if (ch == '(' || ch == ')') {
queueToken(TOKEN_FUNCTION_PUNCTUATION, offset, fScanner.getOffset() - offset);
readVariableOperator(fScanner.getOffset());
readDefaultUntil(')', TOKEN_FUNCTION_PUNCTUATION, fScanner.getOffset());
unread = 0;
} else if (ch == ':') {
ch = fScanner.read();
++unread;
if (ch == ':') {
queueToken(TOKEN_STATIC_PUNCTUATION, offset, fScanner.getOffset() - offset);
readSimpleVariable(fScanner.getOffset());
unread = 0;
}
}
if (unread == 0) {
readVariableOperator(fScanner.getOffset());
}
while (ch != ICharacterScanner.EOF && unread-- > 0) {
fScanner.unread();
}
}
private void readDefaultUntil(char target, IToken token, int offset) {
int ch = fScanner.read();
int unread = 1;
while (ch != target && ch != ICharacterScanner.EOF) {
ch = fScanner.read();
++unread;
}
if (ch == target) {
int length = fScanner.getOffset() - offset - 1;
if (length > 0) {
queueToken(fDefaultToken, offset, length);
}
offset = fScanner.getOffset() - 1;
queueToken(token, offset, fScanner.getOffset() - offset);
} else {
if (ch == ICharacterScanner.EOF) {
--unread;
}
while (unread-- > 0) {
fScanner.unread();
}
}
}
private void readNumeric(int offset) {
int ch = fScanner.read();
while (Character.isDigit(ch)) {
ch = fScanner.read();
}
if (ch != ICharacterScanner.EOF) {
fScanner.unread();
}
queueToken(TOKEN_NUMERIC, offset, fScanner.getOffset() - offset);
}
private void readLiteral(int offset) {
int ch = fScanner.read();
while (Character.isLetterOrDigit(ch) || ch == '_') {
ch = fScanner.read();
}
if (ch != ICharacterScanner.EOF) {
fScanner.unread();
}
if (ch == '$') {
readSimpleVariable(offset);
} else {
queueToken(TOKEN_VARIABLE_OTHER, offset, fScanner.getOffset() - offset);
readVariableOperator(fScanner.getOffset());
}
}
private void readSingleQuotedString(int offset) {
int ch = fScanner.read();
while (ch != '\'' && ch != ICharacterScanner.EOF) {
ch = fScanner.read();
}
queueToken(TOKEN_SINGLE_QUOTED, offset, fScanner.getOffset() - offset);
}
private void readDoubleQuotedString(int offset) {
int firstOffset = offset;
int firstLength = fScanner.getOffset() - offset;
offset = fScanner.getOffset();
int ch = fScanner.read();
while (ch != '"' && ch != ICharacterScanner.EOF) {
ch = fScanner.read();
}
if (ch == ICharacterScanner.EOF) {
return;
}
queueToken(TOKEN_BEGIN_QUOTE_INNER, firstOffset, firstLength);
queueToken(TOKEN_DOUBLE_QUOTED_INNER, offset, fScanner.getOffset() - 1 - offset);
queueToken(TOKEN_END_QUOTE_INNER, fScanner.getOffset() - 1, 1);
}
private static IToken getToken(PHPTokenType... type) {
StringBuilder sb = new StringBuilder();
for (PHPTokenType i : type) {
sb.append(i.toString()).append(' ');
}
return new Token(sb.toString().trim());
}
}