package org.jetbrains.plugins.cucumber.psi;
import com.intellij.lexer.LexerBase;
import com.intellij.psi.TokenType;
import com.intellij.psi.tree.IElementType;
import com.intellij.util.ArrayUtil;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
* @author yole
*/
public class GherkinLexer extends LexerBase {
protected CharSequence myBuffer = ArrayUtil.EMPTY_CHAR_SEQUENCE;
protected int myStartOffset = 0;
protected int myEndOffset = 0;
private int myPosition;
private IElementType myCurrentToken;
private int myCurrentTokenStart;
private List<String> myKeywords;
private int myState;
private final static int STATE_DEFAULT = 0;
private final static int STATE_AFTER_KEYWORD = 1;
private final static int STATE_TABLE = 2;
private final static int STATE_AFTER_KEYWORD_WITH_PARAMETER = 3;
private final static int STATE_INSIDE_PYSTRING = 5;
private final static int STATE_PARAMETER_INSIDE_PYSTRING = 6;
private final static int STATE_PARAMETER_INSIDE_STEP = 7;
private static final String PYSTRING_MARKER = "\"\"\"";
private final GherkinKeywordProvider myKeywordProvider;
private String myCurLanguage;
public GherkinLexer(GherkinKeywordProvider provider) {
myKeywordProvider = provider;
updateLanguage("en");
}
private void updateLanguage(String language) {
myCurLanguage = language;
myKeywords = new ArrayList<>(myKeywordProvider.getAllKeywords(language));
Collections.sort(myKeywords, (o1, o2) -> o2.length() - o1.length());
}
@Override
public void start(@NotNull CharSequence buffer, int startOffset, int endOffset, int initialState) {
myBuffer = buffer;
myStartOffset = startOffset;
myEndOffset = endOffset;
myPosition = startOffset;
myState = initialState;
advance();
}
public int getState() {
return myState;
}
public IElementType getTokenType() {
return myCurrentToken;
}
public int getTokenStart() {
return myCurrentTokenStart;
}
public int getTokenEnd() {
return myPosition;
}
private boolean isStepParameter(@NotNull final String currentElementTerminator) {
int pos = myPosition;
if (myBuffer.charAt(pos) == '<') {
while (pos < myEndOffset && myBuffer.charAt(pos) != '\n' && myBuffer.charAt(pos) != '>' && !isStringAtPosition(currentElementTerminator, pos)) {
pos++;
}
return pos < myEndOffset && myBuffer.charAt(pos) == '>';
}
return false;
}
public void advance() {
if (myPosition >= myEndOffset) {
myCurrentToken = null;
return;
}
myCurrentTokenStart = myPosition;
char c = myBuffer.charAt(myPosition);
if (myState != STATE_INSIDE_PYSTRING && Character.isWhitespace(c)) {
advanceOverWhitespace();
myCurrentToken = TokenType.WHITE_SPACE;
while (myPosition < myEndOffset && Character.isWhitespace(myBuffer.charAt(myPosition))) {
advanceOverWhitespace();
}
} else if (c == '|' && myState != STATE_INSIDE_PYSTRING) {
myCurrentToken = GherkinTokenTypes.PIPE;
myPosition++;
myState = STATE_TABLE;
} else if (myState == STATE_PARAMETER_INSIDE_PYSTRING) {
if (c == '>') {
myState = STATE_INSIDE_PYSTRING;
myPosition++;
myCurrentToken = GherkinTokenTypes.STEP_PARAMETER_BRACE;
} else {
advanceToParameterEnd(PYSTRING_MARKER);
myCurrentToken = GherkinTokenTypes.STEP_PARAMETER_TEXT;
}
} else if (myState == STATE_INSIDE_PYSTRING) {
if (isStringAtPosition(PYSTRING_MARKER)) {
myPosition += 3 /* marker length */;
myCurrentToken = GherkinTokenTypes.PYSTRING;
myState = STATE_DEFAULT;
} else {
if (myBuffer.charAt(myPosition) == '<') {
if (isStepParameter(PYSTRING_MARKER)) {
myPosition++;
myState = STATE_PARAMETER_INSIDE_PYSTRING;
myCurrentToken = GherkinTokenTypes.STEP_PARAMETER_BRACE;
} else {
myPosition++;
advanceToParameterOrSymbol(PYSTRING_MARKER, STATE_INSIDE_PYSTRING, false);
myCurrentToken = GherkinTokenTypes.PYSTRING_TEXT;
}
} else {
advanceToParameterOrSymbol(PYSTRING_MARKER, STATE_INSIDE_PYSTRING, false);
myCurrentToken = GherkinTokenTypes.PYSTRING_TEXT;
}
}
} else if (myState == STATE_TABLE) {
myCurrentToken = GherkinTokenTypes.TABLE_CELL;
while (myPosition < myEndOffset) {
// Cucumber: 0.7.3 Table cells can now contain escaped bars - \| and escaped backslashes - \\
if (myBuffer.charAt(myPosition) == '\\') {
final int nextPos = myPosition + 1;
if (nextPos < myEndOffset) {
final char nextChar = myBuffer.charAt(nextPos);
if (nextChar == '|' || nextChar == '\\') {
myPosition += 2;
continue;
}
// else - common case
}
}
else if (myBuffer.charAt(myPosition) == '|' || myBuffer.charAt(myPosition) == '\n') {
break;
}
myPosition++;
}
while(myPosition > 0 && Character.isWhitespace(myBuffer.charAt(myPosition - 1))) {
myPosition--;
}
}
else if (c == '#') {
myCurrentToken = GherkinTokenTypes.COMMENT;
advanceToEOL();
String commentText = myBuffer.subSequence(myCurrentTokenStart+1, myPosition).toString().trim();
final String language = fetchLocationLanguage(commentText);
if (language != null) {
updateLanguage(language);
}
}
else if (c == ':') {
myCurrentToken = GherkinTokenTypes.COLON;
myPosition++;
}
else if (c == '@') {
myCurrentToken = GherkinTokenTypes.TAG;
myPosition++;
while (myPosition < myEndOffset && isValidTagChar(myBuffer.charAt(myPosition))) {
myPosition++;
}
}
else if (isStringAtPosition(PYSTRING_MARKER)) {
myCurrentToken = GherkinTokenTypes.PYSTRING;
myState = STATE_INSIDE_PYSTRING;
myPosition += 3;
}
else {
if (myState == STATE_DEFAULT) {
for (String keyword : myKeywords) {
int length = keyword.length();
if (isStringAtPosition(keyword)) {
if (myKeywordProvider.isSpaceAfterKeyword(myCurLanguage, keyword) &&
myEndOffset - myPosition > length &&
Character.isLetterOrDigit(myBuffer.charAt(myPosition + length))) {
continue;
}
myCurrentToken = myKeywordProvider.getTokenType(myCurLanguage, keyword);
myPosition += length;
if (myCurrentToken == GherkinTokenTypes.STEP_KEYWORD || myCurrentToken == GherkinTokenTypes.SCENARIO_OUTLINE_KEYWORD) {
myState = STATE_AFTER_KEYWORD_WITH_PARAMETER;
} else {
myState = STATE_AFTER_KEYWORD;
}
return;
}
}
}
if (myState == STATE_PARAMETER_INSIDE_STEP) {
if (c == '>') {
myState = STATE_AFTER_KEYWORD_WITH_PARAMETER;
myPosition++;
myCurrentToken = GherkinTokenTypes.STEP_PARAMETER_BRACE;
} else {
advanceToParameterEnd("\n");
myCurrentToken = GherkinTokenTypes.STEP_PARAMETER_TEXT;
}
return;
} else if (myState == STATE_AFTER_KEYWORD_WITH_PARAMETER) {
if (myPosition < myEndOffset && myBuffer.charAt(myPosition) == '<' && isStepParameter("\n")) {
myState = STATE_PARAMETER_INSIDE_STEP;
myPosition++;
myCurrentToken = GherkinTokenTypes.STEP_PARAMETER_BRACE;
} else {
myCurrentToken = GherkinTokenTypes.TEXT;
advanceToParameterOrSymbol("\n", STATE_AFTER_KEYWORD_WITH_PARAMETER, true);
}
return;
}
myCurrentToken = GherkinTokenTypes.TEXT;
advanceToEOL();
}
}
@Nullable
public static String fetchLocationLanguage(final @NotNull String commentText) {
if (commentText.startsWith("language:")) {
return commentText.substring(9).trim();
}
return null;
}
private void advanceOverWhitespace() {
if (myBuffer.charAt(myPosition) == '\n') {
myState = STATE_DEFAULT;
}
myPosition++;
}
private boolean isStringAtPosition(String keyword) {
int length = keyword.length();
return myEndOffset - myPosition >= length && myBuffer.subSequence(myPosition, myPosition + length).toString().equals(keyword);
}
private boolean isStringAtPosition(String keyword, int position) {
int length = keyword.length();
return myEndOffset - position >= length && myBuffer.subSequence(position, position + length).toString().equals(keyword);
}
private static boolean isValidTagChar(char c) {
return !Character.isWhitespace(c) && c != '@';
}
private void advanceToEOL() {
myPosition++;
int mark = myPosition;
while (myPosition < myEndOffset && myBuffer.charAt(myPosition) != '\n') {
myPosition++;
}
returnWhitespace(mark);
myState = STATE_DEFAULT;
}
private void returnWhitespace(int mark) {
while(myPosition > mark && Character.isWhitespace(myBuffer.charAt(myPosition - 1))) {
myPosition--;
}
}
private void advanceToParameterOrSymbol(String s, int parameterState, boolean shouldReturnWhitespace) {
int mark = myPosition;
while (myPosition < myEndOffset && !isStringAtPosition(s) && !isStepParameter(s)) {
myPosition++;
}
if (shouldReturnWhitespace) {
myState = STATE_DEFAULT;
if (myPosition < myEndOffset) {
if (!isStringAtPosition(s)) {
myState = parameterState;
}
}
returnWhitespace(mark);
}
}
private void advanceToParameterEnd(String endSymbol) {
myPosition++;
int mark = myPosition;
while (myPosition < myEndOffset && !isStringAtPosition(endSymbol) && myBuffer.charAt(myPosition) != '>') {
myPosition++;
}
if (myPosition < myEndOffset) {
if (isStringAtPosition(endSymbol)) {
myState = STATE_DEFAULT;
}
}
returnWhitespace(mark);
}
@NotNull
public CharSequence getBufferSequence() {
return myBuffer;
}
public int getBufferEnd() {
return myEndOffset;
}
}