package org.activityinfo.model.expr; import com.google.common.collect.Lists; import com.google.common.collect.UnmodifiableIterator; import org.activityinfo.model.expr.diagnostic.ExprSyntaxException; import java.util.List; /** * Splits an expression string into a sequence of tokens */ public class ExprLexer extends UnmodifiableIterator<Token> { public static final char DOUBLE_QUOTE = '"'; public static final char SINGLE_QUOTE = '\''; private String string; private int currentCharIndex; private int currentTokenStart = 0; private static final String OPERATOR_CHARS = "+-/*&|=!"; public ExprLexer(String string) { assert string != null : "expr cannot be null"; this.string = string; } /** * @return the current character within the string being processed */ private char peekChar() { return string.charAt(currentCharIndex); } private char nextChar() { return string.charAt(currentCharIndex++); } /** * Adds the current char to the current token */ private void consumeChar() { currentCharIndex++; } private Token finishToken(TokenType type) { return finishToken(type, string.substring(currentTokenStart, currentCharIndex)); } private Token finishToken(TokenType type, String text) { Token token = new Token(type, currentTokenStart, text); currentTokenStart = currentCharIndex; return token; } public List<Token> readAll() { List<Token> tokens = Lists.newArrayList(); while (!isEndOfInput()) { tokens.add(next()); } return tokens; } public boolean isEndOfInput() { return currentCharIndex >= string.length(); } @Override public boolean hasNext() { return !isEndOfInput(); } @Override public Token next() { char c = nextChar(); if (c == '(') { return finishToken(TokenType.PAREN_START); } else if (c == ')') { return finishToken(TokenType.PAREN_END); } else if (c == '{') { return readQuotedToken(TokenType.SYMBOL, '}'); } else if (c == '[') { return readQuotedToken(TokenType.SYMBOL, ']'); } else if (c == ',') { return finishToken(TokenType.COMMA); } else if (c == '.') { return finishToken(TokenType.DOT); } else if (c == DOUBLE_QUOTE) { return readQuotedToken(TokenType.STRING_LITERAL, DOUBLE_QUOTE); } else if (c == SINGLE_QUOTE) { return readQuotedToken(TokenType.STRING_LITERAL, SINGLE_QUOTE); } else if (StringUtil.isWhitespace(c)) { return readWhitespace(); } else if (isNumberPart(c)) { return readNumber(); } else if (isOperator(c)) { return readOperator(c); } else if (isBooleanLiteral(c)) { return readBooleanLiteral(c); } else if (isSymbolStart(c)) { return readSymbol(TokenType.SYMBOL); } else { throw new RuntimeException("Symbol '" + c + "' is not supported"); } } private boolean isOperator(char c) { return OPERATOR_CHARS.indexOf(c) != -1; } private Token readOperator(char c) { while (!isEndOfInput() && isOperator(peekChar())) { consumeChar(); } return finishToken(TokenType.OPERATOR); } private boolean isSymbolStart(char c) { return c == '_' || Character.isLetter(c); } private boolean isSymbolChar(char c) { return c == '_' || StringUtil.isAlphabetic(c) || Character.isDigit(c); } private boolean isNumberPart(char c) { return Character.isDigit(c) || c == '.'; } private boolean isBooleanLiteral(char c) { final int currentIndex = currentCharIndex - 1; if (c == 't' || c == 'T') { String trueLiteral = Boolean.TRUE.toString(); String literal = string.substring(currentIndex, currentIndex + trueLiteral.length()); return trueLiteral.equalsIgnoreCase(literal); } else if (c == 'f' || c == 'F') { String falseLiteral = Boolean.FALSE.toString(); String literal = string.substring(currentIndex, currentIndex + falseLiteral.length()); return falseLiteral.equalsIgnoreCase(literal); } return false; } private Token readWhitespace() { while (!isEndOfInput() && StringUtil.isWhitespace(peekChar())) { consumeChar(); } return finishToken(TokenType.WHITESPACE); } private Token readNumber() { while (!isEndOfInput() && isNumberPart(peekChar())) { consumeChar(); } return finishToken(TokenType.NUMBER); } private Token readSymbol(TokenType tokenType) { while (!isEndOfInput() && isSymbolChar(peekChar())) { consumeChar(); } return finishToken(tokenType); } private Token readQuotedToken(TokenType type, char closingQuote) throws ExprSyntaxException { while(true) { if (isEndOfInput()) { throw new ExprSyntaxException("End of input reached while looking for closing '" + closingQuote + ""); } if (nextChar() == closingQuote) { return finishToken(type, string.substring(currentTokenStart+1, currentCharIndex-1)); } } } private Token readBooleanLiteral(char c) { currentCharIndex--; if (c == 't' || c == 'T') { String trueLiteral = Boolean.TRUE.toString(); String literal = string.substring(currentCharIndex, currentCharIndex + trueLiteral.length()); if (trueLiteral.equalsIgnoreCase(literal)) { currentCharIndex += trueLiteral.length(); return finishToken(TokenType.BOOLEAN_LITERAL); } } else if (c == 'f' || c == 'F') { String falseLiteral = Boolean.FALSE.toString(); String literal = string.substring(currentCharIndex, currentCharIndex + falseLiteral.length()); if (falseLiteral.equalsIgnoreCase(literal)) { currentCharIndex += falseLiteral.length(); return finishToken(TokenType.BOOLEAN_LITERAL); } } throw new RuntimeException("Bug in isBooleanLiteral() ?"); } private Token readBooleanOperator(char c) { if (c == '!') { if (string.charAt(currentCharIndex) == '=') { // check whether it's NOT (!) or NOT_EQUAL operator (!=) currentCharIndex++; } return finishToken(TokenType.OPERATOR); } else if (c == '&') { // if next char is also & then its && operator currentCharIndex++; return finishToken(TokenType.OPERATOR); } else if (c == '|') { currentCharIndex++; // if next char is also | then its || operator return finishToken(TokenType.OPERATOR); } else if (c == '=') { currentCharIndex++; // if next char is also = then its == operator return finishToken(TokenType.OPERATOR); } throw new RuntimeException("Invalid boolean operator."); } }