/**
* Copyright (C) 2010 dennis zhuang (killme2008@gmail.com)
*
* This library is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation; either version 2.1 of the License, or
* (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
**/
package com.googlecode.aviator.lexer;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.math.MathContext;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
import java.util.Stack;
import com.googlecode.aviator.AviatorEvaluator;
import com.googlecode.aviator.Options;
import com.googlecode.aviator.exception.CompileExpressionErrorException;
import com.googlecode.aviator.lexer.token.CharToken;
import com.googlecode.aviator.lexer.token.NumberToken;
import com.googlecode.aviator.lexer.token.StringToken;
import com.googlecode.aviator.lexer.token.Token;
import com.googlecode.aviator.lexer.token.Variable;
/**
* Expression Lexer,scan tokens from string
*
* @author dennis
*
*/
public class ExpressionLexer {
// current char
private char peek;
// Char iterator for string
private final CharacterIterator iterator;
// symbol table
private final SymbolTable symbolTable;
// Tokens buffer
private final Stack<Token<?>> tokenBuffer = new Stack<Token<?>>();
public ExpressionLexer(String expression) {
this.iterator = new StringCharacterIterator(expression);
this.symbolTable = new SymbolTable();
this.peek = this.iterator.current();
}
/**
* Push back token
*
* @param token
*/
public void pushback(Token<?> token) {
this.tokenBuffer.push(token);
}
public Token<?> scan() {
return this.scan(true);
}
public void nextChar() {
this.peek = this.iterator.next();
}
public void prevChar() {
this.peek = this.iterator.previous();
}
static final char[] VALID_HEX_CHAR = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'a', 'B', 'b', 'C',
'c', 'D', 'd', 'E', 'e', 'F', 'f' };
public boolean isValidHexChar(char ch) {
for (char c : VALID_HEX_CHAR) {
if (c == ch) {
return true;
}
}
return false;
}
public int getCurrentIndex() {
return this.iterator.getIndex();
}
public Token<?> scan(boolean analyse) {
// If buffer is not empty,return
if (!this.tokenBuffer.isEmpty()) {
return this.tokenBuffer.pop();
}
// Skip white space or line
for (;; this.nextChar()) {
if (this.peek == CharacterIterator.DONE) {
return null;
}
if (analyse) {
if (this.peek == ' ' || this.peek == '\t' || this.peek == '\r') {
continue;
}
if (this.peek == '\n') {
throw new CompileExpressionErrorException("Aviator doesn't support multi-lines expression at "
+ this.iterator.getIndex());
}
else {
break;
}
}
else {
char ch = this.peek;
int index = this.iterator.getIndex();
this.nextChar();
return new CharToken(ch, index);
}
}
// if it is a hex digit
if (Character.isDigit(this.peek) && this.peek == '0') {
this.nextChar();
if (this.peek == 'x' || this.peek == 'X') {
this.nextChar();
StringBuffer sb = new StringBuffer();
int startIndex = this.iterator.getIndex() - 2;
long value = 0L;
do {
sb.append(this.peek);
value = 16 * value + Character.digit(this.peek, 16);
this.nextChar();
} while (this.isValidHexChar(this.peek));
return new NumberToken(value, sb.toString(), startIndex);
}
else {
this.prevChar();
}
}
// If it is a digit
if (Character.isDigit(this.peek) || this.peek == '.') {
StringBuffer sb = new StringBuffer();
int startIndex = this.iterator.getIndex();
long lval = 0L;
double dval = 0d;
boolean hasDot = false;
double d = 10.0;
boolean isBigInt = false;
boolean isBigDecimal = false;
boolean scientificNotation = false;
boolean negExp = false;
do {
sb.append(this.peek);
if (this.peek == '.') {
if (scientificNotation) {
throw new CompileExpressionErrorException("Illegal number " + sb + " at "
+ this.iterator.getIndex());
}
if (hasDot) {
throw new CompileExpressionErrorException("Illegal Number " + sb + " at "
+ this.iterator.getIndex());
}
else {
hasDot = true;
this.nextChar();
}
}
else if (this.peek == 'N') {
// big integer
if (hasDot) {
throw new CompileExpressionErrorException("Illegal number " + sb + " at "
+ this.iterator.getIndex());
}
isBigInt = true;
this.nextChar();
break;
}
else if (this.peek == 'M') {
isBigDecimal = true;
this.nextChar();
break;
}
else if (this.peek == 'e' || this.peek == 'E') {
if (scientificNotation) {
throw new CompileExpressionErrorException("Illegal number " + sb + " at "
+ this.iterator.getIndex());
}
scientificNotation = true;
this.nextChar();
if (this.peek == '-') {
negExp = true;
sb.append(this.peek);
this.nextChar();
}
}
else {
int digit = Character.digit(this.peek, 10);
if (scientificNotation) {
int n = digit;
nextChar();
while (Character.isDigit(this.peek)) {
n = 10 * n + Character.digit(this.peek, 10);
this.nextChar();
}
while (n-- > 0) {
if (negExp) {
dval = dval / 10;
}
else {
dval = 10 * dval;
}
}
hasDot = true;
}
else if (hasDot) {
dval = dval + digit / d;
d = d * 10;
this.nextChar();
}
else {
lval = 10 * lval + digit;
dval = 10 * dval + digit;
this.nextChar();
}
}
} while (Character.isDigit(this.peek) || this.peek == '.' || this.peek == 'E' || this.peek == 'e'
|| this.peek == 'M' || this.peek == 'N');
Number value;
if (isBigDecimal) {
value =
new BigDecimal(this.getBigNumberLexeme(sb),
(MathContext) AviatorEvaluator.getOption(Options.MATH_CONTEXT));
}
else if (isBigInt) {
value = new BigInteger(this.getBigNumberLexeme(sb));
}
else if (hasDot) {
boolean alwaysUseDecimalAsDouble = AviatorEvaluator.getOption(Options.ALWAYS_USE_DOUBLE_AS_DECIMAL);
if (alwaysUseDecimalAsDouble) {
value =
new BigDecimal(sb.toString(),
(MathContext) AviatorEvaluator.getOption(Options.MATH_CONTEXT));
}
else {
value = dval;
}
}
else {
// if the long value is out of range,then it must be negative,so
// we make it as a big integer.
if (lval < 0) {
value = new BigInteger(sb.toString());
}
else {
value = lval;
}
}
String lexeme = sb.toString();
if (isBigDecimal || isBigInt) {
lexeme = lexeme.substring(0, lexeme.length() - 1);
}
return new NumberToken(value, lexeme, startIndex);
}
// It is a variable
if (Character.isJavaIdentifierStart(this.peek)) {
int startIndex = this.iterator.getIndex();
StringBuilder sb = new StringBuilder();
do {
sb.append(this.peek);
this.nextChar();
} while (Character.isJavaIdentifierPart(this.peek) || this.peek == '.');
String lexeme = sb.toString();
Variable variable = new Variable(lexeme, startIndex);
// If it is a reserved word(true or false)
if (this.symbolTable.contains(lexeme)) {
return this.symbolTable.getVariable(lexeme);
}
else {
this.symbolTable.reserve(lexeme, variable);
return variable;
}
}
if (isBinaryOP(this.peek)) {
CharToken opToken = new CharToken(this.peek, this.iterator.getIndex());
this.nextChar();
return opToken;
}
// String
if (this.peek == '"' || this.peek == '\'') {
char left = this.peek;
int startIndex = this.iterator.getIndex();
StringBuilder sb = new StringBuilder();
while ((this.peek = this.iterator.next()) != left) {
if (this.peek == CharacterIterator.DONE) {
throw new CompileExpressionErrorException("Illegal String " + sb + " at " + startIndex);
}
else {
sb.append(this.peek);
}
}
this.nextChar();
return new StringToken(sb.toString(), startIndex);
}
Token<Character> token = new CharToken(this.peek, this.iterator.getIndex());
this.nextChar();
return token;
}
private String getBigNumberLexeme(StringBuffer sb) {
String lexeme = sb.toString();
lexeme = lexeme.substring(0, lexeme.length() - 1);
return lexeme;
}
static final char[] OPS = { '=', '>', '<', '+', '-', '*', '/', '%', '!', '&', '|' };
public static boolean isBinaryOP(char ch) {
for (char tmp : OPS) {
if (tmp == ch) {
return true;
}
}
return false;
}
}