/*
* Copyright (c) 2010 StockPlay development team
* All rights reserved.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package com.kapti.filter.parsing;
import com.kapti.exceptions.FilterException;
import com.kapti.exceptions.StockPlayException;
import com.kapti.filter.Convertable;
import com.kapti.filter.Filter;
import com.kapti.filter.condition.*;
import com.kapti.filter.condition.ConditionGreater;
import com.kapti.filter.data.Data;
import com.kapti.filter.data.DataDate;
import com.kapti.filter.data.DataFloat;
import com.kapti.filter.data.DataInt;
import com.kapti.filter.data.DataKey;
import com.kapti.filter.data.DataRegex;
import com.kapti.filter.data.DataString;
import com.kapti.filter.relation.RelationAnd;
import com.kapti.filter.relation.RelationOr;
import java.lang.reflect.Constructor;
import java.lang.reflect.Method;
import java.lang.reflect.Modifier;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
/**
*
* @author tim
*/
public class Parser {
//
// Member data
//
static Logger mLogger = Logger.getLogger(Parser.class);
public static enum TokenType {
INT, FLOAT,
WORD, QUOTE,
LEFT_PARENTHESIS, RIGHT_PARENTHESIS, COMMA,
OPERATOR_EQUALS, OPERATOR_NOTEQUALS, OPERATOR_LESS, OPERATOR_GREATER, OPERATOR_STRICTLESS, OPERATOR_STRICTGREATER, OPERATOR_LIKE, OPERATOR_NOTLIKE, OPERATOR_AND, OPERATOR_OR,
WHITESPACE
}
private List<Rule<TokenType>> mTokenRules;
private Map<TokenType, Class<? extends Condition>> mOperatorMap;
//private Map<TokenType, Class> mFunctionMap;
private static Parser instance = new Parser();
//
// Construction
//
private Parser() {
mLogger.info("instantiating Parser");
// Create token ruleset (lower items have higher priority)
mTokenRules = new ArrayList<Rule<TokenType>>();
mTokenRules.add(new Rule<TokenType>(TokenType.WHITESPACE, "\\s+"));
mTokenRules.add(new Rule<TokenType>(TokenType.LEFT_PARENTHESIS, "\\("));
mTokenRules.add(new Rule<TokenType>(TokenType.RIGHT_PARENTHESIS, "\\)"));
mTokenRules.add(new Rule<TokenType>(TokenType.COMMA, ","));
mTokenRules.add(new Rule<TokenType>(TokenType.WORD, "[A-Za-z_]+"));
mTokenRules.add(new Rule<TokenType>(TokenType.QUOTE, "'([^\']*+)'([rksdifk]*)"));
mTokenRules.add(new Rule<TokenType>(TokenType.FLOAT, "-?[0-9.]+"));
mTokenRules.add(new Rule<TokenType>(TokenType.INT, "-?[0-9]+"));
mTokenRules.add(new Rule<TokenType>(TokenType.OPERATOR_EQUALS, "(==|EQUALS)"));
mTokenRules.add(new Rule<TokenType>(TokenType.OPERATOR_NOTEQUALS, "(!=|NOT)"));
mTokenRules.add(new Rule<TokenType>(TokenType.OPERATOR_LESS, "(<=|LESSTHANOREQUAL)"));
mTokenRules.add(new Rule<TokenType>(TokenType.OPERATOR_GREATER, "(>=|GREATERTHANOREQUAL)"));
mTokenRules.add(new Rule<TokenType>(TokenType.OPERATOR_STRICTLESS, "(<|LESSTHAN)"));
mTokenRules.add(new Rule<TokenType>(TokenType.OPERATOR_STRICTGREATER, "(>|GREATERTHAN)"));
mTokenRules.add(new Rule<TokenType>(TokenType.OPERATOR_LIKE, "(=~|LIKE)"));
mTokenRules.add(new Rule<TokenType>(TokenType.OPERATOR_NOTLIKE, "(!~|NOTLIKE)"));
mTokenRules.add(new Rule<TokenType>(TokenType.OPERATOR_AND, "(&&|AND)"));
mTokenRules.add(new Rule<TokenType>(TokenType.OPERATOR_OR, "(\\|\\||OR)"));
// Create operator translation map
mOperatorMap = new HashMap<TokenType, Class<? extends Condition>>();
mOperatorMap.put(TokenType.OPERATOR_EQUALS, ConditionEquals.class);
mOperatorMap.put(TokenType.OPERATOR_NOTEQUALS, ConditionNotEquals.class);
mOperatorMap.put(TokenType.OPERATOR_STRICTLESS, ConditionStrictLess.class);
mOperatorMap.put(TokenType.OPERATOR_LESS, ConditionLess.class);
mOperatorMap.put(TokenType.OPERATOR_STRICTGREATER, ConditionStrictGreater.class);
mOperatorMap.put(TokenType.OPERATOR_GREATER, ConditionGreater.class);
mOperatorMap.put(TokenType.OPERATOR_LIKE, ConditionLike.class);
mOperatorMap.put(TokenType.OPERATOR_NOTLIKE, ConditionNotLike.class);
mOperatorMap.put(TokenType.OPERATOR_AND, RelationAnd.class); // TODO: relation in operator ruleset?
mOperatorMap.put(TokenType.OPERATOR_OR, RelationOr.class);
}
public static Parser getInstance() {
return instance;
}
//
// Methods
//
// Main method
public Filter parse(String iSource) throws StockPlayException {
mLogger.debug("parsing string '" + iSource + "'");
// Lexical analysis
List<Token> tInfix = tokenize(iSource);
Queue<Token> tPostfix = infix_to_postfix(tInfix);
// Syntactic analysis
Filter oFilter = interprete(tPostfix);
return oFilter;
}
// Lexical analysis: the tokenizer
public List<Token> tokenize(String iSource) throws StockPlayException {
// Setup
int tPosition = 0;
final int tEnd = iSource.length();
List<Token> oTokens = new ArrayList<Token>();
// Create a new matcher container
Matcher tMatcher = Pattern.compile("dummy").matcher(iSource);
tMatcher.useTransparentBounds(true).useAnchoringBounds(false);
// Walk the string
while (tPosition < tEnd) {
tMatcher.region(tPosition, tEnd);
// Check all rules
List<Token> tMatches = new ArrayList<Token>();
for (Rule<TokenType> tRule : mTokenRules) {
if (tMatcher.usePattern(tRule.getPattern()).lookingAt()) {
Token tToken;
// Fetch the relevant content
int tContentGroup = 0; // Group 0 equals the entire string
int tGroups = tMatcher.groupCount();
if (tGroups > 0) {
tContentGroup = 1;
}
String tContent = iSource.substring(tMatcher.start(tContentGroup), tMatcher.end(tContentGroup));
// Construct and save the token (minding eventual extra groups)
List<String> tExtra = new ArrayList<String>();
for (int i = 2; i <= tGroups; i++) {
String tExtraString = iSource.substring(tMatcher.start(i), tMatcher.end(i));
if (tExtraString.length() > 0) // because groups are always present...
tExtra.add(tExtraString);
}
if (tExtra.size() == 0)
tExtra = null;
tToken = new Token(tRule.getType(), tMatcher.start(), tMatcher.end(), tContent, tExtra);
tMatches.add(tToken);
}
}
// Pick the longest match
Token tTokenLongest = null;
for (Token tToken : tMatches) {
if (tTokenLongest == null || tToken.getLength() >= tTokenLongest.getLength()) {
tTokenLongest = tToken;
}
}
if (tTokenLongest != null) {
oTokens.add(tTokenLongest);
tPosition = tTokenLongest.getEnd();
} else
throw new FilterException(FilterException.Type.FILTER_FAILURE, "unknown character '" + iSource.substring(tPosition, tPosition+1) + "'");
}
return oTokens;
}
// Lexical analysis: the infix to postfix convertor (the shunting-yard algorithm)
// TODO: support functions with variable amount of parameters
// http://www.kallisti.net.nz/blog/2008/02/extension-to-the-shunting-yard-algorithm-to-allow-variable-numbers-of-arguments-to-functions/
public Queue<Token> infix_to_postfix(List<Token> iInfix) throws StockPlayException {
Queue<Token> tQueue = new LinkedList<Token>();
Stack<Token> tStack = new Stack<Token>();
Iterator<Token> tIterator = iInfix.iterator();
while (tIterator.hasNext()) {
Token tToken = tIterator.next();
switch (tToken.getType()) {
case INT:
case FLOAT:
case QUOTE:
tQueue.add(tToken);
break;
case COMMA:
if (tStack.isEmpty())
throw new FilterException(FilterException.Type.FILTER_FAILURE, "misplaced comma or mismatched parenthesis");
while (tStack.peek().getType() != TokenType.LEFT_PARENTHESIS) {
tQueue.add(tStack.pop());
if (tStack.isEmpty())
throw new FilterException(FilterException.Type.FILTER_FAILURE, "misplaced comma or mismatched parenthesis");
}
break;
case OPERATOR_EQUALS:
case OPERATOR_NOTEQUALS:
case OPERATOR_LESS:
case OPERATOR_GREATER:
case OPERATOR_STRICTLESS:
case OPERATOR_STRICTGREATER:
case OPERATOR_LIKE:
case OPERATOR_NOTLIKE:
case OPERATOR_AND:
case OPERATOR_OR:
while (!tStack.isEmpty() && isOperator(tStack.peek())) {
Token tToken2 = tStack.peek();
// Precedence of Relation < precedence of Condition
if (isRelation(tToken) && isCondition(tToken2)) {
tQueue.add(tStack.pop());
}
else if (isCondition(tToken) && isRelation(tToken2)) {
break;
}
// Assume left-precedence for equal conditions
else if (isRelation(tToken) && isRelation(tToken2) && tToken.getType() == tToken2.getType()) {
break;
}
else
throw new FilterException(FilterException.Type.FILTER_FAILURE, "I cannot make up the operator-precedence here, please use brackets");
}
tStack.push(tToken);
break;
case WORD:
// Token is a key
tQueue.add(tToken);
break;
case LEFT_PARENTHESIS:
tStack.push(tToken);
break;
case RIGHT_PARENTHESIS:
if (tStack.isEmpty())
throw new FilterException(FilterException.Type.FILTER_FAILURE, "mismatched parenthesis");
while (!tStack.isEmpty() && tStack.peek().getType() != TokenType.LEFT_PARENTHESIS) {
tQueue.add(tStack.pop());
if (tStack.isEmpty())
throw new FilterException(FilterException.Type.FILTER_FAILURE, "mismatched parenthesis");
}
tStack.pop();
//if (!tStack.isEmpty() && isFunction(tStack.peek()) {
// tQueue.add(tStack.pop());
//}
case WHITESPACE:
break;
default: {
throw new FilterException(FilterException.Type.FILTER_FAILURE, "unknown token " + tToken);
}
}
}
while (!tStack.isEmpty()) {
Token tToken = tStack.pop();
switch (tToken.getType()) {
case LEFT_PARENTHESIS:
case RIGHT_PARENTHESIS:
throw new FilterException(FilterException.Type.FILTER_FAILURE, "mismatched parenthesis");
case WORD:
case OPERATOR_EQUALS:
case OPERATOR_NOTEQUALS:
case OPERATOR_LESS:
case OPERATOR_GREATER:
case OPERATOR_STRICTLESS:
case OPERATOR_STRICTGREATER:
case OPERATOR_LIKE:
case OPERATOR_NOTLIKE:
case OPERATOR_AND:
case OPERATOR_OR:
tQueue.add(tToken);
break;
default:
throw new FilterException(FilterException.Type.FILTER_FAILURE, "mismatched token " + tToken);
}
}
return tQueue;
}
// Syntactic analysis: the interpreter
public Filter interprete(Queue<Token> iTokens) throws StockPlayException {
Filter oFilter = new Filter();
Stack<Convertable> tStack = new Stack<Convertable>();
if (iTokens.size() == 0)
return oFilter;
Iterator<Token> tIterator = iTokens.iterator();
while (tIterator.hasNext()) {
Token tToken = tIterator.next();
switch (tToken.getType()) {
case INT:
tStack.push(new DataInt(Integer.parseInt(tToken.getContent())));
break;
case FLOAT:
tStack.push(new DataFloat(Double.parseDouble(tToken.getContent())));
break;
case QUOTE:
if (tToken.getExtra() == null) {
tStack.push(new DataString(tToken.getContent()));
} else if (tToken.getExtra().size() != 1) {
// TODO: getExtra no array, only return 1.
throw new FilterException(FilterException.Type.FILTER_FAILURE, "incorrect amount of extra data for quote construction");
} else {
String tModifiers = tToken.getExtra().get(0);
// Process the type modifier
Data tData = null;
String tType = tModifiers.substring(0, 1);
if (tType.equals("s")) {
tData = new DataString(tToken.getContent());
} else if (tType.equals("i")) {
tData = new DataInt(Integer.parseInt(tToken.getContent()));
} else if (tType.equals("f")) {
tData = new DataFloat(Double.parseDouble(tToken.getContent()));
} else if (tType.equals("d")) {
tData = new DataDate(DataDate.parseDate(tToken.getContent()));
} else if (tType.equals("d")) {
tData = new DataKey(tToken.getContent());
} else if (tType.equals("r")) {
tData = new DataRegex(tToken.getContent());
} else {
throw new FilterException(FilterException.Type.FILTER_FAILURE, "unknown type modifier '" + tType + "'");
}
// Process extra modifiers
if (tModifiers.length() > 1) {
String tExtraModifiers = tModifiers.substring(1);
if (tData instanceof DataRegex) {
((DataRegex)tData).setModifiers(tExtraModifiers.toCharArray());
} else {
throw new FilterException(FilterException.Type.FILTER_FAILURE, "datatype doesn't accept extra modifiers");
}
}
tStack.push(tData);
}
break;
case OPERATOR_EQUALS:
case OPERATOR_NOTEQUALS:
case OPERATOR_LESS:
case OPERATOR_GREATER:
case OPERATOR_STRICTLESS:
case OPERATOR_STRICTGREATER:
case OPERATOR_LIKE:
case OPERATOR_NOTLIKE:
case OPERATOR_AND:
case OPERATOR_OR:
// Pick the class
Class<? extends Condition> tClass = mOperatorMap.get(tToken.getType());
// Fetch parameter signature
Method[] tMethods = tClass.getMethods();
Method tSignatureMethod = null;
for (Method tMethod : tMethods)
if (Modifier.isStatic(tMethod.getModifiers()) && tMethod.getName().compareTo("getSignature") == 0)
tSignatureMethod = tMethod;
if (tSignatureMethod == null)
throw new FilterException(FilterException.Type.FILTER_FAILURE, "could not get parameter signature of class due to missing definition");
Class[] tParameterSignature; // TODO: ? extends Data
try {
Object tReturn = tSignatureMethod.invoke(null);
tParameterSignature = (Class[]) tReturn;
}
catch (Exception e) {
throw new FilterException(FilterException.Type.FILTER_FAILURE, "could not get parameter signature of class", e.getCause());
}
// Handle parameters
int tParameterCount = tParameterSignature.length;
if (tStack.size() < tParameterCount)
throw new FilterException(FilterException.Type.FILTER_FAILURE, "parameter mismatch, I expected " + tParameterCount + " of them but only got " + tStack.size());
Vector<Convertable> tParameters = new Vector<Convertable>();
tParameters.setSize(tParameterCount);
for (int i = tParameterCount-1; i >= 0; i--) { // mind the reversion of the argument order
Convertable tParameter = tStack.pop();
Class tExpected = tParameterSignature[i];
if (!(tExpected.isInstance(tParameter)))
throw new FilterException(FilterException.Type.FILTER_FAILURE, "parameter mismatch, I expected a " + tExpected + " but got a " + tParameter.getClass());
tParameters.set(i, tParameter);
}
// Instantiate the object
Condition tCondition = null;
Object tObject = null;
try {
Constructor tConstructor = tClass.getConstructor(List.class);
tObject = tConstructor.newInstance(tParameters);
}
catch (Exception e) {
throw new FilterException(FilterException.Type.FILTER_FAILURE, "error instantiating operator", e.getCause());
}
if (!(tObject instanceof Condition))
throw new FilterException(FilterException.Type.FILTER_FAILURE, "attempt to instantiate non condition-typed operator (check the ruleset)");
tCondition = (Condition) tObject;
// Push the result
tStack.push(tCondition);
break;
case WORD:
tStack.push(new DataKey(tToken.getContent()));
break;
default:
throw new FilterException(FilterException.Type.FILTER_FAILURE, "unknown token " + tToken);
}
}
if (tStack.size() != 1) {
throw new FilterException(FilterException.Type.FILTER_FAILURE, "filter evaluation failed: result count mismatch");
}
Convertable tRoot = tStack.pop();
if (!(tRoot instanceof Condition))
throw new FilterException(FilterException.Type.FILTER_FAILURE, "root node should be a condition");
oFilter.setRoot((Condition)tRoot);
return oFilter;
}
//
// Auxiliary
//
private boolean isOperator(Token iToken) {
switch (iToken.getType()) {
case OPERATOR_EQUALS:
case OPERATOR_NOTEQUALS:
case OPERATOR_LESS:
case OPERATOR_GREATER:
case OPERATOR_STRICTLESS:
case OPERATOR_STRICTGREATER:
case OPERATOR_LIKE:
case OPERATOR_NOTLIKE:
case OPERATOR_AND:
case OPERATOR_OR:
return true;
default:
return false;
}
}
private boolean isCondition(Token iToken) {
switch (iToken.getType()) {
case OPERATOR_EQUALS:
case OPERATOR_NOTEQUALS:
case OPERATOR_LESS:
case OPERATOR_GREATER:
case OPERATOR_STRICTLESS:
case OPERATOR_STRICTGREATER:
case OPERATOR_LIKE:
case OPERATOR_NOTLIKE:
return true;
default:
return false;
}
}
private boolean isRelation(Token iToken) {
switch (iToken.getType()) {
case OPERATOR_AND:
case OPERATOR_OR:
return true;
default:
return false;
}
}
}