/*******************************************************************************
* Copyright (c) 2014 Open Door Logistics (www.opendoorlogistics.com)
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the GNU Lesser Public License v3
* which accompanies this distribution, and is available at http://www.gnu.org/licenses/lgpl.txt
******************************************************************************/
package com.opendoorlogistics.core.formulae;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
import com.opendoorlogistics.core.formulae.Functions.FmConst;
import com.opendoorlogistics.core.formulae.Functions.FmNegate;
import com.opendoorlogistics.core.formulae.StringTokeniser.StringToken;
import com.opendoorlogistics.core.formulae.definitions.FunctionDefinition;
import com.opendoorlogistics.core.formulae.definitions.FunctionDefinition.FunctionType;
import com.opendoorlogistics.core.formulae.definitions.FunctionDefinitionLibrary;
import com.opendoorlogistics.core.scripts.elements.UserFormula;
import com.opendoorlogistics.core.utils.strings.StandardisedStringTreeMap;
import com.opendoorlogistics.core.utils.strings.Strings;
import com.opendoorlogistics.core.utils.strings.Strings.ToString;
public final class FormulaParser {
private UnidentifiedPolicy unidentifiedPolicy = UnidentifiedPolicy.THROW_EXCEPTION;
public UnidentifiedPolicy getUnidentifiedPolicy() {
return unidentifiedPolicy;
}
public void setUnidentifiedPolicy(UnidentifiedPolicy unidentifiedPolicy) {
this.unidentifiedPolicy = unidentifiedPolicy;
}
public enum UnidentifiedPolicy{
THROW_EXCEPTION,
CREATE_UNIDENTIFIED_PLACEHOLDER_FUNCTION
}
private class ReadFunctionToken {
String token;
ArrayList<ReadFunctionToken> children = new ArrayList<>();
FunctionFactory identified;
TokenType tokenType;
int precendence = -1;
@Override
public String toString() {
return toString(false);
}
private String toString(boolean bracketsNeed) {
StringBuilder builder = new StringBuilder();
if (token != null) {
builder.append(token);
}
boolean brackets = tokenType == TokenType.function || (children.size() > 0 && bracketsNeed == true);
if (brackets) {
builder.append("(");
}
int i = 0;
for (ReadFunctionToken child : children) {
if (tokenType == TokenType.function && i > 0) {
builder.append(",");
}
builder.append(child.toString(tokenType != TokenType.function));
i++;
}
if (brackets) {
builder.append(")");
}
return builder.toString();
}
}
private class SourceFormula {
private ReadFunctionToken source;
private Function formula;
private SourceFormula(ReadFunctionToken source) {
this.source = source;
}
@Override
public String toString() {
return source.tokenType.toString();
}
}
/**
* Dummy placeholder used when processing user formulae
* @author Phil
*
*/
private class UserFormulaPlaceholder implements FunctionFactory{
private final List<UserFormulaInternal> ufs;
UserFormulaPlaceholder(List<UserFormulaInternal> ufs) {
this.ufs = ufs;
}
@Override
public Function createFunction(Function... children) {
throw new RuntimeException();
}
}
private class UserFormulaInternal{
private final String name;
private final List<String> parameters = new ArrayList<String>();
private final List<StringToken> definition;
private StandardisedStringTreeMap<Integer> parameterNumbers = new StandardisedStringTreeMap<Integer>(false);
UserFormulaInternal(String formula){
List<StringToken> tokens = StringTokeniser.tokenise(formula);
// if (okVariableOrMethodName.matcher(token).find() == false) {
// throwException(tokens);
// }
int iToken = 0;
int nt = tokens.size();
boolean ok = true;
String errorMessage=null;
if(iToken< nt){
name = tokens.get(iToken++).getLowerCase();
if (!isOkVariableOrMethodName(name)) {
ok = false;
errorMessage = "Invalid function name: " + name;
}
}else{
ok = false;
errorMessage = "Missing function name";
name = null;
}
if(iToken>=nt || !tokens.get(iToken++).getLowerCase().equals("(")){
ok = false;
errorMessage = "Missing left bracket ( at start of user formula";
}
// get parameters
boolean nextIsParam=true;
while(ok){
// check we still have a token
if(iToken>=nt){
ok = false;
errorMessage = "Incorrect function format.";
break;
}
StringToken token = tokens.get(iToken++);
// check for end of parameters
String sToken = token.getLowerCase();
if(sToken.equals(")")){
break;
}
if(nextIsParam){
if(!isOkVariableOrMethodName(sToken)){
ok = false;
errorMessage = "Invalid parameter name: " + sToken;
break;
}
// check parameter name is unique
if(parameterNumbers.get(sToken)!=null){
ok = false;
errorMessage = "Parameter name used twice or more times: " + sToken;
break;
}
parameterNumbers.put(sToken, parameters.size());
parameters.add(sToken);
nextIsParam = false;
}else{
if(!sToken.equals(",")){
ok = false;
errorMessage = "Comma is missing from list of parameters.";
}
nextIsParam = true;
}
}
// check for equals
if(ok){
if(iToken >=nt || !tokens.get(iToken++).getLowerCase().equals("=")){
ok = false;
errorMessage = "Equals sign = missing after parameter names.";
}
}
// check for non-empty
if(ok && iToken>=nt){
ok = false;
errorMessage = "Empty user function definition.";
}
if(ok){
definition = tokens.subList(iToken, tokens.size());
}
else{
definition =null;
throw new RuntimeException("Error parsing user formula: " + formula + (errorMessage!=null ? System.lineSeparator() + errorMessage : ""));
}
}
}
private enum TokenType {
operator, function, number, comma, nestedExpression, variable, string
}
private final UserVariableProvider userVariableProvider;
private final static String anyNumberExactString = "^(" + StringTokeniser.doubleNumber + "|" + StringTokeniser.intNumber + ")$";
private final static Pattern anyNumberExactStringMatcher = Pattern.compile(anyNumberExactString);
final private static Pattern okVariableOrMethodName = Pattern.compile("^" + StringTokeniser.VARIABLE +"$", Pattern.CASE_INSENSITIVE);
final private FunctionDefinitionLibrary library;
final private StandardisedStringTreeMap<ArrayList<UserFormulaInternal>> userFormulae = new StandardisedStringTreeMap<ArrayList<UserFormulaInternal>>(false);
// public FormulaParser( UserVariableProvider userVariableProvider, FunctionDefinitionLibrary lib) {
// this(userVariableProvider, lib, new ArrayList<String>());
// }
public FormulaParser( UserVariableProvider userVariableProvider, FunctionDefinitionLibrary lib,List<UserFormula> userformulaeDfns ) {
this.userVariableProvider = userVariableProvider;
this.library =lib;
if(userformulaeDfns!=null){
for(UserFormula s : userformulaeDfns){
UserFormulaInternal uf = new UserFormulaInternal(s.getValue());
ArrayList<UserFormulaInternal> list = userFormulae.get(uf.name);
if(list==null){
list = new ArrayList<UserFormulaInternal>();
userFormulae.put(uf.name, list);
}else{
// check don't have 2 with same number of parameters
for(UserFormulaInternal other:list){
if(other.parameters.size()==uf.parameters.size()){
throw new RuntimeException("Found two or more user formulae with the same name and number of parameters: " + uf.name);
}
}
}
list.add(uf);
}
}
}
private static boolean isOkVariableOrMethodName(String name){
return okVariableOrMethodName.matcher(name).find() ;
}
// public FormulaParser(final UserVariableProvider userVariableProvider) {
// this.userVariableProvider = userVariableProvider;
// this.library = new FunctionDefinitionLibrary();
// library.build();
// }
private Function compileNestedExpression( ReadFunctionToken token) {
ArrayList<SourceFormula> tmp = new ArrayList<>();
for (ReadFunctionToken child : token.children) {
tmp.add(new SourceFormula(child));
}
// Convert and remove + and - which are indicating sign as needed
int removePlusIndx = 0;
int compileMinusIndx = 0;
while ((removePlusIndx != -1 || compileMinusIndx != -1) && tmp.size() > 0) {
removePlusIndx = -1;
compileMinusIndx = -1;
// If we have an operator at the beginning or two in a row
// these could be a + or minus indicating sign instead...
ReadFunctionToken rft = tmp.get(0).source;
if (StringTokeniser.isMinus(rft.token)) {
compileMinusIndx = 0;
} else if (isPlus(rft.token)) {
removePlusIndx = 0;
}
// Check for any operator twice in a row
int n = tmp.size();
for (int i = 1; i < n && removePlusIndx == -1 && compileMinusIndx == -1; i++) {
SourceFormula last = tmp.get(i - 1);
SourceFormula current = tmp.get(i);
if (last.source.tokenType == TokenType.operator && current.source.tokenType == TokenType.operator) {
if (i + 1 >= tmp.size()) {
throwException(token.token);
}
// cannot have 3 operators in a row
SourceFormula next = tmp.get(i + 1);
if (next.source.tokenType == TokenType.operator) {
throwException(token.token);
}
// can only have two in a row if second is + or -
if (isPlus(current.source.token)) {
removePlusIndx = i;
break;
} else if (StringTokeniser.isMinus(current.source.token)) {
compileMinusIndx = i;
break;
} else {
throwException(token.token);
}
}
}
if (compileMinusIndx != -1) {
if (compileMinusIndx + 1 >= tmp.size()) {
throwException(token.token);
}
SourceFormula next = tmp.get(compileMinusIndx + 1);
Function formula = generateFormula( next.source);
next.formula = new FmNegate(formula);
tmp.remove(compileMinusIndx);
continue;
}
if (removePlusIndx != -1) {
tmp.remove(removePlusIndx);
continue;
}
}
// parse in order of precedence
int chosenIndx;
int precendence;
do {
chosenIndx = -1;
precendence = Integer.MAX_VALUE;
int n = tmp.size();
for (int i = 0; i < n; i++) {
SourceFormula sf = tmp.get(i);
if (sf.formula == null) {
if (sf.source.precendence < precendence) {
chosenIndx = i;
precendence = sf.source.precendence;
}
}
}
if (chosenIndx != -1) {
// compile this
SourceFormula current = tmp.get(chosenIndx);
if (current.source.tokenType == TokenType.operator) {
// compile operator which combines preceding and next tokens
if (chosenIndx < 1 || chosenIndx > tmp.size() - 1) {
throwException(current.source.token);
}
// get previous and next
SourceFormula previous = tmp.get(chosenIndx - 1);
SourceFormula next = tmp.get(chosenIndx + 1);
// compile previous and next if needed
// Also check for unconverted operators (would indicate corrupt syntax with 2 operators in a row)
if (previous.formula == null) {
if (previous.source.tokenType == TokenType.operator) {
throwException(current.source.token);
}
previous.formula = generateFormula( previous.source);
}
if (next.formula == null) {
if (next.source.tokenType == TokenType.operator) {
throwException(current.source.token);
}
next.formula = generateFormula( next.source);
}
// compile current
current.formula = current.source.identified.createFunction( previous.formula, next.formula);
// remove previous and next
tmp.remove(chosenIndx + 1);
tmp.remove(chosenIndx - 1);
} else {
current.formula = generateFormula( current.source);
if(current.formula==null){
throw new RuntimeException("Failed to compile formula " + current.source);
}
}
}
} while (chosenIndx != -1);
// We should now only have one formula
if (tmp.size() != 1) {
throw new RuntimeException();
}
return tmp.get(0).formula;
}
private Function generateFormula( ReadFunctionToken token) {
Function ret = null;
switch (token.tokenType) {
case function:
// Can do one-by-one conversion of parameters
int i = 0;
Function[] children = new Function[token.children.size()];
for (ReadFunctionToken child : token.children) {
children[i++] = generateFormula( child);
}
ret = token.identified.createFunction( children);
break;
case number:
if (token.children.size() > 0) {
throwException(token.token);
}
if(StringTokeniser.isIntegerNumber(token.token)){
ret = new FmConst(Long.parseLong(token.token));
}
else{
ret = new FmConst(Double.parseDouble(token.token));
}
break;
case string:
// get substring...
String str = token.token.substring(1, token.token.length()-1);
// check if its really a variable with speech marks around it...
if(userVariableProvider!=null){
ret =userVariableProvider.getVariable(str);
}
if(ret==null){
ret = new FmConst(str);
}
break;
case variable:
FunctionFactory constFactory = library.identify(token.token, FunctionType.CONSTANT);
if(constFactory!=null){
ret = constFactory.createFunction();
}
// variables can mean different things in different contexts...
// Usually referring to a field in the current row...
// But lookup(datastore , tablename, foreign key name, primary key name, fetch field name)
if(ret==null && userVariableProvider!=null){
ret =userVariableProvider.getVariable(token.token);
}
if(ret==null){
if(unidentifiedPolicy == UnidentifiedPolicy.THROW_EXCEPTION){
throw new RuntimeException("Unidentified variable: " + token.token);
}else{
ret = FmUnidentified.FACTORY.createFunction();
}
}
break;
case nestedExpression:
ret = compileNestedExpression( token);
break;
default:
throwException(token.token);
break;
}
return ret;
}
private boolean isPlus(String token) {
return token != null && token.equals("+");
}
public Function parse(String formula){
List<StringToken> tokens = StringTokeniser.tokenise(formula);
return parseTokens(tokens);
}
private Function parseTokens(List<StringToken> tokens) {
// Do first parse to build the top-level tree
ReadFunctionToken[] tmp = new ReadFunctionToken[1];
int readTokens = readFunctionTokenTree(tokens, 0,null, tmp);
assert readTokens == tokens.size();
// Then parse for user formula and replace them in the tree
tmp[0] = recurseProcessUserFormulae(tmp[0]);
Function formula = generateFormula( tmp[0]);
return formula;
}
private ReadFunctionToken recurseProcessUserFormulae(ReadFunctionToken rft){
if(rft.identified!=null && UserFormulaPlaceholder.class.isInstance(rft.identified)){
// Use the number of children to identify the correct overload
int nc = rft.children!=null ? rft.children.size():0;
List<UserFormulaInternal> ufs = ((UserFormulaPlaceholder)rft.identified).ufs;
UserFormulaInternal found = null;
for(UserFormulaInternal uf:ufs){
if(uf.parameters.size() == nc){
found = uf;
}
}
if(found==null){
throw new RuntimeException("Cannot find a definition of user formula " + ufs.get(0).name + " taking " + nc + " parameters.");
}
// Create a string map of parameter name to ReadFunctionToken tree
StandardisedStringTreeMap<ReadFunctionToken> parameters = new StandardisedStringTreeMap<FormulaParser.ReadFunctionToken>(false);
for(int i =0 ; i<nc ; i++){
parameters.put(found.parameters.get(i), rft.children.get(i));
}
// We now need to create the ReadFunctionToken tree for the user formula, copying the parameters into it
ReadFunctionToken[] tmp = new ReadFunctionToken[1];
readFunctionTokenTree(found.definition, 0, parameters, tmp);
// And replace this node in the tree with the user formula
rft = tmp[0];
}
// parse children
int nc = rft.children!=null ? rft.children.size():0;
for(int i =0 ; i < nc ; i++){
ReadFunctionToken child = rft.children.get(i);
child = recurseProcessUserFormulae(child);
rft.children.set(i, child);
}
return rft;
}
private int readFunctionTokenTree(List<StringToken> tokens, int level,StandardisedStringTreeMap<ReadFunctionToken> userFormulaeParameters, ReadFunctionToken[] out) {
ReadFunctionToken ret = new ReadFunctionToken();
ret.tokenType = TokenType.nestedExpression;
out[0] = ret;
int n = tokens.size();
int i = 0;
while (i < n) {
String token = tokens.get(i).getLowerCase();
if (token.equals(")")) {
// stop reading function
i++;
break;
} else if (token.equals("(")) {
// check if we're inside a function
ReadFunctionToken function = null;
if (ret.children.size() > 0) {
ReadFunctionToken last = ret.children.get(ret.children.size() - 1);
if (last.tokenType == TokenType.function) {
function = last;
}
}
// recurse
ReadFunctionToken[] tmp = new ReadFunctionToken[1];
i++;
i += readFunctionTokenTree(tokens.subList(i, tokens.size()), level + 1,userFormulaeParameters, tmp);
if (function != null) {
List<List<ReadFunctionToken>> parameters = split(tmp[0].children, ",");
for (List<ReadFunctionToken> parameter : parameters) {
if (parameter.size() == 1) {
function.children.add(parameter.get(0));
} else {
ReadFunctionToken node = new ReadFunctionToken();
node.tokenType = TokenType.nestedExpression;
node.children.addAll(parameter);
function.children.add(node);
}
}
} else {
ret.children.add(tmp[0]);
}
} else if (token.equals(".")) {
// Must be an attribute
if (ret.children.size() == 0 || i >= n - 1) {
throwException(tokens);
}
// Get attribute name
i++;
token = tokens.get(i).getOriginal();
// Check valid name
if (okVariableOrMethodName.matcher(token).find() == false) {
throwException(tokens);
}
// Check we have a previous token we can add an attribute nameto
ReadFunctionToken last = ret.children.get(ret.children.size() - 1);
if (last.token == null) {
throwException(tokens);
}
// Previous token must be a variable or function
if (last.tokenType != TokenType.function && last.tokenType != TokenType.variable
&& last.tokenType != TokenType.nestedExpression) {
throwException(tokens);
}
// Go to one after attribute name and check not brackets
i++;
if (i < n && tokens.get(i).getLowerCase().equals("(")) {
throwException(tokens);
}
} else {
// create token and identify type
ReadFunctionToken rft = new ReadFunctionToken();
rft.token = token;
boolean okVariableName = okVariableOrMethodName.matcher(token).find();
FunctionDefinition opDfn = library.identifyOperator(token);
if(opDfn!=null){
rft.identified = opDfn.getFactory();
rft.tokenType = TokenType.operator;
rft.precendence = opDfn.getOperatorPrecendence();
}
// try identifying number
if (rft.tokenType == null && anyNumberExactStringMatcher.matcher(token).find()) {
rft.tokenType = TokenType.number;
}
// try identifying comma
if (rft.tokenType == null && token.equals(",")) {
rft.tokenType = TokenType.comma;
}
// check if its a function
if (rft.tokenType == null && i < n - 1 && tokens.get(i + 1).getLowerCase().equals("(")) {
if (!okVariableName) {
throwException(token);
}
// try userformula first so scripts are never broken if a new formula is introduced with same name
List<UserFormulaInternal> ufs = userFormulae.get(rft.token);
if(ufs!=null){
rft.identified = new UserFormulaPlaceholder(ufs);
}
// then try built in-function
if(rft.identified == null){
rft.identified = library.identify(rft.token, FunctionType.FUNCTION);
}
if (rft.identified == null) {
if(unidentifiedPolicy == UnidentifiedPolicy.THROW_EXCEPTION){
throw new RuntimeException("Unknown function \"" + rft.token + "\"");
}else{
rft.identified = FmUnidentified.FACTORY;
}
}
rft.tokenType = TokenType.function;
}
// check for string (including empty strings). save original case
if(rft.tokenType==null && token.length()>=2 && token.charAt(0)=='"' && token.charAt(token.length()-1)=='"'){
rft.tokenType = TokenType.string;
rft.token = tokens.get(i).getOriginal();
}
// check for parameter before checking for variables
if (rft.tokenType == null && userFormulaeParameters!=null && userFormulaeParameters.containsKey(rft.token)) {
if (!okVariableName) {
throwException(token);
}
// replace with the parameter
rft = userFormulaeParameters.get(rft.token);
}
// must be a variable
if (rft.tokenType == null) {
if (!okVariableName) {
throwException(token);
}
rft.tokenType = TokenType.variable;
}
i++;
// break if we're reading a comma in the top-level
if (rft.tokenType == TokenType.comma && level == 0) {
break;
} else {
// otherwise save the token
ret.children.add(rft);
}
}
}
return i;
}
private List<List<ReadFunctionToken>> split(List<ReadFunctionToken> tokens, String deliminator) {
List<List<ReadFunctionToken>> ret = new ArrayList<>();
ArrayList<ReadFunctionToken> current = new ArrayList<>();
for (ReadFunctionToken token : tokens) {
if (token.token != null && token.token.equals(deliminator)) {
ret.add(current);
current = new ArrayList<>();
} else {
current.add(token);
}
}
if (current.size() > 0) {
ret.add(current);
}
return ret;
}
private static void throwException(List<StringToken> tokens) {
throw new RuntimeException(Strings.toString(new ToString<StringToken>() {
@Override
public String toString(StringToken o) {
return o.getOriginal();
}
}, " ", tokens));
}
// private static void throwException(List<String> tokens) {
// throw new RuntimeException("Error parsing tokens \"" + tokens + "\"");
// }
private static void throwException(String token) {
throw new RuntimeException("Error parsing token \"" + token + "\"");
}
public static void main(String[] args) throws Exception {
FunctionDefinitionLibrary lib = new FunctionDefinitionLibrary();
lib.buildStd();
List<UserFormula> userFormulae = new ArrayList<UserFormula>();
// userFormulae.add(new UserFormula("add2(a,b) = a + b"));
//userFormulae.add(new UserFormula("ten() = 10"));
FormulaParser loader = new FormulaParser(null, lib, userFormulae);
Function formula = loader.parse("\"hello \"world\"\"");
System.out.println(formula.execute(null));
}
@Override
public String toString(){
return library.toString();
}
/**
* Placeholder formula to mark anything that couldn't be identified
* @author Phil
*
*/
public static class FmUnidentified extends FunctionImpl{
public FmUnidentified(Function ... children) {
super(children);
}
@Override
public Object execute(FunctionParameters parameters) {
return Functions.EXECUTION_ERROR;
}
@Override
public Function deepCopy() {
// TODO Auto-generated method stub
return null;
}
public static final FunctionFactory FACTORY = new FunctionFactory() {
@Override
public Function createFunction(Function... children) {
return new FmUnidentified(children);
}
};
public static boolean containsUnidentified(Function f){
if(f!=null ){
if(FmUnidentified.class.isInstance(f)){
return true;
}
int n = f.nbChildren();
for(int i =0 ; i < n ; i++){
if(containsUnidentified(f.child(i))){
return true;
}
}
}
return false;
}
}
}