/*
* Copyright 2013 Future Systems
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.araqne.logdb.query.parser;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringReader;
import java.text.Normalizer;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import java.util.StringTokenizer;
import org.araqne.logdb.FunctionRegistry;
import org.araqne.logdb.QueryContext;
import org.araqne.logdb.QueryParseException;
import org.araqne.logdb.Strings;
import org.araqne.logdb.query.expr.Expression;
public class ExpressionParser {
/**
* @since 1.7.5
*/
public static boolean isContextReference(String optionValue) {
return optionValue != null && optionValue.startsWith("$(\"") && optionValue.endsWith("\")");
}
/**
* @since 1.7.5
*/
public static String evalContextReference(QueryContext context, String s, FunctionRegistry functionRegistry) {
if (ExpressionParser.isContextReference(s)) {
Expression contextReference = ExpressionParser.parse(context, s, functionRegistry);
Object o = contextReference.eval(null);
if (o == null)
return "";
if (o instanceof Date) {
SimpleDateFormat df = new SimpleDateFormat("yyyyMMddHHmmss");
return df.format(o);
}
return o.toString();
}
return s;
}
// http://lexsrv3.nlm.nih.gov/LexSysGroup/Projects/lvg/current/docs/designDoc/UDF/unicode/DefaultTables/symbolTable.html
private static String[] UniToAsciiMap;
static {
UniToAsciiMap = new String[65536];
String map = "\\u00AB \"\n" +
"\\u00AD -\n" +
"\\u00B4 '\n" +
"\\u00BB \"\n" +
"\\u00F7 /\n" +
"\\u01C0 |\n" +
"\\u01C3 !\n" +
"\\u02B9 '\n" +
"\\u02BA \"\n" +
"\\u02BC '\n" +
"\\u02C4 ^\n" +
"\\u02C6 ^\n" +
"\\u02C8 '\n" +
"\\u02CB `\n" +
"\\u02CD _\n" +
"\\u02DC ~\n" +
"\\u0300 `\n" +
"\\u0301 '\n" +
"\\u0302 ^\n" +
"\\u0303 ~\n" +
"\\u030B \"\n" +
"\\u030E \"\n" +
"\\u0331 _\n" +
"\\u0332 _\n" +
"\\u0338 /\n" +
"\\u0589 :\n" +
"\\u05C0 |\n" +
"\\u05C3 :\n" +
"\\u066A %\n" +
"\\u066D *\n" +
"\\u200B \n" +
"\\u2010 -\n" +
"\\u2011 -\n" +
"\\u2012 -\n" +
"\\u2013 -\n" +
"\\u2014 -\n" +
"\\u2015 --\n" +
"\\u2016 ||\n" +
"\\u2017 _\n" +
"\\u2018 '\n" +
"\\u2019 '\n" +
"\\u201A ,\n" +
"\\u201B '\n" +
"\\u201C \"\n" +
"\\u201D \"\n" +
"\\u201E \"\n" +
"\\u201F \"\n" +
"\\u2032 '\n" +
"\\u2033 \"\n" +
"\\u2034 '''\n" +
"\\u2035 `\n" +
"\\u2036 \"\n" +
"\\u2037 '''\n" +
"\\u2038 ^\n" +
"\\u2039 <\n" +
"\\u203A >\n" +
"\\u203D ?\n" +
"\\u2044 /\n" +
"\\u204E *\n" +
"\\u2052 %\n" +
"\\u2053 ~\n" +
"\\u2060 \n" +
"\\u20E5 \\\n" +
"\\u2212 -\n" +
"\\u2215 /\n" +
"\\u2216 \\\n" +
"\\u2217 *\n" +
"\\u2223 |\n" +
"\\u2236 :\n" +
"\\u223C ~\n" +
"\\u2264 <=\n" +
"\\u2265 >=\n" +
"\\u2266 <=\n" +
"\\u2267 >=\n" +
"\\u2303 ^\n" +
"\\u2329 <\n" +
"\\u232A >\n" +
"\\u266F #\n" +
"\\u2731 *\n" +
"\\u2758 |\n" +
"\\u2762 !\n" +
"\\u27E6 [\n" +
"\\u27E8 <\n" +
"\\u27E9 >\n" +
"\\u2983 {\n" +
"\\u2984 }\n" +
"\\u3003 \"\n" +
"\\u3008 <\n" +
"\\u3009 >\n" +
"\\u301B ]\n" +
"\\u301C ~\n" +
"\\u301D \"\n" +
"\\u301E \"\n" +
"\\uFEFF \n";
BufferedReader reader = new BufferedReader(new StringReader(map));
String line = null;
try {
while ((line = reader.readLine()) != null) {
String[] split = line.split("\t");
char s1 = Character.valueOf((char) Integer.parseInt(split[0].substring(2), 16));
String s2 = split.length > 1 ? split[1] : " ";
UniToAsciiMap[s1] = s2;
}
} catch (IOException e) {
System.out.println(e);
}
}
private static String normalizeQueryStr(String s) {
StringBuffer ret = new StringBuffer(s.length());
for (int i = 0; i < s.length(); ++i) {
char c = s.charAt(i);
if (c < 0x128 || c >= 0xffff)
ret.append(c);
else {
String replacement = UniToAsciiMap[c];
if (replacement == null)
ret.append(c);
else
ret.append(replacement);
}
}
return ret.toString();
}
public static Expression parse(QueryContext context, String s, ParsingRule r) {
try {
if (s == null)
throw new IllegalArgumentException("expression string should not be null");
s = Normalizer.normalize(s, Normalizer.Form.NFC);
s = normalizeQueryStr(s);
s = s.replaceAll("\t", " ");
s = s.replaceAll("\n", " ");
s = s.replaceAll("\r", " ");
List<Term> terms = tokenize(s, r);
List<Term> output = convertToPostfix(terms, r);
Stack<Expression> exprStack = new Stack<Expression>();
OpEmitterFactory of = r.getOpEmmiterFactory();
TermEmitterFactory tf = r.getTermEmitterFactory();
FuncEmitterFactory ff = r.getFuncEmitterFactory();
for (Term term : output) {
if (r.getOpTerm().isInstance(term)) {
of.emit(exprStack, term);
} else if (term instanceof TokenTerm) {
// parse token expression (variable or numeric constant)
TokenTerm t = (TokenTerm) term;
tf.emit(exprStack, t);
} else if (term instanceof FuncTerm) {
// parse function expression
FuncTerm f = (FuncTerm) term;
ff.emit(context, exprStack, f);
} else {
Map<String, String> params = new HashMap<String, String>();
params.put("term", term.toString());
params.put("value", s);
throw new QueryParseException("90200", -1, -1, params);
//throw new QueryParseException("unexpected-term", -1, term.toString());
}
}
if (exprStack.size() > 1) {
Map<String, String> params = new HashMap<String, String>();
params.put("value",s);
throw new QueryParseException("90201", -1, -1, params);
//throw new QueryParseException("remain-terms", -1, exprStack.toString());
}
return exprStack.pop();
} catch (QueryParseException e) {
e.getParams().put("value", s);
throw e;
}
}
/**
* @since 1.7.3
*/
public static Expression parse(QueryContext context, String s, FunctionRegistry functionRegistry) {
ParsingRule evalRule = new ParsingRule(EvalOpTerm.NOP, new EvalOpEmitterFactory(), new EvalFuncEmitterFactory(
functionRegistry), new EvalTermEmitterFactory());
try {
return parse(context, s, evalRule);
} catch (QueryParseException e) {
//e.printStackTrace();
e.getParams().put("value", s);
throw e;
}
}
private static List<Term> convertToPostfix(List<Term> tokens, ParsingRule rule) {
Stack<Term> opStack = new Stack<Term>();
List<Term> output = new ArrayList<Term>();
int i = 0;
int len = tokens.size();
OpTerm opTerm = rule.getOpTerm();
while (i < len) {
Term token = tokens.get(i);
if (isDelimiter(token, rule)) {
// need to pop operator and write to output?
while (needPop(token, opStack, output, rule)) {
Term last = opStack.pop();
output.add(last);
}
if (opTerm.isInstance(token) || token instanceof FuncTerm) {
opStack.add(token);
} else if (((TokenTerm) token).getText().equals("(")) {
opStack.add(token);
} else if (((TokenTerm) token).getText().equals(")")) {
boolean foundMatchParens = false;
while (!opStack.isEmpty()) {
Term last = opStack.pop();
if (last instanceof TokenTerm && ((TokenTerm) last).getText().equals("(")) {
foundMatchParens = true;
break;
} else {
output.add(last);
}
}
if (!foundMatchParens){
//throw new QueryParseException("parens-mismatch", -1);
throw new QueryParseException("90202", -1, -1, null);
}
// postprocess for closed parenthesis
// postprocess function term
if (!opStack.empty()) {
Term last = opStack.pop();
if (last instanceof FuncTerm) {
output.add(last);
} else {
opStack.push(last);
}
}
// postprocess comma term
// Being closed by parenthesis means the comma list is
// ended.
if (!output.isEmpty()) {
Term recent = output.get(output.size() - 1);
if (recent instanceof OpTerm) {
OpTerm recentOp = (OpTerm) recent;
output.set(output.size() - 1, recentOp.postProcessCloseParen());
}
}
}
} else {
output.add(token);
}
i++;
}
// last operator flush
while (!opStack.isEmpty()) {
Term op = opStack.pop();
output.add(op);
}
return output;
}
private static boolean needPop(Term token, Stack<Term> opStack, List<Term> output, ParsingRule rule) {
if (!(rule.getOpTerm().isInstance(token)))
return false;
OpTerm currentOp = (OpTerm) token;
int precedence = currentOp.getPrecedence();
boolean leftAssoc = currentOp.isLeftAssoc();
OpTerm lastOp = null;
if (!opStack.isEmpty()) {
Term t = opStack.peek();
if (!(t instanceof OpTerm)) {
return false;
}
lastOp = (OpTerm) t;
} else {
return false;
}
if (leftAssoc && precedence <= lastOp.getPrecedence())
return true;
if (precedence < lastOp.getPrecedence())
return true;
return false;
}
private static boolean isOperator(String token, ParsingRule rule) {
if (token == null)
return false;
String o = token.trim();
if (o.equals("(") || o.equals(")"))
return true;
if (rule.getOpTerm().parse(o) != null)
return true;
return false;
}
public static List<Term> tokenize(String s, ParsingRule rule) {
return tokenize(s, 0, s.length() - 1, rule);
}
private static List<Term> tokenize(String s, int begin, int end, ParsingRule rule) {
List<Term> tokens = new ArrayList<Term>();
String lastToken = null;
int next = begin;
while (true) {
ParseResult r = nextToken(s, next, end, rule);
if (r == null)
break;
String token = (String) r.value;
if (token.isEmpty())
continue;
// read function call (including nested one)
if (token.equals("(") && lastToken != null && !isOperator(lastToken, rule)) {
// remove last term and add function term instead
tokens.remove(tokens.size() - 1);
tokens.add(new FuncTerm(lastToken.trim()));
}
OpTerm op = rule.getOpTerm().parse(token);
// check if unary operator
// handling operator which can be both unary and binary
if (op != null && op.hasAltOp()) {
Term lastTerm = null;
if (!tokens.isEmpty()) {
lastTerm = tokens.get(tokens.size() - 1);
}
if (!op.isUnary()) {
if (lastToken == null || lastToken.equals("(") || rule.getOpTerm().isInstance(lastTerm)) {
op = op.getAltOp();
}
} else {
if (lastToken != null && !lastToken.equals("(") && !rule.getOpTerm().isInstance(lastTerm)) {
op = op.getAltOp();
}
}
}
if (tokens.size() >= 2 && token.equals(")")) {
// function has no argument
int size = tokens.size();
if (tokens.get(size - 1).toString().equals("(") && tokens.get(size - 2) instanceof FuncTerm) {
tokens.remove(size - 1);
FuncTerm func = (FuncTerm) tokens.get(size - 2);
func.setHasArgument(false);
} else {
tokens.add(new TokenTerm(token));
}
} else if (op != null) {
tokens.add(op);
} else {
tokens.add(new TokenTerm(token));
}
next = r.next;
lastToken = token;
}
return tokens;
}
// from org.apache.tools.ant.types.Commandline
// (apache license)
public static String[] translateCommandline(String cmdline) {
if (cmdline == null || cmdline.length() == 0) {
// no command? no string
return new String[0];
}
// parse with a simple finite state machine
final int normal = 0;
final int inQuote = 1;
final int inDoubleQuote = 2;
int state = normal;
StringTokenizer tok = new StringTokenizer(cmdline, "\"\' ", true);
ArrayList<String> v = new ArrayList<String>();
StringBuffer current = new StringBuffer();
boolean lastTokenHasBeenQuoted = false;
while (tok.hasMoreTokens()) {
String nextTok = tok.nextToken();
switch (state) {
case inQuote:
if ("\'".equals(nextTok)) {
lastTokenHasBeenQuoted = true;
state = normal;
} else {
current.append(nextTok);
}
break;
case inDoubleQuote:
if ("\"".equals(nextTok)) {
lastTokenHasBeenQuoted = true;
state = normal;
} else {
current.append(nextTok);
}
break;
default:
if ("\'".equals(nextTok)) {
state = inQuote;
} else if ("\"".equals(nextTok)) {
state = inDoubleQuote;
} else if (" ".equals(nextTok)) {
if (lastTokenHasBeenQuoted || current.length() != 0) {
v.add(current.toString());
current = new StringBuffer();
}
} else {
current.append(nextTok);
}
lastTokenHasBeenQuoted = false;
break;
}
}
if (lastTokenHasBeenQuoted || current.length() != 0) {
v.add(current.toString());
}
if (state == inQuote || state == inDoubleQuote) {
throw new IllegalArgumentException("unbalanced quotes in [" + cmdline + "]");
}
return v.toArray(new String[0]);
}
private static ParseResult nextToken(String s, int begin, int end, ParsingRule rule) {
if (begin > end)
return null;
// use r.next as a position here (need +1 for actual next)
ParseResult r = findNextDelimiter(s, begin, end, rule);
if (r.next < begin) {
// no symbol operator and white space, return whole string
String token = s.substring(begin, end + 1).trim();
return new ParseResult(token, end + 1);
}
if (isAllWhitespaces(s, begin, r.next - 1)) {
// check if next token is quoted string
if (r.value.equals("\"")) {
int p = findClosingQuote(s, r.next + 1);
// int p = s.indexOf('"', r.next + 1);
if (p < 0) {
//throw new QueryParseException("quote-mismatch", r.next + 1);
throw new QueryParseException("90203", -1, -1, null);
// String quoted = unveilEscape(s.substring(r.next));
// return new ParseResult(quoted, s.length());
} else {
String quoted = Strings.unescape(s.substring(r.next, p + 1));
return new ParseResult(quoted, p + 1);
}
}
if (r.value.equals("[")) {
int p = findClosingSquareBracket(s, r.next + 1);
if (p == r.next + 1 - 1)
// throw new QueryParseException("sqbracket-mismatch", r.next + 1);
throw new QueryParseException("90204", -1, -1, null);
else {
String subquery = s.substring(r.next, p + 1);
return new ParseResult(subquery, p + 1);
}
}
// check whitespace
String token = (String) r.value;
if (token.trim().isEmpty())
return nextToken(s, skipWhitespaces(s, begin), end, rule);
// return operator
int len = token.length();
return new ParseResult(token, r.next + len);
} else {
// return term
String token = s.substring(begin, r.next).trim();
return new ParseResult(token, r.next);
}
}
private static int findClosingSquareBracket(String s, int start) {
Stack<Integer> t = new Stack<Integer>();
for (int p = start; p < s.length(); ++p) {
char c = s.charAt(p);
if (c == '[') {
t.push(p);
continue;
}
if (c == ']') {
if (t.isEmpty())
return p;
else
t.pop();
}
}
return start - 1;
}
static int findClosingQuote(String s, int offset) {
boolean escape = false;
for (int i = offset; i < s.length(); i++) {
char c = s.charAt(i);
if (escape) {
if (c == '\\' || c == '"' || c == 'n' || c == 't' || c == 'r')
escape = false;
else{
//throw new QueryParseException("invalid-escape-sequence", offset);
Map<String, String> params = new HashMap<String, String>();
params.put("escape", "\\" + c);
throw new QueryParseException("90205", -1, -1, params);
}
} else {
if (c == '\\')
escape = true;
else if (c == '"')
return i;
}
}
return -1;
}
private static boolean isAllWhitespaces(String s, int begin, int end) {
if (end < begin)
return true;
for (int i = begin; i <= end; i++)
if (!Character.isWhitespace(s.charAt(i)))
return false;
return true;
}
private static ParseResult findNextDelimiter(String s, int begin, int end, ParsingRule rule) {
// check parens, comma and operators
ParseResult r = new ParseResult(null, -1);
min(r, "\"", s.indexOf('"', begin), end);
min(r, "(", s.indexOf('(', begin), end);
min(r, ")", s.indexOf(')', begin), end);
min(r, "[", s.indexOf('[', begin), end);
min(r, "]", s.indexOf(']', begin), end);
for (OpTerm op : rule.getOpTerm().delimiters()) {
min(r, op.getSymbol(), s.indexOf(op.getSymbol(), begin), end);
}
// check white spaces
// tabs are removed by ExpressionParser.parse, so it processes space
// only.
min(r, " ", s.indexOf(' ', begin), end);
return r;
}
private static void min(ParseResult r, String symbol, int p, int end) {
if (p < 0)
return;
boolean change = p >= 0 && p <= end && (r.next == -1 || p < r.next || (p == r.next && r.value instanceof String && symbol.length() > String.class.cast(r.value).length()));
if (change) {
r.value = symbol;
r.next = p;
}
}
private static boolean isDelimiter(Term t, ParsingRule rule) {
if (rule.getOpTerm().isInstance(t) || (t instanceof FuncTerm && ((FuncTerm) t).hasArgument()))
return true;
if (t instanceof TokenTerm) {
String text = ((TokenTerm) t).getText();
return text.equals("(") || text.equals(")");
}
return false;
}
public static class TokenTerm implements Term {
private String text;
public TokenTerm(String text) {
this.text = text;
}
@Override
public String toString() {
return getText();
}
public String getText() {
return text;
}
}
public static class FuncTerm implements Term {
private String name;
private boolean argument;
public FuncTerm(String name) {
this.name = name;
this.argument = true;
}
@Override
public String toString() {
return "func " + name + "()";
}
public String getName() {
return name;
}
public boolean hasArgument() {
return argument;
}
public void setHasArgument(boolean argument) {
this.argument = argument;
}
}
public static int skipWhitespaces(String text, int position) {
int i = position;
while (i < text.length() && Character.isWhitespace(text.charAt(i)))
i++;
return i;
}
}