ExpressionParser.java example

Explorer
logdb-master
/*
 * Copyright 2013 Future Systems
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.araqne.logdb.query.parser;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringReader;
import java.text.Normalizer;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import java.util.StringTokenizer;

import org.araqne.logdb.FunctionRegistry;
import org.araqne.logdb.QueryContext;
import org.araqne.logdb.QueryParseException;
import org.araqne.logdb.Strings;
import org.araqne.logdb.query.expr.Expression;

public class ExpressionParser {

	/**
	 * @since 1.7.5
	 */
	public static boolean isContextReference(String optionValue) {
		return optionValue != null && optionValue.startsWith("$(\"") && optionValue.endsWith("\")");
	}

	/**
	 * @since 1.7.5
	 */
	public static String evalContextReference(QueryContext context, String s, FunctionRegistry functionRegistry) {
		if (ExpressionParser.isContextReference(s)) {
			Expression contextReference = ExpressionParser.parse(context, s, functionRegistry);
			Object o = contextReference.eval(null);
			if (o == null)
				return "";
			
			if (o instanceof Date) {
				SimpleDateFormat df = new SimpleDateFormat("yyyyMMddHHmmss");
				return df.format(o);
			}

			return o.toString();
		}

		return s;
	}
	
	// http://lexsrv3.nlm.nih.gov/LexSysGroup/Projects/lvg/current/docs/designDoc/UDF/unicode/DefaultTables/symbolTable.html
	private static String[] UniToAsciiMap;
	static {
		UniToAsciiMap = new String[65536];
		String map = "\\u00AB	\"\n" + 
				"\\u00AD	-\n" + 
				"\\u00B4	'\n" + 
				"\\u00BB	\"\n" + 
				"\\u00F7	/\n" + 
				"\\u01C0	|\n" + 
				"\\u01C3	!\n" + 
				"\\u02B9	'\n" + 
				"\\u02BA	\"\n" + 
				"\\u02BC	'\n" + 
				"\\u02C4	^\n" + 
				"\\u02C6	^\n" + 
				"\\u02C8	'\n" + 
				"\\u02CB	`\n" + 
				"\\u02CD	_\n" + 
				"\\u02DC	~\n" + 
				"\\u0300	`\n" + 
				"\\u0301	'\n" + 
				"\\u0302	^\n" + 
				"\\u0303	~\n" + 
				"\\u030B	\"\n" + 
				"\\u030E	\"\n" + 
				"\\u0331	_\n" + 
				"\\u0332	_\n" + 
				"\\u0338	/\n" + 
				"\\u0589	:\n" + 
				"\\u05C0	|\n" + 
				"\\u05C3	:\n" + 
				"\\u066A	%\n" + 
				"\\u066D	*\n" + 
				"\\u200B	 \n" + 
				"\\u2010	-\n" + 
				"\\u2011	-\n" + 
				"\\u2012	-\n" + 
				"\\u2013	-\n" + 
				"\\u2014	-\n" + 
				"\\u2015	--\n" + 
				"\\u2016	||\n" + 
				"\\u2017	_\n" + 
				"\\u2018	'\n" + 
				"\\u2019	'\n" + 
				"\\u201A	,\n" + 
				"\\u201B	'\n" + 
				"\\u201C	\"\n" + 
				"\\u201D	\"\n" + 
				"\\u201E	\"\n" + 
				"\\u201F	\"\n" + 
				"\\u2032	'\n" + 
				"\\u2033	\"\n" + 
				"\\u2034	'''\n" + 
				"\\u2035	`\n" + 
				"\\u2036	\"\n" + 
				"\\u2037	'''\n" + 
				"\\u2038	^\n" + 
				"\\u2039	<\n" + 
				"\\u203A	>\n" + 
				"\\u203D	?\n" + 
				"\\u2044	/\n" + 
				"\\u204E	*\n" + 
				"\\u2052	%\n" + 
				"\\u2053	~\n" + 
				"\\u2060	 \n" + 
				"\\u20E5	\\\n" + 
				"\\u2212	-\n" + 
				"\\u2215	/\n" + 
				"\\u2216	\\\n" + 
				"\\u2217	*\n" + 
				"\\u2223	|\n" + 
				"\\u2236	:\n" + 
				"\\u223C	~\n" + 
				"\\u2264	<=\n" + 
				"\\u2265	>=\n" + 
				"\\u2266	<=\n" + 
				"\\u2267	>=\n" + 
				"\\u2303	^\n" + 
				"\\u2329	<\n" + 
				"\\u232A	>\n" + 
				"\\u266F	#\n" + 
				"\\u2731	*\n" + 
				"\\u2758	|\n" + 
				"\\u2762	!\n" + 
				"\\u27E6	[\n" + 
				"\\u27E8	<\n" + 
				"\\u27E9	>\n" + 
				"\\u2983	{\n" + 
				"\\u2984	}\n" + 
				"\\u3003	\"\n" + 
				"\\u3008	<\n" + 
				"\\u3009	>\n" + 
				"\\u301B	]\n" + 
				"\\u301C	~\n" + 
				"\\u301D	\"\n" + 
				"\\u301E	\"\n" + 
				"\\uFEFF	 \n";
		
		BufferedReader reader = new BufferedReader(new StringReader(map));
		String line = null;
		try {
			while ((line = reader.readLine()) != null) {
				String[] split = line.split("\t");
				char s1 = Character.valueOf((char) Integer.parseInt(split[0].substring(2), 16));
				String s2 = split.length > 1 ? split[1] : " ";
				UniToAsciiMap[s1] = s2;
			}
		} catch (IOException e) {
			System.out.println(e);
		}
		
		
	}
	
	private static String normalizeQueryStr(String s) {
		StringBuffer ret = new StringBuffer(s.length());
		for (int i = 0; i < s.length(); ++i) {
			char c = s.charAt(i);
			if (c < 0x128 || c >= 0xffff)
				ret.append(c);
			else {
				String replacement = UniToAsciiMap[c];
				if (replacement == null)
					ret.append(c);
				else
					ret.append(replacement);
			}
		}
		return ret.toString();
	}
	

	public static Expression parse(QueryContext context, String s, ParsingRule r) {
		try {
			if (s == null)
				throw new IllegalArgumentException("expression string should not be null");

			s = Normalizer.normalize(s, Normalizer.Form.NFC);
			s = normalizeQueryStr(s);
			s = s.replaceAll("\t", "    ");
			s = s.replaceAll("\n", " ");
			s = s.replaceAll("\r", " ");
			List<Term> terms = tokenize(s, r);
			List<Term> output = convertToPostfix(terms, r);
			Stack<Expression> exprStack = new Stack<Expression>();
			OpEmitterFactory of = r.getOpEmmiterFactory();
			TermEmitterFactory tf = r.getTermEmitterFactory();
			FuncEmitterFactory ff = r.getFuncEmitterFactory();

			for (Term term : output) {
				if (r.getOpTerm().isInstance(term)) {
					of.emit(exprStack, term);
				} else if (term instanceof TokenTerm) {
					// parse token expression (variable or numeric constant)
					TokenTerm t = (TokenTerm) term;
					tf.emit(exprStack, t);
				} else if (term instanceof FuncTerm) {
					// parse function expression
					FuncTerm f = (FuncTerm) term;
					ff.emit(context, exprStack, f);
				} else {
					Map<String, String> params = new HashMap<String, String>();
					params.put("term", term.toString());
					params.put("value", s);
					throw new QueryParseException("90200", -1, -1, params);
					//throw new QueryParseException("unexpected-term", -1, term.toString());
				}
			}

			if (exprStack.size() > 1) {
				Map<String, String> params = new HashMap<String, String>();
				params.put("value",s);
				throw new QueryParseException("90201", -1, -1, params);
				//throw new QueryParseException("remain-terms", -1, exprStack.toString());
			}
			return exprStack.pop();
		} catch (QueryParseException e) {
			e.getParams().put("value", s);
			throw e;
		}
	}

	/**
	 * @since 1.7.3
	 */
	public static Expression parse(QueryContext context, String s, FunctionRegistry functionRegistry) {
		ParsingRule evalRule = new ParsingRule(EvalOpTerm.NOP, new EvalOpEmitterFactory(), new EvalFuncEmitterFactory(
				functionRegistry), new EvalTermEmitterFactory());
		
		try {
			return parse(context, s, evalRule);
		} catch (QueryParseException e) {
			//e.printStackTrace();
			e.getParams().put("value", s);
			throw e;
		}
	}

	private static List<Term> convertToPostfix(List<Term> tokens, ParsingRule rule) {
		Stack<Term> opStack = new Stack<Term>();
		List<Term> output = new ArrayList<Term>();

		int i = 0;
		int len = tokens.size();

		OpTerm opTerm = rule.getOpTerm();
		while (i < len) {
			Term token = tokens.get(i);

			if (isDelimiter(token, rule)) {
				// need to pop operator and write to output?
				while (needPop(token, opStack, output, rule)) {
					Term last = opStack.pop();
					output.add(last);
				}

				if (opTerm.isInstance(token) || token instanceof FuncTerm) {
					opStack.add(token);
				} else if (((TokenTerm) token).getText().equals("(")) {
					opStack.add(token);
				} else if (((TokenTerm) token).getText().equals(")")) {
					boolean foundMatchParens = false;

					while (!opStack.isEmpty()) {
						Term last = opStack.pop();
						if (last instanceof TokenTerm && ((TokenTerm) last).getText().equals("(")) {
							foundMatchParens = true;
							break;
						} else {
							output.add(last);
						}
					}

					if (!foundMatchParens){
						//throw new QueryParseException("parens-mismatch", -1);
						throw new QueryParseException("90202", -1, -1, null);
					}
					// postprocess for closed parenthesis

					// postprocess function term
					if (!opStack.empty()) {
						Term last = opStack.pop();
						if (last instanceof FuncTerm) {
							output.add(last);
						} else {
							opStack.push(last);
						}
					}

					// postprocess comma term
					// Being closed by parenthesis means the comma list is
					// ended.
					if (!output.isEmpty()) {
						Term recent = output.get(output.size() - 1);
						if (recent instanceof OpTerm) {
							OpTerm recentOp = (OpTerm) recent;
							output.set(output.size() - 1, recentOp.postProcessCloseParen());
						}
					}
				}
			} else {
				output.add(token);
			}

			i++;
		}

		// last operator flush
		while (!opStack.isEmpty()) {
			Term op = opStack.pop();
			output.add(op);
		}

		return output;
	}

	private static boolean needPop(Term token, Stack<Term> opStack, List<Term> output, ParsingRule rule) {
		if (!(rule.getOpTerm().isInstance(token)))
			return false;

		OpTerm currentOp = (OpTerm) token;

		int precedence = currentOp.getPrecedence();
		boolean leftAssoc = currentOp.isLeftAssoc();

		OpTerm lastOp = null;
		if (!opStack.isEmpty()) {
			Term t = opStack.peek();
			if (!(t instanceof OpTerm)) {
				return false;
			}
			lastOp = (OpTerm) t;
		} else {
			return false;
		}

		if (leftAssoc && precedence <= lastOp.getPrecedence())
			return true;

		if (precedence < lastOp.getPrecedence())
			return true;

		return false;
	}

	private static boolean isOperator(String token, ParsingRule rule) {
		if (token == null)
			return false;

		String o = token.trim();

		if (o.equals("(") || o.equals(")"))
			return true;

		if (rule.getOpTerm().parse(o) != null)
			return true;

		return false;
	}

	public static List<Term> tokenize(String s, ParsingRule rule) {
		return tokenize(s, 0, s.length() - 1, rule);
	}

	private static List<Term> tokenize(String s, int begin, int end, ParsingRule rule) {
		List<Term> tokens = new ArrayList<Term>();

		String lastToken = null;
		int next = begin;
		while (true) {
			ParseResult r = nextToken(s, next, end, rule);
			if (r == null)
				break;

			String token = (String) r.value;
			if (token.isEmpty())
				continue;

			// read function call (including nested one)
			if (token.equals("(") && lastToken != null && !isOperator(lastToken, rule)) {
				// remove last term and add function term instead
				tokens.remove(tokens.size() - 1);
				tokens.add(new FuncTerm(lastToken.trim()));
			}

			OpTerm op = rule.getOpTerm().parse(token);

			// check if unary operator
			// handling operator which can be both unary and binary
			if (op != null && op.hasAltOp()) {
				Term lastTerm = null;
				if (!tokens.isEmpty()) {
					lastTerm = tokens.get(tokens.size() - 1);
				}

				if (!op.isUnary()) {
					if (lastToken == null || lastToken.equals("(") || rule.getOpTerm().isInstance(lastTerm)) {
						op = op.getAltOp();
					}
				} else {
					if (lastToken != null && !lastToken.equals("(") && !rule.getOpTerm().isInstance(lastTerm)) {
						op = op.getAltOp();
					}
				}
			}

			if (tokens.size() >= 2 && token.equals(")")) {
				// function has no argument
				int size = tokens.size();
				if (tokens.get(size - 1).toString().equals("(") && tokens.get(size - 2) instanceof FuncTerm) {
					tokens.remove(size - 1);
					FuncTerm func = (FuncTerm) tokens.get(size - 2);
					func.setHasArgument(false);
				} else {
					tokens.add(new TokenTerm(token));
				}
			} else if (op != null) {
				tokens.add(op);
			} else {
				tokens.add(new TokenTerm(token));
			}

			next = r.next;
			lastToken = token;
		}

		return tokens;
	}
	
	// from org.apache.tools.ant.types.Commandline
	// (apache license)
	public static String[] translateCommandline(String cmdline) {
		if (cmdline == null || cmdline.length() == 0) {
			// no command? no string
			return new String[0];
		}
		// parse with a simple finite state machine

		final int normal = 0;
		final int inQuote = 1;
		final int inDoubleQuote = 2;
		int state = normal;
		StringTokenizer tok = new StringTokenizer(cmdline, "\"\' ", true);
		ArrayList<String> v = new ArrayList<String>();
		StringBuffer current = new StringBuffer();
		boolean lastTokenHasBeenQuoted = false;

		while (tok.hasMoreTokens()) {
			String nextTok = tok.nextToken();
			switch (state) {
			case inQuote:
				if ("\'".equals(nextTok)) {
					lastTokenHasBeenQuoted = true;
					state = normal;
				} else {
					current.append(nextTok);
				}
				break;
			case inDoubleQuote:
				if ("\"".equals(nextTok)) {
					lastTokenHasBeenQuoted = true;
					state = normal;
				} else {
					current.append(nextTok);
				}
				break;
			default:
				if ("\'".equals(nextTok)) {
					state = inQuote;
				} else if ("\"".equals(nextTok)) {
					state = inDoubleQuote;
				} else if (" ".equals(nextTok)) {
					if (lastTokenHasBeenQuoted || current.length() != 0) {
						v.add(current.toString());
						current = new StringBuffer();
					}
				} else {
					current.append(nextTok);
				}
				lastTokenHasBeenQuoted = false;
				break;
			}
		}
		if (lastTokenHasBeenQuoted || current.length() != 0) {
			v.add(current.toString());
		}
		if (state == inQuote || state == inDoubleQuote) {
			throw new IllegalArgumentException("unbalanced quotes in [" + cmdline + "]");
		}
		return v.toArray(new String[0]);
	}


	private static ParseResult nextToken(String s, int begin, int end, ParsingRule rule) {
		if (begin > end)
			return null;

		// use r.next as a position here (need +1 for actual next)
		ParseResult r = findNextDelimiter(s, begin, end, rule);
		if (r.next < begin) {
			// no symbol operator and white space, return whole string
			String token = s.substring(begin, end + 1).trim();
			return new ParseResult(token, end + 1);
		}

		if (isAllWhitespaces(s, begin, r.next - 1)) {
			// check if next token is quoted string
			if (r.value.equals("\"")) {
				int p = findClosingQuote(s, r.next + 1);
				// int p = s.indexOf('"', r.next + 1);
				if (p < 0) {
					//throw new QueryParseException("quote-mismatch", r.next + 1);
					throw new QueryParseException("90203", -1, -1, null);
					
					// String quoted = unveilEscape(s.substring(r.next));
					// return new ParseResult(quoted, s.length());
				} else {
					String quoted = Strings.unescape(s.substring(r.next, p + 1));
					return new ParseResult(quoted, p + 1);
				}
			}
			if (r.value.equals("[")) {
				int p = findClosingSquareBracket(s, r.next + 1);
				if (p == r.next + 1 - 1)
				//	throw new QueryParseException("sqbracket-mismatch", r.next + 1);
					throw new QueryParseException("90204", -1, -1, null);
				else {
					String subquery = s.substring(r.next, p + 1);
					return new ParseResult(subquery, p + 1);
				}
			}

			// check whitespace
			String token = (String) r.value;
			if (token.trim().isEmpty())
				return nextToken(s, skipWhitespaces(s, begin), end, rule);

			// return operator
			int len = token.length();
			return new ParseResult(token, r.next + len);
		} else {
			// return term
			String token = s.substring(begin, r.next).trim();
			return new ParseResult(token, r.next);
		}
	}

	private static int findClosingSquareBracket(String s, int start) {
		Stack<Integer> t = new Stack<Integer>();
		for (int p = start; p < s.length(); ++p) {
			char c = s.charAt(p);
			if (c == '[') {
				t.push(p);
				continue;
			}
			if (c == ']') {
				if (t.isEmpty())
					return p;
				else
					t.pop();
			}
		}

		return start - 1;
	}

	static int findClosingQuote(String s, int offset) {
		boolean escape = false;
		for (int i = offset; i < s.length(); i++) {
			char c = s.charAt(i);
			if (escape) {
				if (c == '\\' || c == '"' || c == 'n' || c == 't' || c == 'r')
					escape = false;
				else{
					//throw new QueryParseException("invalid-escape-sequence", offset);
					Map<String, String> params = new HashMap<String, String>();
					params.put("escape", "\\" + c);
					throw new QueryParseException("90205", -1, -1, params);
				}
			} else {
				if (c == '\\')
					escape = true;
				else if (c == '"')
					return i;
			}
		}

		return -1;
	}

	private static boolean isAllWhitespaces(String s, int begin, int end) {
		if (end < begin)
			return true;

		for (int i = begin; i <= end; i++)
			if (!Character.isWhitespace(s.charAt(i)))
				return false;

		return true;
	}

	private static ParseResult findNextDelimiter(String s, int begin, int end, ParsingRule rule) {
		// check parens, comma and operators
		ParseResult r = new ParseResult(null, -1);
		min(r, "\"", s.indexOf('"', begin), end);
		min(r, "(", s.indexOf('(', begin), end);
		min(r, ")", s.indexOf(')', begin), end);
		min(r, "[", s.indexOf('[', begin), end);
		min(r, "]", s.indexOf(']', begin), end);

		for (OpTerm op : rule.getOpTerm().delimiters()) {
			min(r, op.getSymbol(), s.indexOf(op.getSymbol(), begin), end);
		}

		// check white spaces
		// tabs are removed by ExpressionParser.parse, so it processes space
		// only.
		min(r, " ", s.indexOf(' ', begin), end);
		return r;
	}

	private static void min(ParseResult r, String symbol, int p, int end) {
		if (p < 0)
			return;

		boolean change = p >= 0 && p <= end && (r.next == -1 || p < r.next || (p == r.next && r.value instanceof String && symbol.length() > String.class.cast(r.value).length()));
		if (change) {
			r.value = symbol;
			r.next = p;
		}
	}

	private static boolean isDelimiter(Term t, ParsingRule rule) {
		if (rule.getOpTerm().isInstance(t) || (t instanceof FuncTerm && ((FuncTerm) t).hasArgument()))
			return true;

		if (t instanceof TokenTerm) {
			String text = ((TokenTerm) t).getText();
			return text.equals("(") || text.equals(")");
		}

		return false;
	}

	public static class TokenTerm implements Term {
		private String text;

		public TokenTerm(String text) {
			this.text = text;
		}

		@Override
		public String toString() {
			return getText();
		}

		public String getText() {
			return text;
		}

	}

	public static class FuncTerm implements Term {
		private String name;
		private boolean argument;

		public FuncTerm(String name) {
			this.name = name;
			this.argument = true;
		}

		@Override
		public String toString() {
			return "func " + name + "()";
		}

		public String getName() {
			return name;
		}

		public boolean hasArgument() {
			return argument;
		}

		public void setHasArgument(boolean argument) {
			this.argument = argument;
		}
	}

	public static int skipWhitespaces(String text, int position) {
		int i = position;

		while (i < text.length() && Character.isWhitespace(text.charAt(i)))
			i++;

		return i;
	}
}