/*
* Copyright (c) 2013-2017 Cinchapi Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.cinchapi.concourse.lang;
import java.text.MessageFormat;
import java.util.ArrayDeque;
import java.util.Deque;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import java.util.NoSuchElementException;
import java.util.Queue;
import java.util.Set;
import com.google.common.base.Preconditions;
import com.google.common.collect.Multimap;
import org.apache.commons.lang.StringUtils;
import com.cinchapi.concourse.lang.ConjunctionSymbol;
import com.cinchapi.concourse.lang.KeySymbol;
import com.cinchapi.concourse.lang.OperatorSymbol;
import com.cinchapi.concourse.lang.ParenthesisSymbol;
import com.cinchapi.concourse.lang.PostfixNotationSymbol;
import com.cinchapi.concourse.lang.Symbol;
import com.cinchapi.concourse.lang.TimestampSymbol;
import com.cinchapi.concourse.lang.ValueSymbol;
import com.cinchapi.concourse.lang.ast.AST;
import com.cinchapi.concourse.lang.ast.AndTree;
import com.cinchapi.concourse.lang.ast.ExpressionTree;
import com.cinchapi.concourse.lang.ast.OrTree;
import com.cinchapi.concourse.thrift.Operator;
import com.cinchapi.concourse.util.QuoteAwareStringSplitter;
import com.cinchapi.concourse.util.SplitOption;
import com.cinchapi.concourse.util.StringSplitter;
import com.cinchapi.concourse.util.Strings;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
/**
* The {@link Parser} is a tool that operates on various aspects of the
* language.
*
* @author Jeff Nelson
*/
public final class Parser {
/**
* Convert a valid and well-formed list of {@link Symbol} objects into a
* an {@link AST}.
* <p>
* NOTE: This method will group non-conjunctive symbols into
* {@link Expression} objects.
* </p>
*
* @param symbols
* @return the symbols in an AST
*/
public static AST toAbstractSyntaxTree(List<Symbol> symbols) {
Deque<Symbol> operatorStack = new ArrayDeque<Symbol>();
Deque<AST> operandStack = new ArrayDeque<AST>();
symbols = groupExpressions(symbols);
main: for (Symbol symbol : symbols) {
if(symbol == ParenthesisSymbol.LEFT) {
operatorStack.push(symbol);
}
else if(symbol == ParenthesisSymbol.RIGHT) {
while (!operatorStack.isEmpty()) {
Symbol popped = operatorStack.pop();
if(popped == ParenthesisSymbol.LEFT) {
continue main;
}
else {
addASTNode(operandStack, popped);
}
}
throw new SyntaxException(MessageFormat.format(
"Syntax error in {0}: Mismatched parenthesis", symbols));
}
else if(symbol instanceof Expression) {
operandStack.add(ExpressionTree.create((Expression) symbol));
}
else {
operatorStack.push(symbol);
}
}
while (!operatorStack.isEmpty()) {
addASTNode(operandStack, operatorStack.pop());
}
return operandStack.pop();
}
/**
* Convert a valid and well-formed list of {@link Symbol} objects into a
* Queue in postfix notation.
* <p>
* NOTE: This method will group non-conjunctive symbols into
* {@link Expression} objects.
* </p>
*
* @param symbols
* @return the symbols in postfix notation
*/
public static Queue<PostfixNotationSymbol> toPostfixNotation(
List<Symbol> symbols) {
Preconditions
.checkState(
symbols.size() >= 3,
"The parsed query %s does not have"
+ "enough symbols to process. It should have at least 3 symbols but "
+ "only has %s", symbols, symbols.size());
Deque<Symbol> stack = new ArrayDeque<Symbol>();
Queue<PostfixNotationSymbol> queue = new LinkedList<PostfixNotationSymbol>();
symbols = groupExpressions(symbols);
for (Symbol symbol : symbols) {
if(symbol instanceof ConjunctionSymbol) {
while (!stack.isEmpty()) {
Symbol top = stack.peek();
if(symbol == ConjunctionSymbol.OR
&& (top == ConjunctionSymbol.OR || top == ConjunctionSymbol.AND)) {
queue.add((PostfixNotationSymbol) stack.pop());
}
else {
break;
}
}
stack.push(symbol);
}
else if(symbol == ParenthesisSymbol.LEFT) {
stack.push(symbol);
}
else if(symbol == ParenthesisSymbol.RIGHT) {
boolean foundLeftParen = false;
while (!stack.isEmpty()) {
Symbol top = stack.peek();
if(top == ParenthesisSymbol.LEFT) {
foundLeftParen = true;
break;
}
else {
queue.add((PostfixNotationSymbol) stack.pop());
}
}
if(!foundLeftParen) {
throw new SyntaxException(MessageFormat.format(
"Syntax error in {0}: Mismatched parenthesis",
symbols));
}
else {
stack.pop();
}
}
else {
queue.add((PostfixNotationSymbol) symbol);
}
}
while (!stack.isEmpty()) {
Symbol top = stack.peek();
if(top instanceof ParenthesisSymbol) {
throw new SyntaxException(MessageFormat.format(
"Syntax error in {0}: Mismatched parenthesis", symbols));
}
else {
queue.add((PostfixNotationSymbol) stack.pop());
}
}
return queue;
}
/**
* Convert a valid and well-formed CCL string into aQueue in postfix
* notation.
* <p>
* NOTE: This method will group non-conjunctive symbols into
* {@link Expression} objects.
* </p>
*
* @param ccl the string to parse into postfix notation
* @return the queue in postfix notation
*/
public static Queue<PostfixNotationSymbol> toPostfixNotation(String ccl) {
return toPostfixNotation(ccl, null);
}
/**
* Convert a valid and well-formed CCL string into a {@link Queue} in
* postfix notation. This function will also resolve local references from
* the CCL string into a {@link Multimap} passed in.
* <p>
* NOTE: This method will group non-conjunctive symbols into
* {@link Expression} objects.
* </p>
*
* @param ccl the CCL string to convert
* @param data the data to use for local references
* @return the queue in postfix notation
*/
public static Queue<PostfixNotationSymbol> toPostfixNotation(String ccl,
Multimap<String, Object> data) {
// This method uses a value buffer to correct cases when a string value
// is specified without quotes (because its a common mistake to make).
// If an operator other than BETWEEN is specified, we use logic that
// will buffer all the subsequent tokens until we reach a (parenthesis),
// (conjunction) or (at) and assume that the tokens belong to the same
// value.
data = data == null ? EMPTY_MULTIMAP : data;
StringSplitter toks = new QuoteAwareStringSplitter(ccl, ' ',
SplitOption.TOKENIZE_PARENTHESIS);
List<Symbol> symbols = Lists.newArrayList();
GuessState guess = GuessState.KEY;
StringBuilder buffer = null;
StringBuilder timeBuffer = null;
while (toks.hasNext()) {
String tok = toks.next();
if(tok.equals("(") || tok.equals(")")) {
addBufferedValue(buffer, symbols);
addBufferedTime(timeBuffer, symbols);
symbols.add(ParenthesisSymbol.parse(tok));
}
else if(tok.equalsIgnoreCase("&&") || tok.equalsIgnoreCase("&")
|| tok.equalsIgnoreCase("and")) {
addBufferedValue(buffer, symbols);
addBufferedTime(timeBuffer, symbols);
symbols.add(ConjunctionSymbol.AND);
guess = GuessState.KEY;
}
else if(tok.equalsIgnoreCase("||") || tok.equalsIgnoreCase("or")) {
addBufferedValue(buffer, symbols);
addBufferedTime(timeBuffer, symbols);
symbols.add(ConjunctionSymbol.OR);
guess = GuessState.KEY;
}
else if(TIMESTAMP_PIVOT_TOKENS.contains(tok.toLowerCase())) {
addBufferedValue(buffer, symbols);
guess = GuessState.TIMESTAMP;
timeBuffer = new StringBuilder();
}
else if(tok.equalsIgnoreCase("where")) {
continue;
}
else if(StringUtils.isBlank(tok)) {
continue;
}
else if(guess == GuessState.KEY) {
symbols.add(KeySymbol.parse(tok));
guess = GuessState.OPERATOR;
}
else if(guess == GuessState.OPERATOR) {
OperatorSymbol symbol = OperatorSymbol.parse(tok);
symbols.add(symbol);
if(symbol.getOperator() != Operator.BETWEEN) {
buffer = new StringBuilder();
}
guess = GuessState.VALUE;
}
else if(guess == GuessState.VALUE) {
// CON-321: Perform local resolution for variable
if(tok.charAt(0) == '$') {
String var = tok.substring(1);
try {
tok = Iterables.getOnlyElement(data.get(var))
.toString();
}
catch (IllegalArgumentException e) {
String err = "Unable to resolve variable {} because multiple values exist locally: {}";
throw new IllegalStateException(Strings.format(err,
tok, data.get(var)));
}
catch (NoSuchElementException e) {
String err = "Unable to resolve variable {} because no values exist locally";
throw new IllegalStateException(
Strings.format(err, tok));
}
}
else if(tok.length() > 2 && tok.charAt(0) == '\\'
&& tok.charAt(1) == '$') {
tok = tok.substring(1);
}
if(buffer != null) {
buffer.append(tok).append(" ");
}
else {
symbols.add(ValueSymbol.parse(tok));
}
}
else if(guess == GuessState.TIMESTAMP) {
timeBuffer.append(tok).append(" ");
}
else {
throw new IllegalStateException("Cannot properly parse " + tok);
}
}
addBufferedValue(buffer, symbols);
addBufferedTime(timeBuffer, symbols);
return toPostfixNotation(symbols);
}
/**
* Go through a list of symbols and group the expressions together in a
* {@link Expression} object.
*
* @param symbols
* @return the expression
*/
protected static List<Symbol> groupExpressions(List<Symbol> symbols) { // visible
// for
// testing
try {
List<Symbol> grouped = Lists.newArrayList();
ListIterator<Symbol> it = symbols.listIterator();
while (it.hasNext()) {
Symbol symbol = it.next();
if(symbol instanceof KeySymbol) {
// NOTE: We are assuming that the list of symbols is well
// formed, and, as such, the next elements will be an
// operator and one or more symbols. If this is not the
// case, this method will throw a ClassCastException
OperatorSymbol operator = (OperatorSymbol) it.next();
ValueSymbol value = (ValueSymbol) it.next();
Expression expression;
if(operator.getOperator() == Operator.BETWEEN) {
ValueSymbol value2 = (ValueSymbol) it.next();
expression = Expression.create((KeySymbol) symbol,
operator, value, value2);
}
else {
expression = Expression.create((KeySymbol) symbol,
operator, value);
}
grouped.add(expression);
}
else if(symbol instanceof TimestampSymbol) { // Add the
// timestamp to the
// previously
// generated
// Expression
((Expression) Iterables.getLast(grouped))
.setTimestamp((TimestampSymbol) symbol);
}
else {
grouped.add(symbol);
}
}
return grouped;
}
catch (ClassCastException e) {
throw new SyntaxException(e.getMessage());
}
}
/**
* An the appropriate {@link AST} node to the {@code stack} based on
* {@code operator}.
*
* @param stack
* @param operator
*/
private static void addASTNode(Deque<AST> stack, Symbol operator) {
AST right = stack.pop();
AST left = stack.pop();
if(operator == ConjunctionSymbol.AND) {
stack.push(AndTree.create(left, right));
}
else {
stack.push(OrTree.create(left, right));
}
}
/**
* This is a helper method for {@link #toPostfixNotation(String)} that
* contains the logic to create a ValueSymbol from a buffered value.
*
* @param buffer
* @param symbols
*/
private static void addBufferedValue(StringBuilder buffer,
List<Symbol> symbols) {
if(buffer != null && buffer.length() > 0) {
buffer.delete(buffer.length() - 1, buffer.length());
symbols.add(ValueSymbol.parse(buffer.toString()));
buffer.delete(0, buffer.length());
}
}
private static void addBufferedTime(StringBuilder buffer,
List<Symbol> symbols) {
if(buffer != null && buffer.length() > 0) {
buffer.delete(buffer.length() - 1, buffer.length());
long ts = NaturalLanguage.parseMicros(buffer.toString());
symbols.add(TimestampSymbol.create(ts));
buffer.delete(0, buffer.length());
}
}
/**
* A collection of tokens that indicate the parser should pivot to expecting
* a timestamp token.
*/
private final static Set<String> TIMESTAMP_PIVOT_TOKENS = Sets.newHashSet(
"at", "on", "during", "in");
/**
* An empty multimap to use in {@link #toPostfixNotation(String, Multimap)}
* when there is no local data provided against which to resolve.
*/
private final static Multimap<String, Object> EMPTY_MULTIMAP = HashMultimap
.create();
private Parser() {/* noop */}
/**
* An enum that tracks what the parser guesses the next token to be in the
* {@link #toPostfixNotation(String)} method.
*
* @author Jeff Nelson
*/
private enum GuessState {
KEY, OPERATOR, TIMESTAMP, VALUE
}
}