package org.apache.lucene.queryparser.flexible.aqp.processors;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.antlr.runtime.CharStream;
import org.apache.lucene.queryparser.flexible.aqp.nodes.AqpANTLRNode;
import org.apache.lucene.queryparser.flexible.aqp.processors.AqpQProcessor;
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
/**
*
* Looks at the nodes below DEFOP QN and marks the nodes
* that can be concatenated during analysis, eg. weak lensing
* can be used as one token
*
* <pre>
* DEFOP
* |
* / | \
* MODIFIER MOD.. CLAUSE
* / | \
* TMODIFIER TMODIFIER MODIFIER
* / | \
* FIELD FIELD .....
* / |
* QNORMAL QNORMAL
* / |
* weak lensing
* </pre>
*
* <p>
* Care is taken not to join when the fields are different and
* when there is operator/clause/modifier inbetween
*
* @author rca
*
*/
public class AqpDEFOPMarkPlainNodes extends AqpQProcessor {
public static String PLAIN_TOKEN = "PLAIN_TOKEN";
public static String PLAIN_TOKEN_SEPARATOR = " ";
public static String PLAIN_TOKEN_CONCATENATED = "PLAIN_TOKEN_CONCATENATED";
private boolean modifyTree = false;
private List<String> firstChildAllowedModifiers;
private List<String> firstChildAllowedFields;
public AqpDEFOPMarkPlainNodes() {
modifyTree = false;
firstChildAllowedModifiers = Arrays.asList("+", "-");
firstChildAllowedFields = Arrays.asList("");
}
public AqpDEFOPMarkPlainNodes(boolean modifyTree,
List<String> firstChildAllowedModifiers,
List<String> firstChildAllowedFields) {
this.modifyTree = modifyTree;
this.firstChildAllowedModifiers = firstChildAllowedModifiers;
this.firstChildAllowedFields = firstChildAllowedFields;
}
public boolean nodeIsWanted(AqpANTLRNode node) {
if (node.getTokenLabel().equals("DEFOP")) {
// refuse processing: '=(this that token)'
// but this is not the ideal place for it (it is kind of arbitrary)
if (node.getParent() != null && node.getParent().getParent() != null) {
QueryNode p = node.getParent().getParent();
if (p.getChildren().size() > 1 &&
!firstChildAllowedModifiers.contains(((AqpANTLRNode) p.getChildren().get(0)).getTokenInput())) {
return false;
}
}
return true;
}
return false;
}
public QueryNode createQNode(AqpANTLRNode node) throws QueryNodeException {
// only one child, do nothing
if (node.getChildren().size() == 1) {
return node;
}
List<QueryNode> children = node.getChildren();
List<QueryNode> forMarking = new ArrayList<QueryNode>();
List<QueryNode> newChildren = new ArrayList<QueryNode>();
Integer previous = -1;
for (int i=0;i<children.size();i++) {
if (isBareNode(children.get(i), forMarking.size() == 0)) {
if (forMarking.size() == 0) {
previous = i;
forMarking.add(children.get(i));
}
else if (previous+1 == i) {
forMarking.add(children.get(i));
previous = i;
continue;
}
else {
previous = -1;
tagPlainNodes(forMarking, newChildren);
continue;
}
}
else if (modifyTree){
if (forMarking.size() > 0)
tagPlainNodes(forMarking, newChildren);
newChildren.add(children.get(i));
}
}
if (forMarking.size() > 0)
tagPlainNodes(forMarking, newChildren);
if (modifyTree)
node.set(newChildren);
return node;
}
private void tagPlainNodes(List<QueryNode> forMarking, List<QueryNode> newChildren) {
int startPos = -1;
int endPos = -1;
StringBuffer concatenated = new StringBuffer();
AqpANTLRNode terminal;
if (forMarking.size() > 1) {
int tag = concatenated.hashCode();
for (int i=0;i<forMarking.size();i++) {
concatenated.append(markChild(tag, forMarking.get(i)));
if (i+1 != forMarking.size())
concatenated.append(PLAIN_TOKEN_SEPARATOR);
if (modifyTree) {
terminal = (AqpANTLRNode) getTerminalNode(forMarking.get(i));
if (startPos == -1)
startPos = ((AqpANTLRNode)terminal).getInputTokenStart();
endPos = ((AqpANTLRNode)terminal).getInputTokenEnd();
}
}
}
if (modifyTree ) { // keep the first QNORMAL node from the group
QueryNode first = getQNode(forMarking);
if (forMarking.size() > 1) {
terminal = (AqpANTLRNode) getTerminalNode(first);
CharStream is = AqpQProcessor.getInputStream(terminal);
String val = is.substring(startPos, endPos);
terminal.setInputTokenStart(startPos);
terminal.setInputTokenEnd(endPos);
terminal.setTokenInput(val);
terminal.setTag(PLAIN_TOKEN_CONCATENATED, concatenated.toString());
}
newChildren.add(first);
}
forMarking.clear();
}
private QueryNode getQNode(List<QueryNode> forMarking) {
// TODO: we need to pick the first field, but if there is
// truncated query, we need to pick that one
// if there was a truncated node, return it first
for (QueryNode c: forMarking) {
AqpANTLRNode terminal = (AqpANTLRNode) getTerminalNode(c);
if (((AqpANTLRNode) terminal.getParent()).getTokenLabel().equals("QTRUNCATED")) {
return c;
}
}
for (QueryNode c: forMarking) {
AqpANTLRNode terminal = (AqpANTLRNode) getTerminalNode(c);
if (((AqpANTLRNode) terminal.getParent()).getTokenLabel().equals("QNORMAL")) {
return c;
}
}
// else return first child
return forMarking.get(0);
}
private boolean isBareNode(QueryNode node, boolean isPotentiallyFirst) {
StringBuffer sb = new StringBuffer();
harvestLabels(node, sb, 5);
if (sb.toString().equals("/MODIFIER/TMODIFIER/FIELD/QNORMAL") ||
sb.toString().equals("/MODIFIER/TMODIFIER/FIELD/QTRUNCATED") ||
(sb.toString().equals("/MODIFIER/TMODIFIER/FIELD/QDELIMITER")
&& ((AqpANTLRNode) getTerminalNode(node)).getTokenInput().equals(","))) {
ArrayList<String> vals = new ArrayList<String>();
harvestValues(node, vals, 4);
if (vals.size() == 0) {
return true;
}
else if (vals.size()==1) {
if (isPotentiallyFirst &&
(firstChildAllowedModifiers.contains(vals.get(0)) || // we allow modifiers for the first child
firstChildAllowedFields.contains(vals.get(0))) ) { // or certain fields
return true;
}
return false;
}
else if (vals.size()==2) {
if (isPotentiallyFirst &&
(firstChildAllowedModifiers.contains(vals.get(0)) && // we allow modifiers for the first child
firstChildAllowedFields.contains(vals.get(1))) ) { // in certain fields
return true;
}
return false;
}
else {
return false;
}
}
return false;
}
private void harvestLabels(QueryNode node, StringBuffer data, int maxDepth) {
if (maxDepth == 0)
return;
if (node instanceof AqpANTLRNode) {
if (node.isLeaf()) {
return; // avoid the terminal node
}
data.append("/");
data.append(((AqpANTLRNode) node).getTokenLabel());
for (QueryNode child: node.getChildren()) {
harvestLabels(child, data, maxDepth-1);
}
}
else {
data.append("/?");
}
}
private void harvestValues(QueryNode node, List<String> data, int maxDepth) {
if (maxDepth == 0)
return;
if (node instanceof AqpANTLRNode) {
if (node.isLeaf()) {
data.add(((AqpANTLRNode) node).getTokenLabel());
return;
}
for (QueryNode child: node.getChildren()) {
harvestValues(child, data, maxDepth-1);
}
}
else {
data.add("?");
}
}
private String markChild(int tag, QueryNode node) {
AqpANTLRNode terminal = (AqpANTLRNode) getTerminalNode(node);
terminal.setTag(PLAIN_TOKEN, tag);
return terminal.getTokenInput();
}
}