package org.apache.lucene.queryparser.flexible.aqp.processors;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessor;
import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorImpl;
import org.apache.lucene.queryparser.flexible.standard.nodes.PrefixWildcardQueryNode;
import org.apache.lucene.queryparser.flexible.standard.nodes.TermRangeQueryNode;
import org.apache.lucene.queryparser.flexible.standard.nodes.WildcardQueryNode;
import org.apache.lucene.queryparser.flexible.aqp.nodes.AqpAdsabsRegexQueryNode;
import org.apache.lucene.queryparser.flexible.aqp.nodes.AqpNonAnalyzedQueryNode;
public class AqpAdsabsRegexNodeProcessor extends QueryNodeProcessorImpl implements
QueryNodeProcessor {
@Override
protected QueryNode postProcessNode(QueryNode node)
throws QueryNodeException {
return node;
}
@Override
protected QueryNode preProcessNode(QueryNode node)
throws QueryNodeException {
if (node instanceof FieldQueryNode && !(node instanceof AqpNonAnalyzedQueryNode) &&
!(node.getParent() instanceof TermRangeQueryNode)) {
FieldQueryNode n = (FieldQueryNode) node;
String input = n.getTextAsString();
if (input == null) {
return node;
}
// try to detect if we have special regex characters inside the string
if (isRegex(input)) {
if (!isRegex(input.replace(".?", "").replace(".*", ""))) { // detect simple wildcard cases
input = input.replace(".*", "*").replace(".?", "?");
if (input.endsWith("*") && !input.contains("?")) {
return new PrefixWildcardQueryNode(n.getFieldAsString(), input, n.getBegin(), n.getEnd());
}
return new WildcardQueryNode(n.getFieldAsString(), input, n.getBegin(), n.getEnd());
}
try {
Pattern.compile(input);
return new AqpAdsabsRegexQueryNode(n.getFieldAsString(), input, n.getBegin(), n.getEnd());
}
catch (PatternSyntaxException e) {
return node;
}
}
}
return node;
}
@Override
protected List<QueryNode> setChildrenOrder(List<QueryNode> children)
throws QueryNodeException {
return children;
}
/*
* Very simple way to look for regular expression class characters inside the
* string, we ignore java 'characters' (ie. not translate the string into a
* string literal and look for the value). So if somebody searches for \n
* we don't consider that to be a regular expression (if the parser is configured
* correctly, such characters are expanded before they get here - so there should
* be no \n). But it depends... anywayw, searching for \n,\r,\f etc is very strange
* with ADS search engine
*/
String regexMarker =
"(?<!\\\\)" + // negative lookbehind
"(" +
// "(\\[|\\^|\\&|\\||\\{)" + // characters whose presence means regex
"(\\[|\\^|\\||\\{)" + // characters whose presence means regex
"|" + "(\\\\b|\\\\B|\\\\A|\\\\G|\\\\Z|\\\\z)" +
"|" + "(\\\\d|\\\\D|\\\\s|\\\\S|\\\\w|\\\\W)" + // predefined char classes
"|" + "\\\\p\\{" + // character classes
"|" + "\\.(\\?|\\*|\\{)" +
")";
Pattern regexTest = Pattern.compile(regexMarker);
private boolean isRegex(String input) {
Matcher matcher = regexTest.matcher(input);
if (matcher.find()) {
return true;
}
return false;
}
}