package org.apache.lucene.queryparser.flexible.aqp.processors;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.queryparser.flexible.aqp.config.AqpAdsabsQueryConfigHandler;
import org.apache.lucene.queryparser.flexible.aqp.nodes.AqpAdsabsRegexQueryNode;
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
import org.apache.lucene.queryparser.flexible.core.config.QueryConfigHandler;
import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.FuzzyQueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorImpl;
import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler;
import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler.ConfigurationKeys;
import org.apache.lucene.queryparser.flexible.standard.nodes.WildcardQueryNode;
import org.apache.solr.request.SolrQueryRequest;
/**
* This analyzer is applied only to certain nodes in order to clean
* them up. It is using solr analyzer chains for a fields, by convention
* these fields are of the following types
*
* _wildcard - to be used on the wildcard searches
* _regex
* _fuzzy
*
*/
public class AqpAdsabsCarefulAnalyzerProcessor extends QueryNodeProcessorImpl {
private CharTermAttribute termAtt;
public AqpAdsabsCarefulAnalyzerProcessor() {
// empty
}
@Override
public QueryNode process(QueryNode queryTree) throws QueryNodeException {
QueryConfigHandler config = this.getQueryConfigHandler();
if (config.has(AqpAdsabsQueryConfigHandler.ConfigurationKeys.SOLR_REQUEST)
&& config.get(AqpAdsabsQueryConfigHandler.ConfigurationKeys.SOLR_REQUEST)
.getRequest() != null) {
return super.process(queryTree);
}
return queryTree;
}
@Override
protected QueryNode postProcessNode(QueryNode node)
throws QueryNodeException {
String field = null;
String value =null;
String[] tokens;
if (node instanceof WildcardQueryNode) {
field = ((WildcardQueryNode) node).getFieldAsString();
value = ((WildcardQueryNode) node).getTextAsString();
int asteriskPosition = -1;
int qmarkPosition = -1;
int origLen = value.length();
if (value.indexOf('*') > -1) {
asteriskPosition = value.indexOf('*');
}
if (value.indexOf('?') > -1) {
qmarkPosition = value.indexOf('?');
}
if (asteriskPosition > 0 && asteriskPosition+1 < value.length()
|| qmarkPosition > 0 && qmarkPosition+1 < value.length()
|| asteriskPosition > -1 && qmarkPosition > -1)
return node;
for (String suffix: new String[]{"_wildcard", ""}) {
if (hasAnalyzer(field + suffix)) {
tokens = analyze(field + suffix, value);
if (tokens.length != 1)
return node; // break, let the analyzer decide the fate
String newToken = tokens[0];
if (newToken.length() < origLen) {
if (qmarkPosition > -1) {
if (qmarkPosition == 0) {
newToken = '?' + tokens[0];
}
else {
newToken = tokens[0] + '?';
}
}
else {
if (asteriskPosition == 0) {
newToken = '*' + tokens[0];
}
else {
newToken = tokens[0] + '*';
}
}
}
if (!newToken.equals(value)) {
return new WildcardQueryNode(field,
newToken, ((WildcardQueryNode)node).getBegin(),
((WildcardQueryNode)node).getEnd());
}
}
}
}
else if(node instanceof FuzzyQueryNode) {
field = ((FuzzyQueryNode) node).getFieldAsString();
value = ((FuzzyQueryNode) node).getTextAsString();
for (String suffix: new String[]{"_fuzzy", ""}) {
if (hasAnalyzer(field+suffix)) {
tokens = analyze(field + suffix, value);
if (tokens.length > 1)
return node; // break, let the analyzer decide the fate
if (!tokens[0].equals(value)) {
return new FuzzyQueryNode(field,
tokens[0],
((FuzzyQueryNode)node).getSimilarity(),
((FuzzyQueryNode)node).getBegin(),
((FuzzyQueryNode)node).getEnd());
}
}
}
}
else if(node instanceof AqpAdsabsRegexQueryNode) {
field = ((FieldQueryNode) node).getFieldAsString();
value = ((FieldQueryNode) node).getText().toString();
for (String suffix: new String[]{"_regex", ""}) {
if (hasAnalyzer(field + suffix)) {
tokens = analyze(field + suffix, value);
if (tokens.length > 1)
return node; // break, let the analyzer decide the fate
if (!tokens[0].equals(value)) {
return new AqpAdsabsRegexQueryNode(field,
tokens[0], ((FieldQueryNode)node).getBegin(),
((FieldQueryNode)node).getEnd());
}
}
}
}
return node;
}
private boolean hasAnalyzer(String fieldName) {
SolrQueryRequest req = this.getQueryConfigHandler()
.get(AqpAdsabsQueryConfigHandler.ConfigurationKeys.SOLR_REQUEST)
.getRequest();
if (req.getSchema().hasExplicitField(fieldName)) {
return true;
}
return false;
}
private String[] analyze(CharSequence field, String value) throws QueryNodeException {
QueryConfigHandler config = this.getQueryConfigHandler();
Locale locale = getQueryConfigHandler().get(ConfigurationKeys.LOCALE);
if (locale == null) {
locale = Locale.getDefault();
}
Analyzer analyzer = config.get(StandardQueryConfigHandler.ConfigurationKeys.ANALYZER);
ArrayList<String> out = new ArrayList<String>();
TokenStream source = null;
try {
source = analyzer.tokenStream(field.toString(),
new StringReader(value));
source.reset();
} catch (IOException e1) {
if (source != null)
try {
source.close();
} catch (IOException e) {
// ignore
}
return new String[0];
}
try {
while (source.incrementToken()) {
termAtt = source.getAttribute(CharTermAttribute.class);
out.add(termAtt.toString());
}
source.close();
} catch (IOException e) {
// pass
}
return out.toArray(new String[out.size()]);
}
@Override
protected QueryNode preProcessNode(QueryNode node)
throws QueryNodeException {
return node;
}
@Override
protected List<QueryNode> setChildrenOrder(List<QueryNode> children)
throws QueryNodeException {
return children;
}
}