package fr.openwide.core.spring.util.lucene.search;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.queryparser.classic.QueryParser.Operator;
import org.apache.lucene.queryparser.simple.SimpleQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import com.google.common.base.CharMatcher;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import fr.openwide.core.spring.util.StringUtils;
public final class LuceneUtils {
private static final int DEFAULT_ENABLE_WILDCARD_MIN_CHARS = 2;
public static final String LOWERCASE_OPERATORS_PARAM = "lowercaseOperators";
public static final String BOOST_PARAMETER_PREFIX = "^";
public static final String WILDCARD_SUFFIX = "*";
public static final String FUZZY_PARAMETER_SUFFIX = "~";
public static final TermQuery NO_RESULT_QUERY = new TermQuery(new Term("id", "__NEVER_MATCHING_ID__"));
public static Query getAutocompleteQuery(String fieldName, Analyzer analyzer,
String searchPattern, int enableWildcardMinChars) {
return getAutocompleteQuery(ImmutableList.of(fieldName), analyzer, searchPattern, enableWildcardMinChars);
}
public static Query getAutocompleteQuery(String fieldName, Analyzer analyzer, String searchPattern) {
return getAutocompleteQuery(fieldName, analyzer, searchPattern, DEFAULT_ENABLE_WILDCARD_MIN_CHARS);
}
public static Query getAutocompleteQuery(Iterable<String> fieldNames, Analyzer analyzer,
String searchPattern, int enableWildcardMinChars) {
Map<String, Float> fields = Maps.newHashMap();
for(String fieldName : fieldNames) {
fields.put(fieldName, 1.0f);
}
SimpleQueryParser queryParser = new SimpleQueryParser(analyzer, fields);
queryParser.setDefaultOperator(BooleanClause.Occur.MUST);
return queryParser.parse(getAutocompleteQuery(searchPattern, enableWildcardMinChars));
}
public static Query getAutocompleteQuery(Iterable<String> fieldNames, Analyzer analyzer, String searchPattern) {
return getAutocompleteQuery(fieldNames, analyzer, searchPattern, DEFAULT_ENABLE_WILDCARD_MIN_CHARS);
}
public static String getAutocompleteQuery(String searchPattern) {
return getAutocompleteQuery(searchPattern, DEFAULT_ENABLE_WILDCARD_MIN_CHARS, null);
}
public static String getAutocompleteQuery(String searchPattern, Operator operator) {
return getAutocompleteQuery(searchPattern, DEFAULT_ENABLE_WILDCARD_MIN_CHARS, operator);
}
public static String getAutocompleteQuery(String searchPattern, int enableWildcardMinChars) {
return getAutocompleteQuery(searchPattern, enableWildcardMinChars, null);
}
public static String getAutocompleteQuery(String searchPattern, int enableWildcardMinChars, Operator operator) {
String cleanSearchPattern = StringUtils.clean(searchPattern);
if(StringUtils.hasText(cleanSearchPattern) && cleanSearchPattern.length() >= enableWildcardMinChars) {
List<String> searchPatternFragments = getSearchPatternFragments(cleanSearchPattern);
StringBuilder autocompleteQuery = new StringBuilder();
Iterator<String> searchPatternFragmentsIt = searchPatternFragments.iterator();
while (searchPatternFragmentsIt.hasNext()) {
if (autocompleteQuery.length() > 0) {
autocompleteQuery.append(" ");
if (operator != null) {
autocompleteQuery.append(Operator.AND).append(" ");
}
}
autocompleteQuery.append(searchPatternFragmentsIt.next());
if (!searchPatternFragmentsIt.hasNext()) {
autocompleteQuery.append(WILDCARD_SUFFIX);
}
}
cleanSearchPattern = autocompleteQuery.toString().trim();
}
return cleanSearchPattern;
}
public static Query getSimilarityQuery(Iterable<String> fieldNames, Analyzer analyzer, String searchPattern,
Integer maxEditDistance) {
Map<String, Float> fields = Maps.newHashMap();
for(String fieldName : fieldNames) {
fields.put(fieldName, 1.0f);
}
SimpleQueryParser queryParser = new SimpleQueryParser(analyzer, fields);
queryParser.setDefaultOperator(BooleanClause.Occur.MUST);
return queryParser.parse(getSimilarityQuery(searchPattern, maxEditDistance));
}
public static Query getSimilarityQuery(String fieldName, Analyzer analyzer,
String searchPattern, Integer maxEditDistance) {
if (maxEditDistance == null) {
throw new IllegalArgumentException("maxEditDistance may not be null");
}
SimpleQueryParser queryParser = new SimpleQueryParser(analyzer, fieldName);
queryParser.setDefaultOperator(BooleanClause.Occur.MUST);
return queryParser.parse(getSimilarityQuery(searchPattern, maxEditDistance));
}
public static String getSimilarityQuery(String searchPattern, Integer maxEditDistance) {
return getSimilarityQuery(searchPattern, maxEditDistance, null);
}
public static String getSimilarityQuery(String searchPattern, Integer maxEditDistance, Operator operator) {
if (maxEditDistance == null) {
throw new IllegalArgumentException("maxEditDistance may not be null");
}
String cleanSearchPattern = StringUtils.clean(searchPattern);
if (!StringUtils.hasText(cleanSearchPattern)) {
throw new IllegalArgumentException("cleanSearchPattern may not be empty");
}
List<String> searchPatternFragments = getSearchPatternFragments(cleanSearchPattern);
StringBuilder similarityQuery = new StringBuilder();
for (String searchPatternFragment : searchPatternFragments) {
if (similarityQuery.length() > 0) {
similarityQuery.append(" ");
if (operator != null) {
similarityQuery.append(operator).append(" ");
}
}
similarityQuery.append(searchPatternFragment).append(FUZZY_PARAMETER_SUFFIX).append(maxEditDistance.toString());
}
return similarityQuery.toString().trim();
}
/**
* Nettoie la chaîne de recherche et autorise une recherche avec wildcard.
*
* A noter que si stemming ou truc tordu il y a, il faut quand même faire la recherche à la fois sur le champ stemmé
* et sur un champ non stemmé sinon le wildcard pourra ne pas renvoyer de résultat.
*
* Ne met pas d'opérateur explicite entre les différents mots.
*/
public static String getQuery(String searchPattern) {
return getQuery(searchPattern, null);
}
/**
* Nettoie la chaîne de recherche et autorise une recherche avec wildcard.
*
* A noter que si stemming ou truc tordu il y a, il faut quand même faire la recherche à la fois sur le champ stemmé
* et sur un champ non stemmé sinon le wildcard pourra ne pas renvoyer de résultat.
*/
public static String getQuery(String searchPattern, Operator operator) {
String cleanSearchPattern = StringUtils.cleanForQuery(searchPattern);
if(StringUtils.hasText(cleanSearchPattern)) {
List<String> searchPatternFragments = getSearchPatternFragments(cleanSearchPattern);
StringBuilder query = new StringBuilder();
for (String searchPatternFragment : searchPatternFragments) {
if (WILDCARD_SUFFIX.equals(searchPatternFragment)) {
// si c'est juste une *, on ne peut pas faire grand chose, passons...
continue;
}
if (query.length() > 0) {
query.append(" ");
if (operator != null) {
query.append(operator).append(" ");
}
}
query.append(searchPatternFragment);
}
cleanSearchPattern = query.toString().trim();
}
return cleanSearchPattern;
}
private static List<String> getSearchPatternFragments(String searchPattern) {
List<String> searchPatternFragments = Lists.newArrayList();
if(StringUtils.hasText(searchPattern)) {
searchPatternFragments = Splitter.on(CharMatcher.WHITESPACE.or(CharMatcher.is('-')))
.trimResults().omitEmptyStrings().splitToList(searchPattern);
}
return searchPatternFragments;
}
public static RawLuceneQuery toFilterRangeQuery(String field, Number min, Number max) {
return toFilterRangeQuery(field, min, max, true, true);
}
public static RawLuceneQuery toFilterRangeQuery(String field, Number min, Number max, boolean minInclusive, boolean maxInclusive) {
if (min == null && max == null) {
return null;
}
StringBuilder sb = new StringBuilder();
if (StringUtils.hasText(field)) {
sb.append(field);
sb.append(":");
}
sb.append(minInclusive ? "[" : "{")
.append((min == null) ? "*" : min.toString())
.append(" TO ")
.append((max == null) ? "*" : max.toString())
.append(maxInclusive ? "]" : "}");
return new RawLuceneQuery(sb.toString());
}
@SuppressWarnings("unchecked")
public static String queryToString(Query luceneQuery) {
StringBuilder sb = new StringBuilder();
if (luceneQuery instanceof BooleanQuery) {
sb.append(formatBooleanQuery((BooleanQuery) luceneQuery));
} else if (luceneQuery instanceof TermQuery) {
sb.append(formatTermQuery((TermQuery) luceneQuery));
} else if (luceneQuery instanceof RawLuceneQuery) {
sb.append(formatRawLuceneQuery((RawLuceneQuery) luceneQuery));
} else if (luceneQuery instanceof FuzzyQuery) {
sb.append(formatFuzzyQuery((FuzzyQuery) luceneQuery));
} else if (luceneQuery instanceof PrefixQuery) {
sb.append(formatPrefixQuery((PrefixQuery) luceneQuery));
} else if (luceneQuery instanceof WildcardQuery) {
sb.append(formatWildcardQuery((WildcardQuery) luceneQuery));
} else if (luceneQuery instanceof NumericRangeQuery) {
sb.append(formatNumericRangeQuery((NumericRangeQuery<? extends Number>) luceneQuery));
} else if (luceneQuery instanceof IToQueryStringAwareLuceneQuery) {
sb.append(((IToQueryStringAwareLuceneQuery) luceneQuery).toQueryString());
} else {
throw new IllegalStateException(String.format("Query of type %1$s not supported",
luceneQuery.getClass().getName()));
}
if (StringUtils.hasText(sb) && Float.compare(1.0f, luceneQuery.getBoost()) != 0) {
sb.append(BOOST_PARAMETER_PREFIX);
sb.append(luceneQuery.getBoost());
}
return sb.toString();
}
private static String formatBooleanQuery(BooleanQuery booleanQuery) {
StringBuilder sb = new StringBuilder();
if (booleanQuery.clauses().size() > 0) {
StringBuilder booleanQuerySb = new StringBuilder();
for (BooleanClause clause : booleanQuery.clauses()) {
if (clause.getQuery() != null) {
String query = queryToString(clause.getQuery());
if (StringUtils.hasText(query)) {
if (Occur.SHOULD.equals(clause.getOccur())) {
// dans Solr, on peut définir l'opérateur implicite en AND et il faut donc qu'on soit précis
if (booleanQuerySb.length() > 0) {
booleanQuerySb.append("OR ");
}
} else {
booleanQuerySb.append(clause.getOccur().toString());
}
booleanQuerySb.append(query);
booleanQuerySb.append(" ");
}
}
}
if (booleanQuerySb.length() > 0) {
if (booleanQuery.clauses().size() > 1
|| booleanQuerySb.charAt(0) == '-' || booleanQuerySb.charAt(0) == '+'
|| (booleanQuery.clauses().size() == 1 && (booleanQuery.clauses().get(0).getQuery() instanceof RawLuceneQuery))) {
sb.append("(")
.append(booleanQuerySb.toString().trim())
.append(")");
} else {
sb.append(booleanQuerySb);
}
}
}
return sb.toString();
}
private static String formatTermQuery(TermQuery termQuery) {
StringBuilder sb = new StringBuilder();
Term term = termQuery.getTerm();
if (StringUtils.hasText(term.field())) {
sb.append(term.field());
sb.append(":");
}
sb.append("\"")
.append(QueryParser.escape(term.text()))
.append("\"");
return sb.toString();
}
private static String formatRawLuceneQuery(RawLuceneQuery simpleQuery) {
StringBuilder sb = new StringBuilder();
if (StringUtils.hasText(simpleQuery.getQuery())) {
sb.append("(")
.append(simpleQuery.getQuery())
.append(")");
}
return sb.toString();
}
private static String formatFuzzyQuery(FuzzyQuery fuzzyQuery) {
StringBuilder sb = new StringBuilder();
Term term = fuzzyQuery.getTerm();
if (StringUtils.hasText(term.field())) {
sb.append(term.field());
sb.append(":");
}
sb.append(QueryParser.escape(term.text()))
.append(FUZZY_PARAMETER_SUFFIX)
.append(fuzzyQuery.getMaxEdits());
return sb.toString();
}
private static String formatPrefixQuery(PrefixQuery prefixQuery) {
StringBuilder sb = new StringBuilder();
Term prefix = prefixQuery.getPrefix();
if (StringUtils.hasText(prefix.field())) {
sb.append(prefix.field());
sb.append(":");
}
sb.append(QueryParser.escape(prefix.text()));
sb.append("*");
return sb.toString();
}
private static String formatWildcardQuery(WildcardQuery wildcardQuery) {
StringBuilder sb = new StringBuilder();
Term term = wildcardQuery.getTerm();
if (StringUtils.hasText(term.field())) {
sb.append(term.field());
sb.append(":");
}
sb.append(term.text());
return sb.toString();
}
private static String formatNumericRangeQuery(NumericRangeQuery<? extends Number> numericRangeQuery) {
return toFilterRangeQuery(numericRangeQuery.getField(), numericRangeQuery.getMin(), numericRangeQuery.getMax(),
numericRangeQuery.includesMin(), numericRangeQuery.includesMax()).getQuery();
}
private LuceneUtils() {
}
}