package pl.edu.icm.saos.search.search.service;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.joda.time.LocalDate;
import org.springframework.stereotype.Service;
import pl.edu.icm.saos.search.config.model.IndexField;
import pl.edu.icm.saos.search.util.SearchDateTimeUtils;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
/**
* Transforms criterion into fragment of Solr query
* @author madryk
* @param <F> types of fields that can be transformed
*/
@Service
public class SolrCriterionTransformer<F extends IndexField> {
/**
* Operator that can be assigned when creating Solr query with multiple
* values for single field.
*/
public enum Operator {
/**
* All values must be present in specified field
*/
AND,
/**
* Only one of passed values must be present in specified field
*/
OR
}
private final static List<String> SOLR_OPERATORS = ImmutableList.of("AND", "OR", "NOT");
//------------------------ LOGIC --------------------------
/**
* Transforms criterion into fragment of Solr query.
* Criterion value will be parsed. This allows to change final query
* that will be produced. Parsing supports:
* <ul>
* <li>
* change method of joining words - When multiple words are passed then
* created query will search for document which contains all of this words.
* To search for documents which contains only one of this words we can
* place operator <code>OR</code> between them (e.g. <code>word1 OR word2</code>)
* </li>
* <li>
* phrases - To search for documents that contains exact phrase we can use
* quotes (e.g. <code>"word1 word2"</code>)
* </li>
* <li>
* excluding - To search for documents that doesn't contain specific word
* we can insert minus sign before that word (e.g. <code>-word</code>)
* </li>
* </ul>
* All of above parsing features can be mixed.
*
* @param field
* @param value
* @return
*/
public String transformToEqualsCriterionWithParsing(F field, String value) {
if (StringUtils.isBlank(value)) {
return StringUtils.EMPTY;
}
List<String> splittedByPhrases = splitByPhrases(value);
List<String> splittedByWords = splitByWords(splittedByPhrases);
List<List<String>> groupedByOrOperator = groupByOrOperator(splittedByWords);
List<String> queryParts = new LinkedList<String>();
for (List<String> orValuesGroup : groupedByOrOperator) {
String queryPart = null;
if (orValuesGroup.size() == 1) {
queryPart = buildParsedEqualsCriterion(field.getFieldName(), orValuesGroup.get(0), "+");
} else {
List<String> orQueryParts = Lists.newLinkedList();
for (String orValue : orValuesGroup) {
if (isExclusion(orValue)) {
orQueryParts.add("(*:* " + buildParsedEqualsCriterion(field.getFieldName(), orValue, "") + ")");
} else {
orQueryParts.add(buildParsedEqualsCriterion(field.getFieldName(), orValue, ""));
}
}
queryPart = "+(" + orQueryParts.stream().collect(Collectors.joining(" ")) + ")";
}
queryParts.add(queryPart);
}
return queryParts.stream().collect(Collectors.joining(" "));
}
/**
* Invokes {@link #transformToEqualsCriterion(IndexField, String, Operator)} with the
* operator argument set to {@link Operator#AND}
*/
public String transformToEqualsCriterion(F field, String value) {
if (StringUtils.isBlank(value)) {
return StringUtils.EMPTY;
}
return buildEqualsCriterion(field.getFieldName(), value, Operator.AND);
}
/**
* Transforms the passed field and {@literal String} value into a single Solr equals criterion.<br/><br/>
* In case of the operator being equal to {@link Operator#AND} the returned criterion will be marked as required (<code>+</code> sign). So if this
* criterion is joined with others, it will work like an 'and' operator.<br/>
* In case of the operator being equal to {@link Operator#OR} the returned criterion will be marked as not required. So if this
* criterion is joined with others, it will work like an 'or' operator.<br/>
*
* For example: when we pass 'value' for the 'all' field with an {@link Operator#AND} this method will return <code>+all:value</code>
*
* @param field
* @param value
* @return equals criterion {@literal String}
*/
public String transformToEqualsCriterion(F field, String value, Operator operator) {
if (StringUtils.isBlank(value)) {
return StringUtils.EMPTY;
}
return buildEqualsCriterion(field.getFieldName(), value, operator);
}
/**
* Transforms field and {@literal Integer} value into single Solr equals criterion.<br/>
* Internally invokes {@link #transformToEqualsCriterion(IndexField, String)} with
* {@literal Integer} converted to {@literal String}
*
* @param field
* @param value
* @return equals criterion {@literal String}
*/
public String transformToEqualsCriterion(F field, Integer value) {
return (value == null) ? StringUtils.EMPTY : transformToEqualsCriterion(field, String.valueOf(value));
}
/**
* Transforms field and {@literal Long} value into single Solr equals criterion.<br/>
* Internally invokes {@link #transformToEqualsCriterion(IndexField, String)} with
* {@literal Long} converted to {@literal String}
*
* @param field
* @param value
* @return equals criterion {@literal String}
*/
public String transformToEqualsCriterion(F field, Long value) {
return (value == null) ? StringUtils.EMPTY : transformToEqualsCriterion(field, String.valueOf(value));
}
/**
* Transforms field and {@literal Enum} value into single Solr equals criterion.<br/>
* Internally invokes {@link #transformToEqualsCriterion(IndexField, String)} with
* {@literal Enum} converted to {@literal String} using {@link Enum#name()}.
*
* @param field
* @param value
* @return equals criterion {@literal String}
*/
public String transformToEqualsCriterion(F field, Enum<?> enumValue) {
return (enumValue == null) ? StringUtils.EMPTY : transformToEqualsCriterion(field, enumValue.name());
}
/**
* Transforms field and {@literal String} values into multiple Solr equals criteria
* on single field.<br/>
* Returned criteria will be joined according to passed {@link Operator}<br/>
*
*
* For example:<br/>
*
* 1) when we pass 'value1' and 'value2' on field 'someField' with {@link Operator#AND} this method will return
* <code>+someField:value1 +someField:value2</code><br/>
*
* 2) when we pass 'value1' and 'value2' on field 'someField' with {@link Operator#OR} this method will return
* <code>+(someField:value1 someField:value2)</code><br/>
*
*
* @param field
* @param values
* @param operator
* @return equals criteria {@literal String}
*/
public String transformToEqualsCriteria(F field, List<String> values, Operator operator) {
List<String> notEmptyValues = values.stream().filter(x -> StringUtils.isNotBlank(x)).collect(Collectors.toList());
if (notEmptyValues.isEmpty()) {
return StringUtils.EMPTY;
}
List<String> singleValueCriterionList = Lists.newLinkedList();
notEmptyValues.stream()
.map(x -> buildEqualsCriterion(field.getFieldName(), x, operator))
.forEach(singleValueCriterionList::add);
if (singleValueCriterionList.size() == 1 && operator == Operator.OR) {
return "+" + singleValueCriterionList.get(0);
}
String multivaluedCriterion = singleValueCriterionList.stream().collect(Collectors.joining(" "));
return (operator == Operator.OR) ? ("+(" + multivaluedCriterion + ")") : multivaluedCriterion;
}
/**
* Transforms field and {@literal Enum} values into multiple Solr equals criteria
* on single field.<br/>
* Internally invokes {@link #transformToEqualsCriteria(IndexField, List, Operator)}
* with {@literal Enum}s converted to {@literal String}s using {@link Enum#name()}.
*
* @param field
* @param values
* @param operator
* @return equals criteria {@literal String}
*/
public String transformToEqualsEnumCriteria(F field, List<? extends Enum<?>> values, Operator operator) {
return transformToEqualsCriteria(field, values.stream().map(x -> x.name()).collect(Collectors.toList()), operator);
}
/**
* Transforms field and two dates defining range into single Solr range criterion.<br/>
* Internally invokes {@link #transformToRangeCriterion(IndexField, String, String)}
* with dates converted to {@literal String}s in format 'yyyy-MM-dd'T'00:00:00'Z' for
* lower limit and yyyy-MM-dd'T'23:59:59'Z' for upper limit.
*
* @param field
* @param from - lower limit of date range (use {@literal null} for no limit)
* @param to - upper limit of date range (use {@literal null} for no limit)
* @return range criterion {@literal String}
*/
public String transformToDateRangeCriterion(F field, LocalDate from, LocalDate to) {
if (from == null && to == null) {
return StringUtils.EMPTY;
}
String fromString = null;
String toString = null;
if (from != null) {
fromString = SearchDateTimeUtils.convertDateToISOString(from);
}
if (to != null) {
toString = SearchDateTimeUtils.convertDateToISOStringAtEndOfDay(to);
}
return transformToRangeCriterion(field, fromString, toString);
}
/**
* Transforms field and two {@literal String}s defining range into single Solr range criterion.<br/>
* Returned criterion will be marked as required (<code>+</code> sign). So if this
* criterion is joined with others it will work like 'and' operator.<br/>
*
* For example: when we pass 'a' and 'z' on field 'someField'
* this method will return <code>+someField:[a TO z]</code>
*
* @param field
* @param from - lower limit of range (use {@literal null} for no limit)
* @param to - upper limit of range (use {@literal null} for no limit)
* @return range criterion {@literal String}
*/
public String transformToRangeCriterion(F field, String from, String to) {
if (StringUtils.isBlank(from) && StringUtils.isBlank(to)) {
return StringUtils.EMPTY;
}
String parsedFrom = (StringUtils.isBlank(from)) ? "*" : StringUtils.trim(from);
String parsedTo = (StringUtils.isBlank(to)) ? "*" : StringUtils.trim(to);
return "+" + field.getFieldName() + ":[" + parsedFrom + " TO " + parsedTo + "]";
}
/**
* Marks the given criterion as required. If the criterion is joined
* with others, then it will work as 'and'. <br/> <br/>
*
* all:value judge:smith -> +(all:value judge:smith)
*
*/
public String and(String criterion) {
if (StringUtils.isEmpty(criterion)) {
return StringUtils.EMPTY;
}
return "+(" + criterion + ")";
}
/**
* Joins the given criteria into one. <br/> <br/>
*
* +all:value, judge:smith -> +all:value judge:smith
*/
public String join(List<String> criteria) {
if (CollectionUtils.isEmpty(criteria)) {
return StringUtils.EMPTY;
}
return criteria.stream().collect(Collectors.joining(" "));
}
//------------------------ PRIVATE --------------------------
private String buildEqualsCriterion(String fieldName, String value, Operator operator) {
String preparedValue = value.trim();
preparedValue = ClientUtils.escapeQueryChars(preparedValue);
preparedValue = escapeOperators(preparedValue);
return ((operator == Operator.AND) ? "+" : "") + fieldName + ":" + preparedValue;
}
private String buildParsedEqualsCriterion(String fieldName, String value, String defaultOperator) {
String operator = (isExclusion(value)) ? "-" : defaultOperator;
String preparedValue = value;
if (isExclusion(value)) {
preparedValue = preparedValue.substring(1);
}
if (!isPhrase(value)) {
preparedValue = preparedValue.trim();
preparedValue = ClientUtils.escapeQueryChars(preparedValue);
preparedValue = escapeOperators(preparedValue);
} else {
preparedValue = preparedValue.replace("\\", "\\\\"); // escape backslashes inside phrase
}
return operator + fieldName + ":" + preparedValue;
}
private List<String> splitByPhrases(String value) {
List<String> splittedByPhrases = new LinkedList<String>();
int currentPosition = 0;
while (value.substring(currentPosition).contains("\"")) {
int beginPhrase = value.indexOf("\"", currentPosition);
int endPhrase = value.indexOf("\"", beginPhrase + 1);
if (beginPhrase == -1 || endPhrase == -1) {
break;
} else {
if (beginPhrase >= currentPosition+1 && value.charAt(beginPhrase-1) == '-') {
beginPhrase -=1;
}
String beforePhrase = value.substring(currentPosition, beginPhrase);
String phrase = value.substring(beginPhrase, endPhrase + 1);
if (StringUtils.isNotBlank(beforePhrase)) {
splittedByPhrases.add(StringUtils.trim(beforePhrase));
}
splittedByPhrases.add(phrase);
currentPosition = endPhrase + 1;
}
}
if (StringUtils.isNotBlank(value.substring(currentPosition))) {
splittedByPhrases.add(StringUtils.trim(value.substring(currentPosition)));
}
return splittedByPhrases;
}
private List<String> splitByWords(List<String> values) {
List<String> splittedByWords = Lists.newLinkedList();
for (String value : values) {
if (isPhrase(value)) {
splittedByWords.add(value);
} else {
splittedByWords.addAll(Arrays.asList(value.split("\\s+")));
}
}
return splittedByWords;
}
private List<List<String>> groupByOrOperator(List<String> values) {
List<List<String>> groupedByOrOperator = Lists.newLinkedList();
for (int i=0; i<values.size(); ++i) {
List<String> withOrOperator = Lists.newLinkedList();
withOrOperator.add(values.get(i));
int j=1;
while (i+j+1< values.size() && StringUtils.equalsIgnoreCase(values.get(i+j), "OR")) {
withOrOperator.add(values.get(i+j+1));
j += 2;
}
i += j-1;
groupedByOrOperator.add(withOrOperator);
}
return groupedByOrOperator;
}
private boolean isPhrase(String value) {
return value.length() > 1 && (value.startsWith("\"") || value.startsWith("-\"")) && value.endsWith("\"");
}
private boolean isExclusion(String value) {
return value.length() > 1 && value.startsWith("-");
}
private String escapeOperators(String value) {
if (SOLR_OPERATORS.contains(value)) {
return "\\" + value;
}
return value;
}
}