/*
* Copyright 2014, Stratio.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.stratio.cassandra.index.query;
import com.google.common.base.Objects;
import com.stratio.cassandra.index.schema.mapping.ColumnMapperSingle;
import com.stratio.cassandra.index.schema.Schema;
import org.apache.commons.lang3.builder.ToStringBuilder;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.automaton.LevenshteinAutomata;
import org.codehaus.jackson.annotate.JsonCreator;
import org.codehaus.jackson.annotate.JsonProperty;
/**
* A {@link Condition} that implements the fuzzy search query. The similarity measurement is based on the
* Damerau-Levenshtein (optimal string alignment) algorithm, though you can explicitly choose classic Levenshtein by
* passing {@code false} to the {@code transpositions} parameter.
*
* @author Andres de la Pena <adelapena@stratio.com>
*/
public class FuzzyCondition extends SingleFieldCondition {
/** The default Damerau-Levenshtein max distance. */
public final static int DEFAULT_MAX_EDITS = LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE;
/** The default length of common (non-fuzzy) prefix. */
public final static int DEFAULT_PREFIX_LENGTH = 0;
/** The default length of common (non-fuzzy) prefix. */
public final static int DEFAULT_MAX_EXPANSIONS = 50;
/** If transpositions should be treated as a primitive edit operation by default. */
public final static boolean DEFAULT_TRANSPOSITIONS = true;
/** The name of the field to be matched. */
@JsonProperty("field")
private final String field;
/** The fuzzy expression to be matched. */
@JsonProperty("value")
private final String value;
/** The Damerau-Levenshtein max distance. */
@JsonProperty("max_edits")
private final Integer maxEdits;
/** The length of common (non-fuzzy) prefix. */
@JsonProperty("prefix_length")
private final Integer prefixLength;
/** The length of common (non-fuzzy) prefix. */
@JsonProperty("max_expansions")
private final Integer maxExpansions;
/** If transpositions should be treated as a primitive edit operation. */
@JsonProperty("transpositions")
private final Boolean transpositions;
/**
* Returns a new {@link FuzzyCondition}.
*
* @param boost The boost for this query clause. Documents matching this clause will (in addition to the
* normal weightings) have their score multiplied by {@code boost}. If {@code null}, then
* {@link #DEFAULT_BOOST} is used as default.
* @param field The field name.
* @param value The field fuzzy value.
* @param maxEdits Must be >= 0 and <= {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}.
* @param prefixLength Length of common (non-fuzzy) prefix
* @param maxExpansions The maximum number of terms to match. If this number is greater than {@link
* BooleanQuery#getMaxClauseCount} when the query is rewritten, then the maxClauseCount will
* be used instead.
* @param transpositions True if transpositions should be treated as a primitive edit operation. If this is false,
* comparisons will implement the classic Levenshtein algorithm.
*/
@JsonCreator
public FuzzyCondition(@JsonProperty("boost") Float boost,
@JsonProperty("field") String field,
@JsonProperty("value") String value,
@JsonProperty("max_edits") Integer maxEdits,
@JsonProperty("prefix_length") Integer prefixLength,
@JsonProperty("max_expansions") Integer maxExpansions,
@JsonProperty("transpositions") Boolean transpositions) {
super(boost);
this.field = field;
this.value = value;
this.maxEdits = maxEdits == null ? DEFAULT_MAX_EDITS : maxEdits;
this.prefixLength = prefixLength == null ? DEFAULT_PREFIX_LENGTH : prefixLength;
this.maxExpansions = maxExpansions == null ? DEFAULT_MAX_EXPANSIONS : maxExpansions;
this.transpositions = transpositions == null ? DEFAULT_TRANSPOSITIONS : transpositions;
}
/** {@inheritDoc} */
@Override
public Query query(Schema schema) {
if (field == null || field.trim().isEmpty()) {
throw new IllegalArgumentException("Field name required");
}
if (value == null || value.trim().isEmpty()) {
throw new IllegalArgumentException("Field value required");
}
if (maxEdits < 0 || maxEdits > 2) {
throw new IllegalArgumentException("max_edits must be between 0 and 2");
}
if (prefixLength < 0) {
throw new IllegalArgumentException("prefix_length must be positive.");
}
if (maxExpansions < 0) {
throw new IllegalArgumentException("max_expansions must be positive.");
}
ColumnMapperSingle<?> columnMapper = getMapper(schema, field);
Class<?> clazz = columnMapper.baseClass();
if (clazz == String.class) {
String analyzedValue = analyze(field, value, schema);
Term term = new Term(field, analyzedValue);
Query query = new FuzzyQuery(term, maxEdits, prefixLength, maxExpansions, transpositions);
query.setBoost(boost);
return query;
} else {
String message = String.format("Fuzzy queries are not supported by %s mapper", clazz.getSimpleName());
throw new UnsupportedOperationException(message);
}
}
/** {@inheritDoc} */
@Override
public String toString() {
return Objects.toStringHelper(this)
.add("field", field)
.add("value", value)
.add("maxEdits", maxEdits)
.add("prefixLength", prefixLength)
.add("maxExpansions", maxExpansions)
.add("transpositions", transpositions)
.toString();
}
}