package org.apache.lucene.sandbox.queries; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import org.apache.lucene.index.SingleTermsEnum; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.BooleanQuery; // javadocs import org.apache.lucene.search.FuzzyQuery; // javadocs import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.automaton.LevenshteinAutomata; /** Implements the classic fuzzy search query. The similarity measurement * is based on the Levenshtein (edit distance) algorithm. * <p> * Note that, unlike {@link FuzzyQuery}, this query will silently allow * for a (possibly huge) number of edit distances in comparisons, and may * be extremely slow (comparing every term in the index). * * @deprecated Use {@link FuzzyQuery} instead. */ @Deprecated public class SlowFuzzyQuery extends MultiTermQuery { public final static float defaultMinSimilarity = LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE; public final static int defaultPrefixLength = 0; public final static int defaultMaxExpansions = 50; private float minimumSimilarity; private int prefixLength; private boolean termLongEnough = false; protected Term term; /** * Create a new SlowFuzzyQuery that will match terms with a similarity * of at least <code>minimumSimilarity</code> to <code>term</code>. * If a <code>prefixLength</code> > 0 is specified, a common prefix * of that length is also required. * * @param term the term to search for * @param minimumSimilarity a value between 0 and 1 to set the required similarity * between the query term and the matching terms. For example, for a * <code>minimumSimilarity</code> of <code>0.5</code> a term of the same length * as the query term is considered similar to the query term if the edit distance * between both terms is less than <code>length(term)*0.5</code> * <p> * Alternatively, if <code>minimumSimilarity</code> is >= 1f, it is interpreted * as a pure Levenshtein edit distance. For example, a value of <code>2f</code> * will match all terms within an edit distance of <code>2</code> from the * query term. Edit distances specified in this way may not be fractional. * * @param prefixLength length of common (non-fuzzy) prefix * @param maxExpansions the maximum number of terms to match. If this number is * greater than {@link BooleanQuery#getMaxClauseCount} when the query is rewritten, * then the maxClauseCount will be used instead. * @throws IllegalArgumentException if minimumSimilarity is >= 1 or < 0 * or if prefixLength < 0 */ public SlowFuzzyQuery(Term term, float minimumSimilarity, int prefixLength, int maxExpansions) { super(term.field()); this.term = term; if (minimumSimilarity >= 1.0f && minimumSimilarity != (int)minimumSimilarity) throw new IllegalArgumentException("fractional edit distances are not allowed"); if (minimumSimilarity < 0.0f) throw new IllegalArgumentException("minimumSimilarity < 0"); if (prefixLength < 0) throw new IllegalArgumentException("prefixLength < 0"); if (maxExpansions < 0) throw new IllegalArgumentException("maxExpansions < 0"); setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(maxExpansions)); String text = term.text(); int len = text.codePointCount(0, text.length()); if (len > 0 && (minimumSimilarity >= 1f || len > 1.0f / (1.0f - minimumSimilarity))) { this.termLongEnough = true; } this.minimumSimilarity = minimumSimilarity; this.prefixLength = prefixLength; } /** * Calls {@link #SlowFuzzyQuery(Term, float) SlowFuzzyQuery(term, minimumSimilarity, prefixLength, defaultMaxExpansions)}. */ public SlowFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) { this(term, minimumSimilarity, prefixLength, defaultMaxExpansions); } /** * Calls {@link #SlowFuzzyQuery(Term, float) SlowFuzzyQuery(term, minimumSimilarity, 0, defaultMaxExpansions)}. */ public SlowFuzzyQuery(Term term, float minimumSimilarity) { this(term, minimumSimilarity, defaultPrefixLength, defaultMaxExpansions); } /** * Calls {@link #SlowFuzzyQuery(Term, float) SlowFuzzyQuery(term, defaultMinSimilarity, 0, defaultMaxExpansions)}. */ public SlowFuzzyQuery(Term term) { this(term, defaultMinSimilarity, defaultPrefixLength, defaultMaxExpansions); } /** * Returns the minimum similarity that is required for this query to match. * @return float value between 0.0 and 1.0 */ public float getMinSimilarity() { return minimumSimilarity; } /** * Returns the non-fuzzy prefix length. This is the number of characters at the start * of a term that must be identical (not fuzzy) to the query term if the query * is to match that term. */ public int getPrefixLength() { return prefixLength; } @Override protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException { if (!termLongEnough) { // can only match if it's exact return new SingleTermsEnum(terms.iterator(null), term.bytes()); } return new SlowFuzzyTermsEnum(terms, atts, getTerm(), minimumSimilarity, prefixLength); } /** * Returns the pattern term. */ public Term getTerm() { return term; } @Override public String toString(String field) { final StringBuilder buffer = new StringBuilder(); if (!term.field().equals(field)) { buffer.append(term.field()); buffer.append(":"); } buffer.append(term.text()); buffer.append('~'); buffer.append(Float.toString(minimumSimilarity)); buffer.append(ToStringUtils.boost(getBoost())); return buffer.toString(); } @Override public int hashCode() { final int prime = 31; int result = super.hashCode(); result = prime * result + Float.floatToIntBits(minimumSimilarity); result = prime * result + prefixLength; result = prime * result + ((term == null) ? 0 : term.hashCode()); return result; } @Override public boolean equals(Object obj) { if (this == obj) return true; if (!super.equals(obj)) return false; if (getClass() != obj.getClass()) return false; SlowFuzzyQuery other = (SlowFuzzyQuery) obj; if (Float.floatToIntBits(minimumSimilarity) != Float .floatToIntBits(other.minimumSimilarity)) return false; if (prefixLength != other.prefixLength) return false; if (term == null) { if (other.term != null) return false; } else if (!term.equals(other.term)) return false; return true; } }