package org.apache.lucene.search.spans;
/**
* Copyright 2005 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Vector;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.limit.TermLimitException;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.WildcardTermEnum;
/** Matches spans containing a wildcard term.
*/
public class SpanWildcardQuery extends SpanTermQuery
{
/** Limit on the total number of terms matched */
private int termLimit;
/** Limit on the number of terms to report on an error */
private static final int TERMS_TO_REPORT = 50;
/** Construct a SpanWildcardTermQuery matching expanded terms */
public SpanWildcardQuery(Term term) {
this(term, Integer.MAX_VALUE);
}
/** Construct a SpanWildcardTermQuery matching expanded terms, but
* limiting the total number of terms matched.
*/
public SpanWildcardQuery(Term term, int termLimit) {
super(term);
this.termLimit = termLimit;
}
/** Retrieve the term limit this was constructed with */
public int getTermLimit() {
return termLimit;
}
/**
* This method is actually the workhorse of the class. Rewrites the
* wildcard query as a large span OR query on all of the matching terms.
*/
public Query rewrite(IndexReader reader)
throws IOException
{
StringBuffer termReport = new StringBuffer(100);
// Enumerate all the matching terms, and make a SpanTermQuery for each one.
WildcardTermEnum enumerator = new WildcardTermEnum(reader, getTerm());
Vector termQueries = new Vector();
try
{
int nTerms = 0;
do
{
Term t = enumerator.term();
if (t != null)
{
// Enable derived classes to skip certain words (bi-grams, etc.)
if (shouldSkipTerm(t))
continue;
// Found a match
SpanTermQuery tq = new SpanTermQuery(t);
tq.setBoost(getBoost() * enumerator.difference()); // set the boost
termQueries.add(tq);
if (nTerms < TERMS_TO_REPORT) {
termReport.append(t.text());
termReport.append(" ");
}
// If too many terms, throw an exception that contains a clue
// so the user can make a query that fixes the problem.
//
if (nTerms++ == termLimit)
throw new TermLimitException(
"Wildcard query on '" + getTerm().field() +
"' matched too many terms (more than " + termLimit + "). First " +
TERMS_TO_REPORT + " matches: " + termReport.toString());
}
} while (enumerator.next());
}
finally {
enumerator.close();
}
// Now build a big OR query for all the terms.
SpanOrQuery orQuery = new SpanOrQuery(
(SpanQuery[])termQueries.toArray(new SpanQuery[0]));
orQuery.setSpanRecording(getSpanRecording());
return orQuery;
}
/** Enables derived classes to skip certain terms in the index (e.g. stop
* words, bi-grams, etc.) Default implementation doesn't skip any terms.
*/
protected boolean shouldSkipTerm(Term t) {
return false;
}
/** Should never be called on the wildcard query itself, only on the
* result of {@link SpanWildcardQuery#rewrite(IndexReader)}.
*/
public Spans getSpans(final IndexReader reader, final Searcher searcher)
throws IOException
{
throw new UnsupportedOperationException();
}
public String toString(String field) {
return "wild(" + super.toString(field) + ")";
}
}