package org.apache.lucene.search.spans;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collection;
import java.util.Vector;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.limit.TermLimitException;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
/**
* Matches spans containing terms within a specified range.
*/
public class SpanRangeQuery extends SpanQuery
{
private int termLimit;
private Term lowerTerm;
private Term upperTerm;
private boolean inclusive;
/** Limit on the number of terms to report on an error */
private static final int TERMS_TO_REPORT = 50;
/** Constructs a span query selecting all terms greater than
* <code>lowerTerm</code> but less than <code>upperTerm</code>.
* There must be at least one term and either term may be null,
* in which case there is no bound on that side, but if there are
* two terms, both terms <b>must</b> be for the same field.
*/
public SpanRangeQuery(Term lowerTerm, Term upperTerm, boolean inclusive) {
this(lowerTerm, upperTerm, inclusive, Integer.MAX_VALUE);
}
/** Constructs a span query selecting all terms greater than
* <code>lowerTerm</code> but less than <code>upperTerm</code>.
* There must be at least one term and either term may be null,
* in which case there is no bound on that side, but if there are
* two terms, both terms <b>must</b> be for the same field. Applies
* a limit on the total number of terms matched.
*/
public SpanRangeQuery(Term lowerTerm, Term upperTerm, boolean inclusive,
int termLimit)
{
if (lowerTerm == null && upperTerm == null) {
throw new IllegalArgumentException("At least one term must be non-null");
}
if (lowerTerm != null &&
upperTerm != null &&
lowerTerm.field() != upperTerm.field())
{
throw new IllegalArgumentException(
"Both terms must be for the same field");
}
// if we have a lowerTerm, start there. otherwise, start at beginning
if (lowerTerm != null)
this.lowerTerm = lowerTerm;
else
this.lowerTerm = new Term(upperTerm.field(), "");
this.upperTerm = upperTerm;
this.inclusive = inclusive;
this.termLimit = termLimit;
}
/**
* This method is actually the workhorse of the class. Rewrites the range
* query as a large span OR query on all of the matching terms.
*/
public Query rewrite(IndexReader reader)
throws IOException
{
Vector clauses = new Vector();
TermEnum enumerator = reader.terms(lowerTerm);
int nTerms = 0;
StringBuffer termReport = new StringBuffer(100);
try
{
boolean checkLower = false;
if (!inclusive) // make adjustments to set to exclusive
checkLower = true;
String testField = getField();
// Find each matching term, and make a SpanTermQuery for each one.
do
{
Term term = enumerator.term();
if (term != null && term.field() == testField)
{
if (!checkLower || term.text().compareTo(lowerTerm.text()) > 0)
{
checkLower = false;
if (upperTerm != null)
{
int compare = upperTerm.text().compareTo(term.text());
/* if beyond the upper term, or is exclusive and
* this is equal to the upper term, break out */
if ((compare < 0) || (!inclusive && compare == 0))
break;
}
// Enable skipping terms (stop-words, bi-grams, etc.) in derived
// classes.
//
if (shouldSkipTerm(term))
continue;
if (nTerms < TERMS_TO_REPORT) {
termReport.append(term.text());
termReport.append(" ");
}
// If too many terms, throw an exception that contains a clue
// so the user can make a query that fixes the problem.
//
nTerms++;
if (nTerms == termLimit) {
throw new TermLimitException(
"Range query on '" + lowerTerm.field() +
"' matched too many terms (more than " + termLimit + "). " +
"First " + TERMS_TO_REPORT + " matches: " +
termReport.toString());
}
SpanTermQuery tq = new SpanTermQuery(term); // found a match
tq.setBoost(getBoost()); // set the boost
clauses.add(tq); // add to query
}
}
else {
break;
}
} while (enumerator.next());
}
finally {
enumerator.close();
}
// Now form the final query
SpanQuery[] clauseArray = new SpanQuery[clauses.size()];
clauses.toArray(clauseArray);
SpanOrQuery orQuery = new SpanOrQuery(clauseArray);
orQuery.setSpanRecording(getSpanRecording());
return orQuery;
}
/** Enables derived classes to skip certain terms in the index (e.g. stop
* words, bi-grams, etc.) Default implementation doesn't skip any terms.
*/
protected boolean shouldSkipTerm(Term t) {
return false;
}
public Query combine(Query[] queries) {
return Query.mergeBooleanQueries(queries);
}
/** Returns the field name for this query */
public String getField() {
return (lowerTerm != null ? lowerTerm.field() : upperTerm.field());
}
/** Returns the lower term of this range query */
public Term getLowerTerm() {
return lowerTerm;
}
/** Returns the upper term of this range query */
public Term getUpperTerm() {
return upperTerm;
}
/** Returns <code>true</code> if the range query is inclusive */
public boolean isInclusive() {
return inclusive;
}
/** Returns the term limit, if any, for this range query */
public int getTermLimit() {
return termLimit;
}
public String toString(String field)
{
StringBuffer buffer = new StringBuffer();
if (!getField().equals(field)) {
buffer.append(getField());
buffer.append(":");
}
buffer.append(inclusive ? "[" : "{");
buffer.append(lowerTerm != null ? lowerTerm.text() : "null");
buffer.append(" TO ");
buffer.append(upperTerm != null ? upperTerm.text() : "null");
buffer.append(inclusive ? "]" : "}");
if (getBoost() != 1.0f) {
buffer.append("^");
buffer.append(Float.toString(getBoost()));
}
return buffer.toString();
}
/** Should never be called on this query itself, only on the result of
* {@link SpanRangeQuery#rewrite(IndexReader)}.
*/
public Spans getSpans(IndexReader reader, Searcher searcher)
throws IOException
{
throw new UnsupportedOperationException();
}
/** Should never be called on this query itself, only on the result of
* {@link SpanRangeQuery#rewrite(IndexReader)}.
*/
public Collection getTerms() {
throw new UnsupportedOperationException();
}
}