package org.apache.lucene.chunk;
/**
* Copyright 2005 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collection;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.Spans;
/**
* Removes matches which overlap with another SpanQuery, taking into account
* overlap between adjacent chunks in a chunked index.
*/
public class SpanChunkedNotQuery extends SpanQuery
{
private SpanQuery include;
private SpanQuery exclude;
private int slop;
private int chunkBump = 1;
/** Construct a SpanNotQuery matching spans from <code>include</code> which
* have no overlap with spans from <code>exclude</code>.*/
public SpanChunkedNotQuery(SpanQuery include, SpanQuery exclude, int slop)
{
this.include = include;
this.exclude = exclude;
this.slop = slop;
if (!include.getField().equals(exclude.getField()))
throw new IllegalArgumentException("Clauses must have same field.");
}
/** Return the SpanQuery whose matches are filtered. */
public SpanQuery getInclude() {
return include;
}
/** Return the SpanQuery whose matches must not overlap those returned. */
public SpanQuery getExclude() {
return exclude;
}
/** Set the distance that must separate matches from excluded spans.*/
public void setSlop(int slop, int chunkBump) {
this.slop = slop;
this.chunkBump = chunkBump;
}
/** Return the distance that must separate matches from excluded spans.*/
public int getSlop() {
return slop;
}
public String getField() {
return include.getField();
}
public Collection getTerms() {
return include.getTerms();
}
public Query[] getSubQueries() {
Query[] result = new Query[2];
result[0] = include;
result[1] = exclude;
return result;
}
public Query rewrite(IndexReader reader)
throws IOException
{
SpanQuery rewrittenInclude = (SpanQuery)include.rewrite(reader);
SpanQuery rewrittenExclude = (SpanQuery)exclude.rewrite(reader);
if (rewrittenInclude == include && rewrittenExclude == exclude)
return this;
SpanChunkedNotQuery clone = (SpanChunkedNotQuery)this.clone();
clone.include = rewrittenInclude;
clone.exclude = rewrittenExclude;
return clone;
}
public String toString(String field) {
StringBuffer buffer = new StringBuffer();
buffer.append("spanChunkedNot(");
buffer.append(include.toString(field));
buffer.append(", ");
buffer.append(exclude.toString(field));
buffer.append(")");
return buffer.toString();
}
public Spans getSpans(final IndexReader reader, final Searcher searcher)
throws IOException
{
return new Spans()
{
private Spans includeSpans = include.getSpans(reader, searcher);
private boolean moreInclude = true;
private Spans excludeSpans = exclude.getSpans(reader, searcher);
private boolean moreExclude = true;
private boolean firstTime = true;
public boolean next()
throws IOException
{
if (moreInclude) // move to next include
moreInclude = includeSpans.next();
if (firstTime) {
moreExclude = excludeSpans.next();
firstTime = false;
}
while (moreInclude && moreExclude)
{
int includeDoc = includeSpans.doc();
if (includeSpans.start() < slop)
includeDoc--;
if (includeDoc > excludeSpans.doc()) // skip exclude
moreExclude = excludeSpans.skipTo(includeDoc);
while (moreExclude // while exclude is before
&&
(endPos(excludeSpans) + slop) <= startPos(includeSpans))
{
moreExclude = excludeSpans.next(); // increment exclude
}
if (!moreExclude // if no intersection
||
endPos(includeSpans) <= (startPos(excludeSpans) - slop))
break; // we found a match
moreInclude = includeSpans.next(); // intersected: keep scanning
}
return moreInclude;
}
private int baseDoc() {
if (!moreInclude)
return moreExclude ? excludeSpans.doc() : 0;
if (!moreExclude)
return includeSpans.doc();
return Math.min(includeSpans.doc(), excludeSpans.doc());
}
private int startPos(Spans spans) {
return ((spans.doc() - baseDoc()) * chunkBump) + spans.start();
}
private int endPos(Spans spans) {
return ((spans.doc() - baseDoc()) * chunkBump) + spans.end();
}
public boolean skipTo(int target)
throws IOException
{
if (moreInclude) // skip include
moreInclude = includeSpans.skipTo(target);
if (!moreInclude)
return false;
if (moreExclude // skip exclude
&&
includeSpans.doc() > excludeSpans.doc())
moreExclude = excludeSpans.skipTo(includeSpans.doc());
while (moreExclude // while exclude is before
&&
includeSpans.doc() == excludeSpans.doc() &&
excludeSpans.end() <= (includeSpans.start() - slop))
{
moreExclude = excludeSpans.next(); // increment exclude
}
if (!moreExclude // if no intersection
||
includeSpans.doc() != excludeSpans.doc() ||
(includeSpans.end() + slop) <= excludeSpans.start())
return true; // we found a match
return next(); // scan to next match
}
public int doc() {
return includeSpans.doc();
}
public int start() {
return includeSpans.start();
}
public int end() {
return includeSpans.end();
}
public float score() {
return includeSpans.score() * getBoost();
}
public String toString() {
return "spans(" + SpanChunkedNotQuery.this.toString() + ")";
}
public Explanation explain()
throws IOException
{
if (getBoost() == 1.0f)
return includeSpans.explain();
Explanation result = new Explanation(0,
"weight(" + toString() +
"), product of:");
Explanation boostExpl = new Explanation(getBoost(), "boost");
result.addDetail(boostExpl);
Explanation inclExpl = includeSpans.explain();
result.addDetail(inclExpl);
result.setValue(boostExpl.getValue() * inclExpl.getValue());
return result;
}
};
}
}