package org.apache.lucene.search.spans;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Acknowledgements:
*
* A significant amount of new and/or modified code in this module
* was made possible by a grant from the Andrew W. Mellon Foundation,
* as part of the Melvyl Recommender Project.
*/
import java.io.IOException;
import java.util.Set;
import java.util.List;
import java.util.ArrayList;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.Similarity;
/** Calculates spans that match several queries "near" each other. In-order
* matches score higher than out-of-order matches.
*/
class NearSpans implements Spans
{
private SpanNearQuery query;
private Similarity similarity;
private List ordered = new ArrayList(); // spans in query order
private int slop; // from query
private boolean inOrder; // from query
private SpansCell firstCell; // linked list of spans
private SpansCell lastCell; // fully sorted
private int nCellsInList; // number of cells added so far
private int totalLength; // sum of current lengths
private float totalScore; // sum of current scores
private int totalSlop; // sloppiness of current match
private boolean more = true; // true iff not done
private boolean firstTime = true; // true before first next()
/** Wraps a Spans, and can be used to form a linked list. */
private class SpansCell implements Spans
{
private Spans spans;
private SpansCell prevCell;
private SpansCell nextCell;
private int length = -1;
private float score;
private int index;
public SpansCell(Spans spans, int index) {
this.spans = spans;
this.index = index;
}
public boolean next()
throws IOException
{
preChange();
boolean more = spans.next(); // move to next
if (more)
postChange();
return more;
}
public boolean skipTo(int target)
throws IOException
{
preChange();
boolean more = spans.skipTo(target); // skip
if (more)
postChange();
return more;
}
/** Called just before advancing the cell */
private void preChange()
{
if (length != -1) { // subtract old length
totalLength -= length;
totalScore -= score;
}
}
/** Called just after advancing the cell */
private void postChange() {
length = end() - start(); // compute new length
totalLength += length; // add new length to total
score = spans.score();
totalScore += score;
adjustPosition();
}
// If cell needs to move toward end of list, move it.
public void adjustPosition()
{
// Already at the end? Can't move forward.
if (this == lastCell)
return;
// Optimization for common case: jump to end
SpansCell putBefore;
if (!lessThan(lastCell))
putBefore = null;
else
{
// Find where to put it
putBefore = nextCell;
while (putBefore != null && !lessThan(putBefore))
putBefore = putBefore.nextCell;
}
// If changing position, unlink and relink
if (putBefore != nextCell) {
unlink();
linkBefore(putBefore);
}
}
/** Remove the current cell from the linked list */
private void unlink()
{
if (prevCell == null)
firstCell = nextCell;
else
prevCell.nextCell = nextCell;
if (nextCell == null)
lastCell = this;
else
nextCell.prevCell = prevCell;
nextCell = prevCell = null;
--nCellsInList;
}
/** Link the cell into the list just before 'other', or at the tail if null */
private void linkBefore(SpansCell other) {
nextCell = other;
prevCell = (other == null) ? lastCell : other.prevCell;
++nCellsInList;
fixLinks();
}
/** Link the cell into the list just after 'other', or at the head if null */
private void linkAfter(SpansCell other) {
prevCell = other;
nextCell = (other == null) ? firstCell : other.nextCell;
++nCellsInList;
fixLinks();
}
/** Helper function for linkAfter and linkBefore */
private void fixLinks()
{
if (nextCell == null)
lastCell = this;
else
nextCell.prevCell = this;
if (prevCell == null)
firstCell = this;
else
prevCell.nextCell = this;
}
/** Debugging only: check that the links are all correct */
@SuppressWarnings("unused")
private void checkList()
{
System.out.println("DEBUGGING ONLY");
int nCells = 0;
for (SpansCell cell = firstCell; cell != null; cell = cell.nextCell)
{
if (cell == firstCell)
assert cell.prevCell == null;
if (cell == lastCell)
assert cell.nextCell == null;
if (cell.prevCell == null)
assert firstCell == cell;
else
assert cell.prevCell.nextCell == cell;
if (cell.nextCell == null)
assert lastCell == cell;
else
assert cell.nextCell.prevCell == cell;
++nCells;
assert nCells <= nCellsInList; // infinite loop?
}
assert nCells == nCellsInList;
}
/** Ordering function for cells in the list */
private final boolean lessThan(SpansCell otherCell)
{
if (doc() == otherCell.doc())
{
if (start() == otherCell.start())
{
if (end() == otherCell.end()) {
return index > otherCell.index; // do not flip: needed for out-of-order check
}
else {
return end() < otherCell.end();
}
}
else {
return start() < otherCell.start();
}
}
else {
return doc() < otherCell.doc();
}
}
public int doc() {
return spans.doc();
}
public int start() {
return spans.start();
}
public int end() {
return spans.end();
}
public float score() {
throw new UnsupportedOperationException();
}
public Explanation explain()
throws IOException
{
return spans.explain();
}
public void collectTerms(Set terms) {
}
public String toString() {
return spans.toString() + "#" + index;
}
}
public NearSpans(SpanNearQuery query, IndexReader reader, Searcher searcher)
throws IOException
{
this.query = query;
this.slop = query.getSlop();
this.inOrder = query.isInOrder();
SpanQuery[] clauses = query.getClauses(); // initialize spans & list
for (int i = 0; i < clauses.length; i++) {
SpansCell cell = // construct clause spans
new SpansCell(clauses[i].getSpans(reader, searcher), i);
ordered.add(cell); // add to ordered
}
similarity = searcher.getSimilarity();
}
public boolean next()
throws IOException
{
if (firstTime) {
initList(-1);
firstTime = false;
}
else if (more) {
more = advanceOneCell(); // trigger further scanning
}
while (more)
{
// Get rid of cached slop value.
totalSlop = -1;
// skip to doc w/ all clauses
while (more && firstCell.doc() < lastCell.doc()) {
more = firstCell.skipTo(lastCell.doc()); // skip first upto last
}
if (!more)
return false;
// found doc w/ all clauses - is there a match?
if (atMatch())
return true;
// Trigger further scanning.
more = advanceOneCell();
}
return false; // no more matches
}
private boolean advanceOneCell()
throws IOException
{
// Is it even possible to adjust the order and get a better match?
int matchLength = lastCell.end() - firstCell.start();
if (matchLength - totalLength > slop)
{
// Nope... just advance the first cell.
return firstCell.next();
}
// If things are out of order, but the endpoints are within the
// specified slop, we might be able to get a better match
// by advancing one of the out-of-order spans, rather than
// the first span. This can happen, for instance, if there are
// repeated terms in a phrase query.
//
int index = 0;
for (SpansCell cell = firstCell; cell != null; cell = cell.nextCell) {
if (cell.index != index++)
return cell.next();
}
// No out-of-order cell found... just advance the first cell.
return firstCell.next();
}
public boolean skipTo(int target)
throws IOException
{
if (firstTime) { // initialize
initList(target);
firstTime = false;
}
else { // normal case
while (more && firstCell.doc() < target) { // skip as needed
more = firstCell.skipTo(target);
}
}
// Get rid of cached slop value.
totalSlop = -1;
if (more)
{
if (atMatch()) // at a match?
return true;
return next(); // no, scan
}
return false;
}
public int doc() {
return firstCell.doc();
}
public int start() {
return firstCell.start();
}
public int end() {
return lastCell.end();
}
public float score() {
return totalScore * query.getBoost() * similarity.sloppyFreq(totalSlop());
}
public String toString() {
return "spans(" + query.toString() + ")@" +
(firstTime ? "START"
: (more ? (doc() + ":" + start() + "-" + end()) : "END"));
}
private void initList(int target)
throws IOException
{
for (int i = 0; more && i < ordered.size(); i++) {
SpansCell cell = (SpansCell)ordered.get(i);
cell.linkAfter(null); // link as first to start with
if (target < 0)
more = cell.next(); // move to first entry
else
more = cell.skipTo(target);
}
}
private boolean atMatch() {
return (firstCell.doc() == lastCell.doc()) && checkSlop() &&
(!inOrder || matchIsOrdered());
}
private boolean checkSlop()
{
int matchLength = lastCell.end() - firstCell.start();
// Is a match even possible?
if (matchLength - totalLength > slop)
return false;
// Do a more thorough slop calculation.
if (totalSlop() > slop)
return false;
return true;
}
private boolean matchIsOrdered()
{
int lastStart = -1;
for (int i = 0; i < ordered.size(); i++) {
int start = ((SpansCell)ordered.get(i)).start();
if (!(start > lastStart))
return false;
lastStart = start;
}
return true;
}
private int totalSlop()
{
// If cached value is still valid, just return it.
if (totalSlop >= 0)
return totalSlop;
// Need to recalculate.
int matchSlop = 0;
int lastStart = -1;
int lastEnd = -1;
for (int i = 0; i < ordered.size(); i++)
{
SpansCell cell = (SpansCell)ordered.get(i);
int start = cell.start();
int end = cell.end();
// First cell, just record the start and end. Subsequent cells,
// calculate the slop.
//
if (i > 0)
{
// Is the new cell before the old? Penalize it for being out-of-order.
if (end <= lastStart)
matchSlop += (lastStart - end) + 1;
// Is it after?
else if (start >= lastEnd)
matchSlop += (start - lastEnd);
// Overlapping... zero slop
else
; // do nothing
} // if
lastStart = start;
lastEnd = end;
} // for i
return totalSlop = matchSlop;
}
public Explanation explain()
throws IOException
{
Explanation result = new Explanation(0,
"weight(" + toString() +
"), product of:");
Explanation sumExpl = new Explanation(0, "totalMatchScore, sum of:");
// Explain the sum of the matches
float totalScore = 0.0f;
for (int i = 0; i < ordered.size(); i++) {
SpansCell cell = (SpansCell)ordered.get(i);
totalScore += cell.score;
sumExpl.addDetail(cell.spans.explain());
}
sumExpl.setValue(totalScore);
result.addDetail(sumExpl);
// Explain the boost, if any.
Explanation boostExpl = new Explanation(query.getBoost(), "boost");
if (query.getBoost() != 1.0f)
result.addDetail(boostExpl);
// And explain the slop adjustment.
int totalSlop = totalSlop();
Explanation slopExpl = new Explanation(similarity.sloppyFreq(totalSlop),
"sloppyFreq(slop=" + totalSlop +
")");
result.addDetail(slopExpl);
result.setValue(
sumExpl.getValue() * boostExpl.getValue() * slopExpl.getValue());
return result;
}
}