/**
* Copyright 2014 National University of Ireland, Galway.
*
* This file is part of the SIREn project. Project and contact information:
*
* https://github.com/rdelbru/SIREn
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sindice.siren.search.node;
import java.io.IOException;
import java.util.Collection;
import java.util.Comparator;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.IntsRef;
import org.sindice.siren.index.DocsAndNodesIterator;
import org.sindice.siren.util.NodeUtils;
/**
* A {@link NodeScorer} for conjunctions of a set of {@link NodeScorer}s within
* a node. All the {@link NodeScorer}s are required.
*
* <p>
*
* A node is considered matching if all the queries match in the same node.
* The {@link #nextCandidateDocument()} method iterates over candidate documents
* that match all the queries. The {@link #nextNode()} method iterates
* over the matching nodes within a document.
*
* <p>
*
* Code taken from {@link ConjunctionScorer} and adapted for the Siren use
* case.
**/
class NodeConjunctionScorer extends NodeScorer {
private final NodeScorer[] scorers;
private final float coord;
protected IntsRef lastNode = new IntsRef(new int[] { -1 }, 0, 1);
protected int lastDocument = -1;
public NodeConjunctionScorer(final Weight weight, final float coord,
final Collection<NodeScorer> scorers)
throws IOException {
this(weight, coord, scorers.toArray(new NodeScorer[scorers.size()]));
}
public NodeConjunctionScorer(final Weight weight, final float coord,
final NodeScorer ... scorers)
throws IOException {
super(weight);
this.scorers = scorers;
this.coord = coord;
this.init();
}
private void init() throws IOException {
for (final NodeScorer scorer : scorers) {
if (!scorer.nextCandidateDocument()) {
// If even one of the sub-scorers does not have any documents, this
// scorer should not attempt to do any more work.
lastDocument = DocsAndNodesIterator.NO_MORE_DOC;
lastNode = DocsAndNodesIterator.NO_MORE_NOD;
return;
}
}
// Sort the array the first time...
// We don't need to sort the array in any future calls because we know
// it will already start off sorted (all scorers on same candidate doc).
// Note that this comparator is not consistent with equals!
// Also we use mergeSort here to be stable (so order of Scorers that
// match on first document keeps preserved):
ArrayUtil.mergeSort(scorers, new Comparator<NodeScorer>() { // sort the array
public int compare(final NodeScorer o1, final NodeScorer o2) {
return o1.doc() - o2.doc();
}
});
// NOTE: doNext() must be called before the re-sorting of the array later on.
// The reason is this: assume there are 5 scorers, whose first docs are 1,
// 2, 3, 5, 5 respectively. Sorting (above) leaves the array as is. Calling
// doNext() here advances all the first scorers to 5 (or a larger doc ID
// they all agree on).
// However, if we re-sort before doNext() is called, the order will be 5, 3,
// 2, 1, 5 and then doNext() will stop immediately, since the first scorer's
// docs equals the last one. So the invariant that after calling doNext()
// all scorers are on the same doc ID is broken.
if (!this.doNext()) {
// The scorers did not agree on any document.
lastDocument = DocsAndNodesIterator.NO_MORE_DOC;
lastNode = DocsAndNodesIterator.NO_MORE_NOD;
return;
}
// If first-time skip distance is any predictor of
// scorer sparseness, then we should always try to skip first on
// those scorers.
// Keep last scorer in it's last place (it will be the first
// to be skipped on), but reverse all of the others so that
// they will be skipped on in order of original high skip.
final int end = (scorers.length - 1);
for (int i = 0; i < (end >> 1); i++) {
final NodeScorer tmp = scorers[i];
scorers[i] = scorers[end - i - 1];
scorers[end - i - 1] = tmp;
}
}
/**
* Perform a next without initial increment
*/
private boolean doNext() throws IOException {
int first = 0;
int doc = scorers[scorers.length - 1].doc();
NodeScorer firstScorer = scorers[first];
boolean more = true;
while (firstScorer.doc() < doc) {
more = firstScorer.skipToCandidate(doc);
doc = firstScorer.doc();
first = first == scorers.length - 1 ? 0 : first + 1;
firstScorer = scorers[first];
}
return more;
}
@Override
public boolean skipToCandidate(final int target) throws IOException {
if (lastDocument == DocsAndNodesIterator.NO_MORE_DOC) {
return false;
}
else if (scorers[(scorers.length - 1)].doc() < target) {
scorers[(scorers.length - 1)].skipToCandidate(target);
}
final boolean more = this.doNext();
lastDocument = scorers[scorers.length - 1].doc();
lastNode = scorers[scorers.length - 1].node();
return more;
}
@Override
public int doc() {
return lastDocument;
}
@Override
public IntsRef node() {
return lastNode;
}
@Override
public boolean nextCandidateDocument() throws IOException {
if (lastDocument == DocsAndNodesIterator.NO_MORE_DOC) {
return false;
}
else if (lastDocument == -1) { // first time called
lastDocument = scorers[scorers.length - 1].doc();
lastNode = scorers[scorers.length - 1].node();
return true;
}
// advance the last scorer to the next candidate document
scorers[(scorers.length - 1)].nextCandidateDocument();
final boolean more = this.doNext();
lastDocument = scorers[scorers.length - 1].doc();
lastNode = scorers[scorers.length - 1].node();
return more;
}
@Override
public boolean nextNode() throws IOException {
int first = 0;
NodeScorer lastScorer = scorers[scorers.length - 1];
NodeScorer firstScorer = scorers[first];
// scan forward in last
if (lastNode == DocsAndNodesIterator.NO_MORE_NOD || !lastScorer.nextNode()) {
lastNode = DocsAndNodesIterator.NO_MORE_NOD;
return false;
}
while (NodeUtils.compare(firstScorer.node(), lastScorer.node()) < 0) {
do {
if (!firstScorer.nextNode()) { // scan forward in first
lastNode = DocsAndNodesIterator.NO_MORE_NOD;
return false;
}
} while (NodeUtils.compare(firstScorer.node(), lastScorer.node()) < 0);
lastScorer = firstScorer;
first = (first == (scorers.length - 1)) ? 0 : first + 1;
firstScorer = scorers[first];
}
// all equal: a match
lastNode = firstScorer.node();
return true;
}
@Override
public float freqInNode() throws IOException {
// return the number of required matchers in the node
return this.scorers.length;
}
@Override
public float scoreInNode() throws IOException {
float curNodeScore = 0;
for (final NodeScorer scorer : scorers) {
curNodeScore += scorer.scoreInNode();
}
// TODO: why is there a coord here ?
return curNodeScore * coord;
}
@Override
public String toString() {
return "NodeConjunctionScorer(" + weight + "," + lastDocument + "," + lastNode + ")";
}
}