/**
* Copyright 2014 National University of Ireland, Galway.
*
* This file is part of the SIREn project. Project and contact information:
*
* https://github.com/rdelbru/SIREn
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sindice.siren.search.node;
import java.io.IOException;
import org.apache.lucene.util.IntsRef;
import org.sindice.siren.index.DocsAndNodesIterator;
import org.sindice.siren.index.DocsNodesAndPositionsEnum;
import org.sindice.siren.util.NodeUtils;
/**
* A NodeDisjunctionScorerQueue maintains a partial ordering of its
* {@link NodeScorer}s such that the least scorer can always be found in
* constant time. This allows to retrieve in order all matching documents and
* nodes from multiple scorers, as if they were one stream.
* <p>
* <b>NOTE:</b> After initialisation, the NodeDisjunctionScorerQueue is
* positioned on the first candidate document.
* <p>
* Based on a binary heap data structure. The root (top) of the heap contains
* the least scorer.
* <p>
* {@link #put(NodeScorer)} requires log(size) time.
* <p>
* The ordering is based on {@link SirenScorer.doc()} and
* {@link SirenScorer.node()}.
* <p>
* Code taken from {@link ScorerDocQueue} and adapted for the Siren use case.
*/
class NodeDisjunctionScorerQueue {
private final HeapedScorerNode[] heap;
private int size;
private int nrMatchersInNode = -1;
private float scoreInNode = 0;
private class HeapedScorerNode {
NodeScorer scorer;
int doc;
IntsRef node;
HeapedScorerNode(final NodeScorer s) {
this.scorer = s;
this.adjust();
}
void adjust() {
doc = scorer.doc();
node = scorer.node();
}
@Override
public String toString() {
return "[" + doc + "," + "[" + node + "]]";
}
}
private HeapedScorerNode topHSN; // same as heap[1], only for speed
/** Create a NodeDisjunctionScorerQueue with a given capacity. */
public NodeDisjunctionScorerQueue(final int capacity) {
size = 0;
final int heapSize = capacity + 1;
heap = new HeapedScorerNode[heapSize];
topHSN = heap[1]; // initially null
}
/**
* Adds a SirenPrimitiveScorer to a NodeDisjunctionScorerQueue in log(size)
* time. If one tries to add more Scorers than maxSize a RuntimeException
* (ArrayIndexOutOfBound) is thrown.
* <p>
* The scorer is advanced to the next document to initiate heap ordering.
*/
public final void put(final NodeScorer scorer) throws IOException {
if (scorer.nextCandidateDocument()) { // if scorer exhausted, no need to add it
size++;
heap[size] = new HeapedScorerNode(scorer);
this.upHeap();
}
}
/**
* For test purpose only
* <p>
* Returns the least Scorer of the NodeDisjunctionScorerQueue in constant time.
* Should not be used when the queue is empty.
*/
protected NodeScorer top() {
return topHSN.scorer;
}
/**
* Return the current document
*/
public int doc() {
return size == 0 ? DocsAndNodesIterator.NO_MORE_DOC : topHSN.doc;
}
/**
* Return the current node
*/
public IntsRef node() {
return size == 0 ? DocsAndNodesIterator.NO_MORE_NOD : topHSN.node;
}
/**
* Count the number of subscorers that provide the match in the current node,
* and sum their score.
* <p>
* Counting the number of matchers within a node during node iteration is not
* possible as it will require to cache the latest top node. This would have
* required a copy of the array node since array node are reused in the
* {@link DocsNodesAndPositionsEnum}.
* <p>
* Iterating over the elements of the queue enables us to save such a array
* copy.
*
* @see #nrMatchersInNode()
* @see #scoreInNode()
*/
protected void countAndSumMatchers() throws IOException {
if (nrMatchersInNode < 0) { // count and sum not done
nrMatchersInNode = 1; // init counter at 1 to include the top
scoreInNode = topHSN.scorer.scoreInNode();
// perform recursive traversal of the heap
this.computeSumRecursive(1);
}
}
/**
* Perform a traversal of the heap binary tree using recursion. Given a node,
* visit its children and check if their subscorer is equivalent to the least
* subscorer. If the subscorer is equivalent, it increments the number of
* matchers, sum its score with the current score, and recursively visit its
* two children.
*/
private final void computeSumRecursive(final int root) throws IOException {
final int i1 = (root << 1); // index of first child node
final int i2 = (root << 1) + 1; // index of second child node
if (i1 <= size) {
final HeapedScorerNode child1 = heap[i1];
if (topHSN.doc == child1.doc && topHSN.node.intsEquals(child1.node)) {
nrMatchersInNode++;
scoreInNode += child1.scorer.scoreInNode();
this.computeSumRecursive(i1);
}
}
if (i2 <= size) {
final HeapedScorerNode child2 = heap[i2];
if (topHSN.doc == child2.doc && topHSN.node.intsEquals(child2.node)) {
nrMatchersInNode++;
scoreInNode += child2.scorer.scoreInNode();
this.computeSumRecursive(i2);
}
}
}
/**
* Return the number of subscorers that provide the match in the current node.
* <p>
* <b>NOTE:</b> {@link #countAndSumMatchers()} must be called first before.
*/
public int nrMatchersInNode() {
return nrMatchersInNode;
}
/**
* Return the sum of the score of the subscorers that provide the match in
* the current node.
* <p>
* <b>NOTE:</b> {@link #countAndSumMatchers()} must be called first before.
*/
public float scoreInNode() {
return scoreInNode;
}
/**
* Move all the scorers (including the top scorer) that have document
* equals to the top document. If one of the subscorer is exhausted,
* removes the scorer.
*
* @return If the least scorer is exhausted, return false.
*/
public final boolean nextCandidateDocumentAndAdjustElsePop() throws IOException {
if (topHSN != null) {
final int currentDocument = topHSN.doc;
while (size > 0 && topHSN.doc == currentDocument) {
this.checkAdjustElsePop(topHSN.scorer.nextCandidateDocument());
}
// reset nrMatchersInNode
nrMatchersInNode = -1;
// no more doc when queue empty
return (size > 0);
}
else {
return false;
}
}
/**
* Move all the scorers that have document equals to the top document to the
* next node and adjust the heap.
*
* @return If the least scorer has no more nodes, returns false.
*/
public final boolean nextNodeAndAdjust() throws IOException {
/*
* TODO: stecam: I had a NPE in this method with topHSN. However,
* I cannot reproduce it. Try to find the case it does occur.
*/
// count number of scorers having the same document and node
// counting the number of scorers and then performing the iterations of
// all the scorers allows to avoid a node array copy (i.e., current node cache)
if (size > 0 && nrMatchersInNode < 0) {
this.countAndSumMatchers();
}
// Move the scorers to the next node
for (int i = 0; i < nrMatchersInNode; i++) {
topHSN.scorer.nextNode();
this.adjustTop();
}
// reset nrMatchersInNode
nrMatchersInNode = -1;
// if top node has sentinel value, it means that there is no more nodes
return this.node() != DocsAndNodesIterator.NO_MORE_NOD;
}
/**
* Move all the scorers to the candidate document beyond (see NOTE below) the
* current whose number is greater than or equal to <i>target</i>.
* <p>
* <b>NOTE:</b> when <code> target ≤ current</code> implementations must
* not advance beyond their current {@link #doc()}.
*
* @return If the least scorer has no more nodes, returns false.
*/
public final boolean skipToCandidateAndAdjustElsePop(final int target)
throws IOException {
if (topHSN != null) {
while (size > 0 && topHSN.doc < target) {
this.checkAdjustElsePop(topHSN.scorer.skipToCandidate(target));
}
// no more doc when queue empty
return (size > 0);
}
else {
return false;
}
}
/**
* If condition is true, then pop the top and adjust the heap.
*/
private boolean checkAdjustElsePop(final boolean cond) {
if (cond) {
topHSN.adjust();
}
else { // pop
heap[1] = heap[size]; // move last to first
heap[size] = null;
size--;
}
this.downHeap();
return cond;
}
/**
* Should be called when the scorer at top changes of values.
*/
public final void adjustTop() {
topHSN.adjust();
this.downHeap();
}
/**
* Compares the given scorer with the specified heaped scorer for order.
* Returns a negative integer, zero, or a positive integer as this scorer is
* less than, equal to, or greater than the specified heaped scorer.
*/
private int compareTo(final NodeScorer scorer, final NodeScorer other) {
// compare docs
final int doc = scorer.doc();
final int otherDoc = other.doc();
if (doc != otherDoc) {
return doc - otherDoc;
}
// compare nodes
return NodeUtils.compare(scorer.node(), other.node());
}
/**
* Returns the number of scorers currently stored in the
* NodeDisjunctionScorerQueue.
**/
public final int size() {
return size;
}
/** Removes all entries from the NodeDisjunctionScorerQueue. */
public final void clear() {
for (int i = 0; i <= size; i++) {
heap[i] = null;
}
size = 0;
}
private final void upHeap() {
int i = size;
final HeapedScorerNode node = heap[i]; // save bottom node
int j = i >>> 1;
while ((j > 0) && (this.compareTo(node.scorer, heap[j].scorer) < 0)) {
heap[i] = heap[j]; // shift parents down
i = j;
j = j >>> 1;
}
heap[i] = node; // install saved node
topHSN = heap[1];
}
private final void downHeap() {
int i = 1;
final HeapedScorerNode node = heap[i]; // save top node
int j = i << 1; // find smaller child
int k = j + 1;
if ((k <= size) && (this.compareTo(heap[k].scorer, heap[j].scorer) < 0)) {
j = k;
}
while ((j <= size) && (this.compareTo(heap[j].scorer, node.scorer) < 0)) {
heap[i] = heap[j]; // shift up child
i = j;
j = i << 1;
k = j + 1;
if (k <= size && (this.compareTo(heap[k].scorer, heap[j].scorer) < 0)) {
j = k;
}
}
heap[i] = node; // install saved node
topHSN = heap[1];
}
}