package experiments.collective.entdoccentric.LTR; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.util.List; import java.io.IOException; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; /** * A Scorer for OR like queries, counterpart of <code>ConjunctionScorer</code>. * This Scorer implements {@link Scorer#advance(int)} and uses advance() on the * given Scorers. */ class DisjunctionSumScorer extends DisjunctionScorer { /** The minimum number of scorers that should match. */ private final int minimumNrMatchers; /** The document number of the current match. */ private int doc = -1; /** The number of subscorers that provide the current match. */ protected int nrMatchers = -1; private double score = Float.NaN; private final int docBase; private final LearnToRankClause[] clauses; /** * Construct a <code>DisjunctionScorer</code>. * * @param weight * The weight to be used. * @param subScorers * A collection of at least two subscorers. * @param minimumNrMatchers * The positive minimum number of subscorers that should match to * match this query. <br> * When <code>minimumNrMatchers</code> is bigger than the number * of <code>subScorers</code>, no matches will be produced. <br> * When minimumNrMatchers equals the number of subScorers, it * more efficient to use <code>ConjunctionScorer</code>. */ public DisjunctionSumScorer(Weight weight, List<Scorer> subScorers, int minimumNrMatchers, LearnToRankClause[] learnToRankWeights, int docBase) throws IOException { super(weight, subScorers.toArray(new Scorer[subScorers.size()]), learnToRankWeights, subScorers.size()); if (minimumNrMatchers <= 0) { throw new IllegalArgumentException( "Minimum nr of matchers must be positive"); } if (numScorers <= 1) { throw new IllegalArgumentException( "There must be at least 2 subScorers"); } this.clauses = learnToRankWeights; this.minimumNrMatchers = minimumNrMatchers; this.docBase = docBase; } /** * Construct a <code>DisjunctionScorer</code>, using one as the minimum * number of matching subscorers. */ public DisjunctionSumScorer(Weight weight, List<Scorer> subScorers, LearnToRankClause[] learnToRankWeights, int docBase) throws IOException { this(weight, subScorers, 1, learnToRankWeights, docBase); } @Override public int nextDoc() throws IOException { assert doc != NO_MORE_DOCS; while (true) { while (subScorers[0].docID() == doc) { if (subScorers[0].nextDoc() != NO_MORE_DOCS) { heapAdjust(0); } else { heapRemoveRoot(); if (numScorers < minimumNrMatchers) { return doc = NO_MORE_DOCS; } } } afterNext(); if (nrMatchers >= minimumNrMatchers) { break; } } return doc; } private void afterNext() throws IOException { final Scorer sub = subScorers[0]; doc = sub.docID(); if (doc == NO_MORE_DOCS) { nrMatchers = Integer.MAX_VALUE; // stop looping } else { score = sub.score() * clauses[0].getWeight(); clauses[0].addFeatureValue(docBase, doc, (float) score); nrMatchers = 1; countMatches(1); countMatches(2); } } // TODO: this currently scores, but so did the previous impl // TODO: remove recursion. // TODO: if we separate scoring, out of here, modify this // and afterNext() to terminate when nrMatchers == minimumNrMatchers // then also change freq() to just always compute it from scratch private void countMatches(int root) throws IOException { if (root < numScorers && subScorers[root].docID() == doc) { nrMatchers++; float val = subScorers[root].score() * clauses[root].getWeight(); // if(root == 5) { // System.out.println("first"+doc); // System.out.println(subScorers[root].toString()+ " " +subScorers[root].score()); // } score += val; clauses[root].addFeatureValue(docBase, doc, val); countMatches((root << 1) + 1); countMatches((root << 1) + 2); } } /** * Returns the score of the current document matching the query. Initially * invalid, until {@link #nextDoc()} is called the first time. */ @Override public float score() throws IOException { return (float) score; } @Override public int docID() { return doc; } @Override public int freq() throws IOException { return nrMatchers; } /** * Advances to the first match beyond the current whose document number is * greater than or equal to a given target. <br> * The implementation uses the advance() method on the subscorers. * * @param target * The target document number. * @return the document whose number is greater than or equal to the given * target, or -1 if none exist. */ @Override public int advance(int target) throws IOException { if (numScorers == 0) return doc = NO_MORE_DOCS; while (subScorers[0].docID() < target) { if (subScorers[0].advance(target) != NO_MORE_DOCS) { heapAdjust(0); } else { heapRemoveRoot(); if (numScorers == 0) { return doc = NO_MORE_DOCS; } } } afterNext(); if (nrMatchers >= minimumNrMatchers) { return doc; } else { return nextDoc(); } } }