package org.apache.lucene.search; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.List; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.search.BooleanQuery.BooleanWeight; /* Description from Doug Cutting (excerpted from * LUCENE-1483): * * BooleanScorer uses an array to score windows of * 2K docs. So it scores docs 0-2K first, then docs 2K-4K, * etc. For each window it iterates through all query terms * and accumulates a score in table[doc%2K]. It also stores * in the table a bitmask representing which terms * contributed to the score. Non-zero scores are chained in * a linked list. At the end of scoring each window it then * iterates through the linked list and, if the bitmask * matches the boolean constraints, collects a hit. For * boolean queries with lots of frequent terms this can be * much faster, since it does not need to update a priority * queue for each posting, instead performing constant-time * operations per posting. The only downside is that it * results in hits being delivered out-of-order within the * window, which means it cannot be nested within other * scorers. But it works well as a top-level scorer. * * The new BooleanScorer2 implementation instead works by * merging priority queues of postings, albeit with some * clever tricks. For example, a pure conjunction (all terms * required) does not require a priority queue. Instead it * sorts the posting streams at the start, then repeatedly * skips the first to to the last. If the first ever equals * the last, then there's a hit. When some terms are * required and some terms are optional, the conjunction can * be evaluated first, then the optional terms can all skip * to the match and be added to the score. Thus the * conjunction can reduce the number of priority queue * updates for the optional terms. */ final class BooleanScorer extends BulkScorer { private static final class BooleanScorerCollector extends Collector { private BucketTable bucketTable; private int mask; private Scorer scorer; public BooleanScorerCollector(int mask, BucketTable bucketTable) { this.mask = mask; this.bucketTable = bucketTable; } @Override public void collect(final int doc) throws IOException { final BucketTable table = bucketTable; final int i = doc & BucketTable.MASK; final Bucket bucket = table.buckets[i]; if (bucket.doc != doc) { // invalid bucket bucket.doc = doc; // set doc bucket.score = scorer.score(); // initialize score bucket.bits = mask; // initialize mask bucket.coord = 1; // initialize coord bucket.next = table.first; // push onto valid list table.first = bucket; } else { // valid bucket bucket.score += scorer.score(); // increment score bucket.bits |= mask; // add bits in mask bucket.coord++; // increment coord } } @Override public void setNextReader(AtomicReaderContext context) { // not needed by this implementation } @Override public void setScorer(Scorer scorer) { this.scorer = scorer; } @Override public boolean acceptsDocsOutOfOrder() { return true; } } static final class Bucket { int doc = -1; // tells if bucket is valid double score; // incremental score // TODO: break out bool anyProhibited, int // numRequiredMatched; then we can remove 32 limit on // required clauses int bits; // used for bool constraints int coord; // count of terms in score Bucket next; // next valid bucket } /** A simple hash table of document scores within a range. */ static final class BucketTable { public static final int SIZE = 1 << 11; public static final int MASK = SIZE - 1; final Bucket[] buckets = new Bucket[SIZE]; Bucket first = null; // head of valid list public BucketTable() { // Pre-fill to save the lazy init when collecting // each sub: for(int idx=0;idx<SIZE;idx++) { buckets[idx] = new Bucket(); } } public Collector newCollector(int mask) { return new BooleanScorerCollector(mask, this); } public int size() { return SIZE; } } static final class SubScorer { public BulkScorer scorer; // TODO: re-enable this if BQ ever sends us required clauses //public boolean required = false; public boolean prohibited; public Collector collector; public SubScorer next; public boolean more; public SubScorer(BulkScorer scorer, boolean required, boolean prohibited, Collector collector, SubScorer next) { if (required) { throw new IllegalArgumentException("this scorer cannot handle required=true"); } this.scorer = scorer; this.more = true; // TODO: re-enable this if BQ ever sends us required clauses //this.required = required; this.prohibited = prohibited; this.collector = collector; this.next = next; } } private SubScorer scorers = null; private BucketTable bucketTable = new BucketTable(); private final float[] coordFactors; // TODO: re-enable this if BQ ever sends us required clauses //private int requiredMask = 0; private final int minNrShouldMatch; private int end; private Bucket current; // Any time a prohibited clause matches we set bit 0: private static final int PROHIBITED_MASK = 1; private final Weight weight; BooleanScorer(BooleanWeight weight, boolean disableCoord, int minNrShouldMatch, List<BulkScorer> optionalScorers, List<BulkScorer> prohibitedScorers, int maxCoord) throws IOException { this.minNrShouldMatch = minNrShouldMatch; this.weight = weight; for (BulkScorer scorer : optionalScorers) { scorers = new SubScorer(scorer, false, false, bucketTable.newCollector(0), scorers); } for (BulkScorer scorer : prohibitedScorers) { scorers = new SubScorer(scorer, false, true, bucketTable.newCollector(PROHIBITED_MASK), scorers); } coordFactors = new float[optionalScorers.size() + 1]; for (int i = 0; i < coordFactors.length; i++) { coordFactors[i] = disableCoord ? 1.0f : weight.coord(i, maxCoord); } } @Override public boolean score(Collector collector, int max) throws IOException { boolean more; Bucket tmp; FakeScorer fs = new FakeScorer(); // The internal loop will set the score and doc before calling collect. collector.setScorer(fs); do { bucketTable.first = null; while (current != null) { // more queued // check prohibited & required if ((current.bits & PROHIBITED_MASK) == 0) { // TODO: re-enable this if BQ ever sends us required // clauses //&& (current.bits & requiredMask) == requiredMask) { // NOTE: Lucene always passes max = // Integer.MAX_VALUE today, because we never embed // a BooleanScorer inside another (even though // that should work)... but in theory an outside // app could pass a different max so we must check // it: if (current.doc >= max) { tmp = current; current = current.next; tmp.next = bucketTable.first; bucketTable.first = tmp; continue; } if (current.coord >= minNrShouldMatch) { fs.score = (float) (current.score * coordFactors[current.coord]); fs.doc = current.doc; fs.freq = current.coord; collector.collect(current.doc); } } current = current.next; // pop the queue } if (bucketTable.first != null){ current = bucketTable.first; bucketTable.first = current.next; return true; } // refill the queue more = false; end += BucketTable.SIZE; for (SubScorer sub = scorers; sub != null; sub = sub.next) { if (sub.more) { sub.more = sub.scorer.score(sub.collector, end); more |= sub.more; } } current = bucketTable.first; } while (current != null || more); return false; } @Override public String toString() { StringBuilder buffer = new StringBuilder(); buffer.append("boolean("); for (SubScorer sub = scorers; sub != null; sub = sub.next) { buffer.append(sub.scorer.toString()); buffer.append(" "); } buffer.append(")"); return buffer.toString(); } }