/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.search; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.EnumMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.Bits; /** * Expert: the Weight for BooleanQuery, used to * normalize, score and explain these queries. */ final class BooleanWeight extends Weight { /** The Similarity implementation. */ final Similarity similarity; final BooleanQuery query; final ArrayList<Weight> weights; final boolean needsScores; BooleanWeight(BooleanQuery query, IndexSearcher searcher, boolean needsScores, float boost) throws IOException { super(query); this.query = query; this.needsScores = needsScores; this.similarity = searcher.getSimilarity(needsScores); weights = new ArrayList<>(); for (BooleanClause c : query) { Weight w = searcher.createWeight(c.getQuery(), needsScores && c.isScoring(), boost); weights.add(w); } } @Override public void extractTerms(Set<Term> terms) { int i = 0; for (BooleanClause clause : query) { if (clause.isScoring() || (needsScores == false && clause.isProhibited() == false)) { weights.get(i).extractTerms(terms); } i++; } } @Override public Explanation explain(LeafReaderContext context, int doc) throws IOException { final int minShouldMatch = query.getMinimumNumberShouldMatch(); List<Explanation> subs = new ArrayList<>(); float sum = 0.0f; boolean fail = false; int matchCount = 0; int shouldMatchCount = 0; Iterator<BooleanClause> cIter = query.iterator(); for (Iterator<Weight> wIter = weights.iterator(); wIter.hasNext();) { Weight w = wIter.next(); BooleanClause c = cIter.next(); Explanation e = w.explain(context, doc); if (e.isMatch()) { if (c.isScoring()) { subs.add(e); sum += e.getValue(); } else if (c.isRequired()) { subs.add(Explanation.match(0f, "match on required clause, product of:", Explanation.match(0f, Occur.FILTER + " clause"), e)); } else if (c.isProhibited()) { subs.add(Explanation.noMatch("match on prohibited clause (" + c.getQuery().toString() + ")", e)); fail = true; } if (!c.isProhibited()) { matchCount++; } if (c.getOccur() == Occur.SHOULD) { shouldMatchCount++; } } else if (c.isRequired()) { subs.add(Explanation.noMatch("no match on required clause (" + c.getQuery().toString() + ")", e)); fail = true; } } if (fail) { return Explanation.noMatch("Failure to meet condition(s) of required/prohibited clause(s)", subs); } else if (matchCount == 0) { return Explanation.noMatch("No matching clauses", subs); } else if (shouldMatchCount < minShouldMatch) { return Explanation.noMatch("Failure to match minimum number of optional clauses: " + minShouldMatch, subs); } else { // we have a match return Explanation.match(sum, "sum of:", subs); } } static BulkScorer disableScoring(final BulkScorer scorer) { return new BulkScorer() { @Override public int score(final LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException { final LeafCollector noScoreCollector = new LeafCollector() { FakeScorer fake = new FakeScorer(); @Override public void setScorer(Scorer scorer) throws IOException { collector.setScorer(fake); } @Override public void collect(int doc) throws IOException { fake.doc = doc; collector.collect(doc); } }; return scorer.score(noScoreCollector, acceptDocs, min, max); } @Override public long cost() { return scorer.cost(); } }; } // Return a BulkScorer for the optional clauses only, // or null if it is not applicable // pkg-private for forcing use of BooleanScorer in tests BulkScorer optionalBulkScorer(LeafReaderContext context) throws IOException { List<BulkScorer> optional = new ArrayList<BulkScorer>(); Iterator<BooleanClause> cIter = query.iterator(); for (Weight w : weights) { BooleanClause c = cIter.next(); if (c.getOccur() != Occur.SHOULD) { continue; } BulkScorer subScorer = w.bulkScorer(context); if (subScorer != null) { optional.add(subScorer); } } if (optional.size() == 0) { return null; } if (query.getMinimumNumberShouldMatch() > optional.size()) { return null; } if (optional.size() == 1) { return optional.get(0); } return new BooleanScorer(this, optional, Math.max(1, query.getMinimumNumberShouldMatch()), needsScores); } // Return a BulkScorer for the required clauses only, // or null if it is not applicable private BulkScorer requiredBulkScorer(LeafReaderContext context) throws IOException { BulkScorer scorer = null; Iterator<BooleanClause> cIter = query.iterator(); for (Weight w : weights) { BooleanClause c = cIter.next(); if (c.isRequired() == false) { continue; } if (scorer != null) { // we don't have a BulkScorer for conjunctions return null; } scorer = w.bulkScorer(context); if (scorer == null) { // no matches return null; } if (c.isScoring() == false && needsScores) { scorer = disableScoring(scorer); } } return scorer; } /** Try to build a boolean scorer for this weight. Returns null if {@link BooleanScorer} * cannot be used. */ BulkScorer booleanScorer(LeafReaderContext context) throws IOException { final int numOptionalClauses = query.getClauses(Occur.SHOULD).size(); final int numRequiredClauses = query.getClauses(Occur.MUST).size() + query.getClauses(Occur.FILTER).size(); BulkScorer positiveScorer; if (numRequiredClauses == 0) { positiveScorer = optionalBulkScorer(context); if (positiveScorer == null) { return null; } // TODO: what is the right heuristic here? final long costThreshold; if (query.getMinimumNumberShouldMatch() <= 1) { // when all clauses are optional, use BooleanScorer aggressively // TODO: is there actually a threshold under which we should rather // use the regular scorer? costThreshold = -1; } else { // when a minimum number of clauses should match, BooleanScorer is // going to score all windows that have at least minNrShouldMatch // matches in the window. But there is no way to know if there is // an intersection (all clauses might match a different doc ID and // there will be no matches in the end) so we should only use // BooleanScorer if matches are very dense costThreshold = context.reader().maxDoc() / 3; } if (positiveScorer.cost() < costThreshold) { return null; } } else if (numRequiredClauses == 1 && numOptionalClauses == 0 && query.getMinimumNumberShouldMatch() == 0) { positiveScorer = requiredBulkScorer(context); } else { // TODO: there are some cases where BooleanScorer // would handle conjunctions faster than // BooleanScorer2... return null; } if (positiveScorer == null) { return null; } List<Scorer> prohibited = new ArrayList<>(); Iterator<BooleanClause> cIter = query.iterator(); for (Weight w : weights) { BooleanClause c = cIter.next(); if (c.isProhibited()) { Scorer scorer = w.scorer(context); if (scorer != null) { prohibited.add(scorer); } } } if (prohibited.isEmpty()) { return positiveScorer; } else { Scorer prohibitedScorer = prohibited.size() == 1 ? prohibited.get(0) : new DisjunctionSumScorer(this, prohibited, false); if (prohibitedScorer.twoPhaseIterator() != null) { // ReqExclBulkScorer can't deal efficiently with two-phased prohibited clauses return null; } return new ReqExclBulkScorer(positiveScorer, prohibitedScorer.iterator()); } } @Override public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { final BulkScorer bulkScorer = booleanScorer(context); if (bulkScorer != null) { // bulk scoring is applicable, use it return bulkScorer; } else { // use a Scorer-based impl (BS2) return super.bulkScorer(context); } } @Override public Scorer scorer(LeafReaderContext context) throws IOException { ScorerSupplier scorerSupplier = scorerSupplier(context); if (scorerSupplier == null) { return null; } return scorerSupplier.get(false); } @Override public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { int minShouldMatch = query.getMinimumNumberShouldMatch(); final Map<Occur, Collection<ScorerSupplier>> scorers = new EnumMap<>(Occur.class); for (Occur occur : Occur.values()) { scorers.put(occur, new ArrayList<>()); } Iterator<BooleanClause> cIter = query.iterator(); for (Weight w : weights) { BooleanClause c = cIter.next(); ScorerSupplier subScorer = w.scorerSupplier(context); if (subScorer == null) { if (c.isRequired()) { return null; } } else { scorers.get(c.getOccur()).add(subScorer); } } // scorer simplifications: if (scorers.get(Occur.SHOULD).size() == minShouldMatch) { // any optional clauses are in fact required scorers.get(Occur.MUST).addAll(scorers.get(Occur.SHOULD)); scorers.get(Occur.SHOULD).clear(); minShouldMatch = 0; } if (scorers.get(Occur.FILTER).isEmpty() && scorers.get(Occur.MUST).isEmpty() && scorers.get(Occur.SHOULD).isEmpty()) { // no required and optional clauses. return null; } else if (scorers.get(Occur.SHOULD).size() < minShouldMatch) { // either >1 req scorer, or there are 0 req scorers and at least 1 // optional scorer. Therefore if there are not enough optional scorers // no documents will be matched by the query return null; } // we don't need scores, so if we have required clauses, drop optional clauses completely if (!needsScores && minShouldMatch == 0 && scorers.get(Occur.MUST).size() + scorers.get(Occur.FILTER).size() > 0) { scorers.get(Occur.SHOULD).clear(); } return new Boolean2ScorerSupplier(this, scorers, needsScores, minShouldMatch); } }