package org.apache.lucene.search; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.ArrayList; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; class ConstantScoreAutoRewrite extends TermCollectingRewrite<BooleanQuery> { // Defaults derived from rough tests with a 20.0 million // doc Wikipedia index. With more than 350 terms in the // query, the filter method is fastest: public static int DEFAULT_TERM_COUNT_CUTOFF = 350; // If the query will hit more than 1 in 1000 of the docs // in the index (0.1%), the filter method is fastest: public static double DEFAULT_DOC_COUNT_PERCENT = 0.1; private int termCountCutoff = DEFAULT_TERM_COUNT_CUTOFF; private double docCountPercent = DEFAULT_DOC_COUNT_PERCENT; /** If the number of terms in this query is equal to or * larger than this setting then {@link * #CONSTANT_SCORE_FILTER_REWRITE} is used. */ public void setTermCountCutoff(int count) { termCountCutoff = count; } /** @see #setTermCountCutoff */ public int getTermCountCutoff() { return termCountCutoff; } /** If the number of documents to be visited in the * postings exceeds this specified percentage of the * maxDoc() for the index, then {@link * #CONSTANT_SCORE_FILTER_REWRITE} is used. * @param percent 0.0 to 100.0 */ public void setDocCountPercent(double percent) { docCountPercent = percent; } /** @see #setDocCountPercent */ public double getDocCountPercent() { return docCountPercent; } @Override protected BooleanQuery getTopLevelQuery() { return new BooleanQuery(true); } @Override protected void addClause(BooleanQuery topLevel, Term term, float boost /*ignored*/) { topLevel.add(new TermQuery(term), BooleanClause.Occur.SHOULD); } @Override public Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException { // Get the enum and start visiting terms. If we // exhaust the enum before hitting either of the // cutoffs, we use ConstantBooleanQueryRewrite; else, // ConstantFilterRewrite: final int docCountCutoff = (int) ((docCountPercent / 100.) * reader.maxDoc()); final int termCountLimit = Math.min(BooleanQuery.getMaxClauseCount(), termCountCutoff); final CutOffTermCollector col = new CutOffTermCollector(reader, docCountCutoff, termCountLimit); collectTerms(reader, query, col); if (col.hasCutOff) { return MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE.rewrite(reader, query); } else { final Query result; if (col.pendingTerms.isEmpty()) { result = getTopLevelQuery(); } else { BooleanQuery bq = getTopLevelQuery(); for(Term term : col.pendingTerms) { addClause(bq, term, 1.0f); } // Strip scores result = new ConstantScoreQuery(bq); result.setBoost(query.getBoost()); } query.incTotalNumberOfTerms(col.pendingTerms.size()); return result; } } private static final class CutOffTermCollector implements TermCollector { CutOffTermCollector(IndexReader reader, int docCountCutoff, int termCountLimit) { this.reader = reader; this.docCountCutoff = docCountCutoff; this.termCountLimit = termCountLimit; } public boolean collect(Term t, float boost) throws IOException { pendingTerms.add(t); // Loading the TermInfo from the terms dict here // should not be costly, because 1) the // query/filter will load the TermInfo when it // runs, and 2) the terms dict has a cache: docVisitCount += reader.docFreq(t); if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) { hasCutOff = true; return false; } return true; } int docVisitCount = 0; boolean hasCutOff = false; final IndexReader reader; final int docCountCutoff, termCountLimit; final ArrayList<Term> pendingTerms = new ArrayList<Term>(); } @Override public int hashCode() { final int prime = 1279; return (int) (prime * termCountCutoff + Double.doubleToLongBits(docCountPercent)); } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; ConstantScoreAutoRewrite other = (ConstantScoreAutoRewrite) obj; if (other.termCountCutoff != termCountCutoff) { return false; } if (Double.doubleToLongBits(other.docCountPercent) != Double.doubleToLongBits(docCountPercent)) { return false; } return true; } }