/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.search.query; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.queries.MinDocQuery; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Collector; import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MultiCollector; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TimeLimitingCollector; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocsCollector; import org.apache.lucene.search.TopFieldCollector; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.search.TotalHitCountCollector; import org.apache.lucene.search.Weight; import org.apache.lucene.search.grouping.CollapsingTopDocsCollector; import org.elasticsearch.action.search.SearchType; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.MinimumScoreCollector; import org.elasticsearch.common.lucene.search.FilteredCollector; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.search.collapse.CollapseContext; import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.SearchPhase; import org.elasticsearch.search.SearchService; import org.elasticsearch.search.aggregations.AggregationPhase; import org.elasticsearch.search.internal.ScrollContext; import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.profile.ProfileShardResult; import org.elasticsearch.search.profile.SearchProfileShardResults; import org.elasticsearch.search.profile.query.CollectorResult; import org.elasticsearch.search.profile.query.InternalProfileCollector; import org.elasticsearch.search.rescore.RescorePhase; import org.elasticsearch.search.rescore.RescoreSearchContext; import org.elasticsearch.search.sort.SortAndFormats; import org.elasticsearch.search.suggest.SuggestPhase; import java.util.AbstractList; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.concurrent.Callable; /** * Query phase of a search request, used to run the query and get back from each shard information about the matching documents * (document ids and score or sort criteria) so that matches can be reduced on the coordinating node */ public class QueryPhase implements SearchPhase { private final AggregationPhase aggregationPhase; private final SuggestPhase suggestPhase; private RescorePhase rescorePhase; public QueryPhase(Settings settings) { this.aggregationPhase = new AggregationPhase(); this.suggestPhase = new SuggestPhase(settings); this.rescorePhase = new RescorePhase(settings); } @Override public void preProcess(SearchContext context) { context.preProcess(true); } @Override public void execute(SearchContext searchContext) throws QueryPhaseExecutionException { if (searchContext.hasOnlySuggest()) { suggestPhase.execute(searchContext); // TODO: fix this once we can fetch docs for suggestions searchContext.queryResult().topDocs( new TopDocs(0, Lucene.EMPTY_SCORE_DOCS, 0), new DocValueFormat[0]); return; } // Pre-process aggregations as late as possible. In the case of a DFS_Q_T_F // request, preProcess is called on the DFS phase phase, this is why we pre-process them // here to make sure it happens during the QUERY phase aggregationPhase.preProcess(searchContext); boolean rescore = execute(searchContext, searchContext.searcher()); if (rescore) { // only if we do a regular search rescorePhase.execute(searchContext); } suggestPhase.execute(searchContext); aggregationPhase.execute(searchContext); if (searchContext.getProfilers() != null) { ProfileShardResult shardResults = SearchProfileShardResults .buildShardResults(searchContext.getProfilers()); searchContext.queryResult().profileResults(shardResults); } } private static boolean returnsDocsInOrder(Query query, SortAndFormats sf) { if (sf == null || Sort.RELEVANCE.equals(sf.sort)) { // sort by score // queries that return constant scores will return docs in index // order since Lucene tie-breaks on the doc id return query.getClass() == ConstantScoreQuery.class || query.getClass() == MatchAllDocsQuery.class; } else { return Sort.INDEXORDER.equals(sf.sort); } } /** * In a package-private method so that it can be tested without having to * wire everything (mapperService, etc.) * @return whether the rescoring phase should be executed */ static boolean execute(SearchContext searchContext, final IndexSearcher searcher) throws QueryPhaseExecutionException { QuerySearchResult queryResult = searchContext.queryResult(); queryResult.searchTimedOut(false); final boolean doProfile = searchContext.getProfilers() != null; boolean rescore = false; try { queryResult.from(searchContext.from()); queryResult.size(searchContext.size()); Query query = searchContext.query(); final int totalNumDocs = searcher.getIndexReader().numDocs(); int numDocs = Math.min(searchContext.from() + searchContext.size(), totalNumDocs); Collector collector; Callable<TopDocs> topDocsCallable; DocValueFormat[] sortValueFormats = new DocValueFormat[0]; assert query == searcher.rewrite(query); // already rewritten if (searchContext.size() == 0) { // no matter what the value of from is final TotalHitCountCollector totalHitCountCollector = new TotalHitCountCollector(); collector = totalHitCountCollector; if (searchContext.getProfilers() != null) { collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_COUNT, Collections.emptyList()); } topDocsCallable = () -> new TopDocs(totalHitCountCollector.getTotalHits(), Lucene.EMPTY_SCORE_DOCS, 0); } else { // Perhaps have a dedicated scroll phase? final ScrollContext scrollContext = searchContext.scrollContext(); assert (scrollContext != null) == (searchContext.request().scroll() != null); final Collector topDocsCollector; ScoreDoc after = null; if (searchContext.request().scroll() != null) { numDocs = Math.min(searchContext.size(), totalNumDocs); after = scrollContext.lastEmittedDoc; if (returnsDocsInOrder(query, searchContext.sort())) { if (scrollContext.totalHits == -1) { // first round assert scrollContext.lastEmittedDoc == null; // there is not much that we can optimize here since we want to collect all // documents in order to get the total number of hits } else { // now this gets interesting: since we sort in index-order, we can directly // skip to the desired doc and stop collecting after ${size} matches if (scrollContext.lastEmittedDoc != null) { BooleanQuery bq = new BooleanQuery.Builder() .add(query, BooleanClause.Occur.MUST) .add(new MinDocQuery(after.doc + 1), BooleanClause.Occur.FILTER) .build(); query = bq; } searchContext.terminateAfter(numDocs); } } } else { after = searchContext.searchAfter(); } if (totalNumDocs == 0) { // top collectors don't like a size of 0 numDocs = 1; } assert numDocs > 0; if (searchContext.collapse() == null) { if (searchContext.sort() != null) { SortAndFormats sf = searchContext.sort(); topDocsCollector = TopFieldCollector.create(sf.sort, numDocs, (FieldDoc) after, true, searchContext.trackScores(), searchContext.trackScores()); sortValueFormats = sf.formats; } else { rescore = !searchContext.rescore().isEmpty(); for (RescoreSearchContext rescoreContext : searchContext.rescore()) { numDocs = Math.max(rescoreContext.window(), numDocs); } topDocsCollector = TopScoreDocCollector.create(numDocs, after); } } else { Sort sort = Sort.RELEVANCE; if (searchContext.sort() != null) { sort = searchContext.sort().sort; } CollapseContext collapse = searchContext.collapse(); topDocsCollector = collapse.createTopDocs(sort, numDocs, searchContext.trackScores()); if (searchContext.sort() == null) { sortValueFormats = new DocValueFormat[] {DocValueFormat.RAW}; } else { sortValueFormats = searchContext.sort().formats; } } collector = topDocsCollector; if (doProfile) { collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_TOP_HITS, Collections.emptyList()); } topDocsCallable = () -> { final TopDocs topDocs; if (topDocsCollector instanceof TopDocsCollector) { topDocs = ((TopDocsCollector<?>) topDocsCollector).topDocs(); } else if (topDocsCollector instanceof CollapsingTopDocsCollector) { topDocs = ((CollapsingTopDocsCollector) topDocsCollector).getTopDocs(); } else { throw new IllegalStateException("Unknown top docs collector " + topDocsCollector.getClass().getName()); } if (scrollContext != null) { if (scrollContext.totalHits == -1) { // first round scrollContext.totalHits = topDocs.totalHits; scrollContext.maxScore = topDocs.getMaxScore(); } else { // subsequent round: the total number of hits and // the maximum score were computed on the first round topDocs.totalHits = scrollContext.totalHits; topDocs.setMaxScore(scrollContext.maxScore); } if (searchContext.request().numberOfShards() == 1) { // if we fetch the document in the same roundtrip, we already know the last emitted doc if (topDocs.scoreDocs.length > 0) { // set the last emitted doc scrollContext.lastEmittedDoc = topDocs.scoreDocs[topDocs.scoreDocs.length - 1]; } } } return topDocs; }; } final boolean terminateAfterSet = searchContext.terminateAfter() != SearchContext.DEFAULT_TERMINATE_AFTER; if (terminateAfterSet) { final Collector child = collector; // throws Lucene.EarlyTerminationException when given count is reached collector = Lucene.wrapCountBasedEarlyTerminatingCollector(collector, searchContext.terminateAfter()); if (doProfile) { collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_TERMINATE_AFTER_COUNT, Collections.singletonList((InternalProfileCollector) child)); } } if (searchContext.parsedPostFilter() != null) { final Collector child = collector; // this will only get applied to the actual search collector and not // to any scoped collectors, also, it will only be applied to the main collector // since that is where the filter should only work final Weight filterWeight = searcher.createNormalizedWeight(searchContext.parsedPostFilter().query(), false); collector = new FilteredCollector(collector, filterWeight); if (doProfile) { collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_POST_FILTER, Collections.singletonList((InternalProfileCollector) child)); } } // plug in additional collectors, like aggregations final List<Collector> subCollectors = new ArrayList<>(); subCollectors.add(collector); subCollectors.addAll(searchContext.queryCollectors().values()); collector = MultiCollector.wrap(subCollectors); if (doProfile && collector instanceof InternalProfileCollector == false) { // When there is a single collector to wrap, MultiCollector returns it // directly, so only wrap in the case that there are several sub collectors final List<InternalProfileCollector> children = new AbstractList<InternalProfileCollector>() { @Override public InternalProfileCollector get(int index) { return (InternalProfileCollector) subCollectors.get(index); } @Override public int size() { return subCollectors.size(); } }; collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_MULTI, children); } // apply the minimum score after multi collector so we filter aggs as well if (searchContext.minimumScore() != null) { final Collector child = collector; collector = new MinimumScoreCollector(collector, searchContext.minimumScore()); if (doProfile) { collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_MIN_SCORE, Collections.singletonList((InternalProfileCollector) child)); } } if (collector.getClass() == TotalHitCountCollector.class) { // Optimize counts in simple cases to return in constant time // instead of using a collector while (true) { // remove wrappers that don't matter for counts // this is necessary so that we don't only optimize match_all // queries but also match_all queries that are nested in // a constant_score query if (query instanceof ConstantScoreQuery) { query = ((ConstantScoreQuery) query).getQuery(); } else { break; } } if (query.getClass() == MatchAllDocsQuery.class) { collector = null; topDocsCallable = new Callable<TopDocs>() { @Override public TopDocs call() throws Exception { int count = searcher.getIndexReader().numDocs(); return new TopDocs(count, Lucene.EMPTY_SCORE_DOCS, 0); } }; } else if (query.getClass() == TermQuery.class && searcher.getIndexReader().hasDeletions() == false) { final Term term = ((TermQuery) query).getTerm(); collector = null; topDocsCallable = new Callable<TopDocs>() { @Override public TopDocs call() throws Exception { int count = 0; for (LeafReaderContext context : searcher.getIndexReader().leaves()) { count += context.reader().docFreq(term); } return new TopDocs(count, Lucene.EMPTY_SCORE_DOCS, 0); } }; } } final boolean timeoutSet = searchContext.timeout() != null && !searchContext.timeout().equals(SearchService.NO_TIMEOUT); if (timeoutSet && collector != null) { // collector might be null if no collection is actually needed final Collector child = collector; // TODO: change to use our own counter that uses the scheduler in ThreadPool // throws TimeLimitingCollector.TimeExceededException when timeout has reached collector = Lucene.wrapTimeLimitingCollector(collector, searchContext.timeEstimateCounter(), searchContext.timeout().millis()); if (doProfile) { collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_TIMEOUT, Collections.singletonList((InternalProfileCollector) child)); } } if (collector != null) { final Collector child = collector; collector = new CancellableCollector(searchContext.getTask()::isCancelled, searchContext.lowLevelCancellation(), collector); if (doProfile) { collector = new InternalProfileCollector(collector, CollectorResult.REASON_SEARCH_CANCELLED, Collections.singletonList((InternalProfileCollector) child)); } } try { if (collector != null) { if (doProfile) { searchContext.getProfilers().getCurrentQueryProfiler().setCollector((InternalProfileCollector) collector); } searcher.search(query, collector); } } catch (TimeLimitingCollector.TimeExceededException e) { assert timeoutSet : "TimeExceededException thrown even though timeout wasn't set"; queryResult.searchTimedOut(true); } catch (Lucene.EarlyTerminationException e) { assert terminateAfterSet : "EarlyTerminationException thrown even though terminateAfter wasn't set"; queryResult.terminatedEarly(true); } finally { searchContext.clearReleasables(SearchContext.Lifetime.COLLECTION); } if (terminateAfterSet && queryResult.terminatedEarly() == null) { queryResult.terminatedEarly(false); } queryResult.topDocs(topDocsCallable.call(), sortValueFormats); if (searchContext.getProfilers() != null) { ProfileShardResult shardResults = SearchProfileShardResults .buildShardResults(searchContext.getProfilers()); searchContext.queryResult().profileResults(shardResults); } return rescore; } catch (Exception e) { throw new QueryPhaseExecutionException(searchContext, "Failed to execute main query", e); } } }