/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.facet; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts; import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField; import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState; import org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.FilterCollector; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MultiCollector; import org.apache.lucene.search.MultiCollectorManager; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopFieldCollector; import org.apache.lucene.search.TopFieldDocs; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.util.ThreadInterruptedException; /** * Computes drill down and sideways counts for the provided * {@link DrillDownQuery}. Drill sideways counts include * alternative values/aggregates for the drill-down * dimensions so that a dimension does not disappear after * the user drills down into it. * <p> Use one of the static search * methods to do the search, and then get the hits and facet * results from the returned {@link DrillSidewaysResult}. * <p><b>NOTE</b>: this allocates one {@link * FacetsCollector} for each drill-down, plus one. If your * index has high number of facet labels then this will * multiply your memory usage. * * @lucene.experimental */ public class DrillSideways { /** * {@link IndexSearcher} passed to constructor. */ protected final IndexSearcher searcher; /** * {@link TaxonomyReader} passed to constructor. */ protected final TaxonomyReader taxoReader; /** * {@link SortedSetDocValuesReaderState} passed to * constructor; can be null. */ protected final SortedSetDocValuesReaderState state; /** * {@link FacetsConfig} passed to constructor. */ protected final FacetsConfig config; // These are only used for multi-threaded search private final ExecutorService executor; /** * Create a new {@code DrillSideways} instance. */ public DrillSideways(IndexSearcher searcher, FacetsConfig config, TaxonomyReader taxoReader) { this(searcher, config, taxoReader, null); } /** * Create a new {@code DrillSideways} instance, assuming the categories were * indexed with {@link SortedSetDocValuesFacetField}. */ public DrillSideways(IndexSearcher searcher, FacetsConfig config, SortedSetDocValuesReaderState state) { this(searcher, config, null, state); } /** * Create a new {@code DrillSideways} instance, where some * dimensions were indexed with {@link * SortedSetDocValuesFacetField} and others were indexed * with {@link FacetField}. */ public DrillSideways(IndexSearcher searcher, FacetsConfig config, TaxonomyReader taxoReader, SortedSetDocValuesReaderState state) { this(searcher, config, taxoReader, state, null); } /** * Create a new {@code DrillSideways} instance, where some * dimensions were indexed with {@link * SortedSetDocValuesFacetField} and others were indexed * with {@link FacetField}. * <p> * Use this constructor to use the concurrent implementation and/or the CollectorManager */ public DrillSideways(IndexSearcher searcher, FacetsConfig config, TaxonomyReader taxoReader, SortedSetDocValuesReaderState state, ExecutorService executor) { this.searcher = searcher; this.config = config; this.taxoReader = taxoReader; this.state = state; this.executor = executor; } /** * Subclass can override to customize per-dim Facets * impl. */ protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, String[] drillSidewaysDims) throws IOException { Facets drillDownFacets; Map<String, Facets> drillSidewaysFacets = new HashMap<>(); if (taxoReader != null) { drillDownFacets = new FastTaxonomyFacetCounts(taxoReader, config, drillDowns); if (drillSideways != null) { for (int i = 0; i < drillSideways.length; i++) { drillSidewaysFacets.put(drillSidewaysDims[i], new FastTaxonomyFacetCounts(taxoReader, config, drillSideways[i])); } } } else { drillDownFacets = new SortedSetDocValuesFacetCounts(state, drillDowns); if (drillSideways != null) { for (int i = 0; i < drillSideways.length; i++) { drillSidewaysFacets.put(drillSidewaysDims[i], new SortedSetDocValuesFacetCounts(state, drillSideways[i])); } } } if (drillSidewaysFacets.isEmpty()) { return drillDownFacets; } else { return new MultiFacets(drillSidewaysFacets, drillDownFacets); } } /** * Search, collecting hits with a {@link Collector}, and * computing drill down and sideways counts. */ public DrillSidewaysResult search(DrillDownQuery query, Collector hitCollector) throws IOException { Map<String, Integer> drillDownDims = query.getDims(); FacetsCollector drillDownCollector = new FacetsCollector(); if (drillDownDims.isEmpty()) { // There are no drill-down dims, so there is no // drill-sideways to compute: searcher.search(query, MultiCollector.wrap(hitCollector, drillDownCollector)); return new DrillSidewaysResult(buildFacetsResult(drillDownCollector, null, null), null); } Query baseQuery = query.getBaseQuery(); if (baseQuery == null) { // TODO: we could optimize this pure-browse case by // making a custom scorer instead: baseQuery = new MatchAllDocsQuery(); } Query[] drillDownQueries = query.getDrillDownQueries(); FacetsCollector[] drillSidewaysCollectors = new FacetsCollector[drillDownDims.size()]; for (int i = 0; i < drillSidewaysCollectors.length; i++) { drillSidewaysCollectors[i] = new FacetsCollector(); } DrillSidewaysQuery dsq = new DrillSidewaysQuery(baseQuery, drillDownCollector, drillSidewaysCollectors, drillDownQueries, scoreSubDocsAtOnce()); if (hitCollector.needsScores() == false) { // this is a horrible hack in order to make sure IndexSearcher will not // attempt to cache the DrillSidewaysQuery hitCollector = new FilterCollector(hitCollector) { @Override public boolean needsScores() { return true; } }; } searcher.search(dsq, hitCollector); return new DrillSidewaysResult(buildFacetsResult(drillDownCollector, drillSidewaysCollectors, drillDownDims.keySet().toArray(new String[drillDownDims.size()])), null); } /** * Search, sorting by {@link Sort}, and computing * drill down and sideways counts. */ public DrillSidewaysResult search(DrillDownQuery query, Query filter, FieldDoc after, int topN, Sort sort, boolean doDocScores, boolean doMaxScore) throws IOException { if (filter != null) { query = new DrillDownQuery(config, filter, query); } if (sort != null) { int limit = searcher.getIndexReader().maxDoc(); if (limit == 0) { limit = 1; // the collector does not alow numHits = 0 } final int fTopN = Math.min(topN, limit); if (executor != null) { // We have an executor, let use the multi-threaded version final CollectorManager<TopFieldCollector, TopFieldDocs> collectorManager = new CollectorManager<TopFieldCollector, TopFieldDocs>() { @Override public TopFieldCollector newCollector() throws IOException { return TopFieldCollector.create(sort, fTopN, after, true, doDocScores, doMaxScore); } @Override public TopFieldDocs reduce(Collection<TopFieldCollector> collectors) throws IOException { final TopFieldDocs[] topFieldDocs = new TopFieldDocs[collectors.size()]; int pos = 0; for (TopFieldCollector collector : collectors) topFieldDocs[pos++] = collector.topDocs(); return TopDocs.merge(sort, topN, topFieldDocs); } }; ConcurrentDrillSidewaysResult<TopFieldDocs> r = search(query, collectorManager); return new DrillSidewaysResult(r.facets, r.collectorResult); } else { final TopFieldCollector hitCollector = TopFieldCollector.create(sort, fTopN, after, true, doDocScores, doMaxScore); DrillSidewaysResult r = search(query, hitCollector); return new DrillSidewaysResult(r.facets, hitCollector.topDocs()); } } else { return search(after, query, topN); } } /** * Search, sorting by score, and computing * drill down and sideways counts. */ public DrillSidewaysResult search(DrillDownQuery query, int topN) throws IOException { return search(null, query, topN); } /** * Search, sorting by score, and computing * drill down and sideways counts. */ public DrillSidewaysResult search(ScoreDoc after, DrillDownQuery query, int topN) throws IOException { int limit = searcher.getIndexReader().maxDoc(); if (limit == 0) { limit = 1; // the collector does not alow numHits = 0 } final int fTopN = Math.min(topN, limit); if (executor != null) { // We have an executor, let use the multi-threaded version final CollectorManager<TopScoreDocCollector, TopDocs> collectorManager = new CollectorManager<TopScoreDocCollector, TopDocs>() { @Override public TopScoreDocCollector newCollector() throws IOException { return TopScoreDocCollector.create(fTopN, after); } @Override public TopDocs reduce(Collection<TopScoreDocCollector> collectors) throws IOException { final TopDocs[] topDocs = new TopDocs[collectors.size()]; int pos = 0; for (TopScoreDocCollector collector : collectors) topDocs[pos++] = collector.topDocs(); return TopDocs.merge(topN, topDocs); } }; ConcurrentDrillSidewaysResult<TopDocs> r = search(query, collectorManager); return new DrillSidewaysResult(r.facets, r.collectorResult); } else { TopScoreDocCollector hitCollector = TopScoreDocCollector.create(topN, after); DrillSidewaysResult r = search(query, hitCollector); return new DrillSidewaysResult(r.facets, hitCollector.topDocs()); } } /** * Override this and return true if your collector * (e.g., {@code ToParentBlockJoinCollector}) expects all * sub-scorers to be positioned on the document being * collected. This will cause some performance loss; * default is false. */ protected boolean scoreSubDocsAtOnce() { return false; } /** * Result of a drill sideways search, including the * {@link Facets} and {@link TopDocs}. */ public static class DrillSidewaysResult { /** * Combined drill down and sideways results. */ public final Facets facets; /** * Hits. */ public final TopDocs hits; /** * Sole constructor. */ public DrillSidewaysResult(Facets facets, TopDocs hits) { this.facets = facets; this.hits = hits; } } private static class CallableCollector implements Callable<CallableResult> { private final int pos; private final IndexSearcher searcher; private final Query query; private final CollectorManager<?, ?> collectorManager; private CallableCollector(int pos, IndexSearcher searcher, Query query, CollectorManager<?, ?> collectorManager) { this.pos = pos; this.searcher = searcher; this.query = query; this.collectorManager = collectorManager; } @Override public CallableResult call() throws Exception { return new CallableResult(pos, searcher.search(query, collectorManager)); } } private static class CallableResult { private final int pos; private final Object result; private CallableResult(int pos, Object result) { this.pos = pos; this.result = result; } } private DrillDownQuery getDrillDownQuery(final DrillDownQuery query, Query[] queries, final String excludedDimension) { final DrillDownQuery ddl = new DrillDownQuery(config, query.getBaseQuery()); query.getDims().forEach((dim, pos) -> { if (!dim.equals(excludedDimension)) ddl.add(dim, queries[pos]); }); return ddl.getDims().size() == queries.length ? null : ddl; } /** Runs a search, using a {@link CollectorManager} to gather and merge search results */ public <R> ConcurrentDrillSidewaysResult<R> search(final DrillDownQuery query, final CollectorManager<?, R> hitCollectorManager) throws IOException { final Map<String, Integer> drillDownDims = query.getDims(); final List<CallableCollector> callableCollectors = new ArrayList<>(drillDownDims.size() + 1); // Add the main DrillDownQuery callableCollectors.add(new CallableCollector(-1, searcher, query, new MultiCollectorManager(new FacetsCollectorManager(), hitCollectorManager))); int i = 0; final Query[] filters = query.getDrillDownQueries(); for (String dim : drillDownDims.keySet()) callableCollectors.add(new CallableCollector(i++, searcher, getDrillDownQuery(query, filters, dim), new FacetsCollectorManager())); final FacetsCollector mainFacetsCollector; final FacetsCollector[] facetsCollectors = new FacetsCollector[drillDownDims.size()]; final R collectorResult; try { // Run the query pool final List<Future<CallableResult>> futures = executor.invokeAll(callableCollectors); // Extract the results final Object[] mainResults = (Object[]) futures.get(0).get().result; mainFacetsCollector = (FacetsCollector) mainResults[0]; collectorResult = (R) mainResults[1]; for (i = 1; i < futures.size(); i++) { final CallableResult result = futures.get(i).get(); facetsCollectors[result.pos] = (FacetsCollector) result.result; } // Fill the null results with the mainFacetsCollector for (i = 0; i < facetsCollectors.length; i++) if (facetsCollectors[i] == null) facetsCollectors[i] = mainFacetsCollector; } catch (InterruptedException e) { throw new ThreadInterruptedException(e); } catch (ExecutionException e) { throw new RuntimeException(e); } // build the facets and return the result return new ConcurrentDrillSidewaysResult<>(buildFacetsResult(mainFacetsCollector, facetsCollectors, drillDownDims.keySet().toArray(new String[drillDownDims.size()])), null, collectorResult); } /** * Result of a concurrent drill sideways search, including the * {@link Facets} and {@link TopDocs}. */ public static class ConcurrentDrillSidewaysResult<R> extends DrillSidewaysResult { /** The merged search results */ public final R collectorResult; /** * Sole constructor. */ ConcurrentDrillSidewaysResult(Facets facets, TopDocs hits, R collectorResult) { super(facets, hits); this.collectorResult = collectorResult; } } }