DrillSideways.java example

Explorer
lucene-solr-master
- lucene
- solr
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.facet;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;

import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState;
import org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.FilterCollector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiCollector;
import org.apache.lucene.search.MultiCollectorManager;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.util.ThreadInterruptedException;

/**
 * Computes drill down and sideways counts for the provided
 * {@link DrillDownQuery}.  Drill sideways counts include
 * alternative values/aggregates for the drill-down
 * dimensions so that a dimension does not disappear after
 * the user drills down into it.
 * <p> Use one of the static search
 * methods to do the search, and then get the hits and facet
 * results from the returned {@link DrillSidewaysResult}.
 * <p><b>NOTE</b>: this allocates one {@link
 * FacetsCollector} for each drill-down, plus one.  If your
 * index has high number of facet labels then this will
 * multiply your memory usage.
 *
 * @lucene.experimental
 */
public class DrillSideways {

  /**
   * {@link IndexSearcher} passed to constructor.
   */
  protected final IndexSearcher searcher;

  /**
   * {@link TaxonomyReader} passed to constructor.
   */
  protected final TaxonomyReader taxoReader;

  /**
   * {@link SortedSetDocValuesReaderState} passed to
   * constructor; can be null.
   */
  protected final SortedSetDocValuesReaderState state;

  /**
   * {@link FacetsConfig} passed to constructor.
   */
  protected final FacetsConfig config;

  // These are only used for multi-threaded search
  private final ExecutorService executor;

  /**
   * Create a new {@code DrillSideways} instance.
   */
  public DrillSideways(IndexSearcher searcher, FacetsConfig config, TaxonomyReader taxoReader) {
    this(searcher, config, taxoReader, null);
  }

  /**
   * Create a new {@code DrillSideways} instance, assuming the categories were
   * indexed with {@link SortedSetDocValuesFacetField}.
   */
  public DrillSideways(IndexSearcher searcher, FacetsConfig config, SortedSetDocValuesReaderState state) {
    this(searcher, config, null, state);
  }

  /**
   * Create a new {@code DrillSideways} instance, where some
   * dimensions were indexed with {@link
   * SortedSetDocValuesFacetField} and others were indexed
   * with {@link FacetField}.
   */
  public DrillSideways(IndexSearcher searcher, FacetsConfig config, TaxonomyReader taxoReader,
          SortedSetDocValuesReaderState state) {
    this(searcher, config, taxoReader, state, null);
  }

  /**
   * Create a new {@code DrillSideways} instance, where some
   * dimensions were indexed with {@link
   * SortedSetDocValuesFacetField} and others were indexed
   * with {@link FacetField}.
   * <p>
   * Use this constructor to use the concurrent implementation and/or the CollectorManager
   */
  public DrillSideways(IndexSearcher searcher, FacetsConfig config, TaxonomyReader taxoReader,
          SortedSetDocValuesReaderState state, ExecutorService executor) {
    this.searcher = searcher;
    this.config = config;
    this.taxoReader = taxoReader;
    this.state = state;
    this.executor = executor;
  }

  /**
   * Subclass can override to customize per-dim Facets
   * impl.
   */
  protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways,
          String[] drillSidewaysDims) throws IOException {

    Facets drillDownFacets;
    Map<String, Facets> drillSidewaysFacets = new HashMap<>();

    if (taxoReader != null) {
      drillDownFacets = new FastTaxonomyFacetCounts(taxoReader, config, drillDowns);
      if (drillSideways != null) {
        for (int i = 0; i < drillSideways.length; i++) {
          drillSidewaysFacets.put(drillSidewaysDims[i],
                  new FastTaxonomyFacetCounts(taxoReader, config, drillSideways[i]));
        }
      }
    } else {
      drillDownFacets = new SortedSetDocValuesFacetCounts(state, drillDowns);
      if (drillSideways != null) {
        for (int i = 0; i < drillSideways.length; i++) {
          drillSidewaysFacets.put(drillSidewaysDims[i], new SortedSetDocValuesFacetCounts(state, drillSideways[i]));
        }
      }
    }

    if (drillSidewaysFacets.isEmpty()) {
      return drillDownFacets;
    } else {
      return new MultiFacets(drillSidewaysFacets, drillDownFacets);
    }
  }

  /**
   * Search, collecting hits with a {@link Collector}, and
   * computing drill down and sideways counts.
   */
  public DrillSidewaysResult search(DrillDownQuery query, Collector hitCollector) throws IOException {

    Map<String, Integer> drillDownDims = query.getDims();

    FacetsCollector drillDownCollector = new FacetsCollector();

    if (drillDownDims.isEmpty()) {
      // There are no drill-down dims, so there is no
      // drill-sideways to compute:
      searcher.search(query, MultiCollector.wrap(hitCollector, drillDownCollector));
      return new DrillSidewaysResult(buildFacetsResult(drillDownCollector, null, null), null);
    }

    Query baseQuery = query.getBaseQuery();
    if (baseQuery == null) {
      // TODO: we could optimize this pure-browse case by
      // making a custom scorer instead:
      baseQuery = new MatchAllDocsQuery();
    }
    Query[] drillDownQueries = query.getDrillDownQueries();

    FacetsCollector[] drillSidewaysCollectors = new FacetsCollector[drillDownDims.size()];
    for (int i = 0; i < drillSidewaysCollectors.length; i++) {
      drillSidewaysCollectors[i] = new FacetsCollector();
    }

    DrillSidewaysQuery dsq =
            new DrillSidewaysQuery(baseQuery, drillDownCollector, drillSidewaysCollectors, drillDownQueries,
                    scoreSubDocsAtOnce());
    if (hitCollector.needsScores() == false) {
      // this is a horrible hack in order to make sure IndexSearcher will not
      // attempt to cache the DrillSidewaysQuery
      hitCollector = new FilterCollector(hitCollector) {
        @Override
        public boolean needsScores() {
          return true;
        }
      };
    }
    searcher.search(dsq, hitCollector);

    return new DrillSidewaysResult(buildFacetsResult(drillDownCollector, drillSidewaysCollectors,
            drillDownDims.keySet().toArray(new String[drillDownDims.size()])), null);
  }

  /**
   * Search, sorting by {@link Sort}, and computing
   * drill down and sideways counts.
   */
  public DrillSidewaysResult search(DrillDownQuery query, Query filter, FieldDoc after, int topN, Sort sort,
          boolean doDocScores, boolean doMaxScore) throws IOException {
    if (filter != null) {
      query = new DrillDownQuery(config, filter, query);
    }
    if (sort != null) {
      int limit = searcher.getIndexReader().maxDoc();
      if (limit == 0) {
        limit = 1; // the collector does not alow numHits = 0
      }
      final int fTopN = Math.min(topN, limit);

      if (executor != null) { // We have an executor, let use the multi-threaded version

        final CollectorManager<TopFieldCollector, TopFieldDocs> collectorManager =
                new CollectorManager<TopFieldCollector, TopFieldDocs>() {

                  @Override
                  public TopFieldCollector newCollector() throws IOException {
                    return TopFieldCollector.create(sort, fTopN, after, true, doDocScores, doMaxScore);
                  }

                  @Override
                  public TopFieldDocs reduce(Collection<TopFieldCollector> collectors) throws IOException {
                    final TopFieldDocs[] topFieldDocs = new TopFieldDocs[collectors.size()];
                    int pos = 0;
                    for (TopFieldCollector collector : collectors)
                      topFieldDocs[pos++] = collector.topDocs();
                    return TopDocs.merge(sort, topN, topFieldDocs);
                  }

                };
        ConcurrentDrillSidewaysResult<TopFieldDocs> r = search(query, collectorManager);
        return new DrillSidewaysResult(r.facets, r.collectorResult);

      } else {

        final TopFieldCollector hitCollector =
                TopFieldCollector.create(sort, fTopN, after, true, doDocScores, doMaxScore);
        DrillSidewaysResult r = search(query, hitCollector);
        return new DrillSidewaysResult(r.facets, hitCollector.topDocs());
      }
    } else {
      return search(after, query, topN);
    }
  }

  /**
   * Search, sorting by score, and computing
   * drill down and sideways counts.
   */
  public DrillSidewaysResult search(DrillDownQuery query, int topN) throws IOException {
    return search(null, query, topN);
  }

  /**
   * Search, sorting by score, and computing
   * drill down and sideways counts.
   */
  public DrillSidewaysResult search(ScoreDoc after, DrillDownQuery query, int topN) throws IOException {
    int limit = searcher.getIndexReader().maxDoc();
    if (limit == 0) {
      limit = 1; // the collector does not alow numHits = 0
    }
    final int fTopN = Math.min(topN, limit);

    if (executor != null) {  // We have an executor, let use the multi-threaded version

      final CollectorManager<TopScoreDocCollector, TopDocs> collectorManager =
              new CollectorManager<TopScoreDocCollector, TopDocs>() {

                @Override
                public TopScoreDocCollector newCollector() throws IOException {
                  return TopScoreDocCollector.create(fTopN, after);
                }

                @Override
                public TopDocs reduce(Collection<TopScoreDocCollector> collectors) throws IOException {
                  final TopDocs[] topDocs = new TopDocs[collectors.size()];
                  int pos = 0;
                  for (TopScoreDocCollector collector : collectors)
                    topDocs[pos++] = collector.topDocs();
                  return TopDocs.merge(topN, topDocs);
                }

              };
      ConcurrentDrillSidewaysResult<TopDocs> r = search(query, collectorManager);
      return new DrillSidewaysResult(r.facets, r.collectorResult);

    } else {

      TopScoreDocCollector hitCollector = TopScoreDocCollector.create(topN, after);
      DrillSidewaysResult r = search(query, hitCollector);
      return new DrillSidewaysResult(r.facets, hitCollector.topDocs());
    }
  }

  /**
   * Override this and return true if your collector
   * (e.g., {@code ToParentBlockJoinCollector}) expects all
   * sub-scorers to be positioned on the document being
   * collected.  This will cause some performance loss;
   * default is false.
   */
  protected boolean scoreSubDocsAtOnce() {
    return false;
  }

  /**
   * Result of a drill sideways search, including the
   * {@link Facets} and {@link TopDocs}.
   */
  public static class DrillSidewaysResult {
    /**
     * Combined drill down and sideways results.
     */
    public final Facets facets;

    /**
     * Hits.
     */
    public final TopDocs hits;

    /**
     * Sole constructor.
     */
    public DrillSidewaysResult(Facets facets, TopDocs hits) {
      this.facets = facets;
      this.hits = hits;
    }
  }

  private static class CallableCollector implements Callable<CallableResult> {

    private final int pos;
    private final IndexSearcher searcher;
    private final Query query;
    private final CollectorManager<?, ?> collectorManager;

    private CallableCollector(int pos, IndexSearcher searcher, Query query, CollectorManager<?, ?> collectorManager) {
      this.pos = pos;
      this.searcher = searcher;
      this.query = query;
      this.collectorManager = collectorManager;
    }

    @Override
    public CallableResult call() throws Exception {
      return new CallableResult(pos, searcher.search(query, collectorManager));
    }
  }

  private static class CallableResult {

    private final int pos;
    private final Object result;

    private CallableResult(int pos, Object result) {
      this.pos = pos;
      this.result = result;
    }
  }

  private DrillDownQuery getDrillDownQuery(final DrillDownQuery query, Query[] queries,
          final String excludedDimension) {
    final DrillDownQuery ddl = new DrillDownQuery(config, query.getBaseQuery());
    query.getDims().forEach((dim, pos) -> {
      if (!dim.equals(excludedDimension))
        ddl.add(dim, queries[pos]);
    });
    return ddl.getDims().size() == queries.length ? null : ddl;
  }

  /** Runs a search, using a {@link CollectorManager} to gather and merge search results */
  public <R> ConcurrentDrillSidewaysResult<R> search(final DrillDownQuery query,
          final CollectorManager<?, R> hitCollectorManager) throws IOException {

    final Map<String, Integer> drillDownDims = query.getDims();
    final List<CallableCollector> callableCollectors = new ArrayList<>(drillDownDims.size() + 1);

    // Add the main DrillDownQuery
    callableCollectors.add(new CallableCollector(-1, searcher, query,
            new MultiCollectorManager(new FacetsCollectorManager(), hitCollectorManager)));
    int i = 0;
    final Query[] filters = query.getDrillDownQueries();
    for (String dim : drillDownDims.keySet())
      callableCollectors.add(new CallableCollector(i++, searcher, getDrillDownQuery(query, filters, dim),
              new FacetsCollectorManager()));

    final FacetsCollector mainFacetsCollector;
    final FacetsCollector[] facetsCollectors = new FacetsCollector[drillDownDims.size()];
    final R collectorResult;

    try {
      // Run the query pool
      final List<Future<CallableResult>> futures = executor.invokeAll(callableCollectors);

      // Extract the results
      final Object[] mainResults = (Object[]) futures.get(0).get().result;
      mainFacetsCollector = (FacetsCollector) mainResults[0];
      collectorResult = (R) mainResults[1];
      for (i = 1; i < futures.size(); i++) {
        final CallableResult result = futures.get(i).get();
        facetsCollectors[result.pos] = (FacetsCollector) result.result;
      }
      // Fill the null results with the mainFacetsCollector
      for (i = 0; i < facetsCollectors.length; i++)
        if (facetsCollectors[i] == null)
          facetsCollectors[i] = mainFacetsCollector;

    } catch (InterruptedException e) {
      throw new ThreadInterruptedException(e);
    } catch (ExecutionException e) {
      throw new RuntimeException(e);
    }

    // build the facets and return the result
    return new ConcurrentDrillSidewaysResult<>(buildFacetsResult(mainFacetsCollector, facetsCollectors,
            drillDownDims.keySet().toArray(new String[drillDownDims.size()])), null, collectorResult);
  }

  /**
   * Result of a concurrent drill sideways search, including the
   * {@link Facets} and {@link TopDocs}.
   */
  public static class ConcurrentDrillSidewaysResult<R> extends DrillSidewaysResult {

    /** The merged search results */
    public final R collectorResult;

    /**
     * Sole constructor.
     */
    ConcurrentDrillSidewaysResult(Facets facets, TopDocs hits, R collectorResult) {
      super(facets, hits);
      this.collectorResult = collectorResult;
    }
  }
}