package org.apache.lucene.facet.search.sampling;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.facet.search.FacetResultsHandler;
import org.apache.lucene.facet.search.FacetsAccumulator;
import org.apache.lucene.facet.search.FloatArrayAllocator;
import org.apache.lucene.facet.search.IntArrayAllocator;
import org.apache.lucene.facet.search.SamplingWrapper;
import org.apache.lucene.facet.search.ScoredDocIDs;
import org.apache.lucene.facet.search.StandardFacetsAccumulator;
import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.search.results.FacetResultNode;
import org.apache.lucene.facet.search.sampling.Sampler.SampleResult;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Facets accumulation with sampling.<br>
* <p>
* Note two major differences between this class and {@link SamplingWrapper}:
* <ol>
* <li>Latter can wrap any other {@link FacetsAccumulator} while this class
* directly extends {@link StandardFacetsAccumulator}.</li>
* <li>This class can effectively apply sampling on the complement set of
* matching document, thereby working efficiently with the complement
* optimization - see {@link FacetsAccumulator#getComplementThreshold()}.</li>
* </ol>
* <p>
* Note: Sampling accumulation (Accumulation over a sampled-set of the results),
* does not guarantee accurate values for
* {@link FacetResult#getNumValidDescendants()} &
* {@link FacetResultNode#getResidue()}.
*
* @see Sampler
* @lucene.experimental
*/
public class SamplingAccumulator extends StandardFacetsAccumulator {
private double samplingRatio = -1d;
private final Sampler sampler;
/**
* Constructor...
*/
public SamplingAccumulator(
Sampler sampler,
FacetSearchParams searchParams,
IndexReader indexReader, TaxonomyReader taxonomyReader,
IntArrayAllocator intArrayAllocator,
FloatArrayAllocator floatArrayAllocator) {
super(searchParams, indexReader, taxonomyReader, intArrayAllocator,
floatArrayAllocator);
this.sampler = sampler;
}
/**
* Constructor...
*/
public SamplingAccumulator(
Sampler sampler,
FacetSearchParams searchParams,
IndexReader indexReader, TaxonomyReader taxonomyReader) {
super(searchParams, indexReader, taxonomyReader);
this.sampler = sampler;
}
@Override
public List<FacetResult> accumulate(ScoredDocIDs docids) throws IOException {
// first let delegee accumulate without labeling at all (though
// currently it doesn't matter because we have to label all returned anyhow)
boolean origAllowLabeling = isAllowLabeling();
setAllowLabeling(false);
// Replacing the original searchParams with the over-sampled
FacetSearchParams original = searchParams;
searchParams = sampler.overSampledSearchParams(original);
List<FacetResult> sampleRes = super.accumulate(docids);
setAllowLabeling(origAllowLabeling);
List<FacetResult> fixedRes = new ArrayList<FacetResult>();
for (FacetResult fres : sampleRes) {
// for sure fres is not null because this is guaranteed by the delegee.
FacetResultsHandler frh = fres.getFacetRequest().createFacetResultsHandler(
taxonomyReader);
// fix the result of current request
sampler.getSampleFixer(indexReader, taxonomyReader, searchParams)
.fixResult(docids, fres);
fres = frh.rearrangeFacetResult(fres); // let delegee's handler do any
// Using the sampler to trim the extra (over-sampled) results
fres = sampler.trimResult(fres);
// arranging it needs to
// final labeling if allowed (because labeling is a costly operation)
if (isAllowLabeling()) {
frh.labelResult(fres);
}
fixedRes.add(fres); // add to final results
}
searchParams = original; // Back to original params
return fixedRes;
}
@Override
protected ScoredDocIDs actualDocsToAccumulate(ScoredDocIDs docids) throws IOException {
SampleResult sampleRes = sampler.getSampleSet(docids);
samplingRatio = sampleRes.actualSampleRatio;
return sampleRes.docids;
}
@Override
protected double getTotalCountsFactor() {
if (samplingRatio<0) {
throw new IllegalStateException("Total counts ratio unavailable because actualDocsToAccumulate() was not invoked");
}
return samplingRatio;
}
}