package org.apache.lucene.facet.search.sampling;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.facet.index.CategoryDocumentBuilder;
import org.apache.lucene.facet.search.FacetsAccumulator;
import org.apache.lucene.facet.search.FacetsCollector;
import org.apache.lucene.facet.search.params.CountFacetRequest;
import org.apache.lucene.facet.search.params.FacetRequest;
import org.apache.lucene.facet.search.params.FacetRequest.ResultMode;
import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.search.results.FacetResultNode;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.Test;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public class OversampleWithDepthTest extends LuceneTestCase {
@Test
public void testCountWithdepthUsingSamping() throws Exception, IOException {
Directory indexDir = newDirectory();
Directory taxoDir = newDirectory();
// index 100 docs, each with one category: ["root", docnum/10, docnum]
// e.g. root/8/87
index100Docs(indexDir, taxoDir);
DirectoryReader r = DirectoryReader.open(indexDir);
TaxonomyReader tr = new DirectoryTaxonomyReader(taxoDir);
FacetSearchParams fsp = new FacetSearchParams();
CountFacetRequest facetRequest = new CountFacetRequest(new CategoryPath("root"), 10);
// Setting the depth to '2', should potentially get all categories
facetRequest.setDepth(2);
facetRequest.setResultMode(ResultMode.PER_NODE_IN_TREE);
fsp.addFacetRequest(facetRequest);
// Craft sampling params to enforce sampling
final SamplingParams params = new SamplingParams();
params.setMinSampleSize(2);
params.setMaxSampleSize(50);
params.setOversampleFactor(5);
params.setSampingThreshold(60);
params.setSampleRatio(0.1);
FacetResult res = searchWithFacets(r, tr, fsp, params);
FacetRequest req = res.getFacetRequest();
assertEquals(facetRequest, req);
FacetResultNode rootNode = res.getFacetResultNode();
// Each node below root should also have sub-results as the requested depth was '2'
for (FacetResultNode node : rootNode.getSubResults()) {
assertTrue("node " + node.getLabel()
+ " should have had children as the requested depth was '2'",
node.getNumSubResults() > 0);
}
IOUtils.close(r, tr, indexDir, taxoDir);
}
private void index100Docs(Directory indexDir, Directory taxoDir)
throws CorruptIndexException, LockObtainFailedException, IOException {
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new KeywordAnalyzer());
IndexWriter w = new IndexWriter(indexDir, iwc);
TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir);
CategoryDocumentBuilder cdb = new CategoryDocumentBuilder(tw);
ArrayList<CategoryPath> categoryPaths = new ArrayList<CategoryPath>(1);
for (int i = 0; i < 100; i++) {
categoryPaths.clear();
categoryPaths.add(new CategoryPath("root",Integer.toString(i / 10), Integer.toString(i)));
cdb.setCategoryPaths(categoryPaths);
w.addDocument(cdb.build(new Document()));
}
IOUtils.close(tw, w);
}
/** search reader <code>r</code>*/
private FacetResult searchWithFacets(IndexReader r,
TaxonomyReader tr, FacetSearchParams fsp, final SamplingParams params)
throws IOException {
// a FacetsCollector with a sampling accumulator
FacetsCollector fcWithSampling = new FacetsCollector(fsp, r, tr) {
@Override
protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader,
TaxonomyReader taxonomyReader) {
Sampler sampler = new RandomSampler(params, random());
return new SamplingAccumulator(sampler, facetSearchParams, indexReader, taxonomyReader);
}
};
IndexSearcher s = new IndexSearcher(r);
s.search(new MatchAllDocsQuery(), fcWithSampling);
// there's only one expected result, return just it.
return fcWithSampling.getFacetResults().get(0);
}
}