package org.apache.lucene.facet.search.params;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.store.Directory;
import org.junit.Test;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.facet.index.CategoryDocumentBuilder;
import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.search.CategoryListIterator;
import org.apache.lucene.facet.search.FacetArrays;
import org.apache.lucene.facet.search.FacetResultsHandler;
import org.apache.lucene.facet.search.FacetsAccumulator;
import org.apache.lucene.facet.search.ScoredDocIDs;
import org.apache.lucene.facet.search.StandardFacetsAccumulator;
import org.apache.lucene.facet.search.TopKFacetResultsHandler;
import org.apache.lucene.facet.search.cache.CategoryListCache;
import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.search.results.FacetResultNode;
import org.apache.lucene.facet.search.results.IntermediateFacetResult;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.facet.util.ScoredDocIdsUtils;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Test faceted search with creation of multiple category list iterators by the
* same CLP, depending on the provided facet request
*/
public class MultiIteratorsPerCLParamsTest extends LuceneTestCase {
CategoryPath[][] perDocCategories = new CategoryPath[][] {
{ new CategoryPath("author", "Mark Twain"),
new CategoryPath("date", "2010") },
{ new CategoryPath("author", "Robert Frost"),
new CategoryPath("date", "2009") },
{ new CategoryPath("author", "Artur Miller"),
new CategoryPath("date", "2010") },
{ new CategoryPath("author", "Edgar Allan Poe"),
new CategoryPath("date", "2009") },
{ new CategoryPath("author", "Henry James"),
new CategoryPath("date", "2010") } };
String countForbiddenDimension;
@Test
public void testCLParamMultiIteratorsByRequest() throws Exception {
doTestCLParamMultiIteratorsByRequest(false);
}
@Test
public void testCLParamMultiIteratorsByRequestCacheCLI() throws Exception {
doTestCLParamMultiIteratorsByRequest(true);
}
private void doTestCLParamMultiIteratorsByRequest(boolean cacheCLI) throws Exception {
// Create a CLP which generates different CLIs according to the
// FacetRequest's dimension
CategoryListParams clp = new CategoryListParams();
FacetIndexingParams iParams = new DefaultFacetIndexingParams(clp);
Directory indexDir = newDirectory();
Directory taxoDir = newDirectory();
populateIndex(iParams, indexDir, taxoDir);
TaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir);
IndexReader reader = DirectoryReader.open(indexDir);
CategoryListCache clCache = null;
if (cacheCLI) {
// caching the iteratorr, so:
// 1: create the cached iterator, using original params
clCache = new CategoryListCache();
clCache.loadAndRegister(clp, reader, taxo, iParams);
}
ScoredDocIDs allDocs = ScoredDocIdsUtils
.createAllDocsScoredDocIDs(reader);
// Search index with 'author' should filter ONLY ordinals whose parent
// is 'author'
countForbiddenDimension = "date";
validateFacetedSearch(iParams, taxo, reader, clCache, allDocs, "author", 5, 5);
// Search index with 'date' should filter ONLY ordinals whose parent is
// 'date'
countForbiddenDimension = "author";
validateFacetedSearch(iParams, taxo, reader, clCache, allDocs, "date", 5, 2);
// Search index with both 'date' and 'author'
countForbiddenDimension = null;
validateFacetedSearch(iParams, taxo, reader, clCache, allDocs, new String[] {
"author", "date" }, new int[] { 5, 5 }, new int[] { 5, 2 });
taxo.close();
reader.close();
indexDir.close();
taxoDir.close();
}
private void validateFacetedSearch(FacetIndexingParams iParams,
TaxonomyReader taxo, IndexReader reader, CategoryListCache clCache,
ScoredDocIDs allDocs, String dimension, int expectedValue, int expectedNumDescendants) throws IOException {
validateFacetedSearch(iParams, taxo, reader, clCache, allDocs,
new String[] { dimension }, new int[] { expectedValue },
new int[] { expectedNumDescendants });
}
private void validateFacetedSearch(FacetIndexingParams iParams,
TaxonomyReader taxo, IndexReader reader, CategoryListCache clCache, ScoredDocIDs allDocs,
String[] dimension, int[] expectedValue,
int[] expectedNumDescendants)
throws IOException {
FacetSearchParams sParams = new FacetSearchParams(iParams);
sParams.setClCache(clCache);
for (String dim : dimension) {
sParams.addFacetRequest(new PerDimCountFacetRequest(
new CategoryPath(dim), 10));
}
FacetsAccumulator acc = new StandardFacetsAccumulator(sParams, reader, taxo);
// no use to test this with complement since at that mode all facets are taken
acc.setComplementThreshold(FacetsAccumulator.DISABLE_COMPLEMENT);
List<FacetResult> results = acc.accumulate(allDocs);
assertEquals("Wrong #results", dimension.length, results.size());
for (int i = 0; i < results.size(); i++) {
FacetResult res = results.get(i);
assertEquals("wrong num-descendants for dimension " + dimension[i],
expectedNumDescendants[i], res.getNumValidDescendants());
FacetResultNode resNode = res.getFacetResultNode();
assertEquals("wrong value for dimension " + dimension[i],
expectedValue[i], (int) resNode.getValue());
}
}
private void populateIndex(FacetIndexingParams iParams, Directory indexDir,
Directory taxoDir) throws Exception {
RandomIndexWriter writer = new RandomIndexWriter(random(), indexDir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false)));
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
for (CategoryPath[] categories : perDocCategories) {
writer.addDocument(new CategoryDocumentBuilder(taxoWriter, iParams)
.setCategoryPaths(Arrays.asList(categories)).build(
new Document()));
}
taxoWriter.commit();
writer.commit();
taxoWriter.close();
writer.close();
}
private class PerDimCountFacetRequest extends CountFacetRequest {
public PerDimCountFacetRequest(CategoryPath path, int num) {
super(path, num);
}
@Override
public CategoryListIterator createCategoryListIterator(IndexReader reader,
TaxonomyReader taxo, FacetSearchParams sParams, int partition) throws IOException {
// categories of certain dimension only
return new PerDimensionCLI(taxo, super.createCategoryListIterator(
reader, taxo, sParams, partition), getCategoryPath());
}
@Override
/** Override this method just for verifying that only specified facets are iterated.. */
public FacetResultsHandler createFacetResultsHandler(
TaxonomyReader taxonomyReader) {
return new TopKFacetResultsHandler(taxonomyReader, this) {
@Override
public IntermediateFacetResult fetchPartitionResult(
FacetArrays facetArrays, int offset) throws IOException {
final IntermediateFacetResult res = super.fetchPartitionResult(facetArrays, offset);
if (countForbiddenDimension!=null) {
int ord = taxonomyReader.getOrdinal(new CategoryPath(countForbiddenDimension));
assertEquals("Should not have accumulated for dimension '"+countForbiddenDimension+"'!",0,facetArrays.getIntArray()[ord]);
}
return res;
}
};
}
}
/**
* a CLI which filters another CLI for the dimension of the provided
* category-path
*/
private static class PerDimensionCLI implements CategoryListIterator {
private final CategoryListIterator superCLI;
private final int[] parentArray;
private final int parentOrdinal;
PerDimensionCLI(TaxonomyReader taxo, CategoryListIterator superCLI,
CategoryPath requestedPath) throws IOException {
this.superCLI = superCLI;
if (requestedPath == null) {
parentOrdinal = 0;
} else {
CategoryPath cp = new CategoryPath(requestedPath.getComponent(0));
parentOrdinal = taxo.getOrdinal(cp);
}
parentArray = taxo.getParentArray();
}
public boolean init() throws IOException {
return superCLI.init();
}
public long nextCategory() throws IOException {
long next;
while ((next = superCLI.nextCategory()) <= Integer.MAX_VALUE
&& !isInDimension((int) next)) {
}
return next;
}
/** look for original parent ordinal, meaning same dimension */
private boolean isInDimension(int ordinal) {
while (ordinal > 0) {
if (ordinal == parentOrdinal) {
return true;
}
ordinal = parentArray[ordinal];
}
return false;
}
public boolean skipTo(int docId) throws IOException {
return superCLI.skipTo(docId);
}
}
}