package org.apache.lucene.facet;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
import org.apache.lucene.facet.index.CategoryDocumentBuilder;
import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.search.params.FacetRequest;
import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.search.results.FacetResultNode;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.junit.AfterClass;
import org.junit.BeforeClass;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** Base faceted search test. */
public abstract class FacetTestBase extends LuceneTestCase {
/** Holds a search and taxonomy Directories pair. */
private static final class SearchTaxoDirPair {
Directory searchDir, taxoDir;
SearchTaxoDirPair() {}
}
private static HashMap<Integer, SearchTaxoDirPair> dirsPerPartitionSize;
private static File TEST_DIR;
/** Documents text field. */
protected static final String CONTENT_FIELD = "content";
/** taxonomy Reader for the test. */
protected TaxonomyReader taxoReader;
/** Index Reader for the test. */
protected IndexReader indexReader;
/** Searcher for the test. */
protected IndexSearcher searcher;
@BeforeClass
public static void beforeClassFacetTestBase() {
TEST_DIR = _TestUtil.getTempDir("facets");
dirsPerPartitionSize = new HashMap<Integer, FacetTestBase.SearchTaxoDirPair>();
}
@AfterClass
public static void afterClassFacetTestBase() throws Exception {
for (SearchTaxoDirPair pair : dirsPerPartitionSize.values()) {
IOUtils.close(pair.searchDir, pair.taxoDir);
}
}
/** documents text (for the text field). */
private static final String[] DEFAULT_CONTENT = {
"the white car is the one I want.",
"the white dog does not belong to anyone.",
};
/** Facets: facets[D][F] == category-path no. F for document no. D. */
private static final CategoryPath[][] DEFAULT_CATEGORIES = {
{ new CategoryPath("root","a","f1"), new CategoryPath("root","a","f2") },
{ new CategoryPath("root","a","f1"), new CategoryPath("root","a","f3") },
};
/** categories to be added to specified doc */
protected List<CategoryPath> getCategories(int doc) {
return Arrays.asList(DEFAULT_CATEGORIES[doc]);
}
/** Number of documents to index */
protected int numDocsToIndex() {
return DEFAULT_CONTENT.length;
}
/** content to be added to specified doc */
protected String getContent(int doc) {
return DEFAULT_CONTENT[doc];
}
/** Prepare index (in RAM) with single partition */
protected final void initIndex() throws Exception {
initIndex(Integer.MAX_VALUE);
}
/** Prepare index (in RAM) with some documents and some facets */
protected final void initIndex(int partitionSize) throws Exception {
initIndex(partitionSize, false);
}
/** Prepare index (in RAM/Disk) with some documents and some facets */
protected final void initIndex(int partitionSize, boolean forceDisk) throws Exception {
if (VERBOSE) {
System.out.println("Partition Size: " + partitionSize+" forceDisk: "+forceDisk);
}
SearchTaxoDirPair pair = dirsPerPartitionSize.get(Integer.valueOf(partitionSize));
if (pair == null) {
pair = new SearchTaxoDirPair();
if (forceDisk) {
pair.searchDir = newFSDirectory(new File(TEST_DIR, "index"));
pair.taxoDir = newFSDirectory(new File(TEST_DIR, "taxo"));
} else {
pair.searchDir = newDirectory();
pair.taxoDir = newDirectory();
}
RandomIndexWriter iw = new RandomIndexWriter(random(), pair.searchDir, getIndexWriterConfig(getAnalyzer()));
TaxonomyWriter taxo = new DirectoryTaxonomyWriter(pair.taxoDir, OpenMode.CREATE);
populateIndex(iw, taxo, getFacetIndexingParams(partitionSize));
// commit changes (taxonomy prior to search index for consistency)
taxo.commit();
iw.commit();
taxo.close();
iw.close();
dirsPerPartitionSize.put(Integer.valueOf(partitionSize), pair);
}
// prepare for searching
taxoReader = new DirectoryTaxonomyReader(pair.taxoDir);
indexReader = DirectoryReader.open(pair.searchDir);
searcher = newSearcher(indexReader);
}
/** Returns indexing params for the main index */
protected IndexWriterConfig getIndexWriterConfig(Analyzer analyzer) {
return newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
}
/** Returns a default facet indexing params */
protected FacetIndexingParams getFacetIndexingParams(final int partSize) {
return new DefaultFacetIndexingParams() {
@Override
protected int fixedPartitionSize() {
return partSize;
}
};
}
/**
* Faceted Search Params for the test.
* Sub classes should override in order to test with different faceted search params.
*/
protected FacetSearchParams getFacetedSearchParams() {
return getFacetedSearchParams(Integer.MAX_VALUE);
}
/**
* Faceted Search Params with specified partition size.
* @see #getFacetedSearchParams()
*/
protected FacetSearchParams getFacetedSearchParams(int partitionSize) {
FacetSearchParams res = new FacetSearchParams(getFacetIndexingParams(partitionSize));
return res;
}
/**
* Populate the test index+taxonomy for this test.
* <p>Subclasses can override this to test different scenarios
*/
protected void populateIndex(RandomIndexWriter iw, TaxonomyWriter taxo, FacetIndexingParams iParams)
throws IOException {
// add test documents
int numDocsToIndex = numDocsToIndex();
for (int doc=0; doc<numDocsToIndex; doc++) {
indexDoc(iParams, iw, taxo, getContent(doc), getCategories(doc));
}
// also add a document that would be deleted, so that all tests are also working against deletions in the index
String content4del = "ContentOfDocToDelete";
indexDoc(iParams, iw, taxo, content4del, getCategories(0));
iw.commit(); // commit it
iw.deleteDocuments(new Term(CONTENT_FIELD,content4del)); // now delete the committed doc
}
/** Close all indexes */
protected void closeAll() throws Exception {
// close and nullify everything
IOUtils.close(taxoReader, indexReader);
taxoReader = null;
indexReader = null;
searcher = null;
}
/**
* Analyzer to use for the test.
* Sub classes should override in order to test with different analyzer.
*/
protected Analyzer getAnalyzer() {
return new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
}
/** convenience method: convert sub results to an array */
protected static FacetResultNode[] resultNodesAsArray(FacetResultNode parentRes) {
ArrayList<FacetResultNode> a = new ArrayList<FacetResultNode>();
for (FacetResultNode frn : parentRes.getSubResults()) {
a.add(frn);
}
return a.toArray(new FacetResultNode[0]);
}
/** utility Create a dummy document with specified categories and content */
protected final void indexDoc(FacetIndexingParams iParams, RandomIndexWriter iw,
TaxonomyWriter tw, String content, List<CategoryPath> categories) throws IOException {
Document d = new Document();
CategoryDocumentBuilder builder = new CategoryDocumentBuilder(tw, iParams);
builder.setCategoryPaths(categories);
builder.build(d);
d.add(new TextField("content", content, Field.Store.YES));
iw.addDocument(d);
}
/** Build the "truth" with ALL the facets enumerating indexes content. */
protected Map<CategoryPath, Integer> facetCountsTruth() throws IOException {
FacetIndexingParams iParams = getFacetIndexingParams(Integer.MAX_VALUE);
String delim = String.valueOf(iParams.getFacetDelimChar());
Map<CategoryPath, Integer> res = new HashMap<CategoryPath, Integer>();
HashSet<Term> handledTerms = new HashSet<Term>();
for (CategoryListParams clp : iParams.getAllCategoryListParams()) {
Term baseTerm = new Term(clp.getTerm().field());
if (!handledTerms.add(baseTerm)) {
continue; // already handled this term (for another list)
}
Terms terms = MultiFields.getTerms(indexReader, baseTerm.field());
if (terms == null) {
continue;
}
Bits liveDocs = MultiFields.getLiveDocs(indexReader);
TermsEnum te = terms.iterator(null);
DocsEnum de = null;
while (te.next() != null) {
de = _TestUtil.docs(random(), te, liveDocs, de, 0);
int cnt = 0;
while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
cnt++;
}
res.put(new CategoryPath(te.term().utf8ToString().split(delim)), cnt);
}
}
return res;
}
/** Validate counts for returned facets, and that there are not too many results */
protected static void assertCountsAndCardinality(Map<CategoryPath, Integer> facetCountsTruth, List<FacetResult> facetResults) throws Exception {
for (FacetResult fr : facetResults) {
FacetResultNode topResNode = fr.getFacetResultNode();
FacetRequest freq = fr.getFacetRequest();
if (VERBOSE) {
System.out.println(freq.getCategoryPath().toString()+ "\t\t" + topResNode);
}
assertCountsAndCardinality(facetCountsTruth, topResNode, freq.getNumResults());
}
}
/** Validate counts for returned facets, and that there are not too many results */
private static void assertCountsAndCardinality(Map<CategoryPath,Integer> facetCountsTruth, FacetResultNode resNode, int reqNumResults) throws Exception {
int actualNumResults = resNode.getNumSubResults();
if (VERBOSE) {
System.out.println("NumResults: " + actualNumResults);
}
assertTrue("Too many results!", actualNumResults <= reqNumResults);
for (FacetResultNode subRes : resNode.getSubResults()) {
assertEquals("wrong count for: "+subRes, facetCountsTruth.get(subRes.getLabel()).intValue(), (int)subRes.getValue());
assertCountsAndCardinality(facetCountsTruth, subRes, reqNumResults); // recurse into child results
}
}
/** Validate results equality */
protected static void assertSameResults(List<FacetResult> expected,
List<FacetResult> actual) {
String expectedResults = resStringValueOnly(expected);
String actualResults = resStringValueOnly(actual);
if (!expectedResults.equals(actualResults)) {
System.err.println("Results are not the same!");
System.err.println("Expected:\n" + expectedResults);
System.err.println("Actual" + actualResults);
throw new NotSameResultError();
}
}
/** exclude the residue and numDecendants because it is incorrect in sampling */
private static final String resStringValueOnly(List<FacetResult> results) {
StringBuilder sb = new StringBuilder();
for (FacetResult facetRes : results) {
sb.append(facetRes.toString()).append('\n');
}
return sb.toString().replaceAll("Residue:.*.0", "").replaceAll("Num valid Descendants.*", "");
}
/** Special Error class for ability to ignore only this error and retry... */
public static class NotSameResultError extends Error {
public NotSameResultError() {
super("Results are not the same!");
}
}
}