package org.apache.lucene.facet.taxonomy.directory;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.InconsistentTaxonomyException;
import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
import org.apache.lucene.facet.taxonomy.writercache.cl2o.Cl2oTaxonomyWriterCache;
import org.apache.lucene.facet.taxonomy.writercache.lru.LruTaxonomyWriterCache;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.Test;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public class TestDirectoryTaxonomyWriter extends LuceneTestCase {
// A No-Op TaxonomyWriterCache which always discards all given categories, and
// always returns true in put(), to indicate some cache entries were cleared.
private static TaxonomyWriterCache NO_OP_CACHE = new TaxonomyWriterCache() {
@Override
public void close() {}
@Override
public int get(CategoryPath categoryPath) { return -1; }
@Override
public int get(CategoryPath categoryPath, int length) { return -1; }
@Override
public boolean put(CategoryPath categoryPath, int ordinal) { return true; }
@Override
public boolean put(CategoryPath categoryPath, int prefixLen, int ordinal) { return true; }
@Override
public boolean isFull() { return true; }
@Override
public void clear() {}
};
@Test
public void testCommit() throws Exception {
// Verifies that nothing is committed to the underlying Directory, if
// commit() wasn't called.
Directory dir = newDirectory();
DirectoryTaxonomyWriter ltw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE);
assertFalse(DirectoryReader.indexExists(dir));
ltw.commit(); // first commit, so that an index will be created
ltw.addCategory(new CategoryPath("a"));
IndexReader r = DirectoryReader.open(dir);
assertEquals("No categories should have been committed to the underlying directory", 1, r.numDocs());
r.close();
ltw.close();
dir.close();
}
@Test
public void testCommitUserData() throws Exception {
// Verifies taxonomy commit data
Directory dir = newDirectory();
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE);
taxoWriter.addCategory(new CategoryPath("a"));
taxoWriter.addCategory(new CategoryPath("b"));
Map <String, String> userCommitData = new HashMap<String, String>();
userCommitData.put("testing", "1 2 3");
taxoWriter.commit(userCommitData);
taxoWriter.close();
DirectoryReader r = DirectoryReader.open(dir);
assertEquals("2 categories plus root should have been committed to the underlying directory", 3, r.numDocs());
Map <String, String> readUserCommitData = r.getIndexCommit().getUserData();
assertTrue("wrong value extracted from commit data",
"1 2 3".equals(readUserCommitData.get("testing")));
assertNotNull("index.create.time not found in commitData", readUserCommitData.get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME));
r.close();
// open DirTaxoWriter again and commit, INDEX_CREATE_TIME should still exist
// in the commit data, otherwise DirTaxoReader.refresh() might not detect
// that the taxonomy index has been recreated.
taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE);
taxoWriter.addCategory(new CategoryPath("c")); // add a category so that commit will happen
taxoWriter.commit(new HashMap<String, String>(){{
put("just", "data");
}});
taxoWriter.close();
r = DirectoryReader.open(dir);
readUserCommitData = r.getIndexCommit().getUserData();
assertNotNull("index.create.time not found in commitData", readUserCommitData.get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME));
r.close();
dir.close();
}
@Test
public void testRollback() throws Exception {
// Verifies that if callback is called, DTW is closed.
Directory dir = newDirectory();
DirectoryTaxonomyWriter dtw = new DirectoryTaxonomyWriter(dir);
dtw.addCategory(new CategoryPath("a"));
dtw.rollback();
try {
dtw.addCategory(new CategoryPath("a"));
fail("should not have succeeded to add a category following rollback.");
} catch (AlreadyClosedException e) {
// expected
}
dir.close();
}
@Test
public void testEnsureOpen() throws Exception {
// verifies that an exception is thrown if DTW was closed
Directory dir = newDirectory();
DirectoryTaxonomyWriter dtw = new DirectoryTaxonomyWriter(dir);
dtw.close();
try {
dtw.addCategory(new CategoryPath("a"));
fail("should not have succeeded to add a category following close.");
} catch (AlreadyClosedException e) {
// expected
}
dir.close();
}
private void touchTaxo(DirectoryTaxonomyWriter taxoWriter, CategoryPath cp) throws IOException {
taxoWriter.addCategory(cp);
taxoWriter.commit(new HashMap<String, String>(){{
put("just", "data");
}});
}
@Test
public void testRecreateAndRefresh() throws Exception {
// DirTaxoWriter lost the INDEX_CREATE_TIME property if it was opened in
// CREATE_OR_APPEND (or commit(userData) called twice), which could lead to
// DirTaxoReader succeeding to refresh().
Directory dir = newDirectory();
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE);
touchTaxo(taxoWriter, new CategoryPath("a"));
DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir);
touchTaxo(taxoWriter, new CategoryPath("b"));
// this should not fail
taxoReader.refresh();
// now recreate the taxonomy, and check that the timestamp is preserved after opening DirTW again.
taxoWriter.close();
taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, NO_OP_CACHE);
touchTaxo(taxoWriter, new CategoryPath("c"));
taxoWriter.close();
taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE);
touchTaxo(taxoWriter, new CategoryPath("d"));
taxoWriter.close();
// this should fail
try {
taxoReader.refresh();
fail("IconsistentTaxonomyException should have been thrown");
} catch (InconsistentTaxonomyException e) {
// ok, expected
}
taxoReader.close();
dir.close();
}
@Test
public void testUndefinedCreateTime() throws Exception {
// tests that if the taxonomy index doesn't have the INDEX_CREATE_TIME
// property (supports pre-3.6 indexes), all still works.
Directory dir = newDirectory();
// create an empty index first, so that DirTaxoWriter initializes createTime to null.
new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)).close();
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE);
// we cannot commit null keys/values, this ensures that if DirTW.createTime is null, we can still commit.
taxoWriter.close();
DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir);
taxoReader.refresh();
taxoReader.close();
dir.close();
}
public void testConcurrency() throws Exception {
final int ncats = atLeast(100000); // add many categories
final int range = ncats * 3; // affects the categories selection
final AtomicInteger numCats = new AtomicInteger(ncats);
final Directory dir = newDirectory();
final ConcurrentHashMap<Integer,Integer> values = new ConcurrentHashMap<Integer,Integer>();
final double d = random().nextDouble();
final TaxonomyWriterCache cache;
if (d < 0.7) {
// this is the fastest, yet most memory consuming
cache = new Cl2oTaxonomyWriterCache(1024, 0.15f, 3);
} else if (TEST_NIGHTLY && d > 0.98) {
// this is the slowest, but tests the writer concurrency when no caching is done.
// only pick it during NIGHTLY tests, and even then, with very low chances.
cache = NO_OP_CACHE;
} else {
// this is slower than CL2O, but less memory consuming, and exercises finding categories on disk too.
cache = new LruTaxonomyWriterCache(ncats / 10);
}
final DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, cache);
Thread[] addThreads = new Thread[atLeast(4)];
for (int z = 0; z < addThreads.length; z++) {
addThreads[z] = new Thread() {
@Override
public void run() {
Random random = random();
while (numCats.decrementAndGet() > 0) {
try {
int value = random.nextInt(range);
tw.addCategory(new CategoryPath("a", Integer.toString(value)));
values.put(value, value);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
};
}
for (Thread t : addThreads) t.start();
for (Thread t : addThreads) t.join();
tw.close();
DirectoryTaxonomyReader dtr = new DirectoryTaxonomyReader(dir);
assertEquals("mismatch number of categories", values.size() + 2, dtr.getSize()); // +2 for root category + "a"
for (Integer value : values.keySet()) {
assertTrue("category not found a/" + value, dtr.getOrdinal(new CategoryPath("a", value.toString())) > 0);
}
dtr.close();
dir.close();
}
private String getCreateTime(Directory taxoDir) throws IOException {
SegmentInfos infos = new SegmentInfos();
infos.read(taxoDir);
return infos.getUserData().get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME);
}
@Test
public void testReplaceTaxonomy() throws Exception {
Directory input = newDirectory();
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(input);
taxoWriter.addCategory(new CategoryPath("a"));
taxoWriter.close();
Directory dir = newDirectory();
taxoWriter = new DirectoryTaxonomyWriter(dir);
int ordinal = taxoWriter.addCategory(new CategoryPath("b"));
taxoWriter.addCategory(new CategoryPath("c"));
taxoWriter.commit();
String origCreateTime = getCreateTime(dir);
// replace the taxonomy with the input one
taxoWriter.replaceTaxonomy(input);
// add the same category again -- it should not receive the same ordinal !
int newOrdinal = taxoWriter.addCategory(new CategoryPath("b"));
assertNotSame("new ordinal cannot be the original ordinal", ordinal, newOrdinal);
assertEquals("ordinal should have been 2 since only one category was added by replaceTaxonomy", 2, newOrdinal);
taxoWriter.close();
String newCreateTime = getCreateTime(dir);
assertNotSame("create time should have been changed after replaceTaxonomy", origCreateTime, newCreateTime);
dir.close();
input.close();
}
@Test
public void testReaderFreshness() throws Exception {
// ensures that the internal index reader is always kept fresh. Previously,
// this simple scenario failed, if the cache just evicted the category that
// is being added.
Directory dir = newDirectory();
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, NO_OP_CACHE);
int o1 = taxoWriter.addCategory(new CategoryPath("a"));
int o2 = taxoWriter.addCategory(new CategoryPath("a"));
assertTrue("ordinal for same category that is added twice should be the same !", o1 == o2);
taxoWriter.close();
dir.close();
}
}