package proj.zoie.test; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import it.unimi.dsi.fastutil.ints.IntArrayList; import it.unimi.dsi.fastutil.longs.LongOpenHashSet; import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; import java.nio.channels.FileChannel; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Random; import junit.framework.TestCase; import org.apache.log4j.Logger; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.StringField; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Filter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Version; import org.junit.Test; import proj.zoie.api.DataConsumer.DataEvent; import proj.zoie.api.DataDoc; import proj.zoie.api.DefaultDirectoryManager; import proj.zoie.api.DirectoryManager; import proj.zoie.api.UIDDocIdSet; import proj.zoie.api.ZoieException; import proj.zoie.api.ZoieMultiReader; import proj.zoie.api.ZoieSegmentReader; import proj.zoie.api.impl.DocIDMapperImpl; import proj.zoie.api.indexing.IndexingEventListener; import proj.zoie.api.indexing.ZoieIndexable; import proj.zoie.api.indexing.ZoieIndexableInterpreter; import proj.zoie.impl.indexing.AsyncDataConsumer; import proj.zoie.impl.indexing.MemoryStreamDataProvider; import proj.zoie.impl.indexing.ZoieConfig; import proj.zoie.impl.indexing.ZoieSystem; import proj.zoie.test.data.DataForTests; import proj.zoie.test.mock.MockDataLoader; public class ZoieTest extends ZoieTestCaseBase { static Logger log = Logger.getLogger(ZoieTest.class); public ZoieTest() { } private static int countHits(ZoieSystem<IndexReader, String> idxSystem, Query q) throws IOException { IndexSearcher searcher = null; MultiReader reader = null; List<ZoieMultiReader<IndexReader>> readers = null; try { readers = idxSystem.getIndexReaders(); reader = new MultiReader(readers.toArray(new IndexReader[readers.size()]), false); searcher = new IndexSearcher(reader); TopDocs hits = searcher.search(q, 10); return hits.totalHits; } finally { try { if (reader != null) { reader.close(); reader = null; } } finally { idxSystem.returnIndexReaders(readers); } } } @Test public void testIndexWithAnalyzer() throws ZoieException, IOException { File idxDir = getIdxDir(); ZoieSystem<IndexReader, String> idxSystem = createZoie(idxDir, true, 20, new WhitespaceAnalyzer(Version.LUCENE_43), null, ZoieConfig.DEFAULT_VERSION_COMPARATOR, false); idxSystem.start(); MemoryStreamDataProvider<String> memoryProvider = new MemoryStreamDataProvider<String>( ZoieConfig.DEFAULT_VERSION_COMPARATOR); memoryProvider.setMaxEventsPerMinute(Long.MAX_VALUE); memoryProvider.setDataConsumer(idxSystem); memoryProvider.start(); List<DataEvent<String>> list = new ArrayList<DataEvent<String>>(2); list.add(new DataEvent<String>("hao,yan 0", "0")); list.add(new DataEvent<String>("hao,yan 1", "1")); memoryProvider.addEvents(list); memoryProvider.flush(); idxSystem.syncWithVersion(10000, "1"); List<ZoieMultiReader<IndexReader>> readers = null; IndexSearcher searcher = null; MultiReader reader = null; try { readers = idxSystem.getIndexReaders(); reader = new MultiReader(readers.toArray(new IndexReader[readers.size()]), false); searcher = new IndexSearcher(reader); TopDocs hits = searcher.search(new TermQuery(new Term("contents", "hao,yan")), 10); assertEquals(1, hits.totalHits); assertEquals(String.valueOf((Integer.MAX_VALUE * 2L + 1L)), searcher.doc(hits.scoreDocs[0].doc).get("id")); hits = searcher.search(new TermQuery(new Term("contents", "hao")), 10); assertEquals(1, hits.totalHits); assertEquals(String.valueOf((Integer.MAX_VALUE * 2L)), searcher.doc(hits.scoreDocs[0].doc) .get("id")); idxSystem.returnIndexReaders(readers); } finally { try { if (reader != null) { reader.close(); reader = null; } } finally { memoryProvider.stop(); idxSystem.shutdown(); deleteDirectory(idxDir); } } } @Test public void testRealtime2() throws ZoieException { File idxDir = getIdxDir(); ZoieSystem<IndexReader, String> idxSystem = createZoie(idxDir, true, ZoieConfig.DEFAULT_VERSION_COMPARATOR); idxSystem.start(); MemoryStreamDataProvider<String> memoryProvider = new MemoryStreamDataProvider<String>( ZoieConfig.DEFAULT_VERSION_COMPARATOR); memoryProvider.setMaxEventsPerMinute(Long.MAX_VALUE); memoryProvider.setDataConsumer(idxSystem); memoryProvider.start(); try { int count = DataForTests.testdata.length; List<DataEvent<String>> list = new ArrayList<DataEvent<String>>(count); for (int i = 0; i < count; ++i) { list.add(new DataEvent<String>(DataForTests.testdata[i], "" + i)); } memoryProvider.addEvents(list); memoryProvider.flush(); idxSystem.flushEvents(10000); List<ZoieMultiReader<IndexReader>> readers = idxSystem.getIndexReaders(); int numDocs = 0; for (ZoieMultiReader<IndexReader> r : readers) { numDocs += r.numDocs(); } idxSystem.returnIndexReaders(readers); assertEquals(count, numDocs); } catch (IOException ioe) { throw new ZoieException(ioe.getMessage()); } finally { memoryProvider.stop(); idxSystem.shutdown(); deleteDirectory(idxDir); } } private static class EvenUidPurgeFilter extends Filter { @Override public DocIdSet getDocIdSet(final AtomicReaderContext ctx, Bits bits) throws IOException { return new DocIdSet() { IndexReader reader = ctx.reader(); @Override public DocIdSetIterator iterator() throws IOException { return new DocIdSetIterator() { int doc = -1; int maxdoc = reader.maxDoc(); @Override public int advance(int target) throws IOException { doc = target - 1; return nextDoc(); } @Override public int docID() { return doc; } @Override public int nextDoc() throws IOException { while (++doc < maxdoc) { long uid = Long.parseLong(reader.document(doc).get("id")); if (uid % 2 == 0) { return doc; } } return DocIdSetIterator.NO_MORE_DOCS; } @Override public long cost() { // TODO Auto-generated method stub return 0; } }; } }; } } @Test public void testIndexEventListener() throws Exception { File idxDir = getIdxDir(); final int[] flushNum = { 0 }; final String[] flushVersion = { null }; ZoieSystem<IndexReader, String> idxSystem = createZoie(idxDir, true, ZoieConfig.DEFAULT_VERSION_COMPARATOR, true); idxSystem.start(); idxSystem.addIndexingEventListener(new IndexingEventListener() { @Override public void handleUpdatedDiskVersion(String version) { flushVersion[0] = version; } @Override public void handleIndexingEvent(IndexingEvent evt) { flushNum[0]++; } }); MemoryStreamDataProvider<String> memoryProvider = new MemoryStreamDataProvider<String>( ZoieConfig.DEFAULT_VERSION_COMPARATOR); memoryProvider.setMaxEventsPerMinute(Long.MAX_VALUE); memoryProvider.setDataConsumer(idxSystem); memoryProvider.start(); try { int count = DataForTests.testdata.length; List<DataEvent<String>> list = new ArrayList<DataEvent<String>>(count); for (int i = 0; i < count; ++i) { list.add(new DataEvent<String>(DataForTests.testdata[i], "" + i)); } memoryProvider.addEvents(list); memoryProvider.flush(); idxSystem.flushEvents(10000); String diskVersion = null; int wait = 0; while (!"9".equals(diskVersion)) { diskVersion = idxSystem.getCurrentDiskVersion(); Thread.sleep(500); if (++wait >= 10) { break; } } assertTrue(wait < 10); } finally { memoryProvider.stop(); idxSystem.shutdown(); deleteDirectory(idxDir); } assertTrue(flushNum[0] > 0); assertEquals("9", flushVersion[0]); } @SuppressWarnings({ "unchecked", "rawtypes" }) @Test public void testSegmentTermDocs() throws Exception { class DefaultInterpreter implements ZoieIndexableInterpreter<DataDoc> { @Override public ZoieIndexable convertAndInterpret(DataDoc src) { return src; } } File idxDir = getIdxDir(); ZoieConfig zConfig = new ZoieConfig(); ZoieSystem<?, DataDoc> zoie = ZoieSystem.buildDefaultInstance(idxDir, new DefaultInterpreter(), zConfig); zoie.start(); Document d1 = new Document(); StringField f1 = new StringField("num", "abcdef", Store.YES); d1.add(f1); Document d2 = new Document(); StringField f2 = new StringField("num", "abcd", Store.YES); d2.add(f2); Document d3 = new Document(); StringField f3 = new StringField("num", "abcde", Store.YES); d3.add(f3); DataEvent<DataDoc> de1 = new DataEvent<DataDoc>(new DataDoc(1, d1), "1"); DataEvent<DataDoc> de2 = new DataEvent<DataDoc>(new DataDoc(2, d2), "1"); DataEvent<DataDoc> de3 = new DataEvent<DataDoc>(new DataDoc(3, d3), "1"); try { zoie.consume(Arrays.asList(de1, de2, de3)); zoie.flushEvents(10000); List<?> readerList = zoie.getIndexReaders(); // combine the readers MultiReader reader = new MultiReader(readerList.toArray(new IndexReader[readerList.size()]), false); // do search IndexSearcher searcher = new IndexSearcher(reader); QueryParser parser = new QueryParser(Version.LUCENE_43, "num", new StandardAnalyzer( Version.LUCENE_43)); Query q = parser.parse("num:abc*"); TopDocs ret = searcher.search(q, 100); TestCase.assertEquals(3, ret.totalHits); zoie.returnIndexReaders((List) readerList); de1 = new DataEvent<DataDoc>(new DataDoc(1), "2"); de2 = new DataEvent<DataDoc>(new DataDoc(2), "2"); de3 = new DataEvent<DataDoc>(new DataDoc(3), "2"); zoie.consume(Arrays.asList(de1, de2, de3)); zoie.flushEventsToMemoryIndex(10000); readerList = zoie.getIndexReaders(); // combine the readers reader = new MultiReader(readerList.toArray(new IndexReader[readerList.size()]), false); // do search searcher = new IndexSearcher(reader); ret = searcher.search(q, 100); TestCase.assertEquals(0, ret.totalHits); zoie.returnIndexReaders((List) readerList); } catch (IOException ioe) { throw new ZoieException(ioe.getMessage()); } finally { zoie.shutdown(); deleteDirectory(idxDir); } } @Test public void testPurgeFilter() throws Exception { File idxDir = getIdxDir(); ZoieSystem<IndexReader, String> idxSystem = createZoie(idxDir, true, ZoieConfig.DEFAULT_VERSION_COMPARATOR, true); idxSystem.setPurgeFilter(new EvenUidPurgeFilter()); idxSystem.start(); MemoryStreamDataProvider<String> memoryProvider = new MemoryStreamDataProvider<String>( ZoieConfig.DEFAULT_VERSION_COMPARATOR); memoryProvider.setMaxEventsPerMinute(Long.MAX_VALUE); memoryProvider.setDataConsumer(idxSystem); memoryProvider.start(); try { int count = DataForTests.testdata.length; List<DataEvent<String>> list = new ArrayList<DataEvent<String>>(count); for (int i = 0; i < count; ++i) { list.add(new DataEvent<String>(DataForTests.testdata[i], "" + i)); } memoryProvider.addEvents(list); memoryProvider.flush(); idxSystem.flushEvents(10000); List<ZoieMultiReader<IndexReader>> readers = idxSystem.getIndexReaders(); MultiReader multiReader = new MultiReader(readers.toArray(new IndexReader[0]), false); IndexSearcher searcher = new IndexSearcher(multiReader); int numDocs = searcher.search(new MatchAllDocsQuery(), 10).totalHits; log.info("numdocs: " + numDocs); TestCase.assertEquals(10, numDocs); idxSystem.returnIndexReaders(readers); idxSystem.getAdminMBean().flushToDiskIndex(); idxSystem.refreshDiskReader(); readers = idxSystem.getIndexReaders(); multiReader = new MultiReader(readers.toArray(new IndexReader[0]), false); searcher = new IndexSearcher(multiReader); numDocs = searcher.search(new MatchAllDocsQuery(), 10).totalHits; numDocs = multiReader.numDocs(); log.info("new numdocs: " + numDocs); TestCase.assertTrue("numdDocs should be 5, but it is " + numDocs, numDocs == 5); idxSystem.returnIndexReaders(readers); } catch (IOException ioe) { throw new ZoieException(ioe.getMessage()); } finally { memoryProvider.stop(); idxSystem.shutdown(); deleteDirectory(idxDir); } } @Test public void testStore() throws ZoieException { File idxDir = getIdxDir(); ZoieSystem<IndexReader, String> idxSystem = createZoie(idxDir, true, ZoieConfig.DEFAULT_VERSION_COMPARATOR); idxSystem.start(); MemoryStreamDataProvider<String> memoryProvider = new MemoryStreamDataProvider<String>( ZoieConfig.DEFAULT_VERSION_COMPARATOR); memoryProvider.setMaxEventsPerMinute(Long.MAX_VALUE); memoryProvider.setDataConsumer(idxSystem); memoryProvider.start(); try { int count = DataForTests.testdata.length; List<DataEvent<String>> list = new ArrayList<DataEvent<String>>(count); for (int i = 0; i < count; ++i) { list.add(new DataEvent<String>(DataForTests.testdata[i], "" + i)); } memoryProvider.addEvents(list); memoryProvider.flush(); idxSystem.flushEvents(5000); List<ZoieMultiReader<IndexReader>> readers = idxSystem.getIndexReaders(); BytesRef data = null; for (ZoieMultiReader<IndexReader> r : readers) { data = r.getStoredValue(((Integer.MAX_VALUE) * 2L)); if (data != null) break; } TestCase.assertNotNull(data); String val = data.utf8ToString(); String[] parts = val.split(" "); long id = Long.parseLong(parts[parts.length - 1]); TestCase.assertEquals(0L, id); data = null; for (ZoieMultiReader<IndexReader> r : readers) { data = r.getStoredValue(((Integer.MAX_VALUE) * 2L) + 1L); if (data != null) break; } TestCase.assertNull(data); idxSystem.returnIndexReaders(readers); } catch (IOException ioe) { throw new ZoieException(ioe.getMessage()); } finally { memoryProvider.stop(); idxSystem.shutdown(); deleteDirectory(idxDir); } } // hao: test for new zoieVersion @Test public void testRealtime() throws ZoieException { File idxDir = getIdxDir(); ZoieSystem<IndexReader, String> idxSystem = createZoie(idxDir, true, ZoieConfig.DEFAULT_VERSION_COMPARATOR); idxSystem.start(); String query = "zoie"; QueryParser parser = new QueryParser(Version.LUCENE_43, "contents", idxSystem.getAnalyzer()); Query q = null; try { q = parser.parse(query); } catch (Exception e) { throw new ZoieException(e.getMessage(), e); } MemoryStreamDataProvider<String> memoryProvider = new MemoryStreamDataProvider<String>( ZoieConfig.DEFAULT_VERSION_COMPARATOR); memoryProvider.setMaxEventsPerMinute(Long.MAX_VALUE); memoryProvider.setDataConsumer(idxSystem); memoryProvider.start(); try { int count = DataForTests.testdata.length; List<DataEvent<String>> list = new ArrayList<DataEvent<String>>(count); for (int i = 0; i < count; ++i) { list.add(new DataEvent<String>(DataForTests.testdata[i], "" + i)); } memoryProvider.addEvents(list); idxSystem.syncWithVersion(10000, "" + (count - 1)); int repeat = 20; int idx = 0; int[] results = new int[repeat]; int[] expected = new int[repeat]; Arrays.fill(expected, count); // should be consumed by the idxing system IndexSearcher searcher = null; MultiReader reader = null; List<ZoieMultiReader<IndexReader>> readers = null; for (int i = 0; i < repeat; ++i) { try { readers = idxSystem.getIndexReaders(); reader = new MultiReader(readers.toArray(new IndexReader[readers.size()]), false); searcher = new IndexSearcher(reader); TopDocs hits = searcher.search(q, 10); results[idx++] = hits.totalHits; } finally { try { if (searcher != null) { searcher = null; reader.close(); reader = null; } } finally { idxSystem.returnIndexReaders(readers); } } try { Thread.sleep(30); } catch (InterruptedException e) { e.printStackTrace(); } } assertEquals("maybe race condition in disk flush", Arrays.toString(expected), Arrays.toString(results)); } catch (IOException ioe) { throw new ZoieException(ioe.getMessage()); } finally { memoryProvider.stop(); idxSystem.shutdown(); deleteDirectory(idxDir); } } @Test public void testStreamDataProvider() throws ZoieException { MockDataLoader<Integer> consumer = new MockDataLoader<Integer>(); MemoryStreamDataProvider<Integer> memoryProvider = new MemoryStreamDataProvider<Integer>( ZoieConfig.DEFAULT_VERSION_COMPARATOR); memoryProvider.setMaxEventsPerMinute(Long.MAX_VALUE); memoryProvider.setDataConsumer(consumer); memoryProvider.start(); try { int count = 10; List<DataEvent<Integer>> list = new ArrayList<DataEvent<Integer>>(count); for (int i = 0; i < count; ++i) { list.add(new DataEvent<Integer>(i, "" + i)); } memoryProvider.addEvents(list); memoryProvider.syncWithVersion(10000, "" + (count - 1)); int num = consumer.getCount(); assertEquals(num, count); } finally { memoryProvider.stop(); } } @Test public void testStreamDataProviderFlush() throws ZoieException { MockDataLoader<Integer> consumer = new MockDataLoader<Integer>(); MemoryStreamDataProvider<Integer> memoryProvider = new MemoryStreamDataProvider<Integer>( ZoieConfig.DEFAULT_VERSION_COMPARATOR); memoryProvider.setBatchSize(100); memoryProvider.setMaxEventsPerMinute(Long.MAX_VALUE); memoryProvider.setDataConsumer(consumer); memoryProvider.start(); try { int count = 10; List<DataEvent<Integer>> list = new ArrayList<DataEvent<Integer>>(count); for (int i = 0; i < count; ++i) { list.add(new DataEvent<Integer>(i, "" + i)); } memoryProvider.addEvents(list); memoryProvider.flush(); int num = consumer.getCount(); assertEquals(num, count); } finally { memoryProvider.stop(); } } @Test public void testAsyncDataConsumer() throws ZoieException { final long[] delays = { 0L, 10L, 100L, 1000L }; // final long[] delays = {100L, 1000L }; final int[] batchSizes = { 1, 10, 100, 1000, 1000 }; final int count = 1000; // final int count=TestData.testdata.length; final long timeout = 10000L; for (long delay : delays) { for (int batchSize : batchSizes) { if (delay * (count / batchSize + 1) > timeout) { continue; // skip this combination. it will take too long. } MockDataLoader<Integer> mockLoader = new MockDataLoader<Integer>(); mockLoader.setDelay(delay); AsyncDataConsumer<Integer> asyncConsumer = new AsyncDataConsumer<Integer>( ZoieConfig.DEFAULT_VERSION_COMPARATOR); asyncConsumer.setDataConsumer(mockLoader); asyncConsumer.setBatchSize(batchSize); asyncConsumer.start(); MemoryStreamDataProvider<Integer> memoryProvider = new MemoryStreamDataProvider<Integer>( ZoieConfig.DEFAULT_VERSION_COMPARATOR); memoryProvider.setMaxEventsPerMinute(Long.MAX_VALUE); memoryProvider.setDataConsumer(asyncConsumer); memoryProvider.start(); memoryProvider.setBatchSize(batchSize); try { List<DataEvent<Integer>> list = new ArrayList<DataEvent<Integer>>(count); for (int i = 0; i < count; ++i) { list.add(new DataEvent<Integer>(i, "" + i)); } memoryProvider.addEvents(list); boolean done = false; long start = System.currentTimeMillis(); while (!done) { try { asyncConsumer.syncWithVersion(timeout, "" + (count - 1)); done = true; } catch (ZoieException e) { if (!e.getMessage().contains("sync timed out")) throw e; else System.out .println("sync time out could be legit for slow systems. Elapsed time: " + (System.currentTimeMillis() - start) + "ms"); if (System.currentTimeMillis() - start > 600000L) throw e; } } int num = mockLoader.getCount(); assertEquals("batchSize=" + batchSize, num, count); assertTrue("batch not working", (mockLoader.getMaxBatch() > 1 || mockLoader.getMaxBatch() == batchSize)); } finally { memoryProvider.stop(); asyncConsumer.stop(); } } } } @Test public void testDelSet() throws ZoieException { for (int i = 0; i < 2; i++) { testDelSetImpl(); } } private void testDelSetImpl() throws ZoieException { File idxDir = getIdxDir(); final ZoieSystem<IndexReader, String> idxSystem = createZoie(idxDir, true, 100, ZoieConfig.DEFAULT_VERSION_COMPARATOR); idxSystem.getAdminMBean().setFreshness(50); idxSystem.start(); int numThreads = 5; QueryThread[] queryThreads = new QueryThread[numThreads]; for (int i = 0; i < queryThreads.length; i++) { queryThreads[i] = new QueryThread() { @Override public void run() { final String query = "zoie"; QueryParser parser = new QueryParser(Version.LUCENE_43, "contents", idxSystem.getAnalyzer()); Query q; try { q = parser.parse(query); } catch (Exception e) { exception = e; return; } int expected = DataForTests.testdata.length; while (!stop) { IndexSearcher searcher = null; List<ZoieMultiReader<IndexReader>> readers = null; MultiReader reader = null; try { readers = idxSystem.getIndexReaders(); IndexReader[] subReaders = new IndexReader[readers.size()]; for (int i = 0; i < subReaders.length; ++i) { subReaders[i] = readers.get(i); } reader = new MultiReader(subReaders, false); searcher = new IndexSearcher(reader); TopDocs hits = searcher.search(q, 10); int count = hits.totalHits; if (count != expected) { mismatch = true; message = "hit count: " + count + " / expected: " + expected; stop = true; StringBuffer sb = new StringBuffer(); sb.append(message + "\n"); sb.append("each\n"); sb.append(groupDump(readers, q)); sb.append("main\n"); sb.append(dump(reader, hits)); System.out.println(sb.toString()); log.info(sb.toString()); } Thread.sleep(2); } catch (Exception ex) { ex.printStackTrace(); exception = ex; stop = true; } finally { try { if (searcher != null) { reader.close(); reader = null; searcher = null; } } catch (IOException ioe) { log.error(ioe.getMessage(), ioe); } finally { idxSystem.returnIndexReaders(readers); } } } } private String groupDump(List<ZoieMultiReader<IndexReader>> readers, Query q) throws IOException { StringBuffer sb = new StringBuffer(); for (ZoieMultiReader<IndexReader> reader : readers) { sb.append(reader).append("\n"); IndexSearcher searcher = new IndexSearcher(reader); TopDocs hits = searcher.search(q, 20); sb.append(dump(reader, hits)); searcher = null; } return sb.toString(); } private String dump(IndexReader reader, TopDocs hits) throws CorruptIndexException, IOException { StringBuffer sb = new StringBuffer(); ScoreDoc[] sd = hits.scoreDocs; long[] uids = new long[sd.length]; for (int i = 0; i < sd.length; i++) { Document doc = reader.document(sd[i].doc); uids[i] = Long.parseLong(doc.get("id")); } sb.append(Thread.currentThread() + Arrays.toString(uids)).append("\n"); int max = reader.maxDoc(); uids = new long[max]; for (int i = 0; i < max; i++) { Document doc = reader.document(i); uids[i] = Long.parseLong(doc.get("id")); } sb.append("uids: " + Arrays.toString(uids)).append("\n"); return sb.toString(); } }; queryThreads[i].setDaemon(true); } MemoryStreamDataProvider<String> memoryProvider = new MemoryStreamDataProvider<String>( ZoieConfig.DEFAULT_VERSION_COMPARATOR); memoryProvider.setMaxEventsPerMinute(Long.MAX_VALUE); memoryProvider.setDataConsumer(idxSystem); memoryProvider.start(); try { idxSystem.setBatchSize(10); final int count = DataForTests.testdata.length; List<DataEvent<String>> list = new ArrayList<DataEvent<String>>(count); for (int i = 0; i < count; i++) { list.add(new DataEvent<String>(DataForTests.testdata[i], "" + i)); } memoryProvider.addEvents(list); idxSystem.syncWithVersion(100000, "" + (count - 1)); for (QueryThread queryThread : queryThreads) queryThread.start(); for (int n = 1; n <= 3; n++) { for (int i = 0; i < count; i++) { long version = n * count + i; list = new ArrayList<DataEvent<String>>(1); list.add(new DataEvent<String>(DataForTests.testdata[i], "" + version)); memoryProvider.addEvents(list); idxSystem.syncWithVersion(100000, "" + version); } boolean stopNow = false; for (QueryThread queryThread : queryThreads) stopNow |= queryThread.stop; if (stopNow) break; } for (QueryThread queryThread : queryThreads) queryThread.stop = true; // stop all query threads for (QueryThread queryThread : queryThreads) { queryThread.join(); assertTrue("count mismatch[" + queryThread.message + "]", !queryThread.mismatch); } } catch (Exception e) { for (QueryThread queryThread : queryThreads) { if (queryThread.exception == null) throw new ZoieException(e); } } finally { memoryProvider.stop(); idxSystem.shutdown(); deleteDirectory(idxDir); } System.out.println(" done round"); log.info(" done round"); for (QueryThread queryThread : queryThreads) { if (queryThread.exception != null) throw new ZoieException(queryThread.exception); } } @Test public void testDocIDMapper() { final long[] uidList = new long[500000]; long[] qryList = new long[100000]; int intersection = 10000; int del = 5; int[] ansList1 = new int[qryList.length]; int[] ansList2 = new int[qryList.length]; java.util.Random rand = new java.util.Random(System.currentTimeMillis()); DocIDMapperImpl mapper = null; for (int k = 0; k < 10; k++) { java.util.HashSet<Long> uidset = new java.util.HashSet<Long>(); java.util.HashSet<Long> qryset = new java.util.HashSet<Long>(); long id; for (int i = 0; i < intersection; i++) { do { id = rand.nextInt() + (Integer.MAX_VALUE) * 2L; } while (id == ZoieSegmentReader.DELETED_UID || uidset.contains(id)); uidset.add(id); uidList[i] = (i % del) > 0 ? id : ZoieSegmentReader.DELETED_UID; qryList[i] = id; ansList1[i] = (i % del) > 0 ? i : -1; } for (int i = intersection; i < uidList.length; i++) { do { id = rand.nextInt() + (Integer.MAX_VALUE) * 2L; } while (id == ZoieSegmentReader.DELETED_UID || uidset.contains(id)); uidset.add(id); uidList[i] = (i % del) > 0 ? id : ZoieSegmentReader.DELETED_UID; } for (int i = intersection; i < qryList.length; i++) { do { id = rand.nextInt() + (Integer.MAX_VALUE) * 2L; } while (id == ZoieSegmentReader.DELETED_UID || uidset.contains(id) || qryset.contains(id)); qryset.add(id); qryList[i] = id; ansList1[i] = -1; } mapper = new DocIDMapperImpl(uidList); for (int i = 0; i < qryList.length; i++) { ansList2[i] = mapper.getDocID(qryList[i]); } assertTrue("wrong result", Arrays.equals(ansList1, ansList2)); } } @Test public void testExportImport() throws ZoieException, IOException { File idxDir = getIdxDir(); final ZoieSystem<IndexReader, String> idxSystem = createZoie(idxDir, true, ZoieConfig.DEFAULT_VERSION_COMPARATOR); idxSystem.start(); DirectoryManager dirMgr = new DefaultDirectoryManager(idxDir); String query = "zoie"; QueryParser parser = new QueryParser(Version.LUCENE_43, "contents", idxSystem.getAnalyzer()); Query q = null; try { q = parser.parse(query); } catch (Exception e) { throw new ZoieException(e.getMessage(), e); } try { List<DataEvent<String>> list; list = new ArrayList<DataEvent<String>>(DataForTests.testdata.length); for (int i = 0; i < DataForTests.testdata.length; ++i) { list.add(new DataEvent<String>(DataForTests.testdata[i], "" + i)); } idxSystem.consume(list); idxSystem.flushEvents(100000); assertEquals("index version mismatch after first flush", DataForTests.testdata.length - 1, DataForTests.testdata.length - 1); int hits = countHits(idxSystem, q); RandomAccessFile exportFile; FileChannel channel; exportFile = new RandomAccessFile(new File(getTmpDir(), "zoie_export.dat"), "rw"); channel = exportFile.getChannel(); idxSystem.exportSnapshot(channel); channel.close(); exportFile.close(); exportFile = null; channel = null; list = new ArrayList<DataEvent<String>>(DataForTests.testdata2.length); for (int i = 0; i < DataForTests.testdata2.length; ++i) { list.add(new DataEvent<String>(DataForTests.testdata.length + DataForTests.testdata2[i], "" + (DataForTests.testdata.length + i))); } idxSystem.consume(list); idxSystem.flushEvents(100000); String zvt = dirMgr.getVersion(); assertEquals("index version mismatch after second flush", DataForTests.testdata.length + DataForTests.testdata2.length - 1, (long) Long.valueOf(zvt)); assertEquals("should have no hits", 0, countHits(idxSystem, q)); exportFile = new RandomAccessFile(new File(getTmpDir(), "zoie_export.dat"), "r"); channel = exportFile.getChannel(); idxSystem.importSnapshot(channel); idxSystem.flushEvents(10000); channel.close(); exportFile.close(); assertEquals("count is wrong", hits, countHits(idxSystem, q)); } catch (ZoieException e) { throw e; } finally { idxSystem.shutdown(); deleteDirectory(idxDir); } } @Test public void testUIDDocIdSet() throws IOException { LongOpenHashSet uidset = new LongOpenHashSet(); int count = 100; Random rand = new Random(); int id; for (int i = 0; i < count; ++i) { do { id = rand.nextInt(); } while (id == ZoieSegmentReader.DELETED_UID || uidset.contains(id)); uidset.add(id); } long[] uidArray = uidset.toLongArray(); final long[] even = new long[uidArray.length / 2]; int[] ans = new int[even.length]; for (int i = 0; i < even.length; ++i) { even[i] = uidArray[i * 2]; ans[i] = i; } DocIDMapperImpl mapper = new DocIDMapperImpl(even); UIDDocIdSet uidSet = new UIDDocIdSet(even, mapper); DocIdSetIterator docidIter = uidSet.iterator(); IntArrayList intList = new IntArrayList(); int docid; while ((docid = docidIter.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { intList.add(docid); } assertTrue("wrong result from iter", Arrays.equals(ans, intList.toIntArray())); long[] newidArray = new long[count]; for (int i = 0; i < count; ++i) { newidArray[i] = i; } mapper = new DocIDMapperImpl(newidArray); uidSet = new UIDDocIdSet(newidArray, mapper); docidIter = uidSet.iterator(); intList = new IntArrayList(); for (int i = 0; i < newidArray.length; ++i) { docid = docidIter.advance(i * 10); if (docid == DocIdSetIterator.NO_MORE_DOCS) break; intList.add(docid); docid = docidIter.nextDoc(); if (docid == DocIdSetIterator.NO_MORE_DOCS) break; intList.add(docid); } int[] answer = new int[] { 0, 1, 10, 11, 20, 21, 30, 31, 40, 41, 50, 51, 60, 61, 70, 71, 80, 81, 90, 91 }; assertTrue("wrong result from mix of next and skip", Arrays.equals(answer, intList.toIntArray())); } }