package org.apache.lucene.facet.taxonomy.directory; import java.io.File; import org.apache.lucene.store.Directory; import org.junit.Test; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.DiskOrdinalMap; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.MemoryOrdinalMap; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.OrdinalMap; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ public class TestAddTaxonomies extends LuceneTestCase { @Test public void test1() throws Exception { Directory dir1 = newDirectory(); DirectoryTaxonomyWriter tw1 = new DirectoryTaxonomyWriter(dir1); tw1.addCategory(new CategoryPath("Author", "Mark Twain")); tw1.addCategory(new CategoryPath("Animals", "Dog")); Directory dir2 = newDirectory(); DirectoryTaxonomyWriter tw2 = new DirectoryTaxonomyWriter(dir2); tw2.addCategory(new CategoryPath("Author", "Rob Pike")); tw2.addCategory(new CategoryPath("Aardvarks", "Bob")); tw2.close(); Directory dir3 = newDirectory(); DirectoryTaxonomyWriter tw3 = new DirectoryTaxonomyWriter(dir3); tw3.addCategory(new CategoryPath("Author", "Zebra Smith")); tw3.addCategory(new CategoryPath("Aardvarks", "Bob")); tw3.addCategory(new CategoryPath("Aardvarks", "Aaron")); tw3.close(); MemoryOrdinalMap[] maps = new MemoryOrdinalMap[2]; maps[0] = new MemoryOrdinalMap(); maps[1] = new MemoryOrdinalMap(); tw1.addTaxonomies(new Directory[] { dir2, dir3 }, maps); tw1.close(); TaxonomyReader tr = new DirectoryTaxonomyReader(dir1); // Test that the merged taxonomy now contains what we expect: // First all the categories of the original taxonomy, in their original order: assertEquals(tr.getPath(0).toString(), ""); assertEquals(tr.getPath(1).toString(), "Author"); assertEquals(tr.getPath(2).toString(), "Author/Mark Twain"); assertEquals(tr.getPath(3).toString(), "Animals"); assertEquals(tr.getPath(4).toString(), "Animals/Dog"); // Then the categories new in the new taxonomy, in alphabetical order: assertEquals(tr.getPath(5).toString(), "Aardvarks"); assertEquals(tr.getPath(6).toString(), "Aardvarks/Aaron"); assertEquals(tr.getPath(7).toString(), "Aardvarks/Bob"); assertEquals(tr.getPath(8).toString(), "Author/Rob Pike"); assertEquals(tr.getPath(9).toString(), "Author/Zebra Smith"); assertEquals(tr.getSize(), 10); // Test that the maps contain what we expect int[] map0 = maps[0].getMap(); assertEquals(5, map0.length); assertEquals(0, map0[0]); assertEquals(1, map0[1]); assertEquals(8, map0[2]); assertEquals(5, map0[3]); assertEquals(7, map0[4]); int[] map1 = maps[1].getMap(); assertEquals(6, map1.length); assertEquals(0, map1[0]); assertEquals(1, map1[1]); assertEquals(9, map1[2]); assertEquals(5, map1[3]); assertEquals(7, map1[4]); assertEquals(6, map1[5]); tr.close(); dir1.close(); dir2.close(); dir3.close(); } // a reasonable random test public void testmedium() throws Exception { int numTests = atLeast(3); for (int i = 0; i < numTests; i++) { dotest(_TestUtil.nextInt(random, 1, 10), _TestUtil.nextInt(random, 1, 100), _TestUtil.nextInt(random, 100, 1000), random.nextBoolean()); } } // A more comprehensive and big random test. @Test @Nightly public void testbig() throws Exception { dotest(2, 1000, 5000, false); dotest(10, 10000, 100, false); dotest(50, 20, 100, false); dotest(10, 1000, 10000, false); dotest(50, 20, 10000, false); dotest(1, 20, 10000, false); dotest(10, 1, 10000, false); dotest(10, 1000, 20000, true); } private void dotest(int ntaxonomies, int ncats, int range, boolean disk) throws Exception { Directory dirs[] = new Directory[ntaxonomies]; Directory copydirs[] = new Directory[ntaxonomies]; for (int i=0; i<ntaxonomies; i++) { dirs[i] = newDirectory(); copydirs[i] = newDirectory(); DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[i]); DirectoryTaxonomyWriter copytw = new DirectoryTaxonomyWriter(copydirs[i]); for (int j=0; j<ncats; j++) { String cat = Integer.toString(random.nextInt(range)); tw.addCategory(new CategoryPath("a",cat)); copytw.addCategory(new CategoryPath("a",cat)); } // System.err.println("Taxonomy "+i+": "+tw.getSize()); tw.close(); copytw.close(); } DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[0]); Directory otherdirs[] = new Directory[ntaxonomies-1]; System.arraycopy(dirs, 1, otherdirs, 0, ntaxonomies-1); OrdinalMap[] maps = new OrdinalMap[ntaxonomies-1]; if (ntaxonomies>1) { for (int i=0; i<ntaxonomies-1; i++) { if (disk) { // TODO: use a LTC tempfile maps[i] = new DiskOrdinalMap(new File(System.getProperty("java.io.tmpdir"), "tmpmap"+i)); } else { maps[i] = new MemoryOrdinalMap(); } } } tw.addTaxonomies(otherdirs, maps); // System.err.println("Merged axonomy: "+tw.getSize()); tw.close(); // Check that all original categories in the main taxonomy remain in // unchanged, and the rest of the taxonomies are completely unchanged. for (int i=0; i<ntaxonomies; i++) { TaxonomyReader tr = new DirectoryTaxonomyReader(dirs[i]); TaxonomyReader copytr = new DirectoryTaxonomyReader(copydirs[i]); if (i==0) { assertTrue(tr.getSize() >= copytr.getSize()); } else { assertEquals(copytr.getSize(), tr.getSize()); } for (int j=0; j<copytr.getSize(); j++) { String expected = copytr.getPath(j).toString(); String got = tr.getPath(j).toString(); assertTrue("Comparing category "+j+" of taxonomy "+i+": expected "+expected+", got "+got, expected.equals(got)); } tr.close(); copytr.close(); } // Check that all the new categories in the main taxonomy are in // lexicographic order. This isn't a requirement of our API, but happens // this way in our current implementation. TaxonomyReader tr = new DirectoryTaxonomyReader(dirs[0]); TaxonomyReader copytr = new DirectoryTaxonomyReader(copydirs[0]); if (tr.getSize() > copytr.getSize()) { String prev = tr.getPath(copytr.getSize()).toString(); for (int j=copytr.getSize()+1; j<tr.getSize(); j++) { String n = tr.getPath(j).toString(); assertTrue(prev.compareTo(n)<0); prev=n; } } int oldsize = copytr.getSize(); // remember for later tr.close(); copytr.close(); // Check that all the categories from other taxonomies exist in the new // taxonomy. TaxonomyReader main = new DirectoryTaxonomyReader(dirs[0]); for (int i=1; i<ntaxonomies; i++) { TaxonomyReader other = new DirectoryTaxonomyReader(dirs[i]); for (int j=0; j<other.getSize(); j++) { int otherord = main.getOrdinal(other.getPath(j)); assertTrue(otherord != TaxonomyReader.INVALID_ORDINAL); } other.close(); } // Check that all the new categories in the merged taxonomy exist in // one of the added taxonomies. TaxonomyReader[] others = new TaxonomyReader[ntaxonomies-1]; for (int i=1; i<ntaxonomies; i++) { others[i-1] = new DirectoryTaxonomyReader(dirs[i]); } for (int j=oldsize; j<main.getSize(); j++) { boolean found=false; CategoryPath path = main.getPath(j); for (int i=1; i<ntaxonomies; i++) { if (others[i-1].getOrdinal(path) != TaxonomyReader.INVALID_ORDINAL) { found=true; break; } } if (!found) { fail("Found category "+j+" ("+path+") in merged taxonomy not in any of the separate ones"); } } // Check that all the maps are correct for (int i=0; i<ntaxonomies-1; i++) { int[] map = maps[i].getMap(); for (int j=0; j<map.length; j++) { assertEquals(map[j], main.getOrdinal(others[i].getPath(j))); } } for (int i=1; i<ntaxonomies; i++) { others[i-1].close(); } main.close(); IOUtils.close(dirs); IOUtils.close(copydirs); } }