package org.apache.lucene.search; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.SetBasedFieldSelector; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.store.Directory; import java.io.IOException; import java.util.Collections; import java.util.HashSet; import java.util.Map; import java.util.Random; import java.util.Set; /** * Tests {@link MultiSearcher} class. */ public class TestMultiSearcher extends LuceneTestCase { private Random random; @Override public void setUp() throws Exception { super.setUp(); random = newRandom(); } public TestMultiSearcher(String name) { super(name); } /** * ReturnS a new instance of the concrete MultiSearcher class * used in this test. */ protected MultiSearcher getMultiSearcherInstance(Searcher[] searchers) throws IOException { return new MultiSearcher(searchers); } public void testEmptyIndex() throws Exception { // creating two directories for indices Directory indexStoreA = newDirectory(random); Directory indexStoreB = newDirectory(random); // creating a document to store Document lDoc = new Document(); lDoc.add(new Field("fulltext", "Once upon a time.....", Field.Store.YES, Field.Index.ANALYZED)); lDoc.add(new Field("id", "doc1", Field.Store.YES, Field.Index.NOT_ANALYZED)); lDoc.add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); // creating a document to store Document lDoc2 = new Document(); lDoc2.add(new Field("fulltext", "in a galaxy far far away.....", Field.Store.YES, Field.Index.ANALYZED)); lDoc2.add(new Field("id", "doc2", Field.Store.YES, Field.Index.NOT_ANALYZED)); lDoc2.add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); // creating a document to store Document lDoc3 = new Document(); lDoc3.add(new Field("fulltext", "a bizarre bug manifested itself....", Field.Store.YES, Field.Index.ANALYZED)); lDoc3.add(new Field("id", "doc3", Field.Store.YES, Field.Index.NOT_ANALYZED)); lDoc3.add(new Field("handle", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); // creating an index writer for the first index IndexWriter writerA = new IndexWriter(indexStoreA, newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer())); // creating an index writer for the second index, but writing nothing IndexWriter writerB = new IndexWriter(indexStoreB, newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer())); //-------------------------------------------------------------------- // scenario 1 //-------------------------------------------------------------------- // writing the documents to the first index writerA.addDocument(lDoc); writerA.addDocument(lDoc2); writerA.addDocument(lDoc3); writerA.optimize(); writerA.close(); // closing the second index writerB.close(); // creating the query QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, "fulltext", new MockAnalyzer()); Query query = parser.parse("handle:1"); // building the searchables Searcher[] searchers = new Searcher[2]; // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index searchers[0] = new IndexSearcher(indexStoreB, true); searchers[1] = new IndexSearcher(indexStoreA, true); // creating the multiSearcher Searcher mSearcher = getMultiSearcherInstance(searchers); // performing the search ScoreDoc[] hits = mSearcher.search(query, null, 1000).scoreDocs; assertEquals(3, hits.length); // iterating over the hit documents for (int i = 0; i < hits.length; i++) { mSearcher.doc(hits[i].doc); } mSearcher.close(); //-------------------------------------------------------------------- // scenario 2 //-------------------------------------------------------------------- // adding one document to the empty index writerB = new IndexWriter(indexStoreB, newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer()) .setOpenMode(OpenMode.APPEND)); writerB.addDocument(lDoc); writerB.optimize(); writerB.close(); // building the searchables Searcher[] searchers2 = new Searcher[2]; // VITAL STEP:adding the searcher for the empty index first, before the searcher for the populated index searchers2[0] = new IndexSearcher(indexStoreB, true); searchers2[1] = new IndexSearcher(indexStoreA, true); // creating the mulitSearcher MultiSearcher mSearcher2 = getMultiSearcherInstance(searchers2); // performing the same search ScoreDoc[] hits2 = mSearcher2.search(query, null, 1000).scoreDocs; assertEquals(4, hits2.length); // iterating over the hit documents for (int i = 0; i < hits2.length; i++) { // no exception should happen at this point mSearcher2.doc(hits2[i].doc); } // test the subSearcher() method: Query subSearcherQuery = parser.parse("id:doc1"); hits2 = mSearcher2.search(subSearcherQuery, null, 1000).scoreDocs; assertEquals(2, hits2.length); assertEquals(0, mSearcher2.subSearcher(hits2[0].doc)); // hit from searchers2[0] assertEquals(1, mSearcher2.subSearcher(hits2[1].doc)); // hit from searchers2[1] subSearcherQuery = parser.parse("id:doc2"); hits2 = mSearcher2.search(subSearcherQuery, null, 1000).scoreDocs; assertEquals(1, hits2.length); assertEquals(1, mSearcher2.subSearcher(hits2[0].doc)); // hit from searchers2[1] mSearcher2.close(); //-------------------------------------------------------------------- // scenario 3 //-------------------------------------------------------------------- // deleting the document just added, this will cause a different exception to take place Term term = new Term("id", "doc1"); IndexReader readerB = IndexReader.open(indexStoreB, false); readerB.deleteDocuments(term); readerB.close(); // optimizing the index with the writer writerB = new IndexWriter(indexStoreB, new IndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer()) .setOpenMode(OpenMode.APPEND)); writerB.optimize(); writerB.close(); // building the searchables Searcher[] searchers3 = new Searcher[2]; searchers3[0] = new IndexSearcher(indexStoreB, true); searchers3[1] = new IndexSearcher(indexStoreA, true); // creating the mulitSearcher Searcher mSearcher3 = getMultiSearcherInstance(searchers3); // performing the same search ScoreDoc[] hits3 = mSearcher3.search(query, null, 1000).scoreDocs; assertEquals(3, hits3.length); // iterating over the hit documents for (int i = 0; i < hits3.length; i++) { mSearcher3.doc(hits3[i].doc); } mSearcher3.close(); indexStoreA.close(); indexStoreB.close(); } private static Document createDocument(String contents1, String contents2) { Document document=new Document(); document.add(new Field("contents", contents1, Field.Store.YES, Field.Index.NOT_ANALYZED)); document.add(new Field("other", "other contents", Field.Store.YES, Field.Index.NOT_ANALYZED)); if (contents2!=null) { document.add(new Field("contents", contents2, Field.Store.YES, Field.Index.NOT_ANALYZED)); } return document; } private static void initIndex(Random random, Directory directory, int nDocs, boolean create, String contents2) throws IOException { IndexWriter indexWriter=null; try { indexWriter = new IndexWriter(directory, newIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer()).setOpenMode( create ? OpenMode.CREATE : OpenMode.APPEND)); for (int i=0; i<nDocs; i++) { indexWriter.addDocument(createDocument("doc" + i, contents2)); } } finally { if (indexWriter!=null) { indexWriter.close(); } } } public void testFieldSelector() throws Exception { Directory ramDirectory1, ramDirectory2; IndexSearcher indexSearcher1, indexSearcher2; ramDirectory1 = newDirectory(random); ramDirectory2 = newDirectory(random); Query query = new TermQuery(new Term("contents", "doc0")); // Now put the documents in a different index initIndex(random, ramDirectory1, 10, true, null); // documents with a single token "doc0", "doc1", etc... initIndex(random, ramDirectory2, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... indexSearcher1 = new IndexSearcher(ramDirectory1, true); indexSearcher2 = new IndexSearcher(ramDirectory2, true); MultiSearcher searcher = getMultiSearcherInstance(new Searcher[]{indexSearcher1, indexSearcher2}); assertTrue("searcher is null and it shouldn't be", searcher != null); ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; assertTrue("hits is null and it shouldn't be", hits != null); assertTrue(hits.length + " does not equal: " + 2, hits.length == 2); Document document = searcher.doc(hits[0].doc); assertTrue("document is null and it shouldn't be", document != null); assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 2, document.getFields().size() == 2); //Should be one document from each directory //they both have two fields, contents and other Set<String> ftl = new HashSet<String>(); ftl.add("other"); SetBasedFieldSelector fs = new SetBasedFieldSelector(ftl, Collections. <String> emptySet()); document = searcher.doc(hits[0].doc, fs); assertTrue("document is null and it shouldn't be", document != null); assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 1, document.getFields().size() == 1); String value = document.get("contents"); assertTrue("value is not null and it should be", value == null); value = document.get("other"); assertTrue("value is null and it shouldn't be", value != null); ftl.clear(); ftl.add("contents"); fs = new SetBasedFieldSelector(ftl, Collections. <String> emptySet()); document = searcher.doc(hits[1].doc, fs); value = document.get("contents"); assertTrue("value is null and it shouldn't be", value != null); value = document.get("other"); assertTrue("value is not null and it should be", value == null); indexSearcher1.close(); indexSearcher2.close(); ramDirectory1.close(); ramDirectory2.close(); } /* uncomment this when the highest score is always normalized to 1.0, even when it was < 1.0 public void testNormalization1() throws IOException { testNormalization(1, "Using 1 document per index:"); } */ public void testNormalization10() throws IOException { testNormalization(10, "Using 10 documents per index:"); } private void testNormalization(int nDocs, String message) throws IOException { Query query=new TermQuery(new Term("contents", "doc0")); Directory ramDirectory1; IndexSearcher indexSearcher1; ScoreDoc[] hits; ramDirectory1=newDirectory(random); // First put the documents in the same index initIndex(random, ramDirectory1, nDocs, true, null); // documents with a single token "doc0", "doc1", etc... initIndex(random, ramDirectory1, nDocs, false, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... indexSearcher1=new IndexSearcher(ramDirectory1, true); indexSearcher1.setDefaultFieldSortScoring(true, true); hits=indexSearcher1.search(query, null, 1000).scoreDocs; assertEquals(message, 2, hits.length); // Store the scores for use later float[] scores={ hits[0].score, hits[1].score }; assertTrue(message, scores[0] > scores[1]); indexSearcher1.close(); ramDirectory1.close(); hits=null; Directory ramDirectory2; IndexSearcher indexSearcher2; ramDirectory1=newDirectory(random); ramDirectory2=newDirectory(random); // Now put the documents in a different index initIndex(random, ramDirectory1, nDocs, true, null); // documents with a single token "doc0", "doc1", etc... initIndex(random, ramDirectory2, nDocs, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... indexSearcher1=new IndexSearcher(ramDirectory1, true); indexSearcher1.setDefaultFieldSortScoring(true, true); indexSearcher2=new IndexSearcher(ramDirectory2, true); indexSearcher2.setDefaultFieldSortScoring(true, true); Searcher searcher=getMultiSearcherInstance(new Searcher[] { indexSearcher1, indexSearcher2 }); hits=searcher.search(query, null, 1000).scoreDocs; assertEquals(message, 2, hits.length); // The scores should be the same (within reason) assertEquals(message, scores[0], hits[0].score, 1e-6); // This will a document from ramDirectory1 assertEquals(message, scores[1], hits[1].score, 1e-6); // This will a document from ramDirectory2 // Adding a Sort.RELEVANCE object should not change anything hits=searcher.search(query, null, 1000, Sort.RELEVANCE).scoreDocs; assertEquals(message, 2, hits.length); assertEquals(message, scores[0], hits[0].score, 1e-6); // This will a document from ramDirectory1 assertEquals(message, scores[1], hits[1].score, 1e-6); // This will a document from ramDirectory2 searcher.close(); ramDirectory1.close(); ramDirectory2.close(); } /** * test that custom similarity is in effect when using MultiSearcher (LUCENE-789). * @throws IOException */ public void testCustomSimilarity () throws IOException { Directory dir = newDirectory(random); initIndex(random, dir, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... IndexSearcher srchr = new IndexSearcher(dir, true); MultiSearcher msrchr = getMultiSearcherInstance(new Searcher[]{srchr}); Similarity customSimilarity = new DefaultSimilarity() { // overide all @Override public float idf(int docFreq, int numDocs) { return 100.0f; } @Override public float coord(int overlap, int maxOverlap) { return 1.0f; } @Override public float lengthNorm(String fieldName, int numTokens) { return 1.0f; } @Override public float queryNorm(float sumOfSquaredWeights) { return 1.0f; } @Override public float sloppyFreq(int distance) { return 1.0f; } @Override public float tf(float freq) { return 1.0f; } }; srchr.setSimilarity(customSimilarity); msrchr.setSimilarity(customSimilarity); Query query=new TermQuery(new Term("contents", "doc0")); // Get a score from IndexSearcher TopDocs topDocs = srchr.search(query, null, 1); float score1 = topDocs.getMaxScore(); // Get the score from MultiSearcher topDocs = msrchr.search(query, null, 1); float scoreN = topDocs.getMaxScore(); // The scores from the IndexSearcher and Multisearcher should be the same // if the same similarity is used. assertEquals("MultiSearcher score must be equal to single searcher score!", score1, scoreN, 1e-6); msrchr.close(); srchr.close(); dir.close(); } public void testDocFreq() throws IOException{ Directory dir1 = newDirectory(random); Directory dir2 = newDirectory(random); initIndex(random, dir1, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... initIndex(random, dir2, 5, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... IndexSearcher searcher1 = new IndexSearcher(dir1, true); IndexSearcher searcher2 = new IndexSearcher(dir2, true); MultiSearcher multiSearcher = getMultiSearcherInstance(new Searcher[]{searcher1, searcher2}); assertEquals(15, multiSearcher.docFreq(new Term("contents","x"))); multiSearcher.close(); searcher1.close(); searcher2.close(); dir1.close(); dir2.close(); } public void testCreateDocFrequencyMap() throws IOException{ Directory dir1 = newDirectory(random); Directory dir2 = newDirectory(random); Term template = new Term("contents") ; String[] contents = {"a", "b", "c"}; HashSet<Term> termsSet = new HashSet<Term>(); for (int i = 0; i < contents.length; i++) { initIndex(random, dir1, i+10, i==0, contents[i]); initIndex(random, dir2, i+5, i==0, contents[i]); termsSet.add(template.createTerm(contents[i])); } IndexSearcher searcher1 = new IndexSearcher(dir1, true); IndexSearcher searcher2 = new IndexSearcher(dir2, true); MultiSearcher multiSearcher = getMultiSearcherInstance(new Searcher[]{searcher1, searcher2}); Map<Term,Integer> docFrequencyMap = multiSearcher.createDocFrequencyMap(termsSet); assertEquals(3, docFrequencyMap.size()); for (int i = 0; i < contents.length; i++) { assertEquals(Integer.valueOf((i*2) +15), docFrequencyMap.get(template.createTerm(contents[i]))); } multiSearcher.close(); searcher1.close(); searcher2.close(); dir1.close(); dir2.close(); } }