package org.apache.lucene.search; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.TextField; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.Scorer.ChildScorer; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; // TODO: refactor to a base class, that collects freqs from the scorer tree // and test all queries with it public class TestBooleanQueryVisitSubscorers extends LuceneTestCase { Analyzer analyzer; IndexReader reader; IndexSearcher searcher; Directory dir; static final String F1 = "title"; static final String F2 = "body"; @Override public void setUp() throws Exception { super.setUp(); analyzer = new MockAnalyzer(random()); dir = newDirectory(); IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); config.setMergePolicy(newLogMergePolicy()); // we will use docids to validate RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config); writer.addDocument(doc("lucene", "lucene is a very popular search engine library")); writer.addDocument(doc("solr", "solr is a very popular search server and is using lucene")); writer.addDocument(doc("nutch", "nutch is an internet search engine with web crawler and is using lucene and hadoop")); reader = writer.getReader(); writer.close(); searcher = newSearcher(reader); } @Override public void tearDown() throws Exception { reader.close(); dir.close(); super.tearDown(); } public void testDisjunctions() throws IOException { BooleanQuery bq = new BooleanQuery(); bq.add(new TermQuery(new Term(F1, "lucene")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term(F2, "lucene")), BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term(F2, "search")), BooleanClause.Occur.SHOULD); Map<Integer,Integer> tfs = getDocCounts(searcher, bq); assertEquals(3, tfs.size()); // 3 documents assertEquals(3, tfs.get(0).intValue()); // f1:lucene + f2:lucene + f2:search assertEquals(2, tfs.get(1).intValue()); // f2:search + f2:lucene assertEquals(2, tfs.get(2).intValue()); // f2:search + f2:lucene } public void testNestedDisjunctions() throws IOException { BooleanQuery bq = new BooleanQuery(); bq.add(new TermQuery(new Term(F1, "lucene")), BooleanClause.Occur.SHOULD); BooleanQuery bq2 = new BooleanQuery(); bq2.add(new TermQuery(new Term(F2, "lucene")), BooleanClause.Occur.SHOULD); bq2.add(new TermQuery(new Term(F2, "search")), BooleanClause.Occur.SHOULD); bq.add(bq2, BooleanClause.Occur.SHOULD); Map<Integer,Integer> tfs = getDocCounts(searcher, bq); assertEquals(3, tfs.size()); // 3 documents assertEquals(3, tfs.get(0).intValue()); // f1:lucene + f2:lucene + f2:search assertEquals(2, tfs.get(1).intValue()); // f2:search + f2:lucene assertEquals(2, tfs.get(2).intValue()); // f2:search + f2:lucene } public void testConjunctions() throws IOException { BooleanQuery bq = new BooleanQuery(); bq.add(new TermQuery(new Term(F2, "lucene")), BooleanClause.Occur.MUST); bq.add(new TermQuery(new Term(F2, "is")), BooleanClause.Occur.MUST); Map<Integer,Integer> tfs = getDocCounts(searcher, bq); assertEquals(3, tfs.size()); // 3 documents assertEquals(2, tfs.get(0).intValue()); // f2:lucene + f2:is assertEquals(3, tfs.get(1).intValue()); // f2:is + f2:is + f2:lucene assertEquals(3, tfs.get(2).intValue()); // f2:is + f2:is + f2:lucene } static Document doc(String v1, String v2) { Document doc = new Document(); doc.add(new TextField(F1, v1, Store.YES)); doc.add(new TextField(F2, v2, Store.YES)); return doc; } static Map<Integer,Integer> getDocCounts(IndexSearcher searcher, Query query) throws IOException { MyCollector collector = new MyCollector(); searcher.search(query, collector); return collector.docCounts; } static class MyCollector extends Collector { private TopDocsCollector<ScoreDoc> collector; private int docBase; public final Map<Integer,Integer> docCounts = new HashMap<>(); private final Set<Scorer> tqsSet = new HashSet<>(); MyCollector() { collector = TopScoreDocCollector.create(10, true); } @Override public boolean acceptsDocsOutOfOrder() { return false; } @Override public void collect(int doc) throws IOException { int freq = 0; for(Scorer scorer : tqsSet) { if (doc == scorer.docID()) { freq += scorer.freq(); } } docCounts.put(doc + docBase, freq); collector.collect(doc); } @Override public void setNextReader(AtomicReaderContext context) throws IOException { this.docBase = context.docBase; collector.setNextReader(context); } @Override public void setScorer(Scorer scorer) throws IOException { collector.setScorer(scorer); tqsSet.clear(); fillLeaves(scorer, tqsSet); } private void fillLeaves(Scorer scorer, Set<Scorer> set) { if (scorer.getWeight().getQuery() instanceof TermQuery) { set.add(scorer); } else { for (ChildScorer child : scorer.getChildren()) { fillLeaves(child.child, set); } } } public TopDocs topDocs(){ return collector.topDocs(); } public int freq(int doc) throws IOException { return docCounts.get(doc); } } }