package org.apache.lucene.queries; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; public class TermsFilterTest extends LuceneTestCase { public void testCachability() throws Exception { TermsFilter a = termsFilter(random().nextBoolean(), new Term("field1", "a"), new Term("field1", "b")); HashSet<Filter> cachedFilters = new HashSet<>(); cachedFilters.add(a); TermsFilter b = termsFilter(random().nextBoolean(), new Term("field1", "b"), new Term("field1", "a")); assertTrue("Must be cached", cachedFilters.contains(b)); //duplicate term assertTrue("Must be cached", cachedFilters.contains(termsFilter(true, new Term("field1", "a"), new Term("field1", "a"), new Term("field1", "b")))); assertFalse("Must not be cached", cachedFilters.contains(termsFilter(random().nextBoolean(), new Term("field1", "a"), new Term("field1", "a"), new Term("field1", "b"), new Term("field1", "v")))); } public void testMissingTerms() throws Exception { String fieldName = "field1"; Directory rd = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), rd); for (int i = 0; i < 100; i++) { Document doc = new Document(); int term = i * 10; //terms are units of 10; doc.add(newStringField(fieldName, "" + term, Field.Store.YES)); w.addDocument(doc); } IndexReader reader = SlowCompositeReaderWrapper.wrap(w.getReader()); assertTrue(reader.getContext() instanceof AtomicReaderContext); AtomicReaderContext context = (AtomicReaderContext) reader.getContext(); w.close(); List<Term> terms = new ArrayList<>(); terms.add(new Term(fieldName, "19")); FixedBitSet bits = (FixedBitSet) termsFilter(random().nextBoolean(), terms).getDocIdSet(context, context.reader().getLiveDocs()); assertNull("Must match nothing", bits); terms.add(new Term(fieldName, "20")); bits = (FixedBitSet) termsFilter(random().nextBoolean(), terms).getDocIdSet(context, context.reader().getLiveDocs()); assertEquals("Must match 1", 1, bits.cardinality()); terms.add(new Term(fieldName, "10")); bits = (FixedBitSet) termsFilter(random().nextBoolean(), terms).getDocIdSet(context, context.reader().getLiveDocs()); assertEquals("Must match 2", 2, bits.cardinality()); terms.add(new Term(fieldName, "00")); bits = (FixedBitSet) termsFilter(random().nextBoolean(), terms).getDocIdSet(context, context.reader().getLiveDocs()); assertEquals("Must match 2", 2, bits.cardinality()); reader.close(); rd.close(); } public void testMissingField() throws Exception { String fieldName = "field1"; Directory rd1 = newDirectory(); RandomIndexWriter w1 = new RandomIndexWriter(random(), rd1); Document doc = new Document(); doc.add(newStringField(fieldName, "content1", Field.Store.YES)); w1.addDocument(doc); IndexReader reader1 = w1.getReader(); w1.close(); fieldName = "field2"; Directory rd2 = newDirectory(); RandomIndexWriter w2 = new RandomIndexWriter(random(), rd2); doc = new Document(); doc.add(newStringField(fieldName, "content2", Field.Store.YES)); w2.addDocument(doc); IndexReader reader2 = w2.getReader(); w2.close(); TermsFilter tf = new TermsFilter(new Term(fieldName, "content1")); MultiReader multi = new MultiReader(reader1, reader2); for (AtomicReaderContext context : multi.leaves()) { DocIdSet docIdSet = tf.getDocIdSet(context, context.reader().getLiveDocs()); if (context.reader().docFreq(new Term(fieldName, "content1")) == 0) { assertNull(docIdSet); } else { FixedBitSet bits = (FixedBitSet) docIdSet; assertTrue("Must be >= 0", bits.cardinality() >= 0); } } multi.close(); reader1.close(); reader2.close(); rd1.close(); rd2.close(); } public void testFieldNotPresent() throws IOException { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir); int num = atLeast(3); int skip = random().nextInt(num); List<Term> terms = new ArrayList<>(); for (int i = 0; i < num; i++) { terms.add(new Term("field" + i, "content1")); Document doc = new Document(); if (skip == i) { continue; } doc.add(newStringField("field" + i, "content1", Field.Store.YES)); w.addDocument(doc); } w.forceMerge(1); IndexReader reader = w.getReader(); w.close(); assertEquals(1, reader.leaves().size()); AtomicReaderContext context = reader.leaves().get(0); TermsFilter tf = new TermsFilter(terms); FixedBitSet bits = (FixedBitSet) tf.getDocIdSet(context, context.reader().getLiveDocs()); assertEquals("Must be num fields - 1 since we skip only one field", num-1, bits.cardinality()); reader.close(); dir.close(); } public void testSkipField() throws IOException { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir); int num = atLeast(10); Set<Term> terms = new HashSet<>(); for (int i = 0; i < num; i++) { String field = "field" + random().nextInt(100); terms.add(new Term(field, "content1")); Document doc = new Document(); doc.add(newStringField(field, "content1", Field.Store.YES)); w.addDocument(doc); } int randomFields = random().nextInt(10); for (int i = 0; i < randomFields; i++) { while (true) { String field = "field" + random().nextInt(100); Term t = new Term(field, "content1"); if (!terms.contains(t)) { terms.add(t); break; } } } w.forceMerge(1); IndexReader reader = w.getReader(); w.close(); assertEquals(1, reader.leaves().size()); AtomicReaderContext context = reader.leaves().get(0); TermsFilter tf = new TermsFilter(new ArrayList<>(terms)); FixedBitSet bits = (FixedBitSet) tf.getDocIdSet(context, context.reader().getLiveDocs()); assertEquals(context.reader().numDocs(), bits.cardinality()); reader.close(); dir.close(); } public void testRandom() throws IOException { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), dir); int num = atLeast(100); final boolean singleField = random().nextBoolean(); List<Term> terms = new ArrayList<>(); for (int i = 0; i < num; i++) { String field = "field" + (singleField ? "1" : random().nextInt(100)); String string = TestUtil.randomRealisticUnicodeString(random()); terms.add(new Term(field, string)); Document doc = new Document(); doc.add(newStringField(field, string, Field.Store.YES)); w.addDocument(doc); } IndexReader reader = w.getReader(); w.close(); IndexSearcher searcher = newSearcher(reader); int numQueries = atLeast(10); for (int i = 0; i < numQueries; i++) { Collections.shuffle(terms, random()); int numTerms = 1 + random().nextInt( Math.min(BooleanQuery.getMaxClauseCount(), terms.size())); BooleanQuery bq = new BooleanQuery(); for (int j = 0; j < numTerms; j++) { bq.add(new BooleanClause(new TermQuery(terms.get(j)), Occur.SHOULD)); } TopDocs queryResult = searcher.search(new ConstantScoreQuery(bq), reader.maxDoc()); MatchAllDocsQuery matchAll = new MatchAllDocsQuery(); final TermsFilter filter = termsFilter(singleField, terms.subList(0, numTerms));; TopDocs filterResult = searcher.search(matchAll, filter, reader.maxDoc()); assertEquals(filterResult.totalHits, queryResult.totalHits); ScoreDoc[] scoreDocs = filterResult.scoreDocs; for (int j = 0; j < scoreDocs.length; j++) { assertEquals(scoreDocs[j].doc, queryResult.scoreDocs[j].doc); } } reader.close(); dir.close(); } private TermsFilter termsFilter(boolean singleField, Term...terms) { return termsFilter(singleField, Arrays.asList(terms)); } private TermsFilter termsFilter(boolean singleField, Collection<Term> termList) { if (!singleField) { return new TermsFilter(new ArrayList<>(termList)); } final TermsFilter filter; List<BytesRef> bytes = new ArrayList<>(); String field = null; for (Term term : termList) { bytes.add(term.bytes()); if (field != null) { assertEquals(term.field(), field); } field = term.field(); } assertNotNull(field); filter = new TermsFilter(field, bytes); return filter; } public void testHashCodeAndEquals() { int num = atLeast(100); final boolean singleField = random().nextBoolean(); List<Term> terms = new ArrayList<>(); Set<Term> uniqueTerms = new HashSet<>(); for (int i = 0; i < num; i++) { String field = "field" + (singleField ? "1" : random().nextInt(100)); String string = TestUtil.randomRealisticUnicodeString(random()); terms.add(new Term(field, string)); uniqueTerms.add(new Term(field, string)); TermsFilter left = termsFilter(singleField ? random().nextBoolean() : false, uniqueTerms); Collections.shuffle(terms, random()); TermsFilter right = termsFilter(singleField ? random().nextBoolean() : false, terms); assertEquals(right, left); assertEquals(right.hashCode(), left.hashCode()); if (uniqueTerms.size() > 1) { List<Term> asList = new ArrayList<>(uniqueTerms); asList.remove(0); TermsFilter notEqual = termsFilter(singleField ? random().nextBoolean() : false, asList); assertFalse(left.equals(notEqual)); assertFalse(right.equals(notEqual)); } } } public void testSingleFieldEquals() { // Two terms with the same hash code assertEquals("AaAaBB".hashCode(), "BBBBBB".hashCode()); TermsFilter left = termsFilter(true, new Term("id", "AaAaAa"), new Term("id", "AaAaBB")); TermsFilter right = termsFilter(true, new Term("id", "AaAaAa"), new Term("id", "BBBBBB")); assertFalse(left.equals(right)); } public void testNoTerms() { List<Term> emptyTerms = Collections.emptyList(); List<BytesRef> emptyBytesRef = Collections.emptyList(); try { new TermsFilter(emptyTerms); fail("must fail - no terms!"); } catch (IllegalArgumentException e) {} try { new TermsFilter(emptyTerms.toArray(new Term[0])); fail("must fail - no terms!"); } catch (IllegalArgumentException e) {} try { new TermsFilter(null, emptyBytesRef.toArray(new BytesRef[0])); fail("must fail - no terms!"); } catch (IllegalArgumentException e) {} try { new TermsFilter(null, emptyBytesRef); fail("must fail - no terms!"); } catch (IllegalArgumentException e) {} } public void testToString() { TermsFilter termsFilter = new TermsFilter(new Term("field1", "a"), new Term("field1", "b"), new Term("field1", "c")); assertEquals("field1:a field1:b field1:c", termsFilter.toString()); } }