/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Random;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.FixedBitSet;
/**
*
*/
public class TestDocSet extends LuceneTestCase {
Random rand;
float loadfactor;
boolean intersect = true;
boolean union = true;
boolean andNot = true;
boolean intersectSz = true;
boolean unionSz = true;
boolean andNotSz = true;
@Override
public void setUp() throws Exception {
super.setUp();
rand = random();
}
public FixedBitSet getRandomSet(int sz, int bitsToSet) {
FixedBitSet bs = new FixedBitSet(sz);
if (sz==0) return bs;
for (int i=0; i<bitsToSet; i++) {
bs.set(rand.nextInt(sz));
}
return bs;
}
public DocSet getHashDocSet(FixedBitSet bs) {
int[] docs = new int[(int)bs.cardinality()];
FixedBitSet.FixedBitSetIterator iter = new FixedBitSet.FixedBitSetIterator(bs);
for (int i=0; i<docs.length; i++) {
docs[i] = iter.nextDoc();
}
return new HashDocSet(docs,0,docs.length);
}
public DocSet getIntDocSet(FixedBitSet bs) {
int[] docs = new int[(int)bs.cardinality()];
FixedBitSet.FixedBitSetIterator iter = new FixedBitSet.FixedBitSetIterator(bs);
for (int i=0; i<docs.length; i++) {
docs[i] = iter.nextDoc();
}
return new SortedIntDocSet(docs);
}
public DocSet getIntDocSetNative(FixedBitSet bs) {
int[] docs = new int[(int)bs.cardinality()];
FixedBitSet.FixedBitSetIterator iter = new FixedBitSet.FixedBitSetIterator(bs);
for (int i=0; i<docs.length; i++) {
docs[i] = iter.nextDoc();
}
return new SortedIntDocSetNative(docs);
}
public DocSet getBitDocSetNative(FixedBitSet bs) {
BitDocSetNative set = new BitDocSetNative((int)bs.length());
FixedBitSet.FixedBitSetIterator iter = new FixedBitSet.FixedBitSetIterator(bs);
for (;;) {
int doc = iter.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) break;
set.fastSet(doc);
}
return set;
}
public DocSet getSmallSet(FixedBitSet obs) {
return getIntDocSet(obs);
}
public DocSet getBigSet(FixedBitSet obs) {
return getBitDocSet(obs);
}
public DocSet getBitDocSet(FixedBitSet bs) {
return new BitDocSet(bs);
}
public DocSet getDocSlice(FixedBitSet bs) {
int len = (int)bs.cardinality();
int[] arr = new int[len+5];
arr[0]=10; arr[1]=20; arr[2]=30; arr[arr.length-1]=1; arr[arr.length-2]=2;
int offset = 3;
int end = offset + len;
FixedBitSet.FixedBitSetIterator iter = new FixedBitSet.FixedBitSetIterator(bs);
// put in opposite order... DocLists are not ordered.
for (int i=end-1; i>=offset; i--) {
arr[i] = iter.nextDoc();
}
return new DocSlice(offset, len, arr, null, len*2, 100.0f);
}
public DocSet getDocSet(FixedBitSet bs) {
switch(rand.nextInt(10)) {
case 0: return getHashDocSet(bs);
case 1: return getBigSet(bs);
case 2: return getBigSet(bs);
case 3: return getBigSet(bs);
case 4: return getSmallSet(bs);
case 5: return getSmallSet(bs);
case 6: return getSmallSet(bs);
case 7: return getSmallSet(bs);
case 8: return getSmallSet(bs);
case 9: return getDocSlice(bs);
}
return null;
}
// types of docsets that can be a filter
public DocSet getFilterDocSet(FixedBitSet bs) {
switch(rand.nextInt(10)) {
case 0: return getBigSet(bs);
case 1: return getBigSet(bs);
case 2: return getBigSet(bs);
case 3: return getBigSet(bs);
case 4: return getSmallSet(bs);
case 5: return getSmallSet(bs);
case 6: return getSmallSet(bs);
case 7: return getSmallSet(bs);
case 8: return getSmallSet(bs);
case 9: return getSmallSet(bs);
}
return null;
}
public void checkEqual(FixedBitSet bs, DocSet set) {
for (int i=0; i<set.size(); i++) {
assertEquals(bs.get(i), set.exists(i));
}
assertEquals(bs.cardinality(), set.size());
}
public void iter(DocSet d1, DocSet d2) {
// HashDocSet and DocList doesn't iterate in order.
if (d1 instanceof HashDocSet || d2 instanceof HashDocSet || d1 instanceof DocList || d2 instanceof DocList) return;
DocIterator i1 = d1.iterator();
DocIterator i2 = d2.iterator();
assert(i1.hasNext() == i2.hasNext());
for(;;) {
boolean b1 = i1.hasNext();
boolean b2 = i2.hasNext();
assertEquals(b1,b2);
if (!b1) break;
assertEquals(i1.nextDoc(), i2.nextDoc());
}
}
protected void doSingle(int maxSize) {
int sz = rand.nextInt(maxSize+1);
int sz2 = rand.nextInt(maxSize);
FixedBitSet bs1 = getRandomSet(sz, rand.nextInt(sz2+1));
FixedBitSet bs2 = getRandomSet(sz, rand.nextInt(sz2+1));
DocSet test1 = getDocSet(bs1);
DocSet test2 = getDocSet(bs2);
doSingle(bs1, bs2, test1, test2);
test1.decref();
test2.decref();
}
protected void doSingle(FixedBitSet bs1, FixedBitSet bs2, DocSet test1, DocSet test2) {
DocSet a1 = new BitDocSet(bs1);
DocSet a2 = new BitDocSet(bs2);
checkEqual(bs1,test1);
checkEqual(bs2,test2);
iter(a1,test1);
iter(a2,test2);
FixedBitSet a_and = (FixedBitSet) bs1.clone(); a_and.and(bs2);
FixedBitSet a_or = (FixedBitSet) bs1.clone(); a_or.or(bs2);
// FixedBitSet a_xor = (FixedBitSet)bs1.clone(); a_xor.xor(bs2);
FixedBitSet a_andn = (FixedBitSet) bs1.clone(); a_andn.andNot(bs2);
if (intersect) {
DocSet result1 = test1.intersection(test2);
checkEqual(a_and, result1);
result1.decref();
}
if (union) {
DocSet result2 = test1.union(test2);
checkEqual(a_or, result2);
result2.decref();
}
if (andNot) {
DocSet result3 = test1.andNot(test2);
checkEqual(a_andn, result3);
result3.decref();
}
if (intersectSz) assertEquals(a_and.cardinality(), test1.intersectionSize(test2));
if (unionSz) assertEquals(a_or.cardinality(), test1.unionSize(test2));
if (andNotSz) assertEquals(a_andn.cardinality(), test1.andNotSize(test2));
}
public void doMany(int maxSz, int iter) {
for (int i=0; i<iter; i++) {
doSingle(maxSz);
}
}
public void testRandomDocSets() {
// Make the size big enough to go over certain limits (such as one set
// being 8 times the size of another in the int set, or going over 2 times
// 64 bits for the bit doc set. Smaller sets can hit more boundary conditions though.
doMany(130, 10000);
// doMany(130, 1000000);
}
public DocSet getRandomDocSet(int n, int maxDoc) {
FixedBitSet obs = new FixedBitSet(maxDoc);
int[] a = new int[n];
for (int i=0; i<n; i++) {
for(;;) {
int idx = rand.nextInt(maxDoc);
if (obs.getAndSet(idx)) continue;
a[i]=idx;
break;
}
}
if (n <= smallSetCuttoff) {
if (smallSetType ==0) {
Arrays.sort(a);
return new SortedIntDocSet(a);
} else if (smallSetType ==1) {
Arrays.sort(a);
return loadfactor!=0 ? new HashDocSet(a,0,n,1/loadfactor) : new HashDocSet(a,0,n);
}
}
return new BitDocSet(obs, n);
}
public DocSet[] getRandomSets(int nSets, int minSetSize, int maxSetSize, int maxDoc) {
DocSet[] sets = new DocSet[nSets];
for (int i=0; i<nSets; i++) {
int sz;
sz = rand.nextInt(maxSetSize-minSetSize+1)+minSetSize;
// different distribution
// sz = (maxSetSize+1)/(rand.nextInt(maxSetSize)+1) + minSetSize;
sets[i] = getRandomDocSet(sz,maxDoc);
}
return sets;
}
/**** needs code insertion into HashDocSet
public void testCollisions() {
loadfactor=.75f;
rand=new Random(12345); // make deterministic
int maxSetsize=4000;
int nSets=256;
int iter=1;
int[] maxDocs=new int[] {100000,500000,1000000,5000000,10000000};
int ret=0;
long start=System.currentTimeMillis();
for (int maxDoc : maxDocs) {
int cstart = HashDocSet.collisions;
DocSet[] sets = getRandomHashSets(nSets,maxSetsize, maxDoc);
for (DocSet s1 : sets) {
for (DocSet s2 : sets) {
if (s1!=s2) ret += s1.intersectionSize(s2);
}
}
int cend = HashDocSet.collisions;
System.out.println("maxDoc="+maxDoc+"\tcollisions="+(cend-cstart));
}
long end=System.currentTimeMillis();
System.out.println("testIntersectionSizePerformance="+(end-start)+" ms");
if (ret==-1)System.out.println("wow!");
System.out.println("collisions="+HashDocSet.collisions);
}
***/
public static int smallSetType = 0; // 0==sortedint, 1==hash, 2==FixedBitSet
public static int smallSetCuttoff=3000;
/***
public void testIntersectionSizePerformance() {
loadfactor=.75f; // for HashDocSet
rand=new Random(1); // make deterministic
int minBigSetSize=1,maxBigSetSize=30000;
int minSmallSetSize=1,maxSmallSetSize=30000;
int nSets=1024;
int iter=1;
int maxDoc=1000000;
smallSetCuttoff = maxDoc>>6; // break even for SortedIntSet is /32... but /64 is better for performance
// smallSetCuttoff = maxDoc;
DocSet[] bigsets = getRandomSets(nSets, minBigSetSize, maxBigSetSize, maxDoc);
DocSet[] smallsets = getRandomSets(nSets, minSmallSetSize, maxSmallSetSize, maxDoc);
int ret=0;
long start=System.currentTimeMillis();
for (int i=0; i<iter; i++) {
for (DocSet s1 : bigsets) {
for (DocSet s2 : smallsets) {
ret += s1.intersectionSize(s2);
}
}
}
long end=System.currentTimeMillis();
System.out.println("intersectionSizePerformance="+(end-start)+" ms");
System.out.println("ret="+ret);
}
***/
/****
public void testExistsPerformance() {
loadfactor=.75f;
rand=new Random(12345); // make deterministic
int maxSetsize=4000;
int nSets=512;
int iter=1;
int maxDoc=1000000;
DocSet[] sets = getRandomHashSets(nSets,maxSetsize, maxDoc);
int ret=0;
long start=System.currentTimeMillis();
for (int i=0; i<iter; i++) {
for (DocSet s1 : sets) {
for (int j=0; j<maxDoc; j++) {
ret += s1.exists(j) ? 1 :0;
}
}
}
long end=System.currentTimeMillis();
System.out.println("testExistsSizePerformance="+(end-start)+" ms");
if (ret==-1)System.out.println("wow!");
}
***/
/**** needs code insertion into HashDocSet
public void testExistsCollisions() {
loadfactor=.75f;
rand=new Random(12345); // make deterministic
int maxSetsize=4000;
int nSets=512;
int[] maxDocs=new int[] {100000,500000,1000000,5000000,10000000};
int ret=0;
for (int maxDoc : maxDocs) {
int mask = (BitUtil.nextHighestPowerOfTwo(maxDoc)>>1)-1;
DocSet[] sets = getRandomHashSets(nSets,maxSetsize, maxDoc);
int cstart = HashDocSet.collisions;
for (DocSet s1 : sets) {
for (int j=0; j<maxDocs[0]; j++) {
int idx = rand.nextInt()&mask;
ret += s1.exists(idx) ? 1 :0;
}
}
int cend = HashDocSet.collisions;
System.out.println("maxDoc="+maxDoc+"\tcollisions="+(cend-cstart));
}
if (ret==-1)System.out.println("wow!");
System.out.println("collisions="+HashDocSet.collisions);
}
***/
public AtomicReader dummyIndexReader(final int maxDoc) {
return new AtomicReader() {
@Override
public int maxDoc() {
return maxDoc;
}
@Override
public int numDocs() {
return maxDoc;
}
@Override
public FieldInfos getFieldInfos() {
return new FieldInfos(new FieldInfo[0]);
}
@Override
public Bits getLiveDocs() {
return null;
}
@Override
public Fields fields() {
return null;
}
@Override
public Fields getTermVectors(int doc) {
return null;
}
@Override
public NumericDocValues getNumericDocValues(String field) {
return null;
}
@Override
public BinaryDocValues getBinaryDocValues(String field) {
return null;
}
@Override
public SortedDocValues getSortedDocValues(String field) {
return null;
}
@Override
public SortedSetDocValues getSortedSetDocValues(String field) {
return null;
}
@Override
public Bits getDocsWithField(String field) throws IOException {
return null;
}
@Override
public NumericDocValues getNormValues(String field) {
return null;
}
@Override
protected void doClose() {
}
@Override
public void document(int doc, StoredFieldVisitor visitor) {
}
};
}
public IndexReader dummyMultiReader(int nSeg, int maxDoc) throws IOException {
if (nSeg==1 && rand.nextBoolean()) return dummyIndexReader(rand.nextInt(maxDoc));
IndexReader[] subs = new IndexReader[rand.nextInt(nSeg)+1];
for (int i=0; i<subs.length; i++) {
subs[i] = dummyIndexReader(rand.nextInt(maxDoc));
}
MultiReader mr = new MultiReader(subs);
return mr;
}
public void doTestIteratorEqual(DocIdSet a, DocIdSet b) throws IOException {
DocIdSetIterator ia = a.iterator();
DocIdSetIterator ib = b.iterator();
// test for next() equivalence
for(;;) {
int da = ia.nextDoc();
int db = ib.nextDoc();
assertEquals(da, db);
assertEquals(ia.docID(), ib.docID());
if (da==DocIdSetIterator.NO_MORE_DOCS) break;
}
for (int i=0; i<10; i++) {
// test random skipTo() and next()
ia = a.iterator();
ib = b.iterator();
int doc = -1;
for (;;) {
int da,db;
if (rand.nextBoolean()) {
da = ia.nextDoc();
db = ib.nextDoc();
} else {
int target = doc + rand.nextInt(10) + 1; // keep in mind future edge cases like probing (increase if necessary)
da = ia.advance(target);
db = ib.advance(target);
}
assertEquals(da, db);
assertEquals(ia.docID(), ib.docID());
if (da==DocIdSetIterator.NO_MORE_DOCS) break;
doc = da;
}
}
}
public void doFilterTest(IndexReader reader, DocSet a, DocSet b) throws IOException {
IndexReaderContext topLevelContext = reader.getContext();
Filter fa = a.getTopFilter();
Filter fb = b.getTopFilter();
/*** top level filters are no longer supported
// test top-level
DocIdSet da = fa.getDocIdSet(topLevelContext);
DocIdSet db = fb.getDocIdSet(topLevelContext);
doTestIteratorEqual(da, db);
***/
DocIdSet da;
DocIdSet db;
List<AtomicReaderContext> leaves = topLevelContext.leaves();
// first test in-sequence sub readers
for (AtomicReaderContext readerContext : leaves) {
da = fa.getDocIdSet(readerContext, null);
db = fb.getDocIdSet(readerContext, null);
doTestIteratorEqual(da, db);
}
int nReaders = leaves.size();
// now test out-of-sequence sub readers
for (int i=0; i<nReaders; i++) {
AtomicReaderContext readerContext = leaves.get(rand.nextInt(nReaders));
da = fa.getDocIdSet(readerContext, null);
db = fb.getDocIdSet(readerContext, null);
doTestIteratorEqual(da, db);
}
}
public void doFilterTest(IndexReader reader) throws IOException {
IndexReaderContext topLevelContext = reader.getContext();
FixedBitSet bs = getRandomSet(reader.maxDoc(), rand.nextInt(reader.maxDoc()+1));
DocSet a = new BitDocSet(bs);
DocSet b = getFilterDocSet(bs);
Filter fa = a.getTopFilter();
Filter fb = b.getTopFilter();
/*** top level filters are no longer supported
// test top-level
DocIdSet da = fa.getDocIdSet(topLevelContext);
DocIdSet db = fb.getDocIdSet(topLevelContext);
doTestIteratorEqual(da, db);
***/
DocIdSet da;
DocIdSet db;
List<AtomicReaderContext> leaves = topLevelContext.leaves();
// first test in-sequence sub readers
for (AtomicReaderContext readerContext : leaves) {
da = fa.getDocIdSet(readerContext, null);
db = fb.getDocIdSet(readerContext, null);
doTestIteratorEqual(da, db);
}
int nReaders = leaves.size();
// now test out-of-sequence sub readers
for (int i=0; i<nReaders; i++) {
AtomicReaderContext readerContext = leaves.get(rand.nextInt(nReaders));
da = fa.getDocIdSet(readerContext, null);
db = fb.getDocIdSet(readerContext, null);
doTestIteratorEqual(da, db);
}
a.decref();
b.decref();
}
public void testFilter() throws IOException {
// keeping these numbers smaller help hit more edge cases
int maxSeg=4;
int maxDoc=5; // increase if future changes add more edge cases (like probing a certain distance in the bin search)
for (int i=0; i<5000; i++) {
IndexReader r = dummyMultiReader(maxSeg, maxDoc);
doFilterTest(r);
}
}
}