/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.BitsFilteredDocIdSet;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.OpenBitSet;
import org.apache.solr.core.HS;
import org.apache.solr.core.RefCount;
public class SortedIntDocSetNative extends DocSetBaseNative implements RefCount {
protected final long array;
protected final int len;
@Override
protected void free() {
HS.freeArray(array);
}
/**
* @param docs Sorted list of ids
*/
public SortedIntDocSetNative(int[] docs) {
this(docs, docs.length);
}
/**
* @param docs Sorted list of ids
* @param len Number of ids in the list
*/
public SortedIntDocSetNative(int[] docs, int len) {
assert len>=0 && len <= docs.length;
this.len = len;
array = HS.allocArray(len, 4, false);
HS.copyInts(docs, 0, array, 0, len);
}
public SortedIntDocSetNative(long nativeIntArray, int len) {
assert len>=0 && len <= (HS.arraySizeBytes(nativeIntArray)>>2);
this.array = nativeIntArray;
this.len = len;
}
public long getIntArrayPointer() {
return array;
}
@Override
public int size() {
return len;
}
@Override
public long memSize() {
return HS.arraySizeBytes(array)+8;
}
public static int intersectionSize(long smallerSortedList, int a_size, long biggerSortedList, int b_size) {
final long a = smallerSortedList;
final long b = biggerSortedList;
// The next doc we are looking for will be much closer to the last position we tried
// than it will be to the midpoint between last and high... so probe ahead using
// a function of the ratio of the sizes of the sets.
int step = (b_size/a_size)+1;
// Since the majority of probes should be misses, we'll already be above the last probe
// and shouldn't need to move larger than the step size on average to step over our target (and thus lower
// the high upper bound a lot.)... but if we don't go over our target, it's a big miss... so double it.
step = step + step;
// FUTURE: come up with a density such that target * density == likely position?
// then check step on one side or the other?
// (density could be cached in the DocSet)... length/maxDoc
// FUTURE: try partitioning like a sort algorithm. Pick the midpoint of the big
// array, find where that should be in the small array, and then recurse with
// the top and bottom half of both arrays until they are small enough to use
// a fallback insersection method.
// NOTE: I tried this and it worked, but it was actually slower than this current
// highly optimized approach.
int icount = 0;
int low = 0;
int max = b_size-1;
for (int i=0; i<a_size; i++) {
int doca = HS.getInt(a, i);
int high = max;
int probe = low + step; // 40% improvement!
// short linear probe to see if we can drop the high pointer in one big jump.
if (probe<high) {
if (HS.getInt(b,probe)>=doca) {
// success! we cut down the upper bound by a lot in one step!
high=probe;
} else {
// relative failure... we get to move the low pointer, but not my much
low=probe+1;
// reprobe worth it? it appears so!
probe = low + step;
if (probe<high) {
if (HS.getInt(b,probe)>=doca) {
high=probe;
} else {
low=probe+1;
}
}
}
}
// binary search the rest of the way
while (low <= high) {
int mid = (low+high) >>> 1;
int docb = HS.getInt(b,mid);
if (docb < doca) {
low = mid+1;
}
else if (docb > doca) {
high = mid-1;
}
else {
icount++;
low = mid+1; // found it, so start at next element
break;
}
}
// Didn't find it... low is now positioned on the insertion point,
// which is higher than what we were looking for, so continue using
// the same low point.
}
return icount;
}
public static boolean intersects(long smallerSortedList, int a_size, long biggerSortedList, int b_size) {
// see intersectionSize for more in-depth comments of this algorithm
final long a = smallerSortedList;
final long b = biggerSortedList;
int step = (b_size/a_size)+1;
step = step + step;
int low = 0;
int max = b_size-1;
for (int i=0; i<a_size; i++) {
int doca = HS.getInt(a, i);
int high = max;
int probe = low + step;
if (probe<high) {
if (HS.getInt(b, probe) >= doca) {
high=probe;
} else {
low=probe+1;
probe = low + step;
int probeVal = HS.getInt(b, probe);
if (probeVal < high) {
if (probeVal >= doca) {
high=probe;
} else {
low=probe+1;
}
}
}
}
while (low <= high) {
int mid = (low+high) >>> 1;
int docb = HS.getInt(b, mid);
if (docb < doca) {
low = mid+1;
}
else if (docb > doca) {
high = mid-1;
}
else {
return true;
}
}
}
return false;
}
@Override
public int intersectionSize(DocSet other) {
if (!(other instanceof SortedIntDocSetNative)) {
// assume other implementations are better at random access than we are,
// true of BitDocSet and HashDocSet.
int icount = 0;
for (int i=0; i<len; i++) {
if (other.exists( HS.getInt(array,i) )) icount++;
}
return icount;
}
// make "a" the smaller set.
SortedIntDocSetNative otherNative = ((SortedIntDocSetNative)other);
int a_size = size();
int b_size = otherNative.size();
long a,b;
if (a_size <= b_size) {
a = array;
b = otherNative.array;
} else {
a = otherNative.array;
b = array;
a_size = b_size;
b_size = size();
}
if (a_size==0) return 0;
// if b is 8 times bigger than a, use the modified binary search.
if ((b_size>>3) >= a_size) {
return intersectionSize(a,a_size, b,b_size);
}
// if they are close in size, just do a linear walk of both.
int icount=0;
int i=0,j=0;
int doca = HS.getInt(a, i);
int docb = HS.getInt(b, j);
for(;;) {
// switch on the sign bit somehow? Hopefully JVM is smart enough to just test once.
// Since set a is less dense then set b, doca is likely to be greater than docb so
// check that case first. This resulted in a 13% speedup.
if (doca > docb) {
if (++j >= b_size) break;
docb=HS.getInt(b,j);
} else if (doca < docb) {
if (++i >= a_size) break;
doca=HS.getInt(a,i);
} else {
icount++;
if (++i >= a_size) break;
doca=HS.getInt(a,i);
if (++j >= b_size) break;
docb=HS.getInt(b,j);
}
}
return icount;
}
@Override
public boolean intersects(DocSet other) {
if (!(other instanceof SortedIntDocSetNative)) {
// assume other implementations are better at random access than we are,
// true of BitDocSet and HashDocSet.
for (int i=0; i<len; i++) {
if (other.exists( HS.getInt(array,i) )) return true;
}
return false;
}
// make "a" the smaller set.
SortedIntDocSetNative otherNative = ((SortedIntDocSetNative)other);
int a_size = size();
int b_size = otherNative.size();
long a,b;
if (a_size <= b_size) {
a = array;
b = otherNative.array;
} else {
a = otherNative.array;
a_size = b_size;
b = array;
b_size = size();
}
if (a_size==0) return false;
// if b is 8 times bigger than a, use the modified binary search.
if ((b_size>>3) >= a_size) {
return intersects(a, a_size, b, b_size);
}
// if they are close in size, just do a linear walk of both.
int i=0,j=0;
int doca=HS.getInt(a,i), docb=HS.getInt(b,j);
for(;;) {
// switch on the sign bit somehow? Hopefull JVM is smart enough to just test once.
// Since set a is less dense then set b, doca is likely to be greater than docb so
// check that case first. This resulted in a 13% speedup.
if (doca > docb) {
if (++j >= b_size) break;
docb=HS.getInt(b,j);
} else if (doca < docb) {
if (++i >= a_size) break;
doca=HS.getInt(a,i);
} else {
return true;
}
}
return false;
}
/** puts the intersection of a and b into the target array and returns the size */
public static int intersection(long a, int lena, long b, int lenb, int[] target) {
if (lena > lenb) {
int ti=lena; lena=lenb; lenb=ti;
long ta=a; a=b; b=ta;
}
if (lena==0) return 0;
// if b is 8 times bigger than a, use the modified binary search.
if ((lenb>>3) >= lena) {
return intersectionBinarySearch(a, lena, b, lenb, target);
}
int icount=0;
int i=0,j=0;
int doca=HS.getInt(a,i), docb=HS.getInt(b,j);
for(;;) {
if (doca > docb) {
if (++j >= lenb) break;
docb=HS.getInt(b,j);
} else if (doca < docb) {
if (++i >= lena) break;
doca=HS.getInt(a,i);
} else {
target[icount++] = doca;
if (++i >= lena) break;
doca=HS.getInt(a,i);
if (++j >= lenb) break;
docb=HS.getInt(b,j);
}
}
return icount;
}
/** Puts the intersection of a and b into the target array and returns the size.
* lena should be smaller than lenb */
protected static int intersectionBinarySearch(long a, int lena, long b, int lenb, int[] target) {
int step = (lenb/lena)+1;
step = step + step;
int icount = 0;
int low = 0;
int max = lenb-1;
for (int i=0; i<lena; i++) {
int doca = HS.getInt(a,i);
int high = max;
int probe = low + step; // 40% improvement!
// short linear probe to see if we can drop the high pointer in one big jump.
if (probe<high) {
if (HS.getInt(b,probe)>=doca) {
// success! we cut down the upper bound by a lot in one step!
high=probe;
} else {
// relative failure... we get to move the low pointer, but not my much
low=probe+1;
// reprobe worth it? it appears so!
probe = low + step;
if (probe<high) {
if (HS.getInt(b,probe)>=doca) {
high=probe;
} else {
low=probe+1;
}
}
}
}
// binary search
while (low <= high) {
int mid = (low+high) >>> 1;
int docb = HS.getInt(b,mid);
if (docb < doca) {
low = mid+1;
}
else if (docb > doca) {
high = mid-1;
}
else {
target[icount++]=doca;
// HS.setInt(target, icount++, doca);
low = mid+1; // found it, so start at next element
break;
}
}
// Didn't find it... low is now positioned on the insertion point,
// which is higher than what we were looking for, so continue using
// the same low point.
}
return icount;
}
@Override
public DocSet intersection(DocSet other) {
if (!(other instanceof SortedIntDocSetNative)) {
int icount = 0;
int arr[] = new int[len];
for (int i=0; i<len; i++) {
int doc = HS.getInt(array, i);
if (other.exists(doc)) arr[icount++] = doc;
}
return new SortedIntDocSetNative(arr,icount);
}
long otherDocs = ((SortedIntDocSetNative)other).array;
int maxsz = Math.min(len, ((SortedIntDocSetNative)other).len);
int[] arr = new int[maxsz];
int sz = intersection(array, len, otherDocs, ((SortedIntDocSetNative)other).len, arr);
return new SortedIntDocSetNative(arr,sz);
}
protected static int andNotBinarySearch(long a, int lena, long b, int lenb, int[] target) {
int step = (lenb/lena)+1;
step = step + step;
int count = 0;
int low = 0;
int max = lenb-1;
outer:
for (int i=0; i<lena; i++) {
int doca = HS.getInt(a,i);
int high = max;
int probe = low + step; // 40% improvement!
// short linear probe to see if we can drop the high pointer in one big jump.
if (probe<high) {
if (HS.getInt(b,probe)>=doca) {
// success! we cut down the upper bound by a lot in one step!
high=probe;
} else {
// relative failure... we get to move the low pointer, but not my much
low=probe+1;
// reprobe worth it? it appears so!
probe = low + step;
if (probe<high) {
if (HS.getInt(b,probe)>=doca) {
high=probe;
} else {
low=probe+1;
}
}
}
}
// binary search
while (low <= high) {
int mid = (low+high) >>> 1;
int docb = HS.getInt(b,mid);
if (docb < doca) {
low = mid+1;
}
else if (docb > doca) {
high = mid-1;
}
else {
low = mid+1; // found it, so start at next element
continue outer;
}
}
// Didn't find it... low is now positioned on the insertion point,
// which is higher than what we were looking for, so continue using
// the same low point.
target[count++] = doca;
}
return count;
}
/** puts the intersection of a and not b into the target array and returns the size */
public static int andNot(long a, int lena, long b, int lenb, int[] target) {
if (lena==0) return 0;
if (lenb==0) {
HS.copyInts(a, 0, target, 0, lena);
return lena;
}
// if b is 8 times bigger than a, use the modified binary search.
if ((lenb>>3) >= lena) {
return andNotBinarySearch(a, lena, b, lenb, target);
}
int count=0;
int i=0,j=0;
int doca=HS.getInt(a,i),docb=HS.getInt(b,j);
for(;;) {
if (doca > docb) {
if (++j >= lenb) break;
docb=HS.getInt(b,j);
} else if (doca < docb) {
target[count++] = doca;
if (++i >= lena) break;
doca=HS.getInt(a,i);
} else {
if (++i >= lena) break;
doca=HS.getInt(a,i);
if (++j >= lenb) break;
docb=HS.getInt(b,j);
}
}
int leftover=lena - i;
if (leftover > 0) {
HS.copyInts(a, i, target, count, leftover);
count += leftover;
}
return count;
}
@Override
public DocSet andNot(DocSet other) {
if (other.size()==0) {
this.incref();
return this;
}
if (!(other instanceof SortedIntDocSetNative)) {
int count = 0;
int arr[] = new int[len];
for (int i=0; i<len; i++) {
int doc = HS.getInt(array, i);
if (!other.exists(doc)) arr[count++] = doc;
}
return new SortedIntDocSetNative(arr,count);
}
long otherDocs = ((SortedIntDocSetNative)other).array;
int[] arr = new int[len];
int sz = andNot(array, len, otherDocs, ((SortedIntDocSetNative)other).len, arr);
return new SortedIntDocSetNative(arr,sz);
}
@Override
public void setBitsOn(FixedBitSet target) {
for (int i=0; i<len; i++) {
target.set( HS.getInt(array, i) );
}
}
@Override
public boolean exists(int doc) {
// this could be faster by estimating where in the list the doc is likely to appear,
// but we should get away from using exists() anyway.
int low = 0;
int high = len-1;
// binary search
while (low <= high) {
int mid = (low+high) >>> 1;
int docb = HS.getInt(array, mid);
if (docb < doc) {
low = mid+1;
}
else if (docb > doc) {
high = mid-1;
}
else {
return true;
}
}
return false;
}
@Override
public DocIterator iterator() {
return new DocIterator() {
int pos=0;
@Override
public boolean hasNext() {
return pos < len;
}
@Override
public Integer next() {
return nextDoc();
}
/**
* The remove operation is not supported by this Iterator.
*/
@Override
public void remove() {
throw new UnsupportedOperationException("The remove operation is not supported by this Iterator.");
}
@Override
public int nextDoc() {
return HS.getInt(array, pos++);
}
@Override
public float score() {
return 0.0f;
}
};
}
@Override
public FixedBitSet getBits() { // TODO: change to native?
int maxDoc = size() > 0 ? HS.getInt(array,len-1) : 0; // WARNING!!! can't used fixed bit sizes here!
FixedBitSet bs = new FixedBitSet(maxDoc+1);
setBitsOn(bs);
return bs;
}
private static int findIndex(long arr, int value, int low, int high) {
// binary search
while (low <= high) {
int mid = (low+high) >>> 1;
int found = HS.getInt(arr,mid);
if (found < value) {
low = mid+1;
}
else if (found > value) {
high = mid-1;
}
else {
return mid;
}
}
return low;
}
@Override
public Filter getTopFilter() {
return new Filter() {
int lastEndIdx = 0;
@Override
public DocIdSet getDocIdSet(final AtomicReaderContext context, final Bits acceptDocs) {
AtomicReader reader = context.reader();
// all Solr DocSets that are used as filters already only include live docs
final Bits acceptDocs2 = acceptDocs == null ? null : (reader.getLiveDocs() == acceptDocs ? null : acceptDocs);
final int base = context.docBase;
final int maxDoc = reader.maxDoc();
final int max = base + maxDoc; // one past the max doc in this segment.
int sidx = Math.max(0,lastEndIdx);
if (sidx > 0 && HS.getInt(array,sidx-1) >= base) {
// oops, the lastEndIdx isn't correct... we must have been used
// in a multi-threaded context, or the indexreaders are being
// used out-of-order. start at 0.
sidx = 0;
}
if (sidx < len && HS.getInt(array,sidx) < base) {
// if docs[sidx] is < base, we need to seek to find the real start.
sidx = findIndex(array, base, sidx, len-1);
}
final int startIdx = sidx;
// Largest possible end index is limited to the start index
// plus the number of docs contained in the segment. Subtract 1 since
// the end index is inclusive.
int eidx = Math.min(len, startIdx + maxDoc) - 1;
// find the real end
eidx = findIndex(array, max, startIdx, eidx) - 1;
final int endIdx = eidx;
lastEndIdx = endIdx;
return BitsFilteredDocIdSet.wrap(new DocIdSet() {
@Override
public DocIdSetIterator iterator() {
return new DocIdSetIterator() {
int idx = startIdx;
int adjustedDoc = -1;
@Override
public int docID() {
return adjustedDoc;
}
@Override
public int nextDoc() {
return adjustedDoc = (idx > endIdx) ? NO_MORE_DOCS : (HS.getInt(array,idx++) - base);
}
@Override
public int advance(int target) {
if (idx > endIdx || target==NO_MORE_DOCS) return adjustedDoc=NO_MORE_DOCS;
target += base;
// probe next
int rawDoc = HS.getInt(array,idx++);
if (rawDoc >= target) return adjustedDoc=rawDoc-base;
int high = endIdx;
// TODO: probe more before resorting to binary search?
// binary search
while (idx <= high) {
int mid = (idx+high) >>> 1;
rawDoc = HS.getInt(array,mid);
if (rawDoc < target) {
idx = mid+1;
}
else if (rawDoc > target) {
high = mid-1;
}
else {
idx=mid+1;
return adjustedDoc=rawDoc - base;
}
}
// low is on the insertion point...
if (idx <= endIdx) {
return adjustedDoc = HS.getInt(array,idx++) - base;
} else {
return adjustedDoc=NO_MORE_DOCS;
}
}
@Override
public long cost() {
return len;
}
};
}
@Override
public boolean isCacheable() {
return true;
}
@Override
public Bits bits() {
// random access is expensive for this set
return null;
}
}, acceptDocs2);
}
};
}
@Override
public SortedIntDocSetNative clone() {
long newArr = HS.allocArray(len, 4, false);
HS.copyInts(array, 0, newArr, 0, len);
return new SortedIntDocSetNative(newArr, len);
}
}