/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.search; import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.BitsFilteredDocIdSet; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Filter; import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.OpenBitSet; import org.apache.solr.core.HS; import org.apache.solr.core.RefCount; public class SortedIntDocSetNative extends DocSetBaseNative implements RefCount { protected final long array; protected final int len; @Override protected void free() { HS.freeArray(array); } /** * @param docs Sorted list of ids */ public SortedIntDocSetNative(int[] docs) { this(docs, docs.length); } /** * @param docs Sorted list of ids * @param len Number of ids in the list */ public SortedIntDocSetNative(int[] docs, int len) { assert len>=0 && len <= docs.length; this.len = len; array = HS.allocArray(len, 4, false); HS.copyInts(docs, 0, array, 0, len); } public SortedIntDocSetNative(long nativeIntArray, int len) { assert len>=0 && len <= (HS.arraySizeBytes(nativeIntArray)>>2); this.array = nativeIntArray; this.len = len; } public long getIntArrayPointer() { return array; } @Override public int size() { return len; } @Override public long memSize() { return HS.arraySizeBytes(array)+8; } public static int intersectionSize(long smallerSortedList, int a_size, long biggerSortedList, int b_size) { final long a = smallerSortedList; final long b = biggerSortedList; // The next doc we are looking for will be much closer to the last position we tried // than it will be to the midpoint between last and high... so probe ahead using // a function of the ratio of the sizes of the sets. int step = (b_size/a_size)+1; // Since the majority of probes should be misses, we'll already be above the last probe // and shouldn't need to move larger than the step size on average to step over our target (and thus lower // the high upper bound a lot.)... but if we don't go over our target, it's a big miss... so double it. step = step + step; // FUTURE: come up with a density such that target * density == likely position? // then check step on one side or the other? // (density could be cached in the DocSet)... length/maxDoc // FUTURE: try partitioning like a sort algorithm. Pick the midpoint of the big // array, find where that should be in the small array, and then recurse with // the top and bottom half of both arrays until they are small enough to use // a fallback insersection method. // NOTE: I tried this and it worked, but it was actually slower than this current // highly optimized approach. int icount = 0; int low = 0; int max = b_size-1; for (int i=0; i<a_size; i++) { int doca = HS.getInt(a, i); int high = max; int probe = low + step; // 40% improvement! // short linear probe to see if we can drop the high pointer in one big jump. if (probe<high) { if (HS.getInt(b,probe)>=doca) { // success! we cut down the upper bound by a lot in one step! high=probe; } else { // relative failure... we get to move the low pointer, but not my much low=probe+1; // reprobe worth it? it appears so! probe = low + step; if (probe<high) { if (HS.getInt(b,probe)>=doca) { high=probe; } else { low=probe+1; } } } } // binary search the rest of the way while (low <= high) { int mid = (low+high) >>> 1; int docb = HS.getInt(b,mid); if (docb < doca) { low = mid+1; } else if (docb > doca) { high = mid-1; } else { icount++; low = mid+1; // found it, so start at next element break; } } // Didn't find it... low is now positioned on the insertion point, // which is higher than what we were looking for, so continue using // the same low point. } return icount; } public static boolean intersects(long smallerSortedList, int a_size, long biggerSortedList, int b_size) { // see intersectionSize for more in-depth comments of this algorithm final long a = smallerSortedList; final long b = biggerSortedList; int step = (b_size/a_size)+1; step = step + step; int low = 0; int max = b_size-1; for (int i=0; i<a_size; i++) { int doca = HS.getInt(a, i); int high = max; int probe = low + step; if (probe<high) { if (HS.getInt(b, probe) >= doca) { high=probe; } else { low=probe+1; probe = low + step; int probeVal = HS.getInt(b, probe); if (probeVal < high) { if (probeVal >= doca) { high=probe; } else { low=probe+1; } } } } while (low <= high) { int mid = (low+high) >>> 1; int docb = HS.getInt(b, mid); if (docb < doca) { low = mid+1; } else if (docb > doca) { high = mid-1; } else { return true; } } } return false; } @Override public int intersectionSize(DocSet other) { if (!(other instanceof SortedIntDocSetNative)) { // assume other implementations are better at random access than we are, // true of BitDocSet and HashDocSet. int icount = 0; for (int i=0; i<len; i++) { if (other.exists( HS.getInt(array,i) )) icount++; } return icount; } // make "a" the smaller set. SortedIntDocSetNative otherNative = ((SortedIntDocSetNative)other); int a_size = size(); int b_size = otherNative.size(); long a,b; if (a_size <= b_size) { a = array; b = otherNative.array; } else { a = otherNative.array; b = array; a_size = b_size; b_size = size(); } if (a_size==0) return 0; // if b is 8 times bigger than a, use the modified binary search. if ((b_size>>3) >= a_size) { return intersectionSize(a,a_size, b,b_size); } // if they are close in size, just do a linear walk of both. int icount=0; int i=0,j=0; int doca = HS.getInt(a, i); int docb = HS.getInt(b, j); for(;;) { // switch on the sign bit somehow? Hopefully JVM is smart enough to just test once. // Since set a is less dense then set b, doca is likely to be greater than docb so // check that case first. This resulted in a 13% speedup. if (doca > docb) { if (++j >= b_size) break; docb=HS.getInt(b,j); } else if (doca < docb) { if (++i >= a_size) break; doca=HS.getInt(a,i); } else { icount++; if (++i >= a_size) break; doca=HS.getInt(a,i); if (++j >= b_size) break; docb=HS.getInt(b,j); } } return icount; } @Override public boolean intersects(DocSet other) { if (!(other instanceof SortedIntDocSetNative)) { // assume other implementations are better at random access than we are, // true of BitDocSet and HashDocSet. for (int i=0; i<len; i++) { if (other.exists( HS.getInt(array,i) )) return true; } return false; } // make "a" the smaller set. SortedIntDocSetNative otherNative = ((SortedIntDocSetNative)other); int a_size = size(); int b_size = otherNative.size(); long a,b; if (a_size <= b_size) { a = array; b = otherNative.array; } else { a = otherNative.array; a_size = b_size; b = array; b_size = size(); } if (a_size==0) return false; // if b is 8 times bigger than a, use the modified binary search. if ((b_size>>3) >= a_size) { return intersects(a, a_size, b, b_size); } // if they are close in size, just do a linear walk of both. int i=0,j=0; int doca=HS.getInt(a,i), docb=HS.getInt(b,j); for(;;) { // switch on the sign bit somehow? Hopefull JVM is smart enough to just test once. // Since set a is less dense then set b, doca is likely to be greater than docb so // check that case first. This resulted in a 13% speedup. if (doca > docb) { if (++j >= b_size) break; docb=HS.getInt(b,j); } else if (doca < docb) { if (++i >= a_size) break; doca=HS.getInt(a,i); } else { return true; } } return false; } /** puts the intersection of a and b into the target array and returns the size */ public static int intersection(long a, int lena, long b, int lenb, int[] target) { if (lena > lenb) { int ti=lena; lena=lenb; lenb=ti; long ta=a; a=b; b=ta; } if (lena==0) return 0; // if b is 8 times bigger than a, use the modified binary search. if ((lenb>>3) >= lena) { return intersectionBinarySearch(a, lena, b, lenb, target); } int icount=0; int i=0,j=0; int doca=HS.getInt(a,i), docb=HS.getInt(b,j); for(;;) { if (doca > docb) { if (++j >= lenb) break; docb=HS.getInt(b,j); } else if (doca < docb) { if (++i >= lena) break; doca=HS.getInt(a,i); } else { target[icount++] = doca; if (++i >= lena) break; doca=HS.getInt(a,i); if (++j >= lenb) break; docb=HS.getInt(b,j); } } return icount; } /** Puts the intersection of a and b into the target array and returns the size. * lena should be smaller than lenb */ protected static int intersectionBinarySearch(long a, int lena, long b, int lenb, int[] target) { int step = (lenb/lena)+1; step = step + step; int icount = 0; int low = 0; int max = lenb-1; for (int i=0; i<lena; i++) { int doca = HS.getInt(a,i); int high = max; int probe = low + step; // 40% improvement! // short linear probe to see if we can drop the high pointer in one big jump. if (probe<high) { if (HS.getInt(b,probe)>=doca) { // success! we cut down the upper bound by a lot in one step! high=probe; } else { // relative failure... we get to move the low pointer, but not my much low=probe+1; // reprobe worth it? it appears so! probe = low + step; if (probe<high) { if (HS.getInt(b,probe)>=doca) { high=probe; } else { low=probe+1; } } } } // binary search while (low <= high) { int mid = (low+high) >>> 1; int docb = HS.getInt(b,mid); if (docb < doca) { low = mid+1; } else if (docb > doca) { high = mid-1; } else { target[icount++]=doca; // HS.setInt(target, icount++, doca); low = mid+1; // found it, so start at next element break; } } // Didn't find it... low is now positioned on the insertion point, // which is higher than what we were looking for, so continue using // the same low point. } return icount; } @Override public DocSet intersection(DocSet other) { if (!(other instanceof SortedIntDocSetNative)) { int icount = 0; int arr[] = new int[len]; for (int i=0; i<len; i++) { int doc = HS.getInt(array, i); if (other.exists(doc)) arr[icount++] = doc; } return new SortedIntDocSetNative(arr,icount); } long otherDocs = ((SortedIntDocSetNative)other).array; int maxsz = Math.min(len, ((SortedIntDocSetNative)other).len); int[] arr = new int[maxsz]; int sz = intersection(array, len, otherDocs, ((SortedIntDocSetNative)other).len, arr); return new SortedIntDocSetNative(arr,sz); } protected static int andNotBinarySearch(long a, int lena, long b, int lenb, int[] target) { int step = (lenb/lena)+1; step = step + step; int count = 0; int low = 0; int max = lenb-1; outer: for (int i=0; i<lena; i++) { int doca = HS.getInt(a,i); int high = max; int probe = low + step; // 40% improvement! // short linear probe to see if we can drop the high pointer in one big jump. if (probe<high) { if (HS.getInt(b,probe)>=doca) { // success! we cut down the upper bound by a lot in one step! high=probe; } else { // relative failure... we get to move the low pointer, but not my much low=probe+1; // reprobe worth it? it appears so! probe = low + step; if (probe<high) { if (HS.getInt(b,probe)>=doca) { high=probe; } else { low=probe+1; } } } } // binary search while (low <= high) { int mid = (low+high) >>> 1; int docb = HS.getInt(b,mid); if (docb < doca) { low = mid+1; } else if (docb > doca) { high = mid-1; } else { low = mid+1; // found it, so start at next element continue outer; } } // Didn't find it... low is now positioned on the insertion point, // which is higher than what we were looking for, so continue using // the same low point. target[count++] = doca; } return count; } /** puts the intersection of a and not b into the target array and returns the size */ public static int andNot(long a, int lena, long b, int lenb, int[] target) { if (lena==0) return 0; if (lenb==0) { HS.copyInts(a, 0, target, 0, lena); return lena; } // if b is 8 times bigger than a, use the modified binary search. if ((lenb>>3) >= lena) { return andNotBinarySearch(a, lena, b, lenb, target); } int count=0; int i=0,j=0; int doca=HS.getInt(a,i),docb=HS.getInt(b,j); for(;;) { if (doca > docb) { if (++j >= lenb) break; docb=HS.getInt(b,j); } else if (doca < docb) { target[count++] = doca; if (++i >= lena) break; doca=HS.getInt(a,i); } else { if (++i >= lena) break; doca=HS.getInt(a,i); if (++j >= lenb) break; docb=HS.getInt(b,j); } } int leftover=lena - i; if (leftover > 0) { HS.copyInts(a, i, target, count, leftover); count += leftover; } return count; } @Override public DocSet andNot(DocSet other) { if (other.size()==0) { this.incref(); return this; } if (!(other instanceof SortedIntDocSetNative)) { int count = 0; int arr[] = new int[len]; for (int i=0; i<len; i++) { int doc = HS.getInt(array, i); if (!other.exists(doc)) arr[count++] = doc; } return new SortedIntDocSetNative(arr,count); } long otherDocs = ((SortedIntDocSetNative)other).array; int[] arr = new int[len]; int sz = andNot(array, len, otherDocs, ((SortedIntDocSetNative)other).len, arr); return new SortedIntDocSetNative(arr,sz); } @Override public void setBitsOn(FixedBitSet target) { for (int i=0; i<len; i++) { target.set( HS.getInt(array, i) ); } } @Override public boolean exists(int doc) { // this could be faster by estimating where in the list the doc is likely to appear, // but we should get away from using exists() anyway. int low = 0; int high = len-1; // binary search while (low <= high) { int mid = (low+high) >>> 1; int docb = HS.getInt(array, mid); if (docb < doc) { low = mid+1; } else if (docb > doc) { high = mid-1; } else { return true; } } return false; } @Override public DocIterator iterator() { return new DocIterator() { int pos=0; @Override public boolean hasNext() { return pos < len; } @Override public Integer next() { return nextDoc(); } /** * The remove operation is not supported by this Iterator. */ @Override public void remove() { throw new UnsupportedOperationException("The remove operation is not supported by this Iterator."); } @Override public int nextDoc() { return HS.getInt(array, pos++); } @Override public float score() { return 0.0f; } }; } @Override public FixedBitSet getBits() { // TODO: change to native? int maxDoc = size() > 0 ? HS.getInt(array,len-1) : 0; // WARNING!!! can't used fixed bit sizes here! FixedBitSet bs = new FixedBitSet(maxDoc+1); setBitsOn(bs); return bs; } private static int findIndex(long arr, int value, int low, int high) { // binary search while (low <= high) { int mid = (low+high) >>> 1; int found = HS.getInt(arr,mid); if (found < value) { low = mid+1; } else if (found > value) { high = mid-1; } else { return mid; } } return low; } @Override public Filter getTopFilter() { return new Filter() { int lastEndIdx = 0; @Override public DocIdSet getDocIdSet(final AtomicReaderContext context, final Bits acceptDocs) { AtomicReader reader = context.reader(); // all Solr DocSets that are used as filters already only include live docs final Bits acceptDocs2 = acceptDocs == null ? null : (reader.getLiveDocs() == acceptDocs ? null : acceptDocs); final int base = context.docBase; final int maxDoc = reader.maxDoc(); final int max = base + maxDoc; // one past the max doc in this segment. int sidx = Math.max(0,lastEndIdx); if (sidx > 0 && HS.getInt(array,sidx-1) >= base) { // oops, the lastEndIdx isn't correct... we must have been used // in a multi-threaded context, or the indexreaders are being // used out-of-order. start at 0. sidx = 0; } if (sidx < len && HS.getInt(array,sidx) < base) { // if docs[sidx] is < base, we need to seek to find the real start. sidx = findIndex(array, base, sidx, len-1); } final int startIdx = sidx; // Largest possible end index is limited to the start index // plus the number of docs contained in the segment. Subtract 1 since // the end index is inclusive. int eidx = Math.min(len, startIdx + maxDoc) - 1; // find the real end eidx = findIndex(array, max, startIdx, eidx) - 1; final int endIdx = eidx; lastEndIdx = endIdx; return BitsFilteredDocIdSet.wrap(new DocIdSet() { @Override public DocIdSetIterator iterator() { return new DocIdSetIterator() { int idx = startIdx; int adjustedDoc = -1; @Override public int docID() { return adjustedDoc; } @Override public int nextDoc() { return adjustedDoc = (idx > endIdx) ? NO_MORE_DOCS : (HS.getInt(array,idx++) - base); } @Override public int advance(int target) { if (idx > endIdx || target==NO_MORE_DOCS) return adjustedDoc=NO_MORE_DOCS; target += base; // probe next int rawDoc = HS.getInt(array,idx++); if (rawDoc >= target) return adjustedDoc=rawDoc-base; int high = endIdx; // TODO: probe more before resorting to binary search? // binary search while (idx <= high) { int mid = (idx+high) >>> 1; rawDoc = HS.getInt(array,mid); if (rawDoc < target) { idx = mid+1; } else if (rawDoc > target) { high = mid-1; } else { idx=mid+1; return adjustedDoc=rawDoc - base; } } // low is on the insertion point... if (idx <= endIdx) { return adjustedDoc = HS.getInt(array,idx++) - base; } else { return adjustedDoc=NO_MORE_DOCS; } } @Override public long cost() { return len; } }; } @Override public boolean isCacheable() { return true; } @Override public Bits bits() { // random access is expensive for this set return null; } }, acceptDocs2); } }; } @Override public SortedIntDocSetNative clone() { long newArr = HS.allocArray(len, 4, false); HS.copyInts(array, 0, newArr, 0, len); return new SortedIntDocSetNative(newArr, len); } }