/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.util;
import org.apache.lucene.util.OpenBitSet;
/** An iterator to iterate over set bits in an OpenBitSet.
* This is faster than nextSetBit() for iterating over the complete set of bits,
* especially when the density of the bits set is high.
*
* @deprecated Use {@link org.apache.lucene.util.OpenBitSetIterator} instead.
* @version $Id$
*/
public class BitSetIterator {
// The General Idea: instead of having an array per byte that has
// the offsets of the next set bit, that array could be
// packed inside a 32 bit integer (8 4 bit numbers). That
// should be faster than accessing an array for each index, and
// the total array size is kept smaller (256*sizeof(int))=1K
protected final static int[] bitlist={
0x0,0x1,0x2,0x21,0x3,0x31,0x32,0x321,0x4,0x41,0x42,0x421,0x43,0x431,0x432,0x4321,0x5,0x51,0x52,0x521,0x53,0x531,0x532,0x5321,0x54,0x541,0x542,0x5421,0x543,0x5431,0x5432,0x54321,0x6,0x61,0x62,0x621,0x63,0x631,0x632,0x6321,0x64,0x641,0x642,0x6421,0x643,0x6431,0x6432,0x64321,0x65,0x651,0x652,0x6521,0x653,0x6531,0x6532,0x65321,0x654,0x6541,0x6542,0x65421,0x6543,0x65431,0x65432,0x654321,0x7,0x71,0x72,0x721,0x73,0x731,0x732,0x7321,0x74,0x741,0x742,0x7421,0x743,0x7431,0x7432,0x74321,0x75,0x751,0x752,0x7521,0x753,0x7531,0x7532,0x75321,0x754,0x7541,0x7542,0x75421,0x7543,0x75431,0x75432,0x754321,0x76,0x761,0x762,0x7621,0x763,0x7631,0x7632,0x76321,0x764,0x7641,0x7642,0x76421,0x7643,0x76431,0x76432,0x764321,0x765,0x7651,0x7652,0x76521,0x7653,0x76531,0x76532,0x765321,0x7654,0x76541,0x76542,0x765421,0x76543,0x765431,0x765432,0x7654321,0x8,0x81,0x82,0x821,0x83,0x831,0x832,0x8321,0x84,0x841,0x842,0x8421,0x843,0x8431,0x8432,0x84321,0x85,0x851,0x852,0x8521,0x853,0x8531,0x8532,0x85321,0x854,0x8541,0x8542,0x85421,0x8543,0x85431,0x85432,0x854321,0x86,0x861,0x862,0x8621,0x863,0x8631,0x8632,0x86321,0x864,0x8641,0x8642,0x86421,0x8643,0x86431,0x86432,0x864321,0x865,0x8651,0x8652,0x86521,0x8653,0x86531,0x86532,0x865321,0x8654,0x86541,0x86542,0x865421,0x86543,0x865431,0x865432,0x8654321,0x87,0x871,0x872,0x8721,0x873,0x8731,0x8732,0x87321,0x874,0x8741,0x8742,0x87421,0x8743,0x87431,0x87432,0x874321,0x875,0x8751,0x8752,0x87521,0x8753,0x87531,0x87532,0x875321,0x8754,0x87541,0x87542,0x875421,0x87543,0x875431,0x875432,0x8754321,0x876,0x8761,0x8762,0x87621,0x8763,0x87631,0x87632,0x876321,0x8764,0x87641,0x87642,0x876421,0x87643,0x876431,0x876432,0x8764321,0x8765,0x87651,0x87652,0x876521,0x87653,0x876531,0x876532,0x8765321,0x87654,0x876541,0x876542,0x8765421,0x876543,0x8765431,0x8765432,0x87654321
};
/***** the python code that generated bitlist
def bits2int(val):
arr=0
for shift in range(8,0,-1):
if val & 0x80:
arr = (arr << 4) | shift
val = val << 1
return arr
def int_table():
tbl = [ hex(bits2int(val)).strip('L') for val in range(256) ]
return ','.join(tbl)
******/
// hmmm, what about an iterator that finds zeros though,
// or a reverse iterator... should they be separate classes
// for efficiency, or have a common root interface? (or
// maybe both? could ask for a SetBitsIterator, etc...
private final long[] arr;
private final int words;
private int i=-1;
private long word;
private int wordShift;
private int indexArray;
public BitSetIterator(OpenBitSet obs) {
this(obs.getBits(), obs.getNumWords());
}
public BitSetIterator(long[] bits, int numWords) {
arr = bits;
words = numWords;
}
// 64 bit shifts
private void shift() {
if ((int)word ==0) {wordShift +=32; word = word >>>32; }
if ((word & 0x0000FFFF) == 0) { wordShift +=16; word >>>=16; }
if ((word & 0x000000FF) == 0) { wordShift +=8; word >>>=8; }
indexArray = bitlist[(int)word & 0xff];
}
/***** alternate shift implementations
// 32 bit shifts, but a long shift needed at the end
private void shift2() {
int y = (int)word;
if (y==0) {wordShift +=32; y = (int)(word >>>32); }
if ((y & 0x0000FFFF) == 0) { wordShift +=16; y>>>=16; }
if ((y & 0x000000FF) == 0) { wordShift +=8; y>>>=8; }
indexArray = bitlist[y & 0xff];
word >>>= (wordShift +1);
}
private void shift3() {
int lower = (int)word;
int lowByte = lower & 0xff;
if (lowByte != 0) {
indexArray=bitlist[lowByte];
return;
}
shift();
}
******/
public int next() {
if (indexArray==0) {
if (word!=0) {
word >>>= 8;
wordShift += 8;
}
while (word==0) {
if (++i >= words) return -1;
word = arr[i];
wordShift =-1; // loop invariant code motion should move this
}
// after the first time, should I go with a linear search, or
// stick with the binary search in shift?
shift();
}
int bitIndex = (indexArray & 0x0f) + wordShift;
indexArray >>>= 4;
// should i<<6 be cached as a separate variable?
// it would only save one cycle in the best circumstances.
return (i<<6) + bitIndex;
}
public int next(int fromIndex) {
indexArray=0;
i = fromIndex >> 6;
if (i>=words) {
word =0; // setup so next() will also return -1
return -1;
}
wordShift = fromIndex & 0x3f;
word = arr[i] >>> wordShift;
if (word !=0) {
wordShift--; // compensate for 1 based arrIndex
} else {
while (word ==0) {
if (++i >= words) return -1;
word = arr[i];
}
wordShift =-1;
}
shift();
int bitIndex = (indexArray & 0x0f) + wordShift;
indexArray >>>= 4;
// should i<<6 be cached as a separate variable?
// it would only save one cycle in the best circumstances.
return (i<<6) + bitIndex;
}
}