/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
import org.apache.lucene.search.DocIdSetIterator;
/** An iterator to iterate over set bits in an OpenBitSet.
* This is faster than nextSetBit() for iterating over the complete set of bits,
* especially when the density of the bits set is high.
*/
public class OpenBitSetIterator extends DocIdSetIterator {
// hmmm, what about an iterator that finds zeros though,
// or a reverse iterator... should they be separate classes
// for efficiency, or have a common root interface? (or
// maybe both? could ask for a SetBitsIterator, etc...
final long[] arr;
final int words;
private int i=-1;
private long word;
private int wordShift;
private int indexArray;
private int curDocId = -1;
public OpenBitSetIterator(OpenBitSet obs) {
this(obs.getBits(), obs.getNumWords());
}
public OpenBitSetIterator(long[] bits, int numWords) {
arr = bits;
words = numWords;
}
// 64 bit shifts
private void shift() {
if ((int)word ==0) {wordShift +=32; word = word >>>32; }
if ((word & 0x0000FFFF) == 0) { wordShift +=16; word >>>=16; }
if ((word & 0x000000FF) == 0) { wordShift +=8; word >>>=8; }
indexArray = BitUtil.bitList((byte) word);
}
/***** alternate shift implementations
// 32 bit shifts, but a long shift needed at the end
private void shift2() {
int y = (int)word;
if (y==0) {wordShift +=32; y = (int)(word >>>32); }
if ((y & 0x0000FFFF) == 0) { wordShift +=16; y>>>=16; }
if ((y & 0x000000FF) == 0) { wordShift +=8; y>>>=8; }
indexArray = bitlist[y & 0xff];
word >>>= (wordShift +1);
}
private void shift3() {
int lower = (int)word;
int lowByte = lower & 0xff;
if (lowByte != 0) {
indexArray=bitlist[lowByte];
return;
}
shift();
}
******/
@Override
public int nextDoc() {
if (indexArray == 0) {
if (word != 0) {
word >>>= 8;
wordShift += 8;
}
while (word == 0) {
if (++i >= words) {
return curDocId = NO_MORE_DOCS;
}
word = arr[i];
wordShift = -1; // loop invariant code motion should move this
}
// after the first time, should I go with a linear search, or
// stick with the binary search in shift?
shift();
}
int bitIndex = (indexArray & 0x0f) + wordShift;
indexArray >>>= 4;
// should i<<6 be cached as a separate variable?
// it would only save one cycle in the best circumstances.
return curDocId = (i<<6) + bitIndex;
}
@Override
public int advance(int target) {
indexArray = 0;
i = target >> 6;
if (i >= words) {
word = 0; // setup so next() will also return -1
return curDocId = NO_MORE_DOCS;
}
wordShift = target & 0x3f;
word = arr[i] >>> wordShift;
if (word != 0) {
wordShift--; // compensate for 1 based arrIndex
} else {
while (word == 0) {
if (++i >= words) {
return curDocId = NO_MORE_DOCS;
}
word = arr[i];
}
wordShift = -1;
}
shift();
int bitIndex = (indexArray & 0x0f) + wordShift;
indexArray >>>= 4;
// should i<<6 be cached as a separate variable?
// it would only save one cycle in the best circumstances.
return curDocId = (i<<6) + bitIndex;
}
@Override
public int docID() {
return curDocId;
}
@Override
public long cost() {
return words / 64;
}
}