/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.util; import org.apache.lucene.util.OpenBitSet; /** An iterator to iterate over set bits in an OpenBitSet. * This is faster than nextSetBit() for iterating over the complete set of bits, * especially when the density of the bits set is high. * * @deprecated Use {@link org.apache.lucene.util.OpenBitSetIterator} instead. * @version $Id$ */ public class BitSetIterator { // The General Idea: instead of having an array per byte that has // the offsets of the next set bit, that array could be // packed inside a 32 bit integer (8 4 bit numbers). That // should be faster than accessing an array for each index, and // the total array size is kept smaller (256*sizeof(int))=1K protected final static int[] bitlist={ 0x0,0x1,0x2,0x21,0x3,0x31,0x32,0x321,0x4,0x41,0x42,0x421,0x43,0x431,0x432,0x4321,0x5,0x51,0x52,0x521,0x53,0x531,0x532,0x5321,0x54,0x541,0x542,0x5421,0x543,0x5431,0x5432,0x54321,0x6,0x61,0x62,0x621,0x63,0x631,0x632,0x6321,0x64,0x641,0x642,0x6421,0x643,0x6431,0x6432,0x64321,0x65,0x651,0x652,0x6521,0x653,0x6531,0x6532,0x65321,0x654,0x6541,0x6542,0x65421,0x6543,0x65431,0x65432,0x654321,0x7,0x71,0x72,0x721,0x73,0x731,0x732,0x7321,0x74,0x741,0x742,0x7421,0x743,0x7431,0x7432,0x74321,0x75,0x751,0x752,0x7521,0x753,0x7531,0x7532,0x75321,0x754,0x7541,0x7542,0x75421,0x7543,0x75431,0x75432,0x754321,0x76,0x761,0x762,0x7621,0x763,0x7631,0x7632,0x76321,0x764,0x7641,0x7642,0x76421,0x7643,0x76431,0x76432,0x764321,0x765,0x7651,0x7652,0x76521,0x7653,0x76531,0x76532,0x765321,0x7654,0x76541,0x76542,0x765421,0x76543,0x765431,0x765432,0x7654321,0x8,0x81,0x82,0x821,0x83,0x831,0x832,0x8321,0x84,0x841,0x842,0x8421,0x843,0x8431,0x8432,0x84321,0x85,0x851,0x852,0x8521,0x853,0x8531,0x8532,0x85321,0x854,0x8541,0x8542,0x85421,0x8543,0x85431,0x85432,0x854321,0x86,0x861,0x862,0x8621,0x863,0x8631,0x8632,0x86321,0x864,0x8641,0x8642,0x86421,0x8643,0x86431,0x86432,0x864321,0x865,0x8651,0x8652,0x86521,0x8653,0x86531,0x86532,0x865321,0x8654,0x86541,0x86542,0x865421,0x86543,0x865431,0x865432,0x8654321,0x87,0x871,0x872,0x8721,0x873,0x8731,0x8732,0x87321,0x874,0x8741,0x8742,0x87421,0x8743,0x87431,0x87432,0x874321,0x875,0x8751,0x8752,0x87521,0x8753,0x87531,0x87532,0x875321,0x8754,0x87541,0x87542,0x875421,0x87543,0x875431,0x875432,0x8754321,0x876,0x8761,0x8762,0x87621,0x8763,0x87631,0x87632,0x876321,0x8764,0x87641,0x87642,0x876421,0x87643,0x876431,0x876432,0x8764321,0x8765,0x87651,0x87652,0x876521,0x87653,0x876531,0x876532,0x8765321,0x87654,0x876541,0x876542,0x8765421,0x876543,0x8765431,0x8765432,0x87654321 }; /***** the python code that generated bitlist def bits2int(val): arr=0 for shift in range(8,0,-1): if val & 0x80: arr = (arr << 4) | shift val = val << 1 return arr def int_table(): tbl = [ hex(bits2int(val)).strip('L') for val in range(256) ] return ','.join(tbl) ******/ // hmmm, what about an iterator that finds zeros though, // or a reverse iterator... should they be separate classes // for efficiency, or have a common root interface? (or // maybe both? could ask for a SetBitsIterator, etc... private final long[] arr; private final int words; private int i=-1; private long word; private int wordShift; private int indexArray; public BitSetIterator(OpenBitSet obs) { this(obs.getBits(), obs.getNumWords()); } public BitSetIterator(long[] bits, int numWords) { arr = bits; words = numWords; } // 64 bit shifts private void shift() { if ((int)word ==0) {wordShift +=32; word = word >>>32; } if ((word & 0x0000FFFF) == 0) { wordShift +=16; word >>>=16; } if ((word & 0x000000FF) == 0) { wordShift +=8; word >>>=8; } indexArray = bitlist[(int)word & 0xff]; } /***** alternate shift implementations // 32 bit shifts, but a long shift needed at the end private void shift2() { int y = (int)word; if (y==0) {wordShift +=32; y = (int)(word >>>32); } if ((y & 0x0000FFFF) == 0) { wordShift +=16; y>>>=16; } if ((y & 0x000000FF) == 0) { wordShift +=8; y>>>=8; } indexArray = bitlist[y & 0xff]; word >>>= (wordShift +1); } private void shift3() { int lower = (int)word; int lowByte = lower & 0xff; if (lowByte != 0) { indexArray=bitlist[lowByte]; return; } shift(); } ******/ public int next() { if (indexArray==0) { if (word!=0) { word >>>= 8; wordShift += 8; } while (word==0) { if (++i >= words) return -1; word = arr[i]; wordShift =-1; // loop invariant code motion should move this } // after the first time, should I go with a linear search, or // stick with the binary search in shift? shift(); } int bitIndex = (indexArray & 0x0f) + wordShift; indexArray >>>= 4; // should i<<6 be cached as a separate variable? // it would only save one cycle in the best circumstances. return (i<<6) + bitIndex; } public int next(int fromIndex) { indexArray=0; i = fromIndex >> 6; if (i>=words) { word =0; // setup so next() will also return -1 return -1; } wordShift = fromIndex & 0x3f; word = arr[i] >>> wordShift; if (word !=0) { wordShift--; // compensate for 1 based arrIndex } else { while (word ==0) { if (++i >= words) return -1; word = arr[i]; } wordShift =-1; } shift(); int bitIndex = (indexArray & 0x0f) + wordShift; indexArray >>>= 4; // should i<<6 be cached as a separate variable? // it would only save one cycle in the best circumstances. return (i<<6) + bitIndex; } }