/* * Copyright 2004-2011 H2 Group. Multiple-Licensed under the H2 License, * Version 1.0, and under the Eclipse Public License, Version 1.0 * (http://h2database.com/html/license.html). * Initial Developer: H2 Group */ package org.h2.bitmap; import java.util.Arrays; /** * Thos class is copied from Lucene and modified a little. * An "open" BitSet implementation that allows direct access to the array of * words storing the bits. * <p/> * Unlike java.util.bitset, the fact that bits are packed into an array of longs * is part of the interface. This allows efficient implementation of other * algorithms by someone other than the author. It also allows one to * efficiently implement alternate serialization or interchange formats. * <p/> * <code>OpenBitSet</code> is faster than <code>java.util.BitSet</code> in most * operations and *much* faster at calculating cardinality of sets and results * of set operations. It can also handle sets of larger cardinality (up to 64 * * 2**32-1) * <p/> * The goals of <code>OpenBitSet</code> are the fastest implementation possible, * and maximum code reuse. Extra safety and encapsulation may always be built on * top, but if that's built in, the cost can never be removed (and hence people * re-implement their own version in order to get better performance). If you * want a "safe", totally encapsulated (and slower and limited) BitSet class, * use <code>java.util.BitSet</code>. * <p/> * <h3>Performance Results</h3> * * Test system: Pentium 4, Sun Java 1.5_06 -server -Xbatch -Xmx64M <br/> * BitSet size = 1,000,000 <br/> * Results are java.util.BitSet time divided by OpenBitSet time. * <table border="1"> * <tr> * <th></th> * <th>cardinality</th> * <th>intersect_count</th> * <th>union</th> * <th>nextSetBit</th> * <th>get</th> * <th>iterator</th> * </tr> * <tr> * <th>50% full</th> * <td>3.36</td> * <td>3.96</td> * <td>1.44</td> * <td>1.46</td> * <td>1.99</td> * <td>1.58</td> * </tr> * <tr> * <th>1% full</th> * <td>3.31</td> * <td>3.90</td> * <td> </td> * <td>1.04</td> * <td> </td> * <td>0.99</td> * </tr> * </table> * <br/> * Test system: AMD Opteron, 64 bit linux, Sun Java 1.5_06 -server -Xbatch * -Xmx64M <br/> * BitSet size = 1,000,000 <br/> * Results are java.util.BitSet time divided by OpenBitSet time. * <table border="1"> * <tr> * <th></th> * <th>cardinality</th> * <th>intersect_count</th> * <th>union</th> * <th>nextSetBit</th> * <th>get</th> * <th>iterator</th> * </tr> * <tr> * <th>50% full</th> * <td>2.50</td> * <td>3.50</td> * <td>1.00</td> * <td>1.03</td> * <td>1.12</td> * <td>1.25</td> * </tr> * <tr> * <th>1% full</th> * <td>2.51</td> * <td>3.49</td> * <td> </td> * <td>1.00</td> * <td> </td> * <td>1.02</td> * </tr> * </table> */ public class OpenBitSet implements BitSet, Cloneable { /* * BitSets are packed into arrays of "words." Currently a word is a long, * which consists of 64 bits, requiring 6 address bits. The choice of word * size is determined purely by performance concerns. */ protected final static int ADDRESS_BITS_PER_WORD = 6; protected final static int BITS_PER_WORD = 1 << ADDRESS_BITS_PER_WORD; protected final static int BIT_INDEX_MASK = BITS_PER_WORD - 1; protected long[] words; protected int wlen; // number of words (elements) used in the array // Used only for assert: private long numBits; /** * Constructs an LuceneBitSet large enough to hold <code>numBits</code>. */ public OpenBitSet(int nbits) { this.numBits = nbits; this.words = new long[wordIndex(nbits - 1) + 1]; this.wlen = words.length; } public OpenBitSet() { this(BITS_PER_WORD); } /** * Constructs an OpenBitSet from an existing long[]. <br/> * The first 64 bits are in long[0], with bit index 0 at the least * significant bit, and bit index 63 at the most significant. Given a bit * index, the word containing it is long[index/64], and it is at bit number * index%64 within that word. * <p> * numWords are the number of elements in the array that contain set bits * (non-zero longs). numWords should be <= bits.length, and any existing * words in the array at position >= numWords should be zero. * */ public OpenBitSet(long[] bits) { this.words = bits; this.wlen = words.length; this.numBits = wlen << ADDRESS_BITS_PER_WORD; } // @Override // public DocIdSetIterator iterator() { // return new OpenBitSetIterator(bits, wlen); // } /** Expert: returns the long[] storing the bits */ public long[] getBits() { return words; } /** Expert: sets a new long[] to use as the bit storage */ public void setBits(long[] bits) { this.words = bits; } /** Expert: gets the number of longs in the array that are in use */ public int getNumWords() { return wlen; } /** Expert: sets the number of longs in the array that are in use */ public void setNumWords(int nWords) { this.wlen = nWords; } /** * Returns the current capacity in bits (1 greater than the index of the * last bit) */ public int capacity() { return words.length << ADDRESS_BITS_PER_WORD; } @Override public int size() { return capacity(); } @Override public boolean isEmpty() { return cardinality() == 0; } /** * Returns true or false for the specified bit index. The index should be * less than the OpenBitSet size */ @Override public boolean get(int index) { assert index >= 0 && index < numBits; int i = wordIndex(index); // div 64 // signed shift will keep a negative index and force an // array-index-out-of-bounds-exception, removing the need for an // explicit check. int bit = index & 0x3f; // mod 64 long bitmask = 1L << bit; return (words[i] & bitmask) != 0; } /** * Sets the bit at the specified index. The index should be less than the * OpenBitSet size. */ @Override public void set(int index) { assert index >= 0 && index < numBits; int wordNum = wordIndex(index); // div 64 int bit = index & 0x3f; // mod 64 long bitmask = 1L << bit; words[wordNum] |= bitmask; } /** * Sets a range of bits, expanding the set size if necessary * * @param startIndex * lower index * @param endIndex * one-past the last bit to set */ @Override public void set(int startIndex, int endIndex) { if (endIndex <= startIndex) return; int startWord = (int) wordIndex(startIndex); // since endIndex is one past the end, this is index of the last // word to be changed. int endWord = expandingWordNum(endIndex - 1); long startmask = -1L << startIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap long endmask = -1L >>> -endIndex; if (startWord == endWord) { words[startWord] |= (startmask & endmask); return; } words[startWord] |= startmask; Arrays.fill(words, startWord + 1, endWord, -1L); words[endWord] |= endmask; } protected int expandingWordNum(int index) { int wordNum = wordIndex(index); if (wordNum >= wlen) { ensureCapacity(index + 1); wlen = wordNum + 1; } assert (numBits = Math.max(numBits, index + 1)) >= 0; return wordNum; } @Override public int cardinality() { return BitUtil.pop_array(words, 0, wlen); } @Override public void clear() { while (wlen > 0) words[--wlen] = 0; } /** * Sets the bit specified by the index to {@code false}. * * @param bitIndex * the index of the bit to be cleared * @throws IndexOutOfBoundsException * if the specified index is negative * * The index should be less than the OpenBitSet size. */ @Override public void clear(int index) { assert index >= 0 && index < numBits; int wordNum = wordIndex(index); int bit = index & 0x03f; long bitmask = 1L << bit; words[wordNum] &= ~bitmask; } @Override public void clear(int startIndex, int endIndex) { if (endIndex <= startIndex) return; int startWord = wordIndex(startIndex); if (startWord >= wlen) return; // since endIndex is one past the end, this is index of the last // word to be changed. int endWord = wordIndex(endIndex - 1); long startmask = -1L << startIndex; // // 64-(endIndex&0x3f) is the same as -endIndex due to wrap long endmask = -1L >>> -endIndex; // invert masks since we are clearing startmask = ~startmask; endmask = ~endmask; if (startWord == endWord) { words[startWord] &= (startmask | endmask); return; } words[startWord] &= startmask; int middle = Math.min(wlen, endWord); Arrays.fill(words, startWord + 1, middle, 0L); if (endWord < wlen) { words[endWord] &= endmask; } } /** * Sets a bit and returns the previous value. The index should be less than * the OpenBitSet size. */ public boolean getAndSet(int index) { assert index >= 0 && index < numBits; int wordNum = wordIndex(index); // div 64 int bit = index & 0x3f; // mod 64 long bitmask = 1L << bit; boolean val = (words[wordNum] & bitmask) != 0; words[wordNum] |= bitmask; return val; } /** * flips a bit. The index should be less than the OpenBitSet size. */ @Override public void flip(int index) { assert index >= 0 && index < numBits; int wordNum = wordIndex(index); // div 64 int bit = index & 0x3f; // mod 64 long bitmask = 1L << bit; words[wordNum] ^= bitmask; } /** * Flips a range of bits, expanding the set size if necessary * * @param startIndex * lower index * @param endIndex * one-past the last bit to flip */ @Override public void flip(int startIndex, int endIndex) { if (endIndex <= startIndex) return; int startWord = (int) wordIndex(startIndex); // since endIndex is one past the end, this is index of the last // word to be changed. int endWord = expandingWordNum(endIndex - 1); /*** Grrr, java shifting wraps around so -1L>>>64 == -1 * for that reason, make sure not to use endmask if the bits to flip will * be zero in the last word (redefine endWord to be the last changed...) long startmask = -1L << (startIndex & 0x3f); // example: 11111...111000 long endmask = -1L >>> (64-(endIndex & 0x3f)); // example: 00111...111111 ***/ long startmask = -1L << startIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap long endmask = -1L >>> -endIndex; if (startWord == endWord) { words[startWord] ^= (startmask & endmask); return; } words[startWord] ^= startmask; for (int i = startWord + 1; i < endWord; i++) { words[i] = ~words[i]; } words[endWord] ^= endmask; } @Override public void and(BitSet o) { OpenBitSet other = (OpenBitSet) o; int newLen = Math.min(this.wlen, other.wlen); long[] thisArr = this.words; long[] otherArr = other.words; // testing against zero can be more efficient int pos = newLen; while (--pos >= 0) { thisArr[pos] &= otherArr[pos]; } if (this.wlen > newLen) { // fill zeros from the new shorter length to the old length Arrays.fill(words, newLen, this.wlen, 0); } this.wlen = newLen; } @Override public int andCardinality(BitSet other) { OpenBitSet o = (OpenBitSet) other; return BitUtil.pop_intersect(this.words, o.words, 0, Math.min(this.wlen, o.wlen)); } @Override public void or(BitSet o) { OpenBitSet other = (OpenBitSet) o; int newLen = Math.max(wlen, other.wlen); ensureCapacity(newLen); assert (numBits = Math.max(other.numBits, numBits)) >= 0; long[] thisArr = this.words; long[] otherArr = other.words; int pos = Math.min(wlen, other.wlen); while (--pos >= 0) { thisArr[pos] |= otherArr[pos]; } if (this.wlen < newLen) { System.arraycopy(otherArr, this.wlen, thisArr, this.wlen, newLen - this.wlen); } this.wlen = newLen; } @Override public int orCardinality(BitSet other) { OpenBitSet o = (OpenBitSet) other; int tot = BitUtil.pop_union(this.words, o.words, 0, Math.min(this.wlen, o.wlen)); if (this.wlen < o.wlen) { tot += BitUtil.pop_array(o.words, this.wlen, o.wlen - this.wlen); } else if (this.wlen > o.wlen) { tot += BitUtil.pop_array(this.words, o.wlen, this.wlen - o.wlen); } return tot; } @Override public void andNot(BitSet other) { OpenBitSet o = (OpenBitSet) other; int idx = Math.min(wlen,o.wlen); long[] thisArr = this.words; long[] otherArr = o.words; while(--idx>=0) { thisArr[idx] &= ~otherArr[idx]; } } @Override public int andNotCardinality(BitSet other) { OpenBitSet o = (OpenBitSet) other; int tot = BitUtil.pop_andnot(this.words, o.words, 0, Math.min(this.wlen, o.wlen)); if (this.wlen > o.wlen) { tot += BitUtil.pop_array(this.words, o.wlen, this.wlen - o.wlen); } return tot; } @Override public void xor(BitSet other) { OpenBitSet o = (OpenBitSet) other; int newLen = Math.max(wlen, o.wlen); ensureCapacity(newLen); assert (numBits = Math.max(o.numBits, numBits)) >= 0; long[] thisArr = this.words; long[] otherArr = o.words; int pos = Math.min(wlen, o.wlen); while (--pos >= 0) { thisArr[pos] ^= otherArr[pos]; } if (this.wlen < newLen) { System.arraycopy(otherArr, this.wlen, thisArr, this.wlen, newLen - this.wlen); } this.wlen = newLen; } @Override public int xorCardinality(BitSet other) { OpenBitSet o = (OpenBitSet) other; int tot = BitUtil.pop_xor(this.words, o.words, 0, Math.min(this.wlen, o.wlen)); if (this.wlen < o.wlen) { tot += BitUtil.pop_array(o.words, this.wlen, o.wlen - this.wlen); } else if (this.wlen > o.wlen) { tot += BitUtil.pop_array(this.words, o.wlen, this.wlen - o.wlen); } return tot; } @Override public boolean intersects(BitSet o) { OpenBitSet other = (OpenBitSet) o; int pos = Math.min(this.wlen, other.wlen); long[] thisArr = this.words; long[] otherArr = other.words; while (--pos >= 0) { if ((thisArr[pos] & otherArr[pos]) != 0) return true; } return false; } /** * Ensures that the BitSet can hold enough words. * * @param wordsRequired * the minimum acceptable number of words. */ private void ensureCapacity(int wordsRequired) { if (words.length < wordsRequired) { // Allocate larger of doubled size or required size int request = Math.max(2 * words.length, wordsRequired); words = Arrays.copyOf(words, request); } } /** * Lowers numWords, the number of words in use, by checking for trailing * zero words. */ public void trimTrailingZeros() { int idx = wlen - 1; while (idx >= 0 && words[idx] == 0) idx--; wlen = idx + 1; } /** returns the number of 64 bit words it would take to hold numBits */ public static int bits2words(int numBits) { return (int) (wordIndex(numBits - 1) + 1); } /** * Given a bit index, return word index containing it. */ private static int wordIndex(int bitIndex) { return bitIndex >> ADDRESS_BITS_PER_WORD; } /** returns true if both sets have the same bits set */ @Override public boolean equals(Object o) { if (this == o) return true; if (!(o instanceof OpenBitSet)) return false; OpenBitSet a; OpenBitSet b = (OpenBitSet) o; // make a the larger set. if (b.wlen > this.wlen) { a = b; b = this; } else { a = this; } // check for any set bits out of the range of b for (int i = a.wlen - 1; i >= b.wlen; i--) { if (a.words[i] != 0) return false; } for (int i = b.wlen - 1; i >= 0; i--) { if (a.words[i] != b.words[i]) return false; } return true; } @Override public int hashCode() { // Start with a zero hash and use a mix that results in zero if the // input is zero. // This effectively truncates trailing zeros without an explicit check. long h = 0; for (int i = words.length; --i >= 0;) { h ^= words[i]; h = (h << 1) | (h >>> 63); // rotate left } // fold leftmost bits into right and add a constant to prevent // empty sets from returning 0, which is too common. return (int) ((h >> 32) ^ h) + 0x98761234; } /** * Returns the index of the first set bit starting at the index specified. * -1 is returned if there are no more set bits. */ @Override public int nextSetBit(int index) { int i = wordIndex(index); if (i >= wlen) return -1; int subIndex = index & 0x3f; // index within the word long word = words[i] >> subIndex; // skip all the bits to the right of // index if (word != 0) { return (i << ADDRESS_BITS_PER_WORD) + subIndex + BitUtil.ntz(word); } while (++i < wlen) { word = words[i]; if (word != 0) return (i << ADDRESS_BITS_PER_WORD) + BitUtil.ntz(word); } return -1; } @Override public int nextClearBit(int fromIndex) { // // Neither spec nor implementation handle bitsets of maximal length. // // See 4816253. // if (fromIndex < 0) // throw new IndexOutOfBoundsException("fromIndex < 0: " + fromIndex); // // checkInvariants(); // // int u = wordIndex(fromIndex); // if (u >= wordsInUse) // return fromIndex; // // long word = ~words[u] & (WORD_MASK << fromIndex); // // while (true) { // if (word != 0) // return (u * BITS_PER_WORD) + Long.numberOfTrailingZeros(word); // if (++u == wordsInUse) // return wordsInUse * BITS_PER_WORD; // word = ~words[u]; // } return 0; } @Override public OpenBitSet clone() { try { OpenBitSet obs = (OpenBitSet) super.clone(); // hopefully an array clone is as fast(er) than arraycopy obs.words = obs.words.clone(); return obs; } catch (CloneNotSupportedException e) { throw new RuntimeException(e); } } }