/**
* Copyright 2016 LinkedIn Corp. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
package com.github.ambry.utils;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Arrays;
/**
* An "open" BitSet implementation that allows direct access to the arrays of words
* storing the bits. Derived from Lucene's OpenBitSet, but with a paged backing array
* (see bits delaration, below).
* <p/>
* Unlike java.util.bitset, the fact that bits are packed into an array of longs
* is part of the interface. This allows efficient implementation of other algorithms
* by someone other than the author. It also allows one to efficiently implement
* alternate serialization or interchange formats.
* <p/>
* <code>OpenBitSet</code> is faster than <code>java.util.BitSet</code> in most operations
* and *much* faster at calculating cardinality of sets and results of set operations.
* It can also handle sets of larger cardinality (up to 64 * 2**32-1)
* <p/>
* The goals of <code>OpenBitSet</code> are the fastest implementation possible, and
* maximum code reuse. Extra safety and encapsulation
* may always be built on top, but if that's built in, the cost can never be removed (and
* hence people re-implement their own version in order to get better performance).
* If you want a "safe", totally encapsulated (and slower and limited) BitSet
* class, use <code>java.util.BitSet</code>.
*/
public class OpenBitSet implements IBitSet {
/**
* We break the bitset up into multiple arrays to avoid promotion failure caused by attempting to allocate
* large, contiguous arrays. All sub-arrays but the last are uniformly PAGE_SIZE words;
* to avoid waste in small bloom filters the last sub-array
* is sized to exactly the remaining number of words required to achieve the desired set size
*/
private final long[][] bits;
private int wlen; // number of words (elements) used in the array
private final int pageCount;
private static final int PAGE_SIZE = 4096;
/**
* Constructs an OpenBitSet large enough to hold numBits.
* @param numBits
*/
public OpenBitSet(long numBits) {
wlen = (int) bits2words(numBits);
int lastPageSize = wlen % PAGE_SIZE;
int fullPageCount = wlen / PAGE_SIZE;
pageCount = fullPageCount + (lastPageSize == 0 ? 0 : 1);
bits = new long[pageCount][];
for (int i = 0; i < fullPageCount; ++i) {
bits[i] = new long[PAGE_SIZE];
}
if (lastPageSize != 0) {
bits[bits.length - 1] = new long[lastPageSize];
}
}
public OpenBitSet() {
this(64);
}
/**
* @return the pageSize
*/
public int getPageSize() {
return PAGE_SIZE;
}
public int getPageCount() {
return pageCount;
}
public long[] getPage(int pageIdx) {
return bits[pageIdx];
}
/** Returns the current capacity in bits (1 greater than the index of the last bit) */
public long capacity() {
return ((long) wlen) << 6;
}
/**
* Returns the current capacity of this set. Included for
* compatibility. This is *not* equal to {@link #cardinality}
*/
public long size() {
return capacity();
}
// @Override -- not until Java 1.6
public long length() {
return capacity();
}
/** Returns true if there are no set bits */
public boolean isEmpty() {
return cardinality() == 0;
}
/** Expert: gets the number of longs in the array that are in use */
public int getNumWords() {
return wlen;
}
/**
* Returns true or false for the specified bit index.
* The index should be less than the OpenBitSet size
*/
public boolean get(int index) {
int i = index >> 6; // div 64
// signed shift will keep a negative index and force an
// array-index-out-of-bounds-exception, removing the need for an explicit check.
int bit = index & 0x3f; // mod 64
long bitmask = 1L << bit;
// TODO perfectionist one can implement this using bit operations
return (bits[i / PAGE_SIZE][i % PAGE_SIZE] & bitmask) != 0;
}
/**
* Returns true or false for the specified bit index.
* The index should be less than the OpenBitSet size.
*/
public boolean get(long index) {
int i = (int) (index >> 6); // div 64
int bit = (int) index & 0x3f; // mod 64
long bitmask = 1L << bit;
// TODO perfectionist one can implement this using bit operations
return (bits[i / PAGE_SIZE][i % PAGE_SIZE] & bitmask) != 0;
}
/**
* Sets the bit at the specified index.
* The index should be less than the OpenBitSet size.
*/
public void set(long index) {
int wordNum = (int) (index >> 6);
int bit = (int) index & 0x3f;
long bitmask = 1L << bit;
bits[wordNum / PAGE_SIZE][wordNum % PAGE_SIZE] |= bitmask;
}
/**
* Sets the bit at the specified index.
* The index should be less than the OpenBitSet size.
*/
public void set(int index) {
int wordNum = index >> 6; // div 64
int bit = index & 0x3f; // mod 64
long bitmask = 1L << bit;
bits[wordNum / PAGE_SIZE][wordNum % PAGE_SIZE] |= bitmask;
}
/**
* clears a bit.
* The index should be less than the OpenBitSet size.
*/
public void clear(int index) {
int wordNum = index >> 6;
int bit = index & 0x03f;
long bitmask = 1L << bit;
bits[wordNum / PAGE_SIZE][wordNum % PAGE_SIZE] &= ~bitmask;
// hmmm, it takes one more instruction to clear than it does to set... any
// way to work around this? If there were only 63 bits per word, we could
// use a right shift of 10111111...111 in binary to position the 0 in the
// correct place (using sign extension).
// Could also use Long.rotateRight() or rotateLeft() *if* they were converted
// by the JVM into a native instruction.
// bits[word] &= Long.rotateLeft(0xfffffffe,bit);
}
/**
* clears a bit.
* The index should be less than the OpenBitSet size.
*/
public void clear(long index) {
int wordNum = (int) (index >> 6); // div 64
int bit = (int) index & 0x3f; // mod 64
long bitmask = 1L << bit;
bits[wordNum / PAGE_SIZE][wordNum % PAGE_SIZE] &= ~bitmask;
}
/**
* Clears a range of bits. Clearing past the end does not change the size of the set.
*
* @param startIndex lower index
* @param endIndex one-past the last bit to clear
*/
public void clear(int startIndex, int endIndex) {
if (endIndex <= startIndex) {
return;
}
int startWord = (startIndex >> 6);
if (startWord >= wlen) {
return;
}
// since endIndex is one past the end, this is index of the last
// word to be changed.
int endWord = ((endIndex - 1) >> 6);
long startmask = -1L << startIndex;
long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
// invert masks since we are clearing
startmask = ~startmask;
endmask = ~endmask;
if (startWord == endWord) {
bits[startWord / PAGE_SIZE][startWord % PAGE_SIZE] &= (startmask | endmask);
return;
}
bits[startWord / PAGE_SIZE][startWord % PAGE_SIZE] &= startmask;
int middle = Math.min(wlen, endWord);
if (startWord / PAGE_SIZE == middle / PAGE_SIZE) {
Arrays.fill(bits[startWord / PAGE_SIZE], (startWord + 1) % PAGE_SIZE, middle % PAGE_SIZE, 0L);
} else {
while (++startWord < middle) {
bits[startWord / PAGE_SIZE][startWord % PAGE_SIZE] = 0L;
}
}
if (endWord < wlen) {
bits[endWord / PAGE_SIZE][endWord % PAGE_SIZE] &= endmask;
}
}
/** Clears a range of bits. Clearing past the end does not change the size of the set.
*
* @param startIndex lower index
* @param endIndex one-past the last bit to clear
*/
public void clear(long startIndex, long endIndex) {
if (endIndex <= startIndex) {
return;
}
int startWord = (int) (startIndex >> 6);
if (startWord >= wlen) {
return;
}
// since endIndex is one past the end, this is index of the last
// word to be changed.
int endWord = (int) ((endIndex - 1) >> 6);
long startmask = -1L << startIndex;
long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
// invert masks since we are clearing
startmask = ~startmask;
endmask = ~endmask;
if (startWord == endWord) {
bits[startWord / PAGE_SIZE][startWord % PAGE_SIZE] &= (startmask | endmask);
return;
}
bits[startWord / PAGE_SIZE][startWord % PAGE_SIZE] &= startmask;
int middle = Math.min(wlen, endWord);
if (startWord / PAGE_SIZE == middle / PAGE_SIZE) {
Arrays.fill(bits[startWord / PAGE_SIZE], (startWord + 1) % PAGE_SIZE, middle % PAGE_SIZE, 0L);
} else {
while (++startWord < middle) {
bits[startWord / PAGE_SIZE][startWord % PAGE_SIZE] = 0L;
}
}
if (endWord < wlen) {
bits[endWord / PAGE_SIZE][endWord % PAGE_SIZE] &= endmask;
}
}
/** @return the number of set bits */
public long cardinality() {
long bitCount = 0L;
for (int i = getPageCount(); i-- > 0; ) {
bitCount += BitUtil.pop_array(bits[i], 0, wlen);
}
return bitCount;
}
/** this = this AND other */
public void intersect(OpenBitSet other) {
int newLen = Math.min(this.wlen, other.wlen);
long[][] thisArr = this.bits;
long[][] otherArr = other.bits;
int thisPageSize = PAGE_SIZE;
int otherPageSize = other.PAGE_SIZE;
// testing against zero can be more efficient
int pos = newLen;
while (--pos >= 0) {
thisArr[pos / thisPageSize][pos % thisPageSize] &= otherArr[pos / otherPageSize][pos % otherPageSize];
}
if (this.wlen > newLen) {
// fill zeros from the new shorter length to the old length
for (pos = wlen; pos-- > newLen; ) {
thisArr[pos / thisPageSize][pos % thisPageSize] = 0;
}
}
this.wlen = newLen;
}
// some BitSet compatability methods
//** see {@link intersect} */
public void and(OpenBitSet other) {
intersect(other);
}
/** Lowers numWords, the number of words in use,
* by checking for trailing zero words.
*/
public void trimTrailingZeros() {
int idx = wlen - 1;
while (idx >= 0 && bits[idx / PAGE_SIZE][idx % PAGE_SIZE] == 0) {
idx--;
}
wlen = idx + 1;
}
/** returns the number of 64 bit words it would take to hold numBits */
public static long bits2words(long numBits) {
return (((numBits - 1) >>> 6) + 1);
}
/** returns true if both sets have the same bits set */
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (!(o instanceof OpenBitSet)) {
return false;
}
OpenBitSet a;
OpenBitSet b = (OpenBitSet) o;
// make a the larger set.
if (b.wlen > this.wlen) {
a = b;
b = this;
} else {
a = this;
}
int aPageSize = this.PAGE_SIZE;
int bPageSize = b.PAGE_SIZE;
// check for any set bits out of the range of b
for (int i = a.wlen - 1; i >= b.wlen; i--) {
if (a.bits[i / aPageSize][i % aPageSize] != 0) {
return false;
}
}
for (int i = b.wlen - 1; i >= 0; i--) {
if (a.bits[i / aPageSize][i % aPageSize] != b.bits[i / bPageSize][i % bPageSize]) {
return false;
}
}
return true;
}
@Override
public int hashCode() {
// Start with a zero hash and use a mix that results in zero if the input is zero.
// This effectively truncates trailing zeros without an explicit check.
long h = 0;
for (int i = wlen; --i >= 0; ) {
h ^= bits[i / PAGE_SIZE][i % PAGE_SIZE];
h = (h << 1) | (h >>> 63); // rotate left
}
// fold leftmost bits into right and add a constant to prevent
// empty sets from returning 0, which is too common.
return (int) ((h >> 32) ^ h) + 0x98761234;
}
public void close() throws IOException {
// noop, let GC do the cleanup.
}
public void serialize(DataOutput out) throws IOException {
int bitLength = getNumWords();
int pageSize = getPageSize();
int pageCount = getPageCount();
out.writeInt(bitLength);
for (int p = 0; p < pageCount; p++) {
long[] bits = getPage(p);
for (int i = 0; i < pageSize && bitLength-- > 0; i++) {
out.writeLong(bits[i]);
}
}
}
public void clear() {
clear(0, capacity());
}
public static OpenBitSet deserialize(DataInput in) throws IOException {
long bitLength = in.readInt();
OpenBitSet bs = new OpenBitSet(bitLength << 6);
int pageSize = bs.getPageSize();
int pageCount = bs.getPageCount();
for (int p = 0; p < pageCount; p++) {
long[] bits = bs.getPage(p);
for (int i = 0; i < pageSize && bitLength-- > 0; i++) {
bits[i] = in.readLong();
}
}
return bs;
}
}