package com.facebook.infrastructure.utils;
import com.facebook.infrastructure.io.ICompactSerializer;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.Arrays;
public class CountingBloomFilter extends Filter
{
public static final int MAX_COUNT = 15;
static final int BUCKETS_PER_WORD = 16;
private static ICompactSerializer<CountingBloomFilter> serializer_ = new CountingBloomFilterSerializer();
public static ICompactSerializer<CountingBloomFilter> serializer()
{
return serializer_;
}
long[] filter_;
public CountingBloomFilter(int numElements, int bucketsPerElement)
{
this(BloomCalculations.computeBestK(bucketsPerElement), new long[(numElements * bucketsPerElement + 20) / BUCKETS_PER_WORD]);
}
CountingBloomFilter(int hashes, long[] filter)
{
hashCount = hashes;
filter_ = filter;
}
public CountingBloomFilter cloneMe()
{
long[] filter = new long[filter_.length];
System.arraycopy(filter_, 0, filter, 0, filter_.length);
return new CountingBloomFilter(hashCount, filter);
}
int maxBucket() {
int max = 0;
for (int i = 0; i < buckets(); i++) {
Bucket bucket = new Bucket(i);
if (bucket.value > max) {
max = (int)bucket.value;
}
}
return max;
}
public void clear() {
Arrays.fill(filter_, (byte)0);
}
public void merge(CountingBloomFilter cbf)
{
assert cbf != null;
assert filter_.length == cbf.filter_.length;
assert hashCount == cbf.hashCount;
for ( int i = 0; i < buckets(); ++i )
{
Bucket b = new Bucket(i);
Bucket b2 = cbf.getBucket(i);
long merged = b.value + b2.value;
b.set(merged > MAX_COUNT ? MAX_COUNT : merged);
}
}
Bucket getBucket(int i) {
return new Bucket(i);
}
public boolean isPresent(String key)
{
for (int bucketIndex : getHashBuckets(key)) {
Bucket bucket = new Bucket(bucketIndex);
if (bucket.value == 0) {
return false;
}
}
return true;
}
/*
param@ key -- value whose hash is used to fill
the filter_.
This is a general purpose API.
*/
public void add(String key)
{
assert key != null;
for (int bucketIndex : getHashBuckets(key)) {
Bucket bucket = new Bucket(bucketIndex);
if(bucket.value < MAX_COUNT) {
bucket.set(bucket.value + 1);
}
}
}
public void delete(String key)
{
if (!isPresent(key)) {
// TODO test this
throw new IllegalArgumentException("key is not present");
}
for (int bucketIndex : getHashBuckets(key)) {
Bucket bucket = new Bucket(bucketIndex);
if(bucket.value >= 1 && bucket.value < MAX_COUNT) {
bucket.set(bucket.value - 1);
}
}
}
int buckets() {
return filter_.length * BUCKETS_PER_WORD;
}
private class Bucket {
public final int wordIndex;
public final int shift;
public final long mask;
public final long value;
public Bucket(int bucketIndex) {
wordIndex = bucketIndex >> 4;
shift = (bucketIndex & 0x0f) << 2;
mask = 15L << shift;
value = ((filter_[wordIndex] & mask) >>> shift);
}
void set(long value) {
filter_[wordIndex] = (filter_[wordIndex] & ~mask) | (value << shift);
}
}
ICompactSerializer tserializer() {
return serializer_;
}
int emptyBuckets() {
int n = 0;
for (int i = 0; i < buckets(); i++) {
if (new Bucket(i).value == 0) {
n++;
}
}
return n;
}}
class CountingBloomFilterSerializer implements ICompactSerializer<CountingBloomFilter>
{
/*
* The following methods are used for compact representation
* of BloomFilter. This is essential, since we want to determine
* the size of the serialized Bloom Filter blob before it is
* populated armed with the knowledge of how many elements are
* going to reside in it.
*/
public void serialize(CountingBloomFilter cbf, DataOutputStream dos)
throws IOException
{
/* write the number of hash functions used */
dos.writeInt(cbf.getHashCount());
/* write length of the filter */
dos.writeInt(cbf.filter_.length);
for (int i = 0; i < cbf.filter_.length; i++) {
dos.writeLong(cbf.filter_[i]);
}
}
public CountingBloomFilter deserialize(DataInputStream dis) throws IOException
{
/* read the number of hash functions */
int hashes = dis.readInt();
/* read the length of the filter */
int length = dis.readInt();
long[] filter = new long[length];
for (int i = 0; i < length; i++) {
filter[i] = dis.readLong();
}
return new CountingBloomFilter(hashes, filter);
}
}