package be.bagofwords.db.data;
import be.bagofwords.util.HashUtils;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.google.common.hash.Funnel;
import com.google.common.hash.PrimitiveSink;
import java.io.Serializable;
import java.util.Arrays;
@JsonIgnoreProperties("dataCheckSum")
public class LongCountsBloomFilter implements Serializable {
private int numOfHashFunctions;
private ByteArray bytes;
public LongCountsBloomFilter(long expectedSize, double fpp) {
if (expectedSize > Integer.MAX_VALUE) {
throw new RuntimeException("Creating a bloomfilter currently not supported for size " + expectedSize);
}
int numOfBytes = optimalNumOfBytes(expectedSize, fpp);
bytes = new ByteArray(numOfBytes);
numOfHashFunctions = optimalNumOfHashFunctions(expectedSize, numOfBytes);
}
public LongCountsBloomFilter(ByteArray bytes, int numOfHashFunctions) {
this.bytes = bytes;
this.numOfHashFunctions = numOfHashFunctions;
}
public int getMaxCount(long hash64) {
int hash1 = (int) hash64;
int hash2 = (int) (hash64 >>> 32);
if (hash1 == 0 || hash2 == 0) {
hash64 = HashUtils.randomDistributeHash(hash64);
hash1 = (int) hash64;
hash2 = (int) (hash64 >>> 32);
}
int min = Byte.MAX_VALUE - Byte.MIN_VALUE;
for (int i = 1; i <= numOfHashFunctions; i++) {
int nextHash = hash1 + i * hash2;
if (nextHash < 0) {
nextHash = ~nextHash;
}
min = Math.min(min, bytes.get(nextHash % bytes.size()));
}
return min;
}
public synchronized <T> void addCount(long hash64, int count) {
int currCount = getMaxCount(hash64);
int newCount = Math.min(count + currCount, Byte.MAX_VALUE - Byte.MIN_VALUE);
int hash1 = (int) hash64;
int hash2 = (int) (hash64 >>> 32);
if (hash1 == 0 || hash2 == 0) {
hash64 = HashUtils.randomDistributeHash(hash64);
hash1 = (int) hash64;
hash2 = (int) (hash64 >>> 32);
}
for (int i = 1; i <= numOfHashFunctions; i++) {
int nextHash = hash1 + i * hash2;
if (nextHash < 0) {
nextHash = ~nextHash;
}
bytes.set(nextHash % bytes.size(), newCount);
}
}
private static int optimalNumOfBytes(long expectedSize, double fpp) {
if (fpp == 0) {
fpp = Double.MIN_VALUE;
}
double result = -expectedSize * Math.log(fpp) / (Math.log(2) * Math.log(2));
if (result > Integer.MAX_VALUE) {
throw new RuntimeException("Number of required bytes too large!");
}
return (int) result;
}
private static int optimalNumOfHashFunctions(long expectedSize, long numOfBytes) {
if (expectedSize == 0) {
expectedSize = 1;
}
return Math.max(1, (int) Math.round(numOfBytes / expectedSize * Math.log(2)));
}
//Used for serialization
public LongCountsBloomFilter() {
}
public int getNumOfHashFunctions() {
return numOfHashFunctions;
}
public void setNumOfHashFunctions(int numOfHashFunctions) {
this.numOfHashFunctions = numOfHashFunctions;
}
public double expectedFpp() {
return Math.pow((double) bytes.computeBitCount() / bytes.size(), numOfHashFunctions);
}
public LongCountsBloomFilter clone() {
return new LongCountsBloomFilter(getBytes().clone(), numOfHashFunctions);
}
//Json serialization
public ByteArray getBytes() {
return bytes;
}
public void setBytes(ByteArray bytes) {
this.bytes = bytes;
}
public static class LongFunnel implements Funnel<Long> {
public void funnel(Long s, PrimitiveSink primitiveSink) {
primitiveSink.putLong(s);
}
}
private byte max(byte val1, byte val2) {
if (val1 > val2) {
return val1;
} else {
return val2;
}
}
public class ByteArray {
private byte[] data;
public ByteArray(int bits) {
this.data = new byte[bits];
Arrays.fill(this.data, Byte.MIN_VALUE);
}
public ByteArray(byte[] data) {
this.data = data;
}
void set(int index, int value) {
if (value < 0) {
throw new RuntimeException("Can not set negative counts!");
}
int valueToSet = value + Byte.MIN_VALUE;
if (valueToSet > Byte.MAX_VALUE) {
throw new RuntimeException("Too large count " + value);
}
data[index] = max(data[index], (byte) valueToSet);
}
int get(int index) {
return data[index] - Byte.MIN_VALUE;
}
/**
* Number of bits
*/
public int size() {
return data.length;
}
/**
* Number of set bits (1s)
*/
int computeBitCount() {
int bitCount = 0;
for (byte aData : data) {
if (aData != Byte.MIN_VALUE) {
bitCount++;
}
}
return bitCount;
}
//Serialization
public ByteArray() {
}
public byte[] getData() {
return data;
}
public void setData(byte[] data) {
this.data = data;
}
public ByteArray mergeWith(ByteArray otherByteArray) {
if (otherByteArray.size() != size()) {
throw new RuntimeException("Unequal sizes!");
}
ByteArray result = new ByteArray(size());
for (int i = 0; i < data.length; i++) {
int sum = data[i] + otherByteArray.data[i] - Byte.MIN_VALUE;
if (sum > Byte.MAX_VALUE) {
sum = Byte.MAX_VALUE;
}
result.data[i] = (byte) sum;
}
return result;
}
public ByteArray clone() {
return new ByteArray(data.clone());
}
}
}