/*
* Copyright 2010 NCHOVY, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.krakenapps.bloomfilter;
import java.util.BitSet;
public class BloomFilter<T> {
private final int numOfBits;
private final int numOfHashFunction;
private final HashFunction<T> firstFunction;
private final HashFunction<T> secondFunction;
private final BitSet bitmap;
@SuppressWarnings("unchecked")
public BloomFilter() {
this(GeneralHashFunction.stringHashFunctions[2],
GeneralHashFunction.stringHashFunctions[1]);
}
@SuppressWarnings("unchecked")
public BloomFilter(long capacity) {
this(0.001, capacity, GeneralHashFunction.stringHashFunctions[2],
GeneralHashFunction.stringHashFunctions[1]);
}
public BloomFilter(HashFunction<T> first, HashFunction<T> second) {
this(0.001, 1000000L, first, second);
}
public BloomFilter(double errorRate, long capacity, HashFunction<T> first,
HashFunction<T> second) {
OptimumFinder opt = new OptimumFinder(errorRate, capacity);
this.firstFunction = first;
this.secondFunction = second;
this.numOfHashFunction = opt.numOfHashFunction;
this.numOfBits = opt.numOfBits;
this.bitmap = new BitSet(numOfBits);
}
public BloomFilter(double errorRate, long capacity, HashFunction<T> first,
HashFunction<T> second, BitSet bitmap) {
OptimumFinder opt = new OptimumFinder(errorRate, capacity);
this.firstFunction = first;
this.secondFunction = second;
this.numOfHashFunction = opt.numOfHashFunction;
this.numOfBits = opt.numOfBits;
this.bitmap = bitmap;
}
public void add(T key) {
int firstHashCode = firstFunction.hashCode(key);
int secondHashCode = secondFunction.hashCode(key);
for (int i = 0; i < numOfHashFunction; i++) {
int index = getIndex(firstHashCode, secondHashCode, i);
this.bitmap.set(index);
}
}
public boolean contains(T key) {
int firstHashCode = firstFunction.hashCode(key);
int secondHashCode = secondFunction.hashCode(key);
for (int i = 0; i < numOfHashFunction; i++) {
int index = getIndex(firstHashCode, secondHashCode, i);
if (this.bitmap.get(index) == false)
return false;
}
return true;
}
public BitSet getBitmap() {
return bitmap;
}
@Override
public String toString() {
return String.format("BloomFilter-[%d KB, %d hashFunctions (%s, %s)]",
this.numOfBits / 8 / 1024, this.numOfHashFunction,
this.firstFunction.toString(), this.secondFunction.toString());
}
private int getIndex(int firstHashCode, int secondHashCode, int i) {
int index = (firstHashCode + (i * secondHashCode)) % this.numOfBits;
return (index < 0) ? -index : index;
}
private static class OptimumFinder {
private int numOfBits;
private int numOfHashFunction;
private OptimumFinder(double errorRate, long capacity) {
numOfBits = Integer.MAX_VALUE;
numOfHashFunction = 1;
int m = 0;
for (int k = 1; k < 20; k++) {
m = (int) (k * capacity * -1.0 / java.lang.Math
.log(1.0 - java.lang.Math.pow(errorRate, (1.0 / k))));
if (m < numOfBits) {
numOfBits = m;
numOfHashFunction = k;
}
}
assert numOfBits > capacity;
assert numOfHashFunction > 1;
}
}
}