/*
* Copyright (c) 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017 David Berkman
*
* This file is part of the SmallMind Code Project.
*
* The SmallMind Code Project is free software, you can redistribute
* it and/or modify it under either, at your discretion...
*
* 1) The terms of GNU Affero General Public License as published by the
* Free Software Foundation, either version 3 of the License, or (at
* your option) any later version.
*
* ...or...
*
* 2) The terms of the Apache License, Version 2.0.
*
* The SmallMind Code Project is distributed in the hope that it will
* be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License or Apache License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* and the Apache License along with the SmallMind Code Project. If not, see
* <http://www.gnu.org/licenses/> or <http://www.apache.org/licenses/LICENSE-2.0>.
*
* Additional permission under the GNU Affero GPL version 3 section 7
* ------------------------------------------------------------------
* If you modify this Program, or any covered work, by linking or
* combining it with other code, such other code is not for that reason
* alone subject to any of the requirements of the GNU Affero GPL
* version 3.
*/
package org.smallmind.nutsnbolts.util;
import java.io.Serializable;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.BitSet;
import java.util.Collection;
import org.smallmind.nutsnbolts.security.HashAlgorithm;
public class BloomFilter<E extends BloomFilterElement> implements Serializable {
private final MessageDigest messageDigest;
private final BitSet bitset;
private final double bitsPerElement;
private final int maxElements;
private final int hashCount;
private final int length;
private int size;
public BloomFilter (double bitsPerElement, int maxElements, int hashCount)
throws NoSuchAlgorithmException {
this.maxElements = maxElements;
this.bitsPerElement = bitsPerElement;
this.hashCount = hashCount;
length = (int)Math.ceil(bitsPerElement * maxElements);
bitset = new BitSet(length);
size = 0;
messageDigest = MessageDigest.getInstance(HashAlgorithm.SHA_1.getAlgorithmName());
}
public BloomFilter (int length, int maxElements)
throws NoSuchAlgorithmException {
this(length / (double)maxElements, maxElements, (int)Math.round((length / (double)maxElements) * Math.log(2.0)));
}
public BloomFilter (double falsePositiveProbability, int maxElements)
throws NoSuchAlgorithmException {
// hashCount = ceil(-log_2(false prob.)
// bitsPerElement = hashCount / ln(2)
this(Math.ceil(-(Math.log(falsePositiveProbability) / Math.log(2))) / Math.log(2), maxElements, (int)Math.ceil(-(Math.log(falsePositiveProbability) / Math.log(2))));
}
public double getBitsPerElement () {
return this.bitsPerElement;
}
public int getMaxElements () {
return maxElements;
}
public int getHashCount () {
return hashCount;
}
public int length () {
return length;
}
public synchronized int size () {
return this.size;
}
public double calculateCurrentBitsPerElement () {
return this.length / (double)size;
}
public double calculateFalsePositiveProbability () {
// (1 - e^(-hashCount * maxElements / length)) ^ hashCount
return Math.pow((1 - Math.exp(-hashCount * (double)maxElements / (double)length)), hashCount);
}
private int[] createHashes (byte[] elementBytes) {
int[] hashes = new int[hashCount];
int k = 0;
byte salt = 0;
while (k < hashCount) {
byte[] hash;
synchronized (messageDigest) {
messageDigest.update(salt++);
hash = messageDigest.digest(elementBytes);
}
for (int i = 0; i < hash.length / 4 && k < hashCount; i++) {
int h = 0;
for (int j = (i * 4); j < (i * 4) + 4; j++) {
h <<= 8;
h |= ((int)hash[j]) & 0xFF;
}
hashes[k++] = h;
}
}
return hashes;
}
public void add (E element) {
add(element.getBytes());
}
public void add (byte[] bytes) {
for (int hash : createHashes(bytes)) {
bitset.set(Math.abs(hash % length), true);
}
size++;
}
public void addAll (Collection<? extends E> c) {
for (E element : c) {
add(element);
}
}
public boolean contains (E element) {
return contains(element.getBytes());
}
public boolean contains (byte[] bytes) {
for (int hash : createHashes(bytes)) {
if (!bitset.get(Math.abs(hash % length))) {
return false;
}
}
return true;
}
public boolean containsAll (Collection<? extends E> c) {
for (E element : c) {
if (!contains(element)) {
return false;
}
}
return true;
}
public void clear () {
bitset.clear();
size = 0;
}
}