/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
licenses@blazegraph.com
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Dec 30, 2006
*/
package com.bigdata.btree;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import junit.framework.TestCase2;
/**
* Test suite for bloom filter functionality.
*
* @author <a href="mailto:thompsonbry@users.sourceforge.net">Bryan Thompson</a>
* @version $Id$
*
* @see BloomFilter
*/
public class TestBloomFilter extends TestCase2 {
/**
*
*/
public TestBloomFilter() {
}
/**
* @param name
*/
public TestBloomFilter(String name) {
super(name);
}
/**
* Simple constructor tests
*/
public void test_ctor() {
new BloomFilter(1000/* n */, .01/* p */, 10000/* maxN */);
new BloomFilter(5000/* n */, .02/* p */, 10000/* maxN */);
new BloomFilter(5000/* n */, .02/* p */, 5000/* maxN == n */);
}
/**
* Correct rejection tests.
*/
public void test_ctor_correctRejection() {
try {
new BloomFilter(1000/* n */, 0d/* p */, 10000/* maxN */);
fail("Expecting: " + IllegalArgumentException.class);
} catch (IllegalArgumentException ex) {
log.info("Ignoring expected exception: " + ex);
}
try {
new BloomFilter(1000/* n */, 1.00001/* p */, 10000/* maxN */);
fail("Expecting: " + IllegalArgumentException.class);
} catch (IllegalArgumentException ex) {
log.info("Ignoring expected exception: " + ex);
}
try {
new BloomFilter(0/* n */, .01/* p */, 10000/* maxN */);
fail("Expecting: " + IllegalArgumentException.class);
} catch (IllegalArgumentException ex) {
log.info("Ignoring expected exception: " + ex);
}
try {
new BloomFilter(1000/* n */, 0d/* p */, 100/* maxN */);
fail("Expecting: " + IllegalArgumentException.class);
} catch (IllegalArgumentException ex) {
log.info("Ignoring expected exception: " + ex);
}
}
/**
* Test that a bloom filter constructed for a given #of keys and maximum
* error rate respects that maximum error rate in practice.
*/
public void test_errorRate() {
final double errorRate = 0.05d;
final int nkeys = 1000;
// inputs.
System.err.println("targets: nkeys=" + nkeys + ", errorRate="
+ errorRate);
final BloomFilter bloomFilter = new BloomFilter(nkeys, errorRate, nkeys/* maxKeys */);
// displays filter state as provisioned.
System.err.println("provisioned: " + bloomFilter.toString());
final Set<byte[]> groundTruth = new HashSet<byte[]>(nkeys);
final byte[][] keys = new byte[nkeys][];
final Random r = new Random();
final int MAX_KEY_LENGTH = 128;
{
/*
* #of times the filter reported that the key was already present
* when it was not already in the ground truth.
*/
int falsePositiveCount = 0;
for (int i = 0; i < nkeys; i++) {
// loop until we get a distinct key.
while (true) {
// random length key.
final byte[] key = new byte[r.nextInt(MAX_KEY_LENGTH)];
// random bytes for the key.
r.nextBytes(key);
// save key.
keys[i] = key;
// add to ground truth
if (groundTruth.add(key)) {
break;
}
System.err.print('.');
}
final byte[] key = keys[i];
// test filter before.
final boolean falsePositive = bloomFilter.contains(key);
if (falsePositive)
falsePositiveCount++;
// add to the filter.
bloomFilter.add(key);
// verify filter reports that the key is found.
assertTrue(bloomFilter.contains(key));
}
// outputs.
System.err.println("error rate on keys added to the filter"
+ ": #errors=" + falsePositiveCount
+ ", actual error rate="
+ ((double) falsePositiveCount / (double) nkeys));
// displays filter state after inserting keys.
System.err.println("after inserts: " + bloomFilter.toString());
}
/*
* verify that all keys are correctly reported as present.
*/
for (int i = 0; i < nkeys; i++) {
final byte[] key = keys[i];
assertTrue(bloomFilter.contains(key));
}
/*
* Test fully populated filter against new random keys that are known to
* NOT be in the test set.
*/
{
int i = 0;
int falsePositiveCount2 = 0;
while (i < nkeys) {
// random length key.
final byte[] key = new byte[r.nextInt(MAX_KEY_LENGTH)];
// random bytes for the key.
r.nextBytes(key);
if (groundTruth.contains(key)) {
// until we find a distinct key.
System.out.print(".");
continue;
}
i++;
if (bloomFilter.contains(key)) {
falsePositiveCount2++;
}
}
// outputs.
System.err.println("error rate on random distinct keys"
+ ": #errors=" + falsePositiveCount2
+ ", actual error rate="
+ ((double) falsePositiveCount2 / (double) nkeys));
}
}
/**
* This is not really a unit test. Instead it plots a curve of the space
* requirements of the filter (bitLength) against the target error rate and
* filter capacity.
* <p>
* Note: The table has spans within each column where the same bit length is
* computed. This is because the #of hash functions is discrete. Each time
* we increase the #of hash functions the table moves to a new bit length
* value in a given column.
*/
public void test_spaceCurve() {
// the increments in error rate that will be plotted.
double inc = 0.01;
// the different filter capacities that will be plotted.
final int[] capacity = new int[] { 1, 100, 1000, 10000, 100000,
1000000, 2000000, 10000000, 20000000, 100000000 };
// headings.
{
System.out.print("p\t");
for (int n : capacity) {
System.out.print(n);
System.out.print('\t');
}
System.out.println();
}
// p is the target error rate.
for (double p = inc; p < 1d; p += inc) {
System.out.print(p);
final int d = BloomFilter.getHashFunctionCount(p);
for (int n : capacity) {
final long bitLength = BloomFilter.getBitLength(d, n);
System.out.print("\t" + bitLength);
}
System.out.println();
}
}
/**
* Generates a table showing the #of index entries that would produce a
* given error rate for a filter with a specific configuration (target error
* rate of <code>.02</code> at 1 million index entries). The code uses the
* given filter and runs through error rates between <code>.02</code> and
* <code>.20</code> by increments of <code>.01</code> showing the #of
* index entries for which that would be the expected error rate.
* <p>
* You can verify the computation by examining certain entries in the table
* generated by {@link #test_spaceCurve()}. The filter as configured for
* this test has a bit length of <code>8,656,171</code>. If you look at
* the expected error rate for a bloom filter of 2 million index entries of
* the same bit length you will see that it lies somewhere in
* <code>0.13 - 0.24</code> (this table has discrete regions, see the
* javadoc for {@link #test_spaceCurve()}). If you examine the output of
* this test, you will see that an error rate of <code>0.18</code> is
* predicated when the #of index entries is on the order of 2 million
* (2,008,182). The relevant cells are highlighted in the worksheet.
*
* @see src/worksheet/architecture/bloomfilter.xls
*
* @see BloomFilter#getEntryCountForErrorRate(double)
*/
public void test_errorRateCurve() {
final BloomFilter filter;
final int k; // #of hash functions.
final long m; // bit length of the filter.
{
final double p = 0.02d;
final int n = 1000000;
filter = new BloomFilter(n, p, n/* maxN */);
System.out.println("Given p=" + p + ", n=" + n
+ " the filter will use " + filter.getHashFunctionCount()
+ " hash functions and have a bit length of "
+ filter.getBitLength());
k = filter.getHashFunctionCount();
m = filter.getBitLength();
}
// headings
System.out.println("p\tn");
for (double p = 0.01; p < .2; p += 0.01) {
int n = BloomFilter.getEntryCountForErrorRate(k, m, p);
System.out.println("" + p + "\t" + n);
}
}
/**
* Test suite for (de-)serialization of the {@link BloomFilter}.
*
* @todo write test (this gets tested by the index segment build test suite
* where the bloom filter is enabled)
*/
public void test_serialization() {
// fail("write test");
}
}