package brickhouse.udf.bloom;
import org.apache.hadoop.util.bloom.Filter;
import org.apache.hadoop.util.bloom.Key;
import org.junit.Assert;
import org.junit.Test;
import java.util.HashSet;
import java.util.UUID;
public class BloomTest {
///@Test
public void testBloom() {
int numElems = 10 * 1000000;
double pct = 0.01;
Filter bloom = BloomFactory.NewBloomInstance(numElems, pct);
for (int i = 0; i < numElems; ++i) {
UUID uuid = UUID.randomUUID();
Key key = new Key(uuid.toString().getBytes());
bloom.add(key);
Assert.assertTrue(bloom.membershipTest(key));
if ((i % 10000) == 0) {
System.out.println(" Added " + i + " elements.");
}
}
int numHits = 0;
for (int i = 0; i < numElems; ++i) {
UUID uuid = UUID.randomUUID();
Key key = new Key(uuid.toString().getBytes());
if (bloom.membershipTest(key)) {
numHits++;
}
}
System.out.print("Number of hits = " + numHits + " out of " + numElems + " or " + ((double) numHits / (double) numElems) * 100.0 + " %");
Assert.assertTrue(numHits / numElems <= pct);
}
@Test
public void testBloomUnion() {
int numElems = 100000;
double pct = 0.01;
HashSet<String> unionMap = new HashSet<String>();
Filter bloom1 = BloomFactory.NewBloomInstance(numElems, pct);
for (int i = 0; i < numElems / 2; ++i) {
UUID uuid = UUID.randomUUID();
Key key = new Key(uuid.toString().getBytes());
bloom1.add(key);
Assert.assertTrue(bloom1.membershipTest(key));
(unionMap).add(uuid.toString());
if ((i % 10000) == 0) {
System.out.println(" Added " + i + " elements.");
}
}
Filter bloom2 = BloomFactory.NewBloomInstance(numElems, pct);
for (int i = 0; i < numElems / 2; ++i) {
UUID uuid = UUID.randomUUID();
Key key = new Key(uuid.toString().getBytes());
bloom2.add(key);
Assert.assertTrue(bloom2.membershipTest(key));
(unionMap).add(uuid.toString());
if ((i % 10000) == 0) {
System.out.println(" Added " + i + " elements.");
}
}
bloom1.or(bloom2);
for (String uuid : unionMap) {
Assert.assertTrue(bloom1.membershipTest(new Key(uuid.getBytes())));
}
}
}