/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hive.common.util;
import static org.junit.Assert.assertEquals;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.util.ArrayList;
import java.util.Random;
import org.junit.Assert;
import org.junit.Test;
/**
*
*/
public class TestBloomFilter {
private static final int COUNT = 100;
Random rand = new Random(123);
@Test(expected = IllegalArgumentException.class)
public void testBloomIllegalArg1() {
BloomFilter bf = new BloomFilter(0, 0);
}
@Test(expected = IllegalArgumentException.class)
public void testBloomIllegalArg2() {
BloomFilter bf = new BloomFilter(0, 0.1);
}
@Test(expected = IllegalArgumentException.class)
public void testBloomIllegalArg3() {
BloomFilter bf = new BloomFilter(1, 0.0);
}
@Test(expected = IllegalArgumentException.class)
public void testBloomIllegalArg4() {
BloomFilter bf = new BloomFilter(1, 1.0);
}
@Test(expected = IllegalArgumentException.class)
public void testBloomIllegalArg5() {
BloomFilter bf = new BloomFilter(-1, -1);
}
@Test
public void testBloomNumBits() {
assertEquals(0, BloomFilter.optimalNumOfBits(0, 0));
assertEquals(0, BloomFilter.optimalNumOfBits(0, 1));
assertEquals(0, BloomFilter.optimalNumOfBits(1, 1));
assertEquals(7, BloomFilter.optimalNumOfBits(1, 0.03));
assertEquals(72, BloomFilter.optimalNumOfBits(10, 0.03));
assertEquals(729, BloomFilter.optimalNumOfBits(100, 0.03));
assertEquals(7298, BloomFilter.optimalNumOfBits(1000, 0.03));
assertEquals(72984, BloomFilter.optimalNumOfBits(10000, 0.03));
assertEquals(729844, BloomFilter.optimalNumOfBits(100000, 0.03));
assertEquals(7298440, BloomFilter.optimalNumOfBits(1000000, 0.03));
assertEquals(6235224, BloomFilter.optimalNumOfBits(1000000, 0.05));
assertEquals(1870567268, BloomFilter.optimalNumOfBits(300000000, 0.05));
assertEquals(1437758756, BloomFilter.optimalNumOfBits(300000000, 0.1));
assertEquals(432808512, BloomFilter.optimalNumOfBits(300000000, 0.5));
assertEquals(1393332198, BloomFilter.optimalNumOfBits(3000000000L, 0.8));
assertEquals(657882327, BloomFilter.optimalNumOfBits(3000000000L, 0.9));
assertEquals(0, BloomFilter.optimalNumOfBits(3000000000L, 1));
}
@Test
public void testBloomNumHashFunctions() {
assertEquals(1, BloomFilter.optimalNumOfHashFunctions(-1, -1));
assertEquals(1, BloomFilter.optimalNumOfHashFunctions(0, 0));
assertEquals(1, BloomFilter.optimalNumOfHashFunctions(10, 0));
assertEquals(1, BloomFilter.optimalNumOfHashFunctions(10, 10));
assertEquals(7, BloomFilter.optimalNumOfHashFunctions(10, 100));
assertEquals(1, BloomFilter.optimalNumOfHashFunctions(100, 100));
assertEquals(1, BloomFilter.optimalNumOfHashFunctions(1000, 100));
assertEquals(1, BloomFilter.optimalNumOfHashFunctions(10000, 100));
assertEquals(1, BloomFilter.optimalNumOfHashFunctions(100000, 100));
assertEquals(1, BloomFilter.optimalNumOfHashFunctions(1000000, 100));
}
@Test
public void testBloomFilterBytes() {
BloomFilter bf = new BloomFilter(10000);
byte[] val = new byte[]{1, 2, 3};
byte[] val1 = new byte[]{1, 2, 3, 4};
byte[] val2 = new byte[]{1, 2, 3, 4, 5};
byte[] val3 = new byte[]{1, 2, 3, 4, 5, 6};
assertEquals(false, bf.test(val));
assertEquals(false, bf.test(val1));
assertEquals(false, bf.test(val2));
assertEquals(false, bf.test(val3));
bf.add(val);
assertEquals(true, bf.test(val));
assertEquals(false, bf.test(val1));
assertEquals(false, bf.test(val2));
assertEquals(false, bf.test(val3));
bf.add(val1);
assertEquals(true, bf.test(val));
assertEquals(true, bf.test(val1));
assertEquals(false, bf.test(val2));
assertEquals(false, bf.test(val3));
bf.add(val2);
assertEquals(true, bf.test(val));
assertEquals(true, bf.test(val1));
assertEquals(true, bf.test(val2));
assertEquals(false, bf.test(val3));
bf.add(val3);
assertEquals(true, bf.test(val));
assertEquals(true, bf.test(val1));
assertEquals(true, bf.test(val2));
assertEquals(true, bf.test(val3));
byte[] randVal = new byte[COUNT];
for (int i = 0; i < COUNT; i++) {
rand.nextBytes(randVal);
bf.add(randVal);
}
// last value should be present
assertEquals(true, bf.test(randVal));
// most likely this value should not exist
randVal[0] = 0;
randVal[1] = 0;
randVal[2] = 0;
randVal[3] = 0;
randVal[4] = 0;
assertEquals(false, bf.test(randVal));
assertEquals(7800, bf.sizeInBytes());
}
@Test
public void testBloomFilterByte() {
BloomFilter bf = new BloomFilter(10000);
byte val = Byte.MIN_VALUE;
byte val1 = 1;
byte val2 = 2;
byte val3 = Byte.MAX_VALUE;
assertEquals(false, bf.testLong(val));
assertEquals(false, bf.testLong(val1));
assertEquals(false, bf.testLong(val2));
assertEquals(false, bf.testLong(val3));
bf.addLong(val);
assertEquals(true, bf.testLong(val));
assertEquals(false, bf.testLong(val1));
assertEquals(false, bf.testLong(val2));
assertEquals(false, bf.testLong(val3));
bf.addLong(val1);
assertEquals(true, bf.testLong(val));
assertEquals(true, bf.testLong(val1));
assertEquals(false, bf.testLong(val2));
assertEquals(false, bf.testLong(val3));
bf.addLong(val2);
assertEquals(true, bf.testLong(val));
assertEquals(true, bf.testLong(val1));
assertEquals(true, bf.testLong(val2));
assertEquals(false, bf.testLong(val3));
bf.addLong(val3);
assertEquals(true, bf.testLong(val));
assertEquals(true, bf.testLong(val1));
assertEquals(true, bf.testLong(val2));
assertEquals(true, bf.testLong(val3));
byte randVal = 0;
for (int i = 0; i < COUNT; i++) {
randVal = (byte) rand.nextInt(Byte.MAX_VALUE);
bf.addLong(randVal);
}
// last value should be present
assertEquals(true, bf.testLong(randVal));
// most likely this value should not exist
assertEquals(false, bf.testLong((byte) -120));
assertEquals(7800, bf.sizeInBytes());
}
@Test
public void testBloomFilterInt() {
BloomFilter bf = new BloomFilter(10000);
int val = Integer.MIN_VALUE;
int val1 = 1;
int val2 = 2;
int val3 = Integer.MAX_VALUE;
assertEquals(false, bf.testLong(val));
assertEquals(false, bf.testLong(val1));
assertEquals(false, bf.testLong(val2));
assertEquals(false, bf.testLong(val3));
bf.addLong(val);
assertEquals(true, bf.testLong(val));
assertEquals(false, bf.testLong(val1));
assertEquals(false, bf.testLong(val2));
assertEquals(false, bf.testLong(val3));
bf.addLong(val1);
assertEquals(true, bf.testLong(val));
assertEquals(true, bf.testLong(val1));
assertEquals(false, bf.testLong(val2));
assertEquals(false, bf.testLong(val3));
bf.addLong(val2);
assertEquals(true, bf.testLong(val));
assertEquals(true, bf.testLong(val1));
assertEquals(true, bf.testLong(val2));
assertEquals(false, bf.testLong(val3));
bf.addLong(val3);
assertEquals(true, bf.testLong(val));
assertEquals(true, bf.testLong(val1));
assertEquals(true, bf.testLong(val2));
assertEquals(true, bf.testLong(val3));
int randVal = 0;
for (int i = 0; i < COUNT; i++) {
randVal = rand.nextInt();
bf.addLong(randVal);
}
// last value should be present
assertEquals(true, bf.testLong(randVal));
// most likely this value should not exist
assertEquals(false, bf.testLong(-120));
assertEquals(7800, bf.sizeInBytes());
}
@Test
public void testBloomFilterLong() {
BloomFilter bf = new BloomFilter(10000);
long val = Long.MIN_VALUE;
long val1 = 1;
long val2 = 2;
long val3 = Long.MAX_VALUE;
assertEquals(false, bf.testLong(val));
assertEquals(false, bf.testLong(val1));
assertEquals(false, bf.testLong(val2));
assertEquals(false, bf.testLong(val3));
bf.addLong(val);
assertEquals(true, bf.testLong(val));
assertEquals(false, bf.testLong(val1));
assertEquals(false, bf.testLong(val2));
assertEquals(false, bf.testLong(val3));
bf.addLong(val1);
assertEquals(true, bf.testLong(val));
assertEquals(true, bf.testLong(val1));
assertEquals(false, bf.testLong(val2));
assertEquals(false, bf.testLong(val3));
bf.addLong(val2);
assertEquals(true, bf.testLong(val));
assertEquals(true, bf.testLong(val1));
assertEquals(true, bf.testLong(val2));
assertEquals(false, bf.testLong(val3));
bf.addLong(val3);
assertEquals(true, bf.testLong(val));
assertEquals(true, bf.testLong(val1));
assertEquals(true, bf.testLong(val2));
assertEquals(true, bf.testLong(val3));
long randVal = 0;
for (int i = 0; i < COUNT; i++) {
randVal = rand.nextLong();
bf.addLong(randVal);
}
// last value should be present
assertEquals(true, bf.testLong(randVal));
// most likely this value should not exist
assertEquals(false, bf.testLong(-120));
assertEquals(7800, bf.sizeInBytes());
}
@Test
public void testBloomFilterFloat() {
BloomFilter bf = new BloomFilter(10000);
float val = Float.MIN_VALUE;
float val1 = 1.1f;
float val2 = 2.2f;
float val3 = Float.MAX_VALUE;
assertEquals(false, bf.testDouble(val));
assertEquals(false, bf.testDouble(val1));
assertEquals(false, bf.testDouble(val2));
assertEquals(false, bf.testDouble(val3));
bf.addDouble(val);
assertEquals(true, bf.testDouble(val));
assertEquals(false, bf.testDouble(val1));
assertEquals(false, bf.testDouble(val2));
assertEquals(false, bf.testDouble(val3));
bf.addDouble(val1);
assertEquals(true, bf.testDouble(val));
assertEquals(true, bf.testDouble(val1));
assertEquals(false, bf.testDouble(val2));
assertEquals(false, bf.testDouble(val3));
bf.addDouble(val2);
assertEquals(true, bf.testDouble(val));
assertEquals(true, bf.testDouble(val1));
assertEquals(true, bf.testDouble(val2));
assertEquals(false, bf.testDouble(val3));
bf.addDouble(val3);
assertEquals(true, bf.testDouble(val));
assertEquals(true, bf.testDouble(val1));
assertEquals(true, bf.testDouble(val2));
assertEquals(true, bf.testDouble(val3));
float randVal = 0;
for (int i = 0; i < COUNT; i++) {
randVal = rand.nextFloat();
bf.addDouble(randVal);
}
// last value should be present
assertEquals(true, bf.testDouble(randVal));
// most likely this value should not exist
assertEquals(false, bf.testDouble(-120.2f));
assertEquals(7800, bf.sizeInBytes());
}
@Test
public void testBloomFilterDouble() {
BloomFilter bf = new BloomFilter(10000);
double val = Double.MIN_VALUE;
double val1 = 1.1d;
double val2 = 2.2d;
double val3 = Double.MAX_VALUE;
assertEquals(false, bf.testDouble(val));
assertEquals(false, bf.testDouble(val1));
assertEquals(false, bf.testDouble(val2));
assertEquals(false, bf.testDouble(val3));
bf.addDouble(val);
assertEquals(true, bf.testDouble(val));
assertEquals(false, bf.testDouble(val1));
assertEquals(false, bf.testDouble(val2));
assertEquals(false, bf.testDouble(val3));
bf.addDouble(val1);
assertEquals(true, bf.testDouble(val));
assertEquals(true, bf.testDouble(val1));
assertEquals(false, bf.testDouble(val2));
assertEquals(false, bf.testDouble(val3));
bf.addDouble(val2);
assertEquals(true, bf.testDouble(val));
assertEquals(true, bf.testDouble(val1));
assertEquals(true, bf.testDouble(val2));
assertEquals(false, bf.testDouble(val3));
bf.addDouble(val3);
assertEquals(true, bf.testDouble(val));
assertEquals(true, bf.testDouble(val1));
assertEquals(true, bf.testDouble(val2));
assertEquals(true, bf.testDouble(val3));
double randVal = 0;
for (int i = 0; i < COUNT; i++) {
randVal = rand.nextDouble();
bf.addDouble(randVal);
}
// last value should be present
assertEquals(true, bf.testDouble(randVal));
// most likely this value should not exist
assertEquals(false, bf.testDouble(-120.2d));
assertEquals(7800, bf.sizeInBytes());
}
@Test
public void testBloomFilterString() {
BloomFilter bf = new BloomFilter(100000);
String val = "bloo";
String val1 = "bloom fil";
String val2 = "bloom filter";
String val3 = "cuckoo filter";
assertEquals(false, bf.testString(val));
assertEquals(false, bf.testString(val1));
assertEquals(false, bf.testString(val2));
assertEquals(false, bf.testString(val3));
bf.addString(val);
assertEquals(true, bf.testString(val));
assertEquals(false, bf.testString(val1));
assertEquals(false, bf.testString(val2));
assertEquals(false, bf.testString(val3));
bf.addString(val1);
assertEquals(true, bf.testString(val));
assertEquals(true, bf.testString(val1));
assertEquals(false, bf.testString(val2));
assertEquals(false, bf.testString(val3));
bf.addString(val2);
assertEquals(true, bf.testString(val));
assertEquals(true, bf.testString(val1));
assertEquals(true, bf.testString(val2));
assertEquals(false, bf.testString(val3));
bf.addString(val3);
assertEquals(true, bf.testString(val));
assertEquals(true, bf.testString(val1));
assertEquals(true, bf.testString(val2));
assertEquals(true, bf.testString(val3));
long randVal = 0;
for (int i = 0; i < COUNT; i++) {
randVal = rand.nextLong();
bf.addString(Long.toString(randVal));
}
// last value should be present
assertEquals(true, bf.testString(Long.toString(randVal)));
// most likely this value should not exist
assertEquals(false, bf.testString(Long.toString(-120)));
assertEquals(77944, bf.sizeInBytes());
}
@Test
public void testMerge() {
BloomFilter bf = new BloomFilter(10000);
String val = "bloo";
String val1 = "bloom fil";
String val2 = "bloom filter";
String val3 = "cuckoo filter";
bf.addString(val);
bf.addString(val1);
bf.addString(val2);
bf.addString(val3);
BloomFilter bf2 = new BloomFilter(10000);
String v = "2_bloo";
String v1 = "2_bloom fil";
String v2 = "2_bloom filter";
String v3 = "2_cuckoo filter";
bf2.addString(v);
bf2.addString(v1);
bf2.addString(v2);
bf2.addString(v3);
assertEquals(true, bf.testString(val));
assertEquals(true, bf.testString(val1));
assertEquals(true, bf.testString(val2));
assertEquals(true, bf.testString(val3));
assertEquals(false, bf.testString(v));
assertEquals(false, bf.testString(v1));
assertEquals(false, bf.testString(v2));
assertEquals(false, bf.testString(v3));
bf.merge(bf2);
assertEquals(true, bf.testString(val));
assertEquals(true, bf.testString(val1));
assertEquals(true, bf.testString(val2));
assertEquals(true, bf.testString(val3));
assertEquals(true, bf.testString(v));
assertEquals(true, bf.testString(v1));
assertEquals(true, bf.testString(v2));
assertEquals(true, bf.testString(v3));
}
@Test
public void testSerialize() throws Exception {
BloomFilter bf1 = new BloomFilter(10000);
String[] inputs = {
"bloo",
"bloom fil",
"bloom filter",
"cuckoo filter",
};
for (String val : inputs) {
bf1.addString(val);
}
// Serialize/deserialize
ByteArrayOutputStream bytesOut = new ByteArrayOutputStream();
BloomFilter.serialize(bytesOut, bf1);
ByteArrayInputStream bytesIn = new ByteArrayInputStream(bytesOut.toByteArray());
BloomFilter bf2 = BloomFilter.deserialize(bytesIn);
for (String val : inputs) {
assertEquals("Testing bf1 with " + val, true, bf1.testString(val));
assertEquals("Testing bf2 with " + val, true, bf2.testString(val));
}
}
@Test
public void testMergeBloomFilterBytes() throws Exception {
BloomFilter bf1 = new BloomFilter(10000);
BloomFilter bf2 = new BloomFilter(10000);
String[] inputs1 = {
"bloo",
"bloom fil",
"bloom filter",
"cuckoo filter",
};
String[] inputs2 = {
"2_bloo",
"2_bloom fil",
"2_bloom filter",
"2_cuckoo filter",
};
for (String val : inputs1) {
bf1.addString(val);
}
for (String val : inputs2) {
bf2.addString(val);
}
ByteArrayOutputStream bytesOut = new ByteArrayOutputStream();
BloomFilter.serialize(bytesOut, bf1);
byte[] bf1Bytes = bytesOut.toByteArray();
bytesOut.reset();
BloomFilter.serialize(bytesOut, bf1);
byte[] bf2Bytes = bytesOut.toByteArray();
// Merge bytes
BloomFilter.mergeBloomFilterBytes(
bf1Bytes, 0, bf1Bytes.length,
bf2Bytes, 0, bf2Bytes.length);
// Deserialize and test
ByteArrayInputStream bytesIn = new ByteArrayInputStream(bf1Bytes, 0, bf1Bytes.length);
BloomFilter bfMerged = BloomFilter.deserialize(bytesIn);
// All values should pass test
for (String val : inputs1) {
bfMerged.addString(val);
}
for (String val : inputs2) {
bfMerged.addString(val);
}
}
@Test
public void testMergeBloomFilterBytesFailureCases() throws Exception {
BloomFilter bf1 = new BloomFilter(1000);
BloomFilter bf2 = new BloomFilter(200);
// Create bloom filter with same number of bits, but different # hash functions
ArrayList<Long> bits = new ArrayList<Long>();
for (int idx = 0; idx < bf1.getBitSet().length; ++idx) {
bits.add(0L);
}
BloomFilter bf3 = new BloomFilter(bits, bf1.getBitSize(), bf1.getNumHashFunctions() + 1);
// Serialize to bytes
ByteArrayOutputStream bytesOut = new ByteArrayOutputStream();
BloomFilter.serialize(bytesOut, bf1);
byte[] bf1Bytes = bytesOut.toByteArray();
bytesOut.reset();
BloomFilter.serialize(bytesOut, bf2);
byte[] bf2Bytes = bytesOut.toByteArray();
bytesOut.reset();
BloomFilter.serialize(bytesOut, bf3);
byte[] bf3Bytes = bytesOut.toByteArray();
try {
// this should fail
BloomFilter.mergeBloomFilterBytes(
bf1Bytes, 0, bf1Bytes.length,
bf2Bytes, 0, bf2Bytes.length);
Assert.fail("Expected exception not encountered");
} catch (IllegalArgumentException err) {
// expected
}
try {
// this should fail
BloomFilter.mergeBloomFilterBytes(
bf1Bytes, 0, bf1Bytes.length,
bf3Bytes, 0, bf3Bytes.length);
Assert.fail("Expected exception not encountered");
} catch (IllegalArgumentException err) {
// expected
}
}
}