/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.airlift.stats.cardinality;
import com.google.common.collect.ImmutableList;
import io.airlift.slice.Murmur3Hash128;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.List;
import static io.airlift.slice.testing.SliceAssertions.assertSlicesEqual;
import static io.airlift.stats.cardinality.TestUtils.sequence;
import static org.testng.Assert.assertEquals;
public class TestSparseHll
{
@Test(dataProvider = "bits")
public void testMerge(int prefixBitLength)
throws Exception
{
// with overlap
verifyMerge(prefixBitLength, sequence(0, 100), sequence(50, 150));
verifyMerge(prefixBitLength, sequence(50, 150), sequence(0, 100));
// no overlap
verifyMerge(prefixBitLength, sequence(0, 100), sequence(200, 300));
verifyMerge(prefixBitLength, sequence(200, 300), sequence(0, 100));
// idempotent
verifyMerge(prefixBitLength, sequence(0, 100), sequence(0, 100));
// multiple overflows (some with same index)
verifyMerge(prefixBitLength, ImmutableList.of(29678L, 54004L), ImmutableList.of(64034L, 20591L, 56987L));
verifyMerge(prefixBitLength, ImmutableList.of(64034L, 20591L, 56987L), ImmutableList.of(29678L, 54004L));
}
@Test(dataProvider = "bits")
public void testToDense(int prefixBitLength)
throws Exception
{
verifyToDense(prefixBitLength, sequence(0, 10000));
// special cases with overflows
verifyToDense(prefixBitLength, ImmutableList.of(201L, 280L));
verifyToDense(prefixBitLength, ImmutableList.of(224L, 271L));
}
private static void verifyMerge(int prefixBitLength, List<Long> one, List<Long> two)
{
SparseHll hll1 = new SparseHll(prefixBitLength);
SparseHll hll2 = new SparseHll(prefixBitLength);
SparseHll expected = new SparseHll(prefixBitLength);
for (long value : one) {
long hash = Murmur3Hash128.hash64(value);
hll1.insertHash(hash);
expected.insertHash(hash);
}
for (long value : two) {
long hash = Murmur3Hash128.hash64(value);
hll2.insertHash(hash);
expected.insertHash(hash);
}
hll1.verify();
hll2.verify();
hll1.mergeWith(hll2);
hll1.verify();
assertEquals(hll1.cardinality(), expected.cardinality());
assertSlicesEqual(hll1.serialize(), expected.serialize());
}
private static void verifyToDense(int prefixBitLength, List<Long> values)
{
DenseHll expected = new DenseHll(prefixBitLength);
SparseHll sparse = new SparseHll(prefixBitLength);
for (long value : values) {
long hash = Murmur3Hash128.hash64(value);
sparse.insertHash(hash);
expected.insertHash(hash);
}
sparse.verify();
expected.verify();
assertSlicesEqual(sparse.toDense().serialize(), expected.serialize());
}
@DataProvider(name = "bits")
private Object[][] prefixLengths()
{
return new Object[][] {
new Object[] { 4 },
new Object[] { 5 },
new Object[] { 6 },
new Object[] { 7 },
new Object[] { 8 },
new Object[] { 9 },
new Object[] { 10 },
new Object[] { 11 },
new Object[] { 12 },
new Object[] { 13 },
new Object[] { 14 },
new Object[] { 15 },
};
}
}