/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package io.airlift.stats.cardinality; import io.airlift.slice.XxHash64; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.util.List; import static io.airlift.slice.testing.SliceAssertions.assertSlicesEqual; import static io.airlift.stats.cardinality.TestUtils.sequence; import static io.airlift.stats.cardinality.Utils.numberOfBuckets; import static org.testng.Assert.assertEquals; public class TestDenseHll { @Test(dataProvider = "bits") public void testMultipleMerges(int prefixBitLength) throws Exception { DenseHll single = new DenseHll(prefixBitLength); DenseHll merged = new DenseHll(prefixBitLength); DenseHll current = new DenseHll(prefixBitLength); for (int i = 0; i < 10_000_000; i++) { if (i % 10_000 == 0) { merged.mergeWith(current); current = new DenseHll(prefixBitLength); } long hash = XxHash64.hash(i); current.insertHash(hash); single.insertHash(hash); } merged.mergeWith(current); for (int i = 0; i < numberOfBuckets(prefixBitLength); i++) { assertEquals(single.getValue(i), merged.getValue(i)); } assertEquals(single.cardinality(), merged.cardinality()); } @Test(dataProvider = "bits") public void testHighCardinality(int prefixBitLength) throws Exception { TestingHll testingHll = new TestingHll(prefixBitLength); DenseHll hll = new DenseHll(prefixBitLength); for (int i = 0; i < 10_000_000; i++) { long hash = XxHash64.hash(i); testingHll.insertHash(hash); hll.insertHash(hash); } assertSameBuckets(testingHll, hll); } @Test(dataProvider = "bits") public void testInsert(int prefixBitLength) throws Exception { TestingHll testingHll = new TestingHll(prefixBitLength); DenseHll hll = new DenseHll(prefixBitLength); for (int i = 0; i < 20_000; i++) { long hash = XxHash64.hash(i); testingHll.insertHash(hash); hll.insertHash(hash); hll.verify(); } assertSameBuckets(testingHll, hll); } @Test public void testMergeWithOverflows() throws Exception { TestingHll testingHll = new TestingHll(12); DenseHll hll1 = new DenseHll(12); DenseHll hll2 = new DenseHll(12); // these two numbers cause overflows long hash1 = XxHash64.hash(25130); long hash2 = XxHash64.hash(227291); hll1.insertHash(hash1); testingHll.insertHash(hash1); hll2.insertHash(hash2); testingHll.insertHash(hash2); hll1.mergeWith(hll2); hll1.verify(); assertSameBuckets(testingHll, hll1); } @Test(dataProvider = "bits") public void testMerge(int prefixBitLength) throws Exception { // small, non-overlapping verifyMerge(prefixBitLength, sequence(0, 100), sequence(100, 200)); verifyMerge(prefixBitLength, sequence(100, 200), sequence(0, 100)); // small, overlapping verifyMerge(prefixBitLength, sequence(0, 100), sequence(50, 150)); verifyMerge(prefixBitLength, sequence(50, 150), sequence(0, 100)); // small, same verifyMerge(prefixBitLength, sequence(0, 100), sequence(0, 100)); // large, non-overlapping verifyMerge(prefixBitLength, sequence(0, 20000), sequence(20000, 40000)); verifyMerge(prefixBitLength, sequence(20000, 40000), sequence(0, 20000)); // large, overlapping verifyMerge(prefixBitLength, sequence(0, 2_000_000), sequence(1_000_000, 3_000_000)); verifyMerge(prefixBitLength, sequence(1_000_000, 3_000_000), sequence(0, 2_000_000)); // large, same verifyMerge(prefixBitLength, sequence(0, 2_000_000), sequence(0, 2_000_000)); } private static void verifyMerge(int prefixBitLength, List<Long> one, List<Long> two) { DenseHll hll1 = new DenseHll(prefixBitLength); DenseHll hll2 = new DenseHll(prefixBitLength); DenseHll expected = new DenseHll(prefixBitLength); for (long value : one) { long hash = XxHash64.hash(value); hll1.insertHash(hash); expected.insertHash(hash); } for (long value : two) { long hash = XxHash64.hash(value); hll2.insertHash(hash); expected.insertHash(hash); } hll1.verify(); hll2.verify(); hll1.mergeWith(hll2); hll1.verify(); assertEquals(hll1.cardinality(), expected.cardinality()); assertSlicesEqual(hll1.serialize(), expected.serialize()); } private static void assertSameBuckets(TestingHll testingHll, DenseHll hll) { for (int i = 0; i < testingHll.getBuckets().length; i++) { assertEquals(hll.getValue(i), testingHll.getBuckets()[i]); } } @DataProvider(name = "bits") private Object[][] prefixLengths() { return new Object[][] { new Object[] {4}, new Object[] {5}, new Object[] {6}, new Object[] {7}, new Object[] {8}, new Object[] {9}, new Object[] {10}, new Object[] {11}, new Object[] {12}, new Object[] {13}, new Object[] {14}, new Object[] {15}, new Object[] {16}, }; } }