/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package io.airlift.stats.cardinality; import io.airlift.slice.DynamicSliceOutput; import io.airlift.slice.Murmur3Hash128; import io.airlift.slice.Slice; import io.airlift.slice.SliceOutput; import org.testng.annotations.Test; import static io.airlift.slice.testing.SliceAssertions.assertSlicesEqual; import static io.airlift.stats.cardinality.Utils.numberOfBuckets; import static org.testng.Assert.assertEquals; public class TestDenseSerialization { @Test public void testEmpty() throws Exception { SliceOutput expected = new DynamicSliceOutput(1) .appendByte(3) // format tag .appendByte(12) // p .appendByte(0); // baseline for (int i = 0; i < 1 << (12 - 1); i++) { expected.appendByte(0); } // overflows expected.appendByte(0) .appendByte(0); assertSlicesEqual(makeHll(12).serialize(), expected.slice()); } @Test public void testSingleNoOverflow() throws Exception { byte[] buckets = new byte[1 << (12 - 1)]; buckets[326] = 0b0000_0001; Slice expected = new DynamicSliceOutput(1) .appendByte(3) // format tag .appendByte(12) // p .appendByte(0) // baseline .appendBytes(buckets) // buckets // overflows .appendByte(0) .appendByte(0) .slice(); assertSlicesEqual(makeHll(12, 0).serialize(), expected); } @Test public void testSingleWithOverflow() throws Exception { byte[] buckets = new byte[1 << (12 - 1)]; buckets[1353] = (byte) 0b1111_0000; Slice expected = new DynamicSliceOutput(1) .appendByte(3) // format tag .appendByte(12) // p .appendByte(0) // baseline .appendBytes(buckets) // buckets // overflows .appendByte(1) .appendByte(0) // overflow bucket .appendByte(0x92) .appendByte(0xA) // overflow value .appendByte(2) .slice(); assertSlicesEqual(makeHll(12, 61697).serialize(), expected); } @Test public void testMultipleOverflow() throws Exception { byte[] buckets = new byte[1 << (12 - 1)]; buckets[1353] = (byte) 0b1111_0000; buckets[2024] = (byte) 0b1111_0000; Slice expected = new DynamicSliceOutput(1) .appendByte(3) // format tag .appendByte(12) // p .appendByte(0) // baseline .appendBytes(buckets) // buckets // overflows .appendByte(2) .appendByte(0) // overflow bucket .appendByte(146) .appendByte(10) .appendByte(208) .appendByte(15) // overflow value .appendByte(2) .appendByte(4) .slice(); assertSlicesEqual(makeHll(12, 61697, 394873).serialize(), expected); // test commutativity assertSlicesEqual(makeHll(12, 394873, 61697).serialize(), expected); } @Test public void testMergeWithOverflows() throws Exception { DenseHll expected = makeHll(4, 37227, 93351); assertSlicesEqual( makeHll(4, 37227).mergeWith(makeHll(4, 93351)).serialize(), expected.serialize()); // test commutativity assertSlicesEqual( makeHll(4, 93351).mergeWith(makeHll(4, 37227)).serialize(), expected.serialize()); } @Test public void testBaselineAdjusment() throws Exception { byte[] buckets = new byte[] {0x45, 0x23, 0x01, 0x31, 0x22, 0x05, 0x04, 0x01}; Slice expected = new DynamicSliceOutput(1) .appendByte(3) // format tag .appendByte(4) // p .appendByte(2) // baseline .appendBytes(buckets) // buckets // overflows .appendByte(0) .appendByte(0) .slice(); DenseHll hll = new DenseHll(4); for (int i = 0; i < 100; i++) { hll.insertHash(Murmur3Hash128.hash64(i)); } assertSlicesEqual(hll.serialize(), expected); } @Test public void testOverflowAfterBaselineIncrement() throws Exception { byte[] buckets = new byte[] {0x45, 0x23, 0x01, 0x31, 0x22, 0x05, 0x04, (byte) 0xF1}; Slice expected = new DynamicSliceOutput(1) .appendByte(3) // format tag .appendByte(4) // p .appendByte(2) // baseline .appendBytes(buckets) // buckets // overflows .appendByte(1) .appendByte(0) // overflow bucket .appendByte(14) .appendByte(0) // overflow value .appendByte(5) .slice(); DenseHll hll = new DenseHll(4); for (int i = 0; i < 100; i++) { hll.insertHash(Murmur3Hash128.hash64(i)); } hll.insertHash(Murmur3Hash128.hash64(37227)); assertSlicesEqual(hll.serialize(), expected); } @Test public void testBaselineAdjustmentAfterOverflow() throws Exception { byte[] buckets = new byte[] {0x45, 0x23, 0x01, 0x31, 0x22, 0x05, 0x04, (byte) 0xF1}; Slice expected = new DynamicSliceOutput(1) .appendByte(3) // format tag .appendByte(4) // p .appendByte(2) // baseline .appendBytes(buckets) // buckets // overflows .appendByte(1) .appendByte(0) // overflow bucket .appendByte(14) .appendByte(0) // overflow value .appendByte(5) .slice(); DenseHll hll = new DenseHll(4); hll.insertHash(Murmur3Hash128.hash64(37227)); for (int i = 0; i < 100; i++) { hll.insertHash(Murmur3Hash128.hash64(i)); } assertSlicesEqual(hll.serialize(), expected); } @Test public void testRoundtrip() throws Exception { DenseHll hll = new DenseHll(4); for (int i = 0; i < 1000; i++) { hll.insertHash(Murmur3Hash128.hash64(i)); Slice serialized = hll.serialize(); Slice reserialized = new DenseHll(serialized).serialize(); assertSlicesEqual(serialized, reserialized); } } @Test public void testDeserializeDenseV1NoOverflows() throws Exception { int indexBitLength = 4; int numberOfBuckets = numberOfBuckets(indexBitLength); Slice serialized = new DynamicSliceOutput(1) .appendByte(Format.DENSE_V1.getTag()) // format tag .appendByte(indexBitLength) // p .appendByte(10) // baseline .appendBytes(new byte[numberOfBuckets / 2]) // buckets // overflow bucket .appendByte(0xFF) .appendByte(0xFF) // overflow value .appendByte(0) .slice(); DenseHll deserialized = new DenseHll(serialized); for (int i = 0; i < numberOfBuckets; i++) { assertEquals(deserialized.getValue(i), 10); } deserialized.verify(); } @Test public void testDeserializeDenseV1EmptyOverflow() throws Exception { // bucket 1 has a value of 17 (i.e., baseline = 2, delta == 15 and overflow is present with a value of 0) int indexBitLength = 4; int numberOfBuckets = numberOfBuckets(indexBitLength); Slice serialized = new DynamicSliceOutput(1) .appendByte(Format.DENSE_V1.getTag()) // format tag .appendByte(indexBitLength) // p .appendByte(2) // baseline .appendBytes(new byte[] { 0x0F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}) // buckets // overflow bucket .appendByte(0x01) .appendByte(0x00) // overflow value .appendByte(0) .slice(); DenseHll deserialized = new DenseHll(serialized); for (int i = 0; i < numberOfBuckets; i++) { if (i == 1) { assertEquals(deserialized.getValue(i), 17); } else { assertEquals(deserialized.getValue(i), 2); } } deserialized.verify(); } @Test public void testDeserializeDenseV1Overflow() throws Exception { // bucket 1 has a value of 20 (i.e., baseline = 2, delta == 15, overflow == 3) int indexBitLength = 4; int numberOfBuckets = numberOfBuckets(indexBitLength); Slice serialized = new DynamicSliceOutput(1) .appendByte(Format.DENSE_V1.getTag()) // format tag .appendByte(indexBitLength) // p .appendByte(2) // baseline .appendBytes(new byte[] { 0x0F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}) // buckets // overflow bucket .appendByte(0x01) .appendByte(0x00) // overflow value .appendByte(3) .slice(); DenseHll deserialized = new DenseHll(serialized); for (int i = 0; i < numberOfBuckets; i++) { if (i == 1) { assertEquals(deserialized.getValue(i), 20); } else { assertEquals(deserialized.getValue(i), 2); } } deserialized.verify(); } private static DenseHll makeHll(int indexBits, long... values) { DenseHll result = new DenseHll(indexBits); for (long value : values) { result.insertHash(Murmur3Hash128.hash64(value)); } return result; } }