/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.util.hll; import java.util.HashSet; import org.apache.lucene.util.LuceneTestCase; import org.junit.Test; import com.carrotsearch.hppc.LongHashSet; import static com.carrotsearch.randomizedtesting.RandomizedTest.*; /** * Tests {@link HLL} of type {@link HLLType#EXPLICIT}. */ public class ExplicitHLLTest extends LuceneTestCase { /** * Tests basic set semantics of {@link HLL#addRaw(long)}. */ @Test public void addBasicTest() { { // Adding a single positive value to an empty set should work. final HLL hll = newHLL(128/*arbitrary*/); hll.addRaw(1L/*positive*/); assertEquals(hll.cardinality(), 1L); } { // Adding a single negative value to an empty set should work. final HLL hll = newHLL(128/*arbitrary*/); hll.addRaw(-1L/*negative*/); assertEquals(hll.cardinality(), 1L); } { // Adding a duplicate value to a set should be a no-op. final HLL hll = newHLL(128/*arbitrary*/); hll.addRaw(1L/*positive*/); assertEquals(hll.cardinality(), 1L/*arbitrary*/); assertEquals(hll.cardinality(), 1L/*dupe*/); } } // ------------------------------------------------------------------------ /** * Tests {@link HLL#union(HLL)}. */ @Test public void unionTest() { {// Unioning two distinct sets should work final HLL hllA = newHLL(128/*arbitrary*/); final HLL hllB = newHLL(128/*arbitrary*/); hllA.addRaw(1L); hllA.addRaw(2L); hllB.addRaw(3L); hllA.union(hllB); assertEquals(hllA.cardinality(), 3); } {// Unioning two sets whose union doesn't exceed the cardinality cap should not promote final HLL hllA = newHLL(128/*arbitrary*/); final HLL hllB = newHLL(128/*arbitrary*/); hllA.addRaw(1L); hllA.addRaw(2L); hllB.addRaw(1L); hllA.union(hllB); assertEquals(hllA.cardinality(), 2); } {// unioning two sets whose union exceeds the cardinality cap should promote final HLL hllA = newHLL(128/*arbitrary*/); final HLL hllB = newHLL(128/*arbitrary*/); // fill up sets to explicitThreshold for(long i=0; i<128/*explicitThreshold*/; i++) { hllA.addRaw(i); hllB.addRaw(i + 128); } hllA.union(hllB); assertEquals(hllA.getType(), HLLType.SPARSE); } } // ------------------------------------------------------------------------ /** * Tests {@link HLL#clear()} */ @Test public void clearTest() { final HLL hll = newHLL(128/*arbitrary*/); hll.addRaw(1L); assertEquals(hll.cardinality(), 1L); hll.clear(); assertEquals(hll.cardinality(), 0L); } // ------------------------------------------------------------------------ /** */ @Test public void toFromBytesTest() { final ISchemaVersion schemaVersion = SerializationUtil.DEFAULT_SCHEMA_VERSION; final HLLType type = HLLType.EXPLICIT; final int padding = schemaVersion.paddingBytes(type); final int bytesPerWord = 8; {// Should work on an empty set final HLL hll = newHLL(128/*arbitrary*/); final byte[] bytes = hll.toBytes(schemaVersion); // assert output has correct byte length assertEquals(bytes.length, padding/*no elements, just padding*/); final HLL inHLL = HLL.fromBytes(bytes); assertElementsEqual(hll, inHLL); } {// Should work on a partially filled set final HLL hll = newHLL(128/*arbitrary*/); for(int i=0; i<3; i++) { hll.addRaw(i); } final byte[] bytes = hll.toBytes(schemaVersion); // assert output has correct byte length assertEquals(bytes.length, padding + (bytesPerWord * 3/*elements*/)); final HLL inHLL = HLL.fromBytes(bytes); assertElementsEqual(hll, inHLL); } {// Should work on a full set final int explicitThreshold = 128; final HLL hll = newHLL(explicitThreshold); for(int i=0; i<explicitThreshold; i++) { hll.addRaw(27 + i/*arbitrary*/); } final byte[] bytes = hll.toBytes(schemaVersion); // assert output has correct byte length assertEquals(bytes.length, padding + (bytesPerWord * explicitThreshold/*elements*/)); final HLL inHLL = HLL.fromBytes(bytes); assertElementsEqual(hll, inHLL); } } // ------------------------------------------------------------------------ /** * Tests correctness against {@link java.util.HashSet}. */ @Test public void randomValuesTest() { final int explicitThreshold = 4096; final HashSet<Long> canonical = new HashSet<Long>(); final HLL hll = newHLL(explicitThreshold); for(int i=0;i<explicitThreshold;i++){ long randomLong = randomLong(); canonical.add(randomLong); hll.addRaw(randomLong); } final int canonicalCardinality = canonical.size(); assertEquals(hll.cardinality(), canonicalCardinality); } // ------------------------------------------------------------------------ /** * Tests promotion to {@link HLLType#SPARSE} and {@link HLLType#FULL}. */ @Test public void promotionTest() { { // locally scoped for sanity final int explicitThreshold = 128; final HLL hll = new HLL(11/*log2m, unused*/, 5/*regwidth, unused*/, explicitThreshold, 256/*sparseThreshold*/, HLLType.EXPLICIT); for(int i=0;i<explicitThreshold + 1;i++){ hll.addRaw(i); } assertEquals(hll.getType(), HLLType.SPARSE); } { // locally scoped for sanity final HLL hll = new HLL(11/*log2m, unused*/, 5/*regwidth, unused*/, 4/*expthresh => explicitThreshold = 8*/, false/*sparseon*/, HLLType.EXPLICIT); for(int i=0;i<9/* > explicitThreshold */;i++){ hll.addRaw(i); } assertEquals(hll.getType(), HLLType.FULL); } } // ************************************************************************ // assertion helpers /** * Asserts that values in both sets are exactly equal. */ private static void assertElementsEqual(final HLL hllA, final HLL hllB) { final LongHashSet internalSetA = hllA.explicitStorage; final LongHashSet internalSetB = hllB.explicitStorage; assertTrue(internalSetA.equals(internalSetB)); } /** * Builds a {@link HLLType#EXPLICIT} {@link HLL} instance with the specified * explicit threshold. * * @param explicitThreshold explicit threshold to use for the constructed * {@link HLL}. This must be greater than zero. * @return a default-sized {@link HLLType#EXPLICIT} empty {@link HLL} instance. * This will never be <code>null</code>. */ private static HLL newHLL(final int explicitThreshold) { return new HLL(11/*log2m, unused*/, 5/*regwidth, unused*/, explicitThreshold, 256/*sparseThreshold, arbitrary, unused*/, HLLType.EXPLICIT); } }