HyperLogLogEncoderAbstractTest.java example

Explorer
hazelcast-master
/*
 * Copyright (c) 2008-2017, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.cardinality.impl.hyperloglog.impl;

import com.hazelcast.test.HazelcastParallelClassRunner;
import com.hazelcast.test.annotation.ParallelTest;
import com.hazelcast.test.annotation.QuickTest;
import com.hazelcast.util.HashUtil;
import com.hazelcast.util.collection.IntHashSet;
import org.HdrHistogram.Histogram;
import org.junit.Before;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.runner.RunWith;

import java.nio.ByteBuffer;
import java.util.Random;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

@RunWith(HazelcastParallelClassRunner.class)
@Category({QuickTest.class, ParallelTest.class})
public abstract class HyperLogLogEncoderAbstractTest {

    private HyperLogLogEncoder encoder;

    public abstract int runLength();

    public abstract int precision();

    public abstract HyperLogLogEncoder createStore();

    @Before
    public void setup() {
        encoder = createStore();
    }

    @Test
    public void add() {
        assertTrue(encoder.add(1000L));
        assertEquals(1L, encoder.estimate());
    }

    /**
     * - Add up-to runLength() random numbers on both a Set and a HyperLogLog encoder.
     * - Sample the actual count, and the estimate respectively every 100 operations.
     * - Compute the error rate, of the measurements and store it in a histogram.
     * - Assert that the 99th percentile of the histogram is less than the expected max error,
     * which is the result of std error (1.04 / sqrt(m)) + 3%.
     * (2% is the typical accuracy, but tests on the implementation showed up rare occurrences of 3%)
     */
    @Test
    public void testEstimateErrorRateForBigCardinalities() {
        double stdError = (1.04 / Math.sqrt(1 << precision())) * 100;
        double maxError = Math.ceil(stdError + 3.0);

        IntHashSet actualCount = new IntHashSet(runLength(), -1);
        Random random = new Random();
        Histogram histogram = new Histogram(5);
        ByteBuffer bb = ByteBuffer.allocate(4);

        int sampleStep = 100;
        long expected;
        long actual;

        for (int i = 1; i <= runLength(); i++) {
            int toCount = random.nextInt();
            actualCount.add(toCount);

            bb.clear();
            bb.putInt(toCount);
            encoder.add(HashUtil.MurmurHash3_x64_64(bb.array(), 0, bb.array().length));

            if (i % sampleStep == 0) {
                expected = actualCount.size();
                actual = encoder.estimate();
                double errorPct = ((actual * 100.0) / expected) - 100;
                histogram.recordValue(Math.abs((long) (errorPct * 100)));
            }
        }

        double errorPerc99 = histogram.getValueAtPercentile(99) / 100.0;
        if (errorPerc99 > maxError) {
            fail("For P=" + precision() + ", max error=" + maxError + "% expected."
                    + " Error: " + errorPerc99 + "%.");
        }
    }

    HyperLogLogEncoder getEncoder() {
        return encoder;
    }
}