/*
* Copyright (C) 2012 Facebook, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.stats.cardinality;
import org.testng.annotations.Test;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;
public class TestAdaptiveHyperLogLog {
@Test
public void testConsistencyWithFixedHyperLogLog() {
HyperLogLog simple = new HyperLogLog(1024);
AdaptiveHyperLogLog adaptive = new AdaptiveHyperLogLog(1024);
for (int i = 0; i < 4000; ++i) {
simple.add(i);
adaptive.add(i);
assertEquals(adaptive.estimate(), simple.estimate());
assertEquals(adaptive.buckets(), simple.buckets());
}
}
@Test
public void testRoundtripLowCardinality() {
AdaptiveHyperLogLog expected = new AdaptiveHyperLogLog(1024);
for (int i = 0; i < 10; ++i) {
expected.add(i);
}
AdaptiveHyperLogLog actual = new AdaptiveHyperLogLog(expected.buckets());
assertEquals(actual.buckets(), expected.buckets());
assertEquals(actual.estimate(), expected.estimate());
}
@Test
public void testRoundtripHighCardinality() {
AdaptiveHyperLogLog expected = new AdaptiveHyperLogLog(1024);
for (int i = 0; i < 30000; ++i) {
expected.add(i);
}
AdaptiveHyperLogLog actual = new AdaptiveHyperLogLog(expected.buckets());
assertEquals(actual.buckets(), expected.buckets());
assertEquals(actual.estimate(), expected.estimate());
}
@Test
public void testMergeNoOverlap() {
int buckets = 1024;
AdaptiveHyperLogLog first = new AdaptiveHyperLogLog(buckets);
AdaptiveHyperLogLog second = new AdaptiveHyperLogLog(buckets);
int count = 30000;
int value = 0;
for (int i = 0; i < count; i++) {
first.add(++value);
}
for (int i = 0; i < count; i++) {
second.add(++value);
}
AdaptiveHyperLogLog merged = AdaptiveHyperLogLog.merge(first, second);
assertEstimate(merged.estimate(), count, buckets);
}
@Test
public void testMergeWithOverlap()
{
int buckets = 1024;
AdaptiveHyperLogLog first = new AdaptiveHyperLogLog(buckets);
AdaptiveHyperLogLog second = new AdaptiveHyperLogLog(buckets);
int count = 30000;
for (int i = 0; i < 2 * count / 3; i++) {
first.add(count);
}
for (int i = count / 3; i < count; i++) {
second.add(count);
}
AdaptiveHyperLogLog merged = AdaptiveHyperLogLog.merge(first, second);
assertEstimate(merged.estimate(), count, buckets);
}
@Test
public void testMergeInPlace() {
int buckets = 1024;
AdaptiveHyperLogLog first = new AdaptiveHyperLogLog(buckets);
AdaptiveHyperLogLog second = new AdaptiveHyperLogLog(buckets);
int count = 30000;
int value = 0;
for (int i = 0; i < count; i++) {
first.add(++value);
}
for (int i = 0; i < count; i++) {
second.add(++value);
}
first.merge(second);
assertEstimate(first.estimate(), count, buckets);
}
@Test
public void testAddSameElements()
{
AdaptiveHyperLogLog estimator = new AdaptiveHyperLogLog(1024);
for (int i = 0; i < 10000; i++) {
estimator.add(i);
}
long expectedEstimate = estimator.estimate();
for (int i = 0; i < 10000; i++) {
assertFalse(estimator.add(i));
}
assertEquals(estimator.estimate(), expectedEstimate);
}
private void assertEstimate(long actual, int expected, int numberOfBuckets) {
// this is actually the standard deviation of the expected error, but it provides a
// good bound for our deterministic test
double expectedError = 1.04 / Math.sqrt(numberOfBuckets);
assertTrue((actual - 2 * expected) / (2 * expected) < expectedError);
}
}