/*
* Copyright (C) 2011 Clearspring Technologies, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.streaminer.stream.cardinality;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import org.junit.Test;
public class TestCountThenEstimate
{
@Test
public void testMerge() throws CardinalityMergeException
{
int numToMerge = 10;
int tippingPoint = 100;
int cardinality = 1000;
CountThenEstimate[] ctes = new CountThenEstimate[numToMerge];
for (int i = 0; i < numToMerge; i++)
{
ctes[i] = new CountThenEstimate(tippingPoint, AdaptiveCounting.Builder.obyCount(100000));
for (int j = 0; j < tippingPoint - 1; j++)
{
ctes[i].offer(Math.random());
}
}
int expectedCardinality = numToMerge * (tippingPoint - 1);
long mergedEstimate = CountThenEstimate.mergeEstimators(ctes).cardinality();
double error = Math.abs(mergedEstimate - expectedCardinality) / (double) expectedCardinality;
assertEquals(0.01, error, 0.01);
for (int i = 0; i < numToMerge / 2; i++)
{
for (int j = tippingPoint - 1; j < cardinality; j++)
{
ctes[i].offer(Math.random());
}
}
expectedCardinality = (numToMerge / 2) * (cardinality + tippingPoint - 1);
mergedEstimate = CountThenEstimate.mergeEstimators(ctes).cardinality();
error = Math.abs(mergedEstimate - expectedCardinality) / (double) expectedCardinality;
assertEquals(0.01, error, 0.01);
for (int i = numToMerge / 2; i < numToMerge; i++)
{
for (int j = tippingPoint - 1; j < cardinality; j++)
{
ctes[i].offer(Math.random());
}
}
expectedCardinality = numToMerge * cardinality;
mergedEstimate = CountThenEstimate.mergeEstimators(ctes).cardinality();
error = Math.abs(mergedEstimate - expectedCardinality) / (double) expectedCardinality;
assertEquals(0.01, error, 0.01);
}
@Test
public void testSmallMerge() throws CardinalityMergeException
{
// Untipped test case
int numToMerge = 1000;
int cardinalityPer = 5;
CountThenEstimate[] ctes = new CountThenEstimate[numToMerge];
for (int i = 0; i < numToMerge; i++)
{
ctes[i] = new CountThenEstimate(10000, AdaptiveCounting.Builder.obyCount(100000));
for (int j = 0; j < cardinalityPer; j++)
{
ctes[i].offer(Math.random());
}
}
int expectedCardinality = numToMerge * cardinalityPer;
CountThenEstimate merged = CountThenEstimate.mergeEstimators(ctes);
long mergedEstimate = merged.cardinality();
assertEquals(expectedCardinality, mergedEstimate);
assertFalse(merged.tipped);
// Tipped test case
numToMerge = 10;
cardinalityPer = 100;
ctes = new CountThenEstimate[numToMerge];
for (int i = 0; i < numToMerge; i++)
{
ctes[i] = new CountThenEstimate(cardinalityPer + 1, AdaptiveCounting.Builder.obyCount(100000));
for (int j = 0; j < cardinalityPer; j++)
{
ctes[i].offer(Math.random());
}
}
expectedCardinality = numToMerge * cardinalityPer;
merged = CountThenEstimate.mergeEstimators(ctes);
mergedEstimate = merged.cardinality();
double error = Math.abs(mergedEstimate - expectedCardinality) / (double) expectedCardinality;
assertEquals(0.01, error, 0.01);
assertTrue(merged.tipped);
}
@Test
public void testTip() throws IOException, ClassNotFoundException
{
CountThenEstimate cte = new CountThenEstimate(10000, new LinearCounting.Builder(1024));
CountThenEstimate clone = new CountThenEstimate(cte.getBytes());
assertCountThenEstimateEquals(cte, clone);
for (int i = 0; i < 128; i++)
{
cte.offer(Integer.toString(i));
}
clone = new CountThenEstimate(cte.getBytes());
assertEquals(128, cte.cardinality());
assertCountThenEstimateEquals(cte, clone);
for (int i = 128; i < 256; i++)
{
cte.offer(Integer.toString(i));
}
clone = new CountThenEstimate(cte.getBytes());
assertFalse(cte.tipped());
assertEquals(256, cte.cardinality());
assertTrue(clone.tipped());
double error = Math.abs(cte.cardinality() - 256) / 256D;
assertEquals(0.1, error, 0.1);
}
@Test
public void testLinearCountingSerialization() throws IOException, ClassNotFoundException
{
CountThenEstimate cte = new CountThenEstimate(3, new LinearCounting.Builder(1024));
CountThenEstimate clone = new CountThenEstimate(cte.getBytes());
assertCountThenEstimateEquals(cte, clone);
cte.offer("1");
cte.offer("2");
cte.offer("3");
assertEquals(3, cte.cardinality());
clone = new CountThenEstimate(cte.getBytes());
assertCountThenEstimateEquals(cte, clone);
cte.offer("4");
clone = new CountThenEstimate(cte.getBytes());
assertCountThenEstimateEquals(cte, clone);
assertEquals(0, clone.tippingPoint);
}
@Test
public void testHyperLogLogSerialization() throws IOException, ClassNotFoundException
{
CountThenEstimate cte = new CountThenEstimate(3, new HyperLogLog.Builder(0.05));
CountThenEstimate clone = new CountThenEstimate(cte.getBytes());
assertCountThenEstimateEquals(cte, clone);
cte.offer("1");
cte.offer("2");
cte.offer("3");
assertEquals(3, cte.cardinality());
clone = new CountThenEstimate(cte.getBytes());
assertCountThenEstimateEquals(cte, clone);
cte.offer("4");
clone = new CountThenEstimate(cte.getBytes());
assertCountThenEstimateEquals(cte, clone);
assertEquals(0, clone.tippingPoint);
}
@Test
public void testAdaptiveCountingSerialization() throws IOException, ClassNotFoundException
{
CountThenEstimate cte = new CountThenEstimate(3, new AdaptiveCounting.Builder(10));
CountThenEstimate clone = new CountThenEstimate(cte.getBytes());
assertCountThenEstimateEquals(cte, clone);
cte.offer("1");
cte.offer("2");
cte.offer("3");
assertEquals(3, cte.cardinality());
clone = new CountThenEstimate(cte.getBytes());
assertCountThenEstimateEquals(cte, clone);
cte.offer("4");
clone = new CountThenEstimate(cte.getBytes());
assertCountThenEstimateEquals(cte, clone);
assertEquals(0, clone.tippingPoint);
}
@Test
public void testAdaptiveCountingSerialization_withHyperLogLog() throws IOException, ClassNotFoundException
{
CountThenEstimate cte = new CountThenEstimate(3, new HyperLogLog.Builder(0.01));
CountThenEstimate clone = new CountThenEstimate(cte.getBytes());
assertCountThenEstimateEquals(cte, clone);
cte.offer("1");
cte.offer("2");
cte.offer("3");
cte.offer("3");
cte.offer("2");
assertEquals(3, cte.cardinality());
clone = new CountThenEstimate(cte.getBytes());
assertCountThenEstimateEquals(cte, clone);
cte.offer("4");
clone = new CountThenEstimate(cte.getBytes());
assertEquals(4, clone.cardinality());
assertEquals(cte.cardinality(), clone.cardinality());
assertCountThenEstimateEquals(cte, clone);
assertEquals(0, clone.tippingPoint);
}
private void assertCountThenEstimateEquals(CountThenEstimate expected, CountThenEstimate actual) throws IOException
{
assertEquals(expected.tipped, actual.tipped);
if (expected.tipped)
{
assertArrayEquals(expected.estimator.getBytes(), actual.estimator.getBytes());
}
else
{
assertEquals(expected.tippingPoint, actual.tippingPoint);
if (expected.builder instanceof LinearCounting.Builder)
{
assertEquals(((LinearCounting.Builder) expected.builder).size, ((LinearCounting.Builder) actual.builder).size);
}
else if (expected.builder instanceof AdaptiveCounting.Builder)
{
assertEquals(((AdaptiveCounting.Builder) expected.builder).k, ((AdaptiveCounting.Builder) actual.builder).k);
}
assertEquals(expected.estimator, actual.estimator);
}
assertEquals(expected.counter, actual.counter);
assertEquals(expected.cardinality(), actual.cardinality());
}
}