// ============================================================================
//
// Copyright (C) 2006-2016 Talend Inc. - www.talend.com
//
// This source code is available under agreement available at
// %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt
//
// You should have received a copy of the agreement
// along with this program; if not, write to Talend SA
// 9 rue Pages 92150 Suresnes, France
//
// ============================================================================
package org.talend.dataquality.statistics.cardinality;
import java.util.Random;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
public class CardinalityHLLAnalyzerTest {
private CardinalityHLLAnalyzer distinctHLLAna = null;
@Before
public void setUp() throws Exception {
distinctHLLAna = new CardinalityHLLAnalyzer();
}
@After
public void tearDown() throws Exception {
}
@Test
public void testAnalyze() {
// 1. based on a small size.
String[] data = new String[] { "0", "1", "2", "3", "16", "17", "18", "19", "19" };
distinctHLLAna.init();
for (String col : data) {
distinctHLLAna.analyze(col);
}
Assert.assertEquals(8, distinctHLLAna.getResult().get(0).getDistinctCount(), 0);
Assert.assertEquals(1, distinctHLLAna.getResult().get(0).getDuplicateCount(), 0);
// 2. based a on large size, the error less than 0.1
int size = 10000000;
distinctHLLAna.init();
for (int i = 0; i < size; i++) {
distinctHLLAna.analyze(streamElement(i));
}
long estimate = distinctHLLAna.getResult().get(0).getDistinctCount();
double err = Math.abs(estimate - size) / (double) size;
Assert.assertTrue(err < .1);
// 3. with an empty
data = new String[] { "" };
distinctHLLAna.init();
for (String col : data) {
distinctHLLAna.analyze(col);
}
Assert.assertEquals(1, distinctHLLAna.getResult().get(0).getDistinctCount(), 0);
Assert.assertEquals(0, distinctHLLAna.getResult().get(0).getDuplicateCount(), 0);
distinctHLLAna.init();
Assert.assertTrue(distinctHLLAna.getResult().size() == 0);
}
protected static String streamElement(int i) {
return Long.toHexString(prng.nextLong());
// return se++;
}
private static Random prng = new Random();
}