/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.streaminer.stream.frequency;
import org.apache.commons.lang3.RandomStringUtils;
import org.junit.Test;
import org.streaminer.stream.frequency.CountMinSketchAlt.CMSMergeException;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import java.util.TreeSet;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.assertEquals;
public class CountMinSketchAltTest
{
@Test
public void testAccuracy() throws FrequencyException
{
int seed = 7364181;
Random r = new Random(seed);
int numItems = 1000000;
int[] xs = new int[numItems];
int maxScale = 20;
for (int i = 0; i < xs.length; ++i)
{
int scale = r.nextInt(maxScale);
xs[i] = r.nextInt(1 << scale);
}
double epsOfTotalCount = 0.0001;
double confidence = 0.99;
CountMinSketchAlt sketch = new CountMinSketchAlt(epsOfTotalCount, confidence, seed);
for (int x : xs)
{
sketch.add(x, 1);
}
int[] actualFreq = new int[1 << maxScale];
for (int x : xs)
{
actualFreq[x]++;
}
sketch = CountMinSketchAlt.deserialize(CountMinSketchAlt.serialize(sketch));
int numErrors = 0;
for (int i = 0; i < actualFreq.length; ++i)
{
double ratio = 1.0 * (sketch.estimateCount(i) - actualFreq[i]) / xs.length;
if (ratio > 1.0001)
{
numErrors++;
}
}
double pCorrect = 1 - 1.0 * numErrors / actualFreq.length;
assertTrue("Confidence not reached: required " + confidence + ", reached " + pCorrect, pCorrect > confidence);
}
@Test
public void testAccuracyStrings() throws FrequencyException
{
int seed = 7364181;
Random r = new Random(seed);
int numItems = 1000000;
String[] xs = new String[numItems];
int maxScale = 20;
for (int i = 0; i < xs.length; ++i)
{
int scale = r.nextInt(maxScale);
xs[i] = RandomStringUtils.random(scale);
}
double epsOfTotalCount = 0.0001;
double confidence = 0.99;
CountMinSketchAlt sketch = new CountMinSketchAlt(epsOfTotalCount, confidence, seed);
for (String x : xs)
{
sketch.add(x, 1);
}
Map<String, Long> actualFreq = new HashMap<String, Long>();
for (String x : xs)
{
Long val = actualFreq.get(x);
if (val == null)
{
actualFreq.put(x, 1L);
}
else
{
actualFreq.put(x, val + 1L);
}
}
sketch = CountMinSketchAlt.deserialize(CountMinSketchAlt.serialize(sketch));
int numErrors = 0;
for (int i = 0; i < actualFreq.size(); ++i)
{
Long value = actualFreq.get(i);
long lvalue = (value == null) ? 0 : value;
double ratio = 1.0 * (sketch.estimateCount(i) - lvalue) / xs.length;
if (ratio > 1.0001)
{
numErrors++;
}
}
double pCorrect = 1 - 1.0 * numErrors / actualFreq.size();
assertTrue("Confidence not reached: required " + confidence + ", reached " + pCorrect, pCorrect > confidence);
}
@Test
public void merge() throws CMSMergeException, FrequencyException
{
int numToMerge = 5;
int cardinality = 1000000;
double epsOfTotalCount = 0.0001;
double confidence = 0.99;
int seed = 7364181;
int maxScale = 20;
Random r = new Random();
TreeSet<Integer> vals = new TreeSet<Integer>();
CountMinSketchAlt baseline = new CountMinSketchAlt(epsOfTotalCount, confidence, seed);
CountMinSketchAlt[] sketchs = new CountMinSketchAlt[numToMerge];
for (int i = 0; i < numToMerge; i++)
{
sketchs[i] = new CountMinSketchAlt(epsOfTotalCount, confidence, seed);
for (int j = 0; j < cardinality; j++)
{
int scale = r.nextInt(maxScale);
int val = r.nextInt(1 << scale);
vals.add(val);
sketchs[i].add(val, 1);
baseline.add(val, 1);
}
}
CountMinSketchAlt merged = CountMinSketchAlt.merge(sketchs);
assertEquals(baseline.size(), merged.size());
assertEquals(baseline.getConfidence(), merged.getConfidence(), baseline.getConfidence() / 100);
assertEquals(baseline.getRelativeError(), merged.getRelativeError(), baseline.getRelativeError() / 100);
for (int val : vals)
{
assertEquals(baseline.estimateCount(val), merged.estimateCount(val));
}
}
@Test
public void testMergeEmpty() throws CMSMergeException
{
assertNull(CountMinSketchAlt.merge());
}
@Test(expected = CMSMergeException.class)
public void testUncompatibleMerge() throws CMSMergeException
{
CountMinSketchAlt cms1 = new CountMinSketchAlt(1, 1, 0);
CountMinSketchAlt cms2 = new CountMinSketchAlt(0.1, 0.1, 0);
CountMinSketchAlt.merge(cms1, cms2);
}
}