/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.streaminer.stream.frequency; import org.junit.Test; import static org.junit.Assert.*; import java.util.Random; import org.streaminer.stream.frequency.decay.DecayFormula; import org.streaminer.stream.frequency.decay.ExpDecayFormula; public class TimeDecayCountMinSketchTest { @Test public void testAccuracy() throws FrequencyException { int seed = 7364181; Random r = new Random(seed); int numItems = 1000000; int[] xs = new int[numItems]; int maxScale = 20; for (int i = 0; i < xs.length; ++i) { int scale = r.nextInt(maxScale); xs[i] = r.nextInt(1 << scale); } double epsOfTotalCount = 0.0001; double confidence = 0.99; DecayFormula decay = new ExpDecayFormula(24 * 60 * 60); TimeDecayCountMinSketch sketch = new TimeDecayCountMinSketch(epsOfTotalCount, confidence, seed, decay); TimeDecayRealCounting<Integer> realDecay = new TimeDecayRealCounting<Integer>(decay); RealCounting<Integer> real = new RealCounting<Integer>(); long timestamp = 0; for (int i=0; i<xs.length; i++) { int x = xs[i]; //long timestamp = System.currentTimeMillis(); if (i%100 == 0 && i != 0) timestamp += 10000; sketch.add(x, 1, timestamp); realDecay.add(x, 1, timestamp); real.add(x); } int numErrors = 0; for (int i = 0; i < realDecay.size(); ++i) { timestamp += 10000;//System.currentTimeMillis(); double ratio = 1.0 * (sketch.estimateCount(i, timestamp) - realDecay.estimateCount(i, timestamp)) / xs.length; if (ratio > 1.0001) { numErrors++; } System.out.println(String.format("%d\t%d\t%f\t%f", i, real.estimateCount(i), realDecay.estimateCount(i, timestamp), sketch.estimateCount(i, timestamp))); } double pCorrect = 1 - 1.0 * numErrors / realDecay.size(); assertTrue("Confidence not reached: required " + confidence + ", reached " + pCorrect, pCorrect > confidence); } /* @Test public void testAccuracyStrings() throws FrequencyException { int seed = 7364181; Random r = new Random(seed); int numItems = 1000000; String[] xs = new String[numItems]; int maxScale = 20; for (int i = 0; i < xs.length; ++i) { int scale = r.nextInt(maxScale); xs[i] = RandomStringUtils.random(scale); } double epsOfTotalCount = 0.0001; double confidence = 0.99; CountMinSketchAlt sketch = new CountMinSketchAlt(epsOfTotalCount, confidence, seed); for (String x : xs) { sketch.add(x, 1); } Map<String, Long> actualFreq = new HashMap<String, Long>(); for (String x : xs) { Long val = actualFreq.get(x); if (val == null) { actualFreq.put(x, 1L); } else { actualFreq.put(x, val + 1L); } } sketch = CountMinSketchAlt.deserialize(CountMinSketchAlt.serialize(sketch)); int numErrors = 0; for (int i = 0; i < actualFreq.size(); ++i) { Long value = actualFreq.get(i); long lvalue = (value == null) ? 0 : value; double ratio = 1.0 * (sketch.estimateCount(i) - lvalue) / xs.length; if (ratio > 1.0001) { numErrors++; } } double pCorrect = 1 - 1.0 * numErrors / actualFreq.size(); assertTrue("Confidence not reached: required " + confidence + ", reached " + pCorrect, pCorrect > confidence); } */ }