package com.espertech.esper.epl.approx;/* *************************************************************************************** * Copyright (C) 2006 EsperTech, Inc. All rights reserved. * * http://www.espertech.com/esper * * http://www.espertech.com * * ---------------------------------------------------------------------------------- * * The software in this package is published under the terms of the GPL license * * a copy of which has been included with this distribution in the license.txt file. * *************************************************************************************** */ import com.espertech.esper.util.MurmurHash; import junit.framework.TestCase; import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; import java.util.HashMap; import java.util.Map; import java.util.Random; public class TestCountMinSketchStateHashes extends TestCase { public void testSimpleFlow() { CountMinSketchStateHashes state = CountMinSketchStateHashes.makeState(getDefaultSpec()); add(state, "hello", 100); assertEquals(100, estimateCount(state, "hello")); add(state, "text", 1); assertEquals(1, estimateCount(state, "text")); add(state, "hello", 3); assertEquals(103, estimateCount(state, "hello")); assertEquals(1, estimateCount(state, "text")); } public void testSpace() { final double eps = 0.001; final double confidence = 0.999; final int space = 2000; final int points = 100000; final boolean randomized = true; Random random = new Random(); CountMinSketchSpecHashes spec = new CountMinSketchSpecHashes(eps, confidence, 123456); CountMinSketchStateHashes state = CountMinSketchStateHashes.makeState(spec); Map<ByteBuffer, Long> sent = new HashMap<ByteBuffer, Long>(); for (int i = 0; i < points; i++) { ByteBuffer bytes; if (randomized) { bytes = TestCountMinSketchStateTopK.generateBytesRandom(random, space); } else { bytes = TestCountMinSketchStateTopK.generateBytesModulo(i, space); } state.add(bytes.array(), 1); Long count = sent.get(bytes); if (count == null) { sent.put(bytes, 1L); } else { sent.put(bytes, count + 1); } if (i > 0 && i % 100000 == 0) { System.out.println("Completed " + i); } } // compare int errors = 0; for (Map.Entry<ByteBuffer, Long> entry : sent.entrySet()) { long frequency = state.estimateCount(entry.getKey().array()); if (frequency != entry.getValue()) { System.out.println("Expected " + entry.getValue() + " received " + frequency); errors++; } } System.out.println("Found " + errors + " errors at space " + space + " sent " + points); assertTrue(eps * points > errors); } public void testPerformanceMurmurHash() { final int warmupLoopCount = 1; // 1000000; final int measureLoopCount = 1; // 1000000000; // init String[] texts = new String[]{"joe", "melissa", "townhall", "ballpark", "trial-by-error", "house", "teamwork", "recommendation", "partial", "soccer ball"}; byte[][] bytes = new byte[texts.length][]; for (int i = 0; i < texts.length; i++) { bytes[i] = texts[i].getBytes(); } // warmup for (int i = 0; i < warmupLoopCount; i++) { byte[] bytearr = bytes[i % bytes.length]; int code = MurmurHash.hash(bytearr, 0, bytearr.length, 0); if (code == 0) { System.out.println("A zero code"); } } // run // 23.3 for 1G for MurmurHash.hash long start = System.nanoTime(); for (int i = 0; i < measureLoopCount; i++) { byte[] bytearr = bytes[i % bytes.length]; int codeOne = MurmurHash.hash(bytearr, 0, bytearr.length, 0); if (codeOne == 0) { System.out.println("A zero code"); } } long delta = System.nanoTime() - start; // Comment me in - System.out.println("Delta " + (delta / 1000000000.0)); } protected static CountMinSketchSpecHashes getDefaultSpec() { double epsOfTotalCount = 0.0001; double confidence = 0.99; int seed = 1234567; return new CountMinSketchSpecHashes(epsOfTotalCount, confidence, seed); } private long estimateCount(CountMinSketchStateHashes state, String item) { return state.estimateCount(getBytes(item)); } private void add(CountMinSketchStateHashes state, String item, long count) { state.add(getBytes(item), count); } private static byte[] getBytes(String item) { try { return item.getBytes("UTF-16"); } catch (UnsupportedEncodingException e) { throw new RuntimeException(e); } } }