/*
***************************************************************************************
* Copyright (C) 2006 EsperTech, Inc. All rights reserved. *
* http://www.espertech.com/esper *
* http://www.espertech.com *
* ---------------------------------------------------------------------------------- *
* The software in this package is published under the terms of the GPL license *
* a copy of which has been included with this distribution in the license.txt file. *
***************************************************************************************
*/
package com.espertech.esper.epl.approx;
import com.espertech.esper.client.util.CountMinSketchAgentStringUTF16;
import com.espertech.esper.collection.Pair;
import junit.framework.TestCase;
import java.nio.ByteBuffer;
import java.util.*;
public class TestCountMinSketchStateTopK extends TestCase {
public void testTopK() {
final int space = 10000;
final int points = 100000;
final int topkMax = 100;
Random random = new Random();
CountMinSketchStateTopk topk = new CountMinSketchStateTopk(topkMax);
Map<ByteBuffer, Long> sent = new HashMap<ByteBuffer, Long>();
for (int i = 0; i < points; i++) {
// for simple population: ByteBuffer bytes = generateBytesModulo(i, space);
ByteBuffer bytes = generateBytesRandom(random, space);
Long count = sent.get(bytes);
if (count == null) {
sent.put(bytes, 1L);
topk.updateExpectIncreasing(bytes.array(), 1);
} else {
sent.put(bytes, count + 1);
topk.updateExpectIncreasing(bytes.array(), count + 1);
}
if (i > 0 && i % 100000 == 0) {
System.out.println("Completed " + i);
}
}
// compare
List<ByteBuffer> top = topk.getTopKValues();
// assert filled
if (sent.size() < topkMax) {
assertEquals(sent.size(), top.size());
} else {
assertEquals(topkMax, top.size());
}
// assert no duplicate values
Set<ByteBuffer> set = new HashSet<ByteBuffer>();
for (ByteBuffer topBytes : top) {
assertTrue(set.add(topBytes));
}
// assert order descending
Long lastFreq = null;
for (ByteBuffer topBytes : top) {
long freq = sent.get(topBytes);
if (lastFreq != null) {
assertTrue(freq <= lastFreq);
}
lastFreq = freq;
}
}
public void testFlow() {
// top-k for 3
CountMinSketchSpec spec = new CountMinSketchSpec(TestCountMinSketchStateHashes.getDefaultSpec(), 3, new CountMinSketchAgentStringUTF16());
CountMinSketchState state = CountMinSketchState.makeState(spec);
updateAssert(state, "a", "a=1");
updateAssert(state, "b", "a=1,b=1");
updateAssert(state, "a", "a=2,b=1");
updateAssert(state, "c", "a=2,b=1,c=1");
updateAssert(state, "d", "a=2,b=1,c=1");
updateAssert(state, "c", "a=2,b=1,c=2");
updateAssert(state, "a", "a=3,b=1,c=2");
updateAssert(state, "d", "a=3,d=2,c=2");
updateAssert(state, "e", "a=3,d=2,c=2");
updateAssert(state, "e", "a=3,d=2,c=2");
updateAssert(state, "e", "a=3,e=3,c=2");
updateAssert(state, "d", "a=3,e=3,d=3");
updateAssert(state, "c", "a=3,e=3,d=3");
updateAssert(state, "c", "a=3,e=3,c=4");
}
private void updateAssert(CountMinSketchState state, String value, String expected) {
state.add(value.getBytes(), 1);
Collection<ByteBuffer> topkValues = state.getTopKValues();
List<Pair<Long, Object>> topkList = new ArrayList<Pair<Long, Object>>();
for (ByteBuffer topkValue : topkValues) {
long frequency = state.frequency(topkValue.array());
String text = new String(topkValue.array());
topkList.add(new Pair<Long, Object>(frequency, text));
}
assertList(expected, topkList);
}
private void assertList(String pairText, List<Pair<Long, Object>> asList) {
String[] pairs = pairText.split(",");
assertEquals("received " + asList.toString(), pairs.length, asList.size());
for (String pair : pairs) {
String[] pairArr = pair.split("=");
Pair<Long, Object> pairExpected = new Pair<Long, Object>(Long.parseLong(pairArr[1]), pairArr[0]);
boolean found = asList.remove(pairExpected);
assertTrue("failed to find " + pairExpected + " among remaining " + asList.toString(), found);
}
}
protected static ByteBuffer generateBytesRandom(Random random, int space) {
int val = random.nextInt(space);
byte[] bytes = Integer.toString(val).getBytes();
return ByteBuffer.wrap(bytes);
}
protected static ByteBuffer generateBytesModulo(int num, int space) {
String value = Integer.toString(num % space);
return ByteBuffer.wrap(value.getBytes());
}
}