/*
***************************************************************************************
* Copyright (C) 2006 EsperTech, Inc. All rights reserved. *
* http://www.espertech.com/esper *
* http://www.espertech.com *
* ---------------------------------------------------------------------------------- *
* The software in this package is published under the terms of the GPL license *
* a copy of which has been included with this distribution in the license.txt file. *
***************************************************************************************
*/
package com.espertech.esper.epl.approx;
import com.espertech.esper.util.MurmurHash;
import java.util.Random;
/**
* <p>
* Count-min sketch (or CM sketch) is a probabilistic sub-linear space streaming algorithm
* (source: Wikipedia, see http://en.wikipedia.org/wiki/Count%E2%80%93min_sketch)
* </p>
* <p>
* Count-min sketch computes an approximate frequency and thereby top-k or heavy-hitters.
* </p>
* <p>
* Paper:
* Graham Cormode and S. Muthukrishnan. An improved data stream summary:
* The Count-Min sketch and its applications. 2004. 10.1016/j.jalgor.2003.12.001
* http://dl.acm.org/citation.cfm?id=1073718
* </p>
*/
public class CountMinSketchStateHashes {
private int depth;
private int width;
private long[][] table;
private long[] hash;
private long total;
public static CountMinSketchStateHashes makeState(CountMinSketchSpecHashes spec) {
int width = (int) Math.ceil(2 / spec.getEpsOfTotalCount());
int depth = (int) Math.ceil(-Math.log(1 - spec.getConfidence()) / Math.log(2));
long[][] table = new long[depth][width];
long[] hash = new long[depth];
Random r = new Random(spec.getSeed());
for (int i = 0; i < depth; ++i) {
hash[i] = r.nextInt(Integer.MAX_VALUE);
}
return new CountMinSketchStateHashes(depth, width, table, hash, 0);
}
public CountMinSketchStateHashes(int depth, int width, long[][] table, long[] hash, long total) {
this.depth = depth;
this.width = width;
this.table = table;
this.hash = hash;
this.total = total;
}
public long[][] getTable() {
return table;
}
public long[] getHash() {
return hash;
}
public int getDepth() {
return depth;
}
public int getWidth() {
return width;
}
public void incTotal(long count) {
total += count;
}
public long getTotal() {
return total;
}
public long estimateCount(byte[] item) {
long res = Long.MAX_VALUE;
int[] buckets = getHashBuckets(item, depth, width);
for (int i = 0; i < depth; ++i) {
res = Math.min(res, table[i][buckets[i]]);
}
return res;
}
public void add(byte[] item, long count) {
if (count < 0) {
throw new IllegalArgumentException("Negative increments not implemented");
}
int[] buckets = getHashBuckets(item, depth, width);
for (int i = 0; i < depth; ++i) {
table[i][buckets[i]] += count;
}
total += count;
}
private int[] getHashBuckets(byte[] b, int hashCount, int max) {
int[] result = new int[hashCount];
int hash1 = MurmurHash.hash(b, 0, b.length, 0);
int hash2 = MurmurHash.hash(b, 0, b.length, hash1);
for (int i = 0; i < hashCount; i++) {
result[i] = Math.abs((hash1 + i * hash2) % max);
}
return result;
}
}