package org.streaminer.stream.frequency;
import java.util.Random;
import org.streaminer.stream.frequency.decay.DecayFormula;
import org.streaminer.util.hash.HashUtils;
/**
* Implementaion of a time decaying Count-Min Sketch with values updated on-demand
* instead of fixed time intervals. The Count-Min Sketch implementation is from
* the CountMinSketchAlt class, originally from the stream-lib. The Decay functions
* were obtained from a <a href="https://github.com/michal-harish/streaming-sketches">DecayHashMap implementation</a>.
*/
public class TimeDecayCountMinSketch implements ITimeDecayFrequency<Object> {
public static final long PRIME_MODULUS = (1L << 31) - 1;
private int depth;
private int width;
private double[][] table;
private long[] hashA;
private long[] timers;
private long size;
private double eps;
private double confidence;
private DecayFormula formula;
private TimeDecayCountMinSketch() {
}
public TimeDecayCountMinSketch(int depth, int width, int seed, DecayFormula formula) {
this.depth = depth;
this.width = width;
this.eps = 2.0 / width;
this.confidence = 1 - 1 / Math.pow(2, depth);
this.formula = formula;
initTablesWith(depth, width, seed);
}
public TimeDecayCountMinSketch(double epsOfTotalCount, double confidence, int seed, DecayFormula formula) {
// 2/w = eps ; w = 2/eps
// 1/2^depth <= 1-confidence ; depth >= -log2 (1-confidence)
this.eps = epsOfTotalCount;
this.confidence = confidence;
this.width = (int) Math.ceil(2 / epsOfTotalCount);
this.depth = (int) Math.ceil(-Math.log(1 - confidence) / Math.log(2));
this.formula = formula;
initTablesWith(depth, width, seed);
}
private TimeDecayCountMinSketch(int depth, int width, int size, long[] hashA, double[][] table) {
this.depth = depth;
this.width = width;
this.eps = 2.0 / width;
this.confidence = 1 - 1 / Math.pow(2, depth);
this.hashA = hashA;
this.table = table;
this.size = size;
}
private void initTablesWith(int depth, int width, int seed) {
this.table = new double[depth][width];
this.hashA = new long[depth];
this.timers = new long[width];
Random r = new Random(seed);
// We're using a linear hash functions
// of the form (a*x+b) mod p.
// a,b are chosen independently for each hash function.
// However we can set b = 0 as all it does is shift the results
// without compromising their uniformity or independence with
// the other hashes.
for (int i = 0; i < depth; ++i) {
hashA[i] = r.nextInt(Integer.MAX_VALUE);
}
}
public double getRelativeError() {
return eps;
}
public double getConfidence() {
return confidence;
}
private int hash(long item, int i) {
long hash = hashA[i] * item;
// A super fast way of computing x mod 2^p-1
// See http://www.cs.princeton.edu/courses/archive/fall09/cos521/Handouts/universalclasses.pdf
// page 149, right after Proposition 7.
hash += hash >> 32;
hash &= PRIME_MODULUS;
// Doing "%" after (int) conversion is ~2x faster than %'ing longs.
return ((int) hash) % width;
}
public void add(Object item, long qtd, long timestamp) {
if (qtd < 0) {
throw new IllegalArgumentException("Negative increments not implemented");
}
if (item instanceof Integer) {
addLong(((Integer)item).longValue(), qtd, timestamp);
} else if (item instanceof Long) {
addLong((Long)item, qtd, timestamp);
} else if (item instanceof String) {
addString((String)item, qtd, timestamp);
}
}
public void addString(String item, long qtd, long timestamp) {
int[] buckets = HashUtils.getHashBuckets((String)item, depth, width);
for (int i = 0; i < depth; ++i) {
double quantity = 0.0;
if (timers[buckets[i]] <= timestamp) {
quantity = projectValue(timestamp, timers[buckets[i]], table[i][buckets[i]]) + qtd;
timers[buckets[i]] = timestamp;
} else {
quantity += projectValue(timers[buckets[i]], timestamp, qtd);
}
table[i][buckets[i]] = quantity;
}
size += qtd;
}
private void addLong(long item, long qtd, long timestamp) {
for (int i = 0; i < depth; ++i) {
int h = hash((Long)item, i);
double quantity = 0.0;
if (timers[h] <= timestamp) {
quantity = projectValue(timestamp, timers[h], table[i][h]) + qtd;
timers[h] = timestamp;
} else {
quantity += projectValue(timers[h], timestamp, qtd);
}
table[i][h] = quantity;
}
size += qtd;
}
public double estimateCount(Object item, long timestamp) {
if (item instanceof Integer) {
return estimateCountLong(((Integer)item).longValue(), timestamp);
} else if (item instanceof Long) {
return estimateCountLong((Long)item, timestamp);
} else if (item instanceof String) {
return estimateCountString((String) item, timestamp);
}
return 0d;
}
public double estimateCountString(String item, long timestamp) {
double res = Double.MAX_VALUE;
int[] buckets = HashUtils.getHashBuckets((String)item, depth, width);
for (int i = 0; i < depth; ++i) {
double value = projectValue(timestamp, timers[buckets[i]], table[i][buckets[i]]);
res = Math.min(res, value);
}
return res;
}
private double estimateCountLong(long item, long timestamp) {
double res = Double.MAX_VALUE;
for (int i = 0; i < depth; ++i) {
int h = hash((Long)item, i);
double value = projectValue(timestamp, timers[h], table[i][h]);
res = Math.min(res, value);
}
return res;
}
private double projectValue(long futureTimestamp, long timestamp, double quantity) {
if (futureTimestamp < timestamp) {
throw new IllegalArgumentException("Cannot project decaying quantity into the past.");
}
double t = Double.valueOf(futureTimestamp - timestamp);
return formula.evaluate(quantity, t);
}
public long size() {
return size;
}
}