package org.streaminer.stream.sampler;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Random;
import org.streaminer.stream.sampler.sre.OneSparseRecoveryEstimator;
import org.streaminer.stream.sampler.sre.SSparseRecoveryEstimator;
import org.streaminer.util.hash.Hash;
/**
* A naive implementation of an L0-Sampling data structure, as described in
* Cormode and Firmani's 2013 paper, "On Unifying the Space of L0-Sampling Algorithms".
*
* N refers to the size of the input space (e.g. an unsigned 64-bit int in the
* case of most cookie ID spaces)
*
* k refers to the number of hash functions used in the s-sparse recovery data
* structure.
*
* s refers to the sparsity of the s-sparse recovery data structure.
*
* In theory, one generally should hold k >= s/2, but in practice C&F note that
* "it suffices to use small values of k, for instance k=7, to ensure that the
* failure rate holds steady, independent of the number of reptitions made."
*
* Also of note: "When time is important, using s<=12 and k<=6 ensures fast
* computation. On the other hand, by selecting bigger values for both s and
* k, the process becomes slower than the FIS variant."
*
* Python Source Code: https://github.com/venantius/droplet
*
* @author Maycon Viana Bordin <mayconbordin@gmail.com>
*/
public class L0Sampler {
private static Random rand = new Random();
private int size;
private int sparsity;
private int k;
private SSparseRecoveryEstimator[] levels;
private Hash hasher;
public L0Sampler(int size, int sparsity, Hash hasher) {
this.size = size;
this.sparsity = sparsity;
this.hasher = hasher;
double delta = Math.pow(2, (-sparsity/ 12));
k = (int) Math.round(Math.log(sparsity/delta)/Math.log(2));
initialize();
}
public L0Sampler(int size, int sparsity, int k, Hash hasher) {
this.size = size;
this.sparsity = sparsity;
this.k = k;
this.hasher = hasher;
initialize();
}
private void initialize() {
int numLevels = (int) Math.round(Math.log(size)/Math.log(2));
levels = new SSparseRecoveryEstimator[numLevels];
for (int i=0; i<numLevels; i++)
levels[i] = new SSparseRecoveryEstimator(sparsity*2, k, hasher);
}
/**
* Attempt to recover a nonzero vector from one of the L0 Sampler's levels.
* @return
*/
public int[] recover() {
return recover(rand.nextInt(size));
}
/**
* Attempt to recover a nonzero vector from one of the L0 Sampler's levels.
* @param i
* @return
*/
public int[] recover(int i) {
List<OneSparseRecoveryEstimator> vector = null;
for (SSparseRecoveryEstimator level : levels) {
if (level.isSSparse()) {
vector = level.recover();
if (!vector.isEmpty())
break;
}
}
if (vector != null && !vector.isEmpty())
return select(vector);
else
return null;
}
/**
* Update the L0 sampler. This process generally aligns with the 'sample'
* step as described in section 2 of the paper.
* @param i
* @param value
*/
public void update(int i, int value) {
if (!(i > 0 && i <= size))
throw new IllegalArgumentException("Update value " + i + "outside size" + size);
for (int j=0; j<levels.length; j++) {
if (size * Math.pow(2, -(j + 1)) >= (hasher.hash(String.valueOf(i)) % size) + 1)
levels[j].update(i, value);
}
}
/**
* Attempts to select (and delete) an item from the data structure until
* either the data structure is empty or no more items can be recovered.
* @return
*/
public List<Integer[]> recursiveSelection() {
List<Integer[]> sample = new ArrayList<Integer[]>();
while (true) {
int[] selection = recover();
if (selection == null)
break;
sample.add(new Integer[]{selection[0], selection[1]});
update(selection[0], -selection[1]);
}
return sample;
}
/**
* Given a vector of recovered items, grabs the one with the lowest hash value.
* @param vector
* @return
*/
private int[] select(List<OneSparseRecoveryEstimator> vector) {
Collections.sort(vector, sreComparator);
OneSparseRecoveryEstimator item = vector.get(0);
int i = item.getIota() / item.getPhi();
return new int[]{i, item.getPhi()};
}
private Comparator<OneSparseRecoveryEstimator> sreComparator = new Comparator<OneSparseRecoveryEstimator>() {
public int compare(OneSparseRecoveryEstimator o1, OneSparseRecoveryEstimator o2) {
int h1 = hasher.hash(String.valueOf(o1.getIota()/o1.getPhi()));
int h2 = hasher.hash(String.valueOf(o2.getIota()/o2.getPhi()));
if (h1 < h2) return -1;
else if (h1 > h2) return 1;
else return 0;
}
};
@Override
public String toString() {
StringBuilder sb = new StringBuilder("L0Sampler{levels=[");
for (int i=0; i<levels.length; i++) {
sb.append(String.format("level %d: %s", i, levels[i].toString()));
}
sb.append(String.format("], size=%d, sparsity=%d, k=%d}", size, sparsity, k));
return sb.toString();
}
}