package org.streaminer.stream.sampler; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Random; /** * Sampling with replacement: a method of randomly sampling n items from a set * of M items, with equal probability; where M >= n and M, the number of items * is unknown until the end. This means that the equal probability sampling * should be maintained for all successive items > n as they become available * (although the content of successive samples can change). * * Source code: http://rosettacode.org/wiki/Knuth's_algorithm_S#Java * Reference: * Knuth, Donald Ervin. The art of computer programming. Pearson Education, 2005. * * @param <T> */ public class WRSampler<T> implements ISampleList<T> { private static final Random rand = new Random(); private List<T> sample; private int count = 0; private int sampleSize; public WRSampler(int sampleSize) { this.sampleSize = sampleSize; sample = new ArrayList<T>(sampleSize); } public void sample(T item) { count++; if (count <= sampleSize) { sample.add(item); } else if (rand.nextInt(count) < sampleSize) { sample.set(rand.nextInt(sampleSize), item); } } public void sample(T... items) { for (T item : items) sample(item); } public Collection<T> getSamples() { return sample; } public int getSize() { return sampleSize; } }