// MultiRandSelector.java
package plume;
import java.util.*;
/**
* Performs uniform random selection over an iterator, where the objects in
* the iteration may be partitioned so that the random selection chooses
* the same number from each group.
*
* <p>For example, given data about incomes by state, it may be more
* useful to select 1000 people from each state rather than 50,000 from the
* nation. As another example, for selecting invocations in a Daikon trace
* file, it may be more useful to select an equal number of samples per
* program point.
* <p>The performance is the equal to running a set of RandomSelector
* Objects, one for each bucket, as well as some overhead for
* determining which bucket to assign to each Object in the iteration.
*
* <p>To use this class, call this.accept() on every Object in the
* iteration to be sampled. Then, call valuesIter() to receive an
* iteration of all the values selected by the random selection.
*
* @see RandomSelector
**/
public class MultiRandSelector<T> {
private int num_elts = -1;
private boolean coin_toss_mode;
private double keep_probability = -1.0;
private Random seed;
private Partitioner<T,T> eq;
private HashMap<T,RandomSelector<T>> map;
/** @param num_elts the number of elements to select from each
* bucket
* @param eq partioner that determines how to partition the objects from
* the iteration.
*/
public MultiRandSelector (int num_elts, Partitioner<T,T> eq) {
this (num_elts, new Random(), eq);
}
public MultiRandSelector (double keep_prob, Partitioner<T,T> eq) {
this (keep_prob, new Random(), eq);
}
public MultiRandSelector (int num_elts, Random r,
Partitioner<T,T> eq) {
this.num_elts = num_elts;
seed = r;
this.eq = eq;
map = new HashMap<T,RandomSelector<T>>();
}
public MultiRandSelector (double keep_prob, Random r,
Partitioner<T,T> eq) {
this.keep_probability = keep_prob;
coin_toss_mode = true;
seed = r;
this.eq = eq;
map = new HashMap<T,RandomSelector<T>>();
}
public void acceptIter (Iterator<T> iter) {
while (iter.hasNext()) {
accept (iter.next());
}
}
/**
*/
public void accept (T next) {
T equivClass = eq.assignToBucket (next);
if (equivClass == null)
return;
RandomSelector<T> delegation = map.get (equivClass);
if (delegation == null) {
delegation = (coin_toss_mode) ?
new RandomSelector<T> (keep_probability, seed) :
new RandomSelector<T> (num_elts, seed);
map.put (equivClass, delegation);
}
delegation.accept (next);
}
// TODO: is there any reason not to simply return a copy?
/** NOT safe from concurrent modification. */
public Map<T,RandomSelector<T>> values () {
return map;
}
/** Returns an iterator of all objects selected. */
public Iterator<T> valuesIter() {
ArrayList<T> ret = new ArrayList<T>();
for (RandomSelector<T> rs : map.values()) {
ret.addAll (rs.getValues());
}
return ret.iterator();
}
}