package plume; import java.util.*; /** * RandomSelector selects k elements uniformly at random from * an arbitrary iterator, using O(k) space. A naive algorithm would use O(n) * space. For example, selecting 1 element from a FileStream * containing 1000 elements will take O(1) space. The class takes as * input the number k during initialization and then can accept() any * number of Objects in the future. At any point in time, getValues() * will either return k randomly selected elements from the elements * previous accepted or if accept() was called fewer than k times, will * return all elements previously accepted. * * <p>The random selection is independent between every constructed * instance of RandomSelector objects, but for the same instance, * multiple calls to getValues() are not independent. Making two calls * to consecutive getValues() without an accept() in between will * return two new Lists containing the same elements. * * <p>A second mode allows for a fixed probability of randomly keeping * each item as opposed to a fixed number of samples. * * <P>SPECFIELDS: * <BR>current_values : Set : The values chosen based on the Objects observed * <BR>number_observed : int : The number of Objects observed * <BR>number_to_take : int : The number of elements to choose ('k' above) * <BR>keep_probability: double : The percentage of elements to keep * <BR>selector_mode : * {FIXED,PERCENT} : either fixed amount of samples or fixed percent. * * <P>Example use: * <br> // randomly selects 100 lines of text from a file * <pre> * List selectedLines = null; * try { * BufferedReader br = new BufferedReader * (new FileReader ("myfile.txt")); * RandomSelector selector = new RandomSelector (100); * while (br.ready()) { * selector.accept (br.readLine()); * } * selectedLines = selector.getValues(); * } * catch (IOException e2) { e2.printStackTrace(); } * </pre> **/ public class RandomSelector<T> { // Rep Invariant: values != null && values.size() <= num_elts && // ((num_elts == -1 && coin_toss_mode == true) || // (keep_probability == -1.0 && coin_toss_mode == false)) // Abstraction Function: // 1. for all elements, 'val' of AF(current_values), // this.values.indexOf (val) != -1 // 2. AF(number_observed) = this.observed // 3. AF(number_to_take) = this.num_elts // 4. AF(keep_probability) = this.keep_probability // 5. AF(selector_mode) = fixed amount if coin_toss_mode == true // fixed percentage if coin_toss_mode == false private int num_elts = -1; private int observed; private Random generator; private ArrayList<T> values; private boolean coin_toss_mode = false; private double keep_probability = -1.0; /** @param num_elts The number of elements intended to be selected * from the input elements * * Sets 'number_to_take' = num_elts **/ public RandomSelector (int num_elts) { this (num_elts, new Random()); } /** @param num_elts The number of elements intended to be selected * from the input elements. * @param r The seed to give for random number generation. * * Sets 'number_to_take' = num_elts **/ public RandomSelector (int num_elts, Random r) { values = new ArrayList<T>(); this.num_elts = num_elts; observed = 0; generator = r; } /** @param keep_probability The probability that each element is * selected from the oncoming Iteration. * @param r The seed to give for random number generation. **/ public RandomSelector (double keep_probability, Random r) { values = new ArrayList<T>(); this.keep_probability = keep_probability; coin_toss_mode = true; observed = 0; generator = r; } /** <P>When in fixed sample mode, increments the number of * observed elements i by 1, then with probability k / i, the * Object 'next' will be added to the currently selected values * 'current_values' where k is equal to 'number_to_take'. If the * size of current_values exceeds number_to_take, then one of the * existing elements in current_values will be removed at random. * * * <P>When in probability mode, adds next to 'current_values' with * probability equal to 'keep_probability'. * **/ public void accept (T next) { // if we are in coin toss mode, then we want to keep // with probability == keep_probability. if (coin_toss_mode) { if (generator.nextDouble() < keep_probability) { values.add (next); // System.out.println ("ACCEPTED " + keep_probability ); } else { // System.out.println ("didn't accept " + keep_probability ); } return; } // in fixed sample mode, the i-th element has a k/i chance // of being accepted where k is number_to_take. if (generator.nextDouble() < ((double) num_elts / (++observed))) { if (values.size() < num_elts) { values.add (next); } else { int rem = (int) (values.size() * generator.nextDouble()); values.set (rem, next); } } // do nothing if the probability condition is not met } /** Returns current_values, modifies none. **/ public List<T> getValues() { // avoid concurrent mod errors and rep exposure ArrayList<T> ret = new ArrayList<T>(); ret.addAll (values); return ret; } }