/* * Copyright (c) 2008, SQL Power Group Inc. * * This file is part of SQL Power Library. * * SQL Power Library is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * SQL Power Library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package ca.sqlpower.util.reservoir; /** * Generic interface for a reservoir sampling algorithm. A reservior is a * container that reads an indefinite number of items sequentially, and * maintains a statistically valid random sample of <i>n</i> items at all * times. The value of <i>n</i> has to be chosen up front, but the ultimate * number of items being sampled (that is to say, the size of the population * being sampled) does not have to be known in advance. * <p> * In the documentation for the methods, the total number of records considered * for sampling (that is, the size of the population being sampled) is referred * to as <i>N</i>. * * @param T the type of the item being sampled */ public interface Reservoir<T> { /** * Creates a random sample and returns it. The length of the returned array * will be <tt>min(</tt><i>n</i><tt>,</tt> <i>N</i><tt>)</tt>. * * @return A statistically valid random sample of the records in the given * data source. If <i>n</i> > <i>N</i>, all records from the data * source will be in the array. * @throws ReservoirDataException If accessing the given data source throws an exception */ public T[] getSample(ReservoirDataSource<T> dataSource, int n) throws ReservoirDataException; /** * Sets the seed value for random number generation in this reservior. Using * the same seed value when taking a sample from the same data source will * result in the same set of records being selected for the sample. The * default value for the seed is from the default java.util.Random constructor, * which will result in a different sample each time. * <p> * If you are re-using a Reservior instance for taking multiple samples, and * you want the same sample in successive uses, call this method before each * call to {@link #getSample(ReservoirDataSource, int)}. * * @param s * The seed to use for the random number generator */ public void setRandomSeed(long s); }