/*
* RapidMiner
*
* Copyright (C) 2001-2011 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.tools.math.sampling;
import com.rapidminer.tools.RandomGenerator;
/**
* Implements iterative, linear sampling without replacement. The
* size of the population and the size or fraction of the sample
* have to be given as parameters.
*
* The algorithm is based upon A.F. Bissell (1986): Ordered Random
* Selection Without Replacement. In: Applied Statistics, 35 (1),
* pp. 73-75.
*
* @author Tobias Malbrecht
*/
public class OrderedSamplingWithoutReplacement {
/**
* The random generator which delivers random numbers
* between 0 and 1.
*/
private RandomGenerator randomGenerator;
/**
* Number of those population elements which are still
* under consideration and from which the elements are
* drawn.
*/
private int populationCounter;
/**
* Number of elements in the elements under consideration
* which will be not included in the sample.
*/
private int notRequiredElementsCounter;
/**
* Probability that no element of the first values are
* drawn from the population.
*/
private double probability;
/**
* A random value.
*/
private double randomValue;
/**
* Constructor for an absolute number of elements.
*
* @param randomGenerator A RandomGenerator.
* @param populationSize The size of the population.
* @param sampleSize The size of the sample.
*/
public OrderedSamplingWithoutReplacement(RandomGenerator randomGenerator, int populationSize, int sampleSize) {
this.randomGenerator = randomGenerator;
this.populationCounter = populationSize;
this.notRequiredElementsCounter = populationSize - sampleSize;
this.probability = 1;
this.randomValue = 1 - randomGenerator.nextDouble();
}
/**
* Constructor for a relative fraction of elements.
*
* @param randomGenerator A RandomGenerator.
* @param populationSize The size of the sample relative to the population size.
* @param sampleRatio The ratio of the sample.
*/
public OrderedSamplingWithoutReplacement(RandomGenerator randomGenerator, int populationSize, double sampleRatio) {
this(randomGenerator, populationSize, (int) Math.round(populationSize * sampleRatio));
}
public static int[] getSampledIndices(RandomGenerator randomGenerator, int populationSize, int sampleSize) {
OrderedSamplingWithoutReplacement sampling = new OrderedSamplingWithoutReplacement(randomGenerator, populationSize, sampleSize);
int[] result = new int[sampleSize];
int rCounter = 0;
for (int i = 0; i < populationSize; i++) {
if (sampling.acceptElement()) {
result[rCounter++] = i;
}
}
return result;
}
/**
* Include element in the sample.
*
* @return flag whether to include an element in the sample
*/
public boolean acceptElement() {
probability *= ((double) notRequiredElementsCounter) / ((double) populationCounter);
if (probability > randomValue) {
populationCounter--;
notRequiredElementsCounter--;
return false;
} else {
populationCounter--;
probability = 1;
randomValue = 1 - randomGenerator.nextDouble();
return true;
}
}
}