/** * Copyright (C) 2001-2017 by RapidMiner and the contributors * * Complete list of developers available at our web site: * * http://rapidminer.com * * This program is free software: you can redistribute it and/or modify it under the terms of the * GNU Affero General Public License as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License along with this program. * If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.tools.math.sampling; import com.rapidminer.tools.RandomGenerator; /** * Implements iterative, linear sampling without replacement. The size of the population and the * size or fraction of the sample have to be given as parameters. * * The algorithm is based upon A.F. Bissell (1986): Ordered Random Selection Without Replacement. * In: Applied Statistics, 35 (1), pp. 73-75. * * @author Tobias Malbrecht */ public class OrderedSamplingWithoutReplacement { /** * The random generator which delivers random numbers between 0 and 1. */ private RandomGenerator randomGenerator; /** * Number of those population elements which are still under consideration and from which the * elements are drawn. */ private int populationCounter; /** * Number of elements in the elements under consideration which will be not included in the * sample. */ private int notRequiredElementsCounter; /** * Probability that no element of the first values are drawn from the population. */ private double probability; /** * A random value. */ private double randomValue; /** * Constructor for an absolute number of elements. * * @param randomGenerator * A RandomGenerator. * @param populationSize * The size of the population. * @param sampleSize * The size of the sample. */ public OrderedSamplingWithoutReplacement(RandomGenerator randomGenerator, int populationSize, int sampleSize) { this.randomGenerator = randomGenerator; this.populationCounter = populationSize; this.notRequiredElementsCounter = populationSize - sampleSize; this.probability = 1; this.randomValue = 1 - randomGenerator.nextDouble(); } /** * Constructor for a relative fraction of elements. * * @param randomGenerator * A RandomGenerator. * @param populationSize * The size of the sample relative to the population size. * @param sampleRatio * The ratio of the sample. */ public OrderedSamplingWithoutReplacement(RandomGenerator randomGenerator, int populationSize, double sampleRatio) { this(randomGenerator, populationSize, (int) Math.round(populationSize * sampleRatio)); } public static int[] getSampledIndices(RandomGenerator randomGenerator, int populationSize, int sampleSize) { OrderedSamplingWithoutReplacement sampling = new OrderedSamplingWithoutReplacement(randomGenerator, populationSize, sampleSize); int[] result = new int[sampleSize]; int rCounter = 0; for (int i = 0; i < populationSize; i++) { if (sampling.acceptElement()) { result[rCounter++] = i; } } return result; } /** * Include element in the sample. * * @return flag whether to include an element in the sample */ public boolean acceptElement() { probability *= ((double) notRequiredElementsCounter) / ((double) populationCounter); if (probability > randomValue) { populationCounter--; notRequiredElementsCounter--; return false; } else { populationCounter--; probability = 1; randomValue = 1 - randomGenerator.nextDouble(); return true; } } }