/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.tools.math.sampling; import com.rapidminer.tools.RandomGenerator; /** * Implements iterative, linear sampling without replacement. The * size of the population and the size or fraction of the sample * have to be given as parameters. * * The algorithm is based upon A.F. Bissell (1986): Ordered Random * Selection Without Replacement. In: Applied Statistics, 35 (1), * pp. 73-75. * * @author Tobias Malbrecht * @version $Id: OrderedSamplingWithoutReplacement.java,v 1.4 2008/07/15 15:07:24 ingomierswa Exp $ */ public class OrderedSamplingWithoutReplacement { /** * The random generator which delivers random numbers * between 0 and 1. */ private RandomGenerator randomGenerator; /** * Number of those population elements which are still * under consideration and from which the elements are * drawn. */ private int populationCounter; /** * Number of elements in the elements under consideration * which will be not included in the sample. */ private int notRequiredElementsCounter; /** * Probability that no element of the first values are * drawn from the population. */ private double probability; /** * A random value. */ private double randomValue; /** * Constructor for an absolute number of elements. * * @param randomGenerator A RandomGenerator. * @param populationSize The size of the population. * @param sampleSize The size of the sample. */ public OrderedSamplingWithoutReplacement(RandomGenerator randomGenerator, int populationSize, int sampleSize) { this.randomGenerator = randomGenerator; this.populationCounter = populationSize; this.notRequiredElementsCounter = populationSize - sampleSize; this.probability = 1; this.randomValue = 1 - randomGenerator.nextDouble(); } /** * Constructor for a relative fraction of elements. * * @param randomGenerator A RandomGenerator. * @param populationSize The size of the sample relative to the population size. * @param sampleRatio The ratio of the sample. */ public OrderedSamplingWithoutReplacement(RandomGenerator randomGenerator, int populationSize, double sampleRatio) { this(randomGenerator, populationSize, (int) Math.round(populationSize * sampleRatio)); } public static int[] getSampledIndices(RandomGenerator randomGenerator, int populationSize, int sampleSize) { OrderedSamplingWithoutReplacement sampling = new OrderedSamplingWithoutReplacement(randomGenerator, populationSize, sampleSize); int[] result = new int[sampleSize]; int rCounter = 0; for (int i = 0; i < populationSize; i++) { if (sampling.acceptElement()) { result[rCounter++] = i; } } return result; } /** * Include element in the sample. * * @return flag whether to include an element in the sample */ public boolean acceptElement() { probability *= ((double) notRequiredElementsCounter) / ((double) populationCounter); if (probability > randomValue) { populationCounter--; notRequiredElementsCounter--; return false; } else { populationCounter--; probability = 1; randomValue = 1 - randomGenerator.nextDouble(); return true; } } }