/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.learner.tree;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
/**
* Selects a random attribute subset.
*
* @author Ingo Mierswa, Gisa Schaefer
*/
public class RandomAttributeSubsetPreprocessing implements AttributePreprocessing {
private Random random;
private double subsetRatio;
private boolean useHeuristicRation;
/**
* Stores the parameters for using them in {@link #preprocess}.
*
* @param useHeuristicRation
* if <code>true</code> the subsetRatio is ignored and <code>log(m) + 1</code> out of
* <code>m</code> entries are randomly selected by {@link #preprocess}
* @param subsetRatio
* the percentage of entries that are randomly selected by {@link #preprocess} - only
* used if useHeuristicRatio is <code>false</code>
* @param random
* the {@link Random} used for the random selection of the entries in
* {@link #preprocess}
*/
public RandomAttributeSubsetPreprocessing(boolean useHeuristicRation, double subsetRatio, Random random) {
this.subsetRatio = subsetRatio;
this.random = random;
this.useHeuristicRation = useHeuristicRation;
}
@Override
public int[] preprocess(int[] attributeSelection) {
double usedSubsetRatio = subsetRatio;
if (useHeuristicRation) {
double desiredNumber = Math.floor(Math.log(attributeSelection.length) / Math.log(2) + 1);
usedSubsetRatio = desiredNumber / attributeSelection.length;
}
List<Integer> indexSubset = new LinkedList<>();
for (int i = 0; i < attributeSelection.length; i++) {
if (random.nextDouble() <= usedSubsetRatio) {
indexSubset.add(i);
}
}
// ensure that at least one attribute is left
if (indexSubset.size() == 0) {
int index = random.nextInt(attributeSelection.length);
return new int[] { attributeSelection[index] };
}
// store the entries at the selected indices
int[] attributeSubset = new int[indexSubset.size()];
int counter = 0;
for (int index : indexSubset) {
attributeSubset[counter] = attributeSelection[index];
counter++;
}
return attributeSubset;
}
}