/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.learner.meta;
import java.util.List;
import java.util.Random;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.set.MappedExampleSet;
import com.rapidminer.example.set.SplittedExampleSet;
import com.rapidminer.operator.Model;
import com.rapidminer.operator.OperatorCapability;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.ParameterTypeMatrix;
import com.rapidminer.tools.RandomGenerator;
/**
* This operator uses a given cost matrix to compute label predictions according to classification
* costs. The method used by this operator is similar to MetaCost as described by Pedro Domingos.
*
* @author Helge Homburg
*/
public class MetaCost extends AbstractMetaLearner {
/** The parameter name for "The cost matrix in Matlab single line format" */
public static final String PARAMETER_COST_MATRIX = "cost_matrix";
/** The parameter name for "File" */
public static final String PARAMETER_COST_MATRIX_FILE_LOCATION = "cost_matrix_file_location";
/**
* The parameter name for "Fraction of examples used for training. Must be greater than 0
* and should be lower than 1."
*/
public static final String PARAMETER_USE_SUBSET_FOR_TRAINING = "use_subset_for_training";
/** The parameter name for "The number of iterations (base models)." */
public static final String PARAMETER_ITERATIONS = "iterations";
/**
* The parameter name for "Use sampling with replacement (true) or without (false)"
*/
public static final String PARAMETER_SAMPLING_WITH_REPLACEMENT = "sampling_with_replacement";
public MetaCost(OperatorDescription description) {
super(description);
}
@Override
public Model learn(ExampleSet inputSet) throws OperatorException {
int iterations = getParameterAsInt(PARAMETER_ITERATIONS);
double subsetRatio = getParameterAsDouble(PARAMETER_USE_SUBSET_FOR_TRAINING);
Model[] models = new Model[iterations];
// get cost matrix
double[][] costMatrix = getParameterAsMatrix(PARAMETER_COST_MATRIX);
// perform bagging operation
if (getParameterAsBoolean(PARAMETER_SAMPLING_WITH_REPLACEMENT)) {
// sampling with replacement
Random randomGenerator = RandomGenerator.getRandomGenerator(this);
int size = (int) (inputSet.size() * subsetRatio);
for (int i = 0; i < iterations; i++) {
int[] mapping = MappedExampleSet.createBootstrappingMapping(inputSet, size, randomGenerator);
MappedExampleSet currentSampleSet = new MappedExampleSet(inputSet, mapping);
models[i] = applyInnerLearner(currentSampleSet);
inApplyLoop();
}
} else {
// sampling without replacement
boolean useLocalRandomSeed = getParameterAsBoolean(RandomGenerator.PARAMETER_USE_LOCAL_RANDOM_SEED);
int localRandomSeed = getParameterAsInt(RandomGenerator.PARAMETER_LOCAL_RANDOM_SEED);
for (int i = 0; i < iterations; i++) {
SplittedExampleSet splitted = new SplittedExampleSet(inputSet, subsetRatio,
SplittedExampleSet.SHUFFLED_SAMPLING, useLocalRandomSeed, localRandomSeed);
splitted.selectSingleSubset(0);
models[i] = applyInnerLearner(splitted);
inApplyLoop();
}
}
return new MetaCostModel(inputSet, models, costMatrix);
}
/**
* Support polynominal labels. For all other capabilities, it checks for the underlying operator
* to see which capabilities are supported by them.
*/
@Override
public boolean supportsCapability(OperatorCapability capability) {
switch (capability) {
case NUMERICAL_LABEL:
case NO_LABEL:
case UPDATABLE:
case FORMULA_PROVIDER:
return false;
default:
return true;
}
}
@Override
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
types.add(new ParameterTypeMatrix(PARAMETER_COST_MATRIX, "The cost matrix in Matlab single line format",
"Cost Matrix", "Predicted Class", "True Class", true, false));
types.add(new ParameterTypeDouble(PARAMETER_USE_SUBSET_FOR_TRAINING,
"Fraction of examples used for training. Must be greater than 0 and should be lower than 1.", 0, 1, 1.0));
types.add(new ParameterTypeInt(PARAMETER_ITERATIONS, "The number of iterations (base models).", 1, Integer.MAX_VALUE,
10));
types.add(new ParameterTypeBoolean(PARAMETER_SAMPLING_WITH_REPLACEMENT,
"Use sampling with replacement (true) or without (false)", true));
types.addAll(RandomGenerator.getRandomGeneratorParameters(this));
return types;
}
}