/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.learner.meta; import java.util.List; import java.util.Random; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.set.MappedExampleSet; import com.rapidminer.example.set.SplittedExampleSet; import com.rapidminer.operator.Model; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.learner.LearnerCapability; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.parameter.ParameterTypeDouble; import com.rapidminer.parameter.ParameterTypeInt; import com.rapidminer.parameter.ParameterTypeMatrix; import com.rapidminer.tools.RandomGenerator; /** * This operator uses a given cost matrix to compute label predictions * according to classification costs. The method used by this operator * is similar to MetaCost as described by Pedro Domingos. * * @author Helge Homburg * @version $Id: MetaCost.java,v 1.9 2008/07/13 11:00:49 ingomierswa Exp $ */ public class MetaCost extends AbstractMetaLearner { /** The parameter name for "The cost matrix in Matlab single line format" */ public static final String PARAMETER_COST_MATRIX = "cost_matrix"; /** The parameter name for "File" */ public static final String PARAMETER_COST_MATRIX_FILE_LOCATION = "cost_matrix_file_location"; /** The parameter name for "Fraction of examples used for training. Must be greater than 0 and should be lower than 1." */ public static final String PARAMETER_USE_SUBSET_FOR_TRAINING = "use_subset_for_training"; /** The parameter name for "The number of iterations (base models)." */ public static final String PARAMETER_ITERATIONS = "iterations"; /** The parameter name for "Use sampling with replacement (true) or without (false)" */ public static final String PARAMETER_SAMPLING_WITH_REPLACEMENT = "sampling_with_replacement"; /** The parameter name for "Use the given random seed instead of global random numbers (-1: use global)" */ public static final String PARAMETER_LOCAL_RANDOM_SEED = "local_random_seed"; public MetaCost(OperatorDescription description) { super(description); } public Model learn(ExampleSet inputSet) throws OperatorException { int iterations = getParameterAsInt(PARAMETER_ITERATIONS); double subsetRatio = getParameterAsDouble(PARAMETER_USE_SUBSET_FOR_TRAINING); Model[] models = new Model[iterations]; //get cost matrix double[][] costMatrix = getParameterAsMatrix(PARAMETER_COST_MATRIX); //perform bagging operation if (getParameterAsBoolean(PARAMETER_SAMPLING_WITH_REPLACEMENT)) { //sampling with replacement int randomSeed = getParameterAsInt(PARAMETER_LOCAL_RANDOM_SEED); Random randomGenerator = RandomGenerator.getRandomGenerator(randomSeed); int size = (int)(inputSet.size()*subsetRatio); for (int i = 0; i < iterations; i++) { ExampleSet exampleSet = (ExampleSet)inputSet.clone(); int[] mapping = MappedExampleSet.createBootstrappingMapping(exampleSet, size, randomGenerator); MappedExampleSet currentSampleSet = new MappedExampleSet(exampleSet, mapping); models[i] = applyInnerLearner(currentSampleSet); inApplyLoop(); } } else { //sampling without replacement for (int i = 0; i < iterations; i++) { SplittedExampleSet splitted = new SplittedExampleSet((ExampleSet)inputSet.clone(), subsetRatio, SplittedExampleSet.SHUFFLED_SAMPLING, -1); splitted.selectSingleSubset(0); models[i] = applyInnerLearner(splitted); inApplyLoop(); } } return new MetaCostModel(inputSet, models, costMatrix); } /** * Support polynominal labels. For all other capabilities, it checks for the underlying * operator to see which capabilities are supported by them. */ public boolean supportsCapability(LearnerCapability capability) { if (getNumberOfOperators() == 0) return false; if (capability == LearnerCapability.POLYNOMINAL_CLASS) return true; if (capability == LearnerCapability.BINOMINAL_CLASS) return true; return super.supportsCapability(capability); } public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); types.add(new ParameterTypeMatrix(PARAMETER_COST_MATRIX, "The cost matrix in Matlab single line format", "Cost Matrix", "Predicted Class", "True Class", true, false)); types.add(new ParameterTypeDouble(PARAMETER_USE_SUBSET_FOR_TRAINING, "Fraction of examples used for training. Must be greater than 0 and should be lower than 1.", 0, 1, 1.0)); types.add(new ParameterTypeInt(PARAMETER_ITERATIONS, "The number of iterations (base models).", 1, Integer.MAX_VALUE, 10)); types.add(new ParameterTypeBoolean(PARAMETER_SAMPLING_WITH_REPLACEMENT, "Use sampling with replacement (true) or without (false)", true)); types.add(new ParameterTypeInt(PARAMETER_LOCAL_RANDOM_SEED, "Use the given random seed instead of global random numbers (-1: use global)", -1, Integer.MAX_VALUE, -1)); return types; } }