/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.learner.meta; import java.util.Iterator; import java.util.List; import java.util.Vector; import com.rapidminer.example.Attribute; import com.rapidminer.example.Example; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.Tools; import com.rapidminer.operator.Model; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.operator.ValueDouble; import com.rapidminer.operator.learner.LearnerCapability; import com.rapidminer.operator.learner.PredictionModel; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeInt; /** * This AdaBoost implementation can be used with all learners available in RapidMiner, not only * the ones which originally are part of the Weka package. * * @author Martin Scholz * @version $Id: AdaBoost.java,v 1.7 2008/06/06 09:37:14 ingomierswa Exp $ */ public class AdaBoost extends AbstractMetaLearner { /** * Name of the variable specifying the maximal number of iterations of the * learner. */ public static final String PARAMETER_ITERATIONS = "iterations"; /** Discard models with an advantage of less than the specified value. */ public static final double MIN_ADVANTAGE = 0.001; // field for visualizing performance protected int currentIteration; // The total weight as a performance measure to be visualized. private double performance = 0; // A backup of the original weights of the training set to restore them // after learning. private double[] oldWeights; /** Constructor. */ public AdaBoost(OperatorDescription description) { super(description); addValue(new ValueDouble("performance", "The performance.") { public double getDoubleValue() { return performance; } }); addValue(new ValueDouble("iteration", "The current iteration.") { public double getDoubleValue() { return currentIteration; } }); } /** * Overrides the method of the super class. Returns true for polynominal * class. */ public boolean supportsCapability(LearnerCapability lc) { if (lc == LearnerCapability.NUMERICAL_CLASS) return false; if (lc == LearnerCapability.WEIGHTED_EXAMPLES) return true; return super.supportsCapability(lc); } /** * Constructs a <code>Model</code> repeatedly running a weak learner, * reweighting the training example set accordingly, and combining the * hypothesis using the available weighted performance values. */ public Model learn(ExampleSet exampleSet) throws OperatorException { if (!exampleSet.getAttributes().getLabel().isNominal()) throw new UserError(this, 119, exampleSet.getAttributes().getLabel().getName(), getName()); this.performance = this.prepareWeights(exampleSet); Model model = this.trainBoostingModel(exampleSet); Attribute weightAttribute = exampleSet.getAttributes().getWeight(); if (this.oldWeights != null) { // need to reset weights Iterator<Example> reader = exampleSet.iterator(); int i = 0; while (reader.hasNext() && i < this.oldWeights.length) { reader.next().setValue(weightAttribute, this.oldWeights[i++]); } } else { // need to delete the weights attribute exampleSet.getAttributes().remove(weightAttribute); exampleSet.getExampleTable().removeAttribute(weightAttribute); } return model; } /** * Creates a weight attribute if not yet done. It either backs up the old * weights for restoring them later, or it fills the newly created attribute * with the initial value of 1. * * @param exampleSet * the example set to be prepared * @return the total weight */ protected double prepareWeights(ExampleSet exampleSet) { Attribute weightAttr = exampleSet.getAttributes().getWeight(); double totalWeight = 0; if (weightAttr == null) { this.oldWeights = null; weightAttr = Tools.createWeightAttribute(exampleSet); Iterator<Example> exRead = exampleSet.iterator(); while (exRead.hasNext()) { exRead.next().setValue(weightAttr, 1); totalWeight++; } } else { // Back up old weights: this.oldWeights = new double[exampleSet.size()]; Iterator<Example> reader = exampleSet.iterator(); for (int i = 0; (reader.hasNext() && i < oldWeights.length); i++) { this.oldWeights[i] = reader.next().getWeight(); totalWeight += this.oldWeights[i]; } } return totalWeight; } /** Main method for training the ensemble classifier */ private AdaBoostModel trainBoostingModel(ExampleSet trainingSet) throws OperatorException { log("Total weight of example set at the beginning: " + this.performance); // Containers for models and weights: Vector<Model> ensembleModels = new Vector<Model>(); Vector<Double> ensembleWeights = new Vector<Double>(); // maximum number of iterations final int iterations = this.getParameterAsInt(PARAMETER_ITERATIONS); for (int i = 0; (i < iterations && this.performance > 0); i++) { this.currentIteration = i; // train one model per iteration ExampleSet iterationSet = (ExampleSet)trainingSet.clone(); Model model = applyInnerLearner(iterationSet); iterationSet = model.apply(iterationSet); // get the weighted performance value of the example set with // respect to the model AdaBoostPerformanceMeasures wp = new AdaBoostPerformanceMeasures(iterationSet); // Reweight the example set with respect to the weighted performance // values: this.performance = wp.reweightExamples(iterationSet); PredictionModel.removePredictedLabel(iterationSet); log("Total weight of example set after iteration " + (this.currentIteration + 1) + " is " + this.performance); if (this.isModelUseful(wp) == false) { // If the model is not considered to be useful (low advantage) // then discard it and stop. log("Discard model because of low advantage on training data."); return new AdaBoostModel(trainingSet, ensembleModels, ensembleWeights); } // Add the new model and its weights to the collection of models: ensembleModels.add(model); double errorRate = wp.getErrorRate(); double weight; if (errorRate == 0) { weight = Double.POSITIVE_INFINITY; } else { weight = Math.log((1.0d - errorRate) / errorRate); } ensembleWeights.add(weight); } // Build a Model object. Last parameter is "crispPredictions", nowadays // always true. AdaBoostModel resultModel = new AdaBoostModel(trainingSet, ensembleModels, ensembleWeights); return resultModel; } /** * Helper method to decide whether a model improves the training error * enough to be considered. * * @param wp * the advantage over the default classifier / random guessing * @return <code>true</code> iff the advantage is high enough to consider * the model to be useful */ private boolean isModelUseful(AdaBoostPerformanceMeasures wp) { return (wp.getErrorRate() < 0.5); } /** * Adds the parameters "number of iterations" and "model * file". */ public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); types.add(new ParameterTypeInt(PARAMETER_ITERATIONS, "The maximum number of iterations.", 1, Integer.MAX_VALUE, 10)); return types; } }