/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.learner.meta;
import java.util.Iterator;
import java.util.List;
import java.util.Vector;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.set.SplittedExampleSet;
import com.rapidminer.operator.MissingIOObjectException;
import com.rapidminer.operator.Model;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ValueDouble;
import com.rapidminer.operator.learner.LearnerCapability;
import com.rapidminer.operator.learner.PredictionModel;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.tools.Tools;
/**
* <p>This operator trains an ensemble of classifiers for boolean target
* attributes. In each iteration the training set is reweighted, so that
* previously discovered patterns and other kinds of prior knowledge are
* "sampled out" {@rapidminer.cite Scholz/2005b}. An inner classifier,
* typically a rule or decision tree induction algorithm, is sequentially
* applied several times, and the models are combined to a single global model.
* The number of models to be trained maximally are specified by the parameter
* <code>iterations</code>.</p>
*
* <p>If the parameter <code>rescale_label_priors</code> is set, then the example
* set is reweighted, so that all classes are equally probable (or frequent).
* For two-class problems this turns the problem of fitting models to maximize
* weighted relative accuracy into the more common task of classifier induction
* {@rapidminer.cite Scholz/2005a}. Applying a rule induction algorithm as an inner
* learner allows to do subgroup discovery. This option is also recommended for
* data sets with class skew, if a "very weak learner" like a decision
* stump is used. If <code>rescale_label_priors</code> is not set, then the
* operator performs boosting based on probability estimates.</p>
*
* <p>The estimates used by this operator may either be computed using the same set
* as for training, or in each iteration the training set may be split randomly,
* so that a model is fitted based on the first subset, and the probabilities
* are estimated based on the second. The first solution may be advantageous in
* situations where data is rare. Set the parameter
* <code>ratio_internal_bootstrap</code> to 1 to use the same set for training
* as for estimation. Set this parameter to a value of lower than 1 to use the
* specified subset of data for training, and the remaining examples for
* probability estimation.</p>
*
* <p>If the parameter <code>allow_marginal_skews</code> is <em>not</em> set,
* then the support of each subset defined in terms of common base model
* predictions does not change from one iteration to the next. Analogously the
* class priors do not change. This is the procedure originally described in
* {@rapidminer.cite Scholz/2005b} in the context of subgroup discovery.</p>
*
* <p>Setting the <code>allow_marginal_skews</code> option to <code>true</code>
* leads to a procedure that changes the marginal weights/probabilities of
* subsets, if this is beneficial in a boosting context, and stratifies the two
* classes to be equally likely. As for AdaBoost, the total weight upper-bounds
* the training error in this case. This bound is reduced more quickly by the
* BayesianBoosting operator, however.</p>
*
* <p>In sum, to reproduce the sequential sampling, or knowledge-based sampling,
* from {@rapidminer.cite Scholz/2005b} for subgroup discovery, two of the
* default parameter settings of this operator have to be changed:
* <code>rescale_label_priors</code> must
* be set to <code>true</code>, and <code>allow_marginal_skews</code> must
* be set to <code>false</code>. In addition, a boolean (binomial) label
* has to be used.</p>
*
* <p>The operator requires an example set as its input. To sample out prior
* knowledge of a different form it is possible to provide another model as an
* optional additional input. The predictions of this model are used to weight
* produce an initial weighting of the training set. The ouput of the operator
* is a classification model applicable for estimating conditional class
* probabilities or for plain crisp classification. It contains up to the
* specified number of inner base models. In the case of an optional initial
* model, this model will also be stored in the output model, in order to
* produce the same initial weighting during model application.</p>
*
* @author Martin Scholz
* @version $Id: BayesianBoosting.java,v 1.56 2006/04/14 15:14:32 ingomierswa
* Exp $
*/
public class BayesianBoosting extends AbstractMetaLearner {
/**
* Name of the variable specifying the maximal number of iterations of the
* learner.
*/
public static final String PARAMETER_ITERATIONS = "iterations";
/** Name of the flag indicating internal bootstrapping. */
public static final String PARAMETER_USE_SUBSET_FOR_TRAINING = "use_subset_for_training";
/**
* Boolean parameter to specify whether the label priors should be equally
* likely after first iteration.
*/
public static final String PARAMETER_RESCALE_LABEL_PRIORS = "rescale_label_priors";
/**
* Boolean parameter that switches between KBS (if set to false) and a
* boosting-like reweighting.
*/
public static final String PARAMETER_ALLOW_MARGINAL_SKEWS = "allow_marginal_skews";
/** Discard models with an advantage of less than the specified value. */
public static final double MIN_ADVANTAGE = 0.001;
/** A model to initialise the example weights. */
private Model startModel;
/** Field for visualizing performance. */
protected int currentIteration;
/** A performance measure to be visualized. */
private double performance = 0;
/** A backup of the original weights of the training set to restore them
* after learning. */
private double[] oldWeights;
/** Constructor. */
public BayesianBoosting(OperatorDescription description) {
super(description);
addValue(new ValueDouble("performance", "The performance.") {
public double getDoubleValue() {
return performance;
}
});
addValue(new ValueDouble("iteration", "The current iteration.") {
public double getDoubleValue() {
return currentIteration;
}
});
}
/**
* Overrides the method of the super class. Returns true for polynominal
* class.
*/
public boolean supportsCapability(LearnerCapability lc) {
if (lc == LearnerCapability.NUMERICAL_CLASS || lc == LearnerCapability.POLYNOMINAL_CLASS)
return false;
if (lc == LearnerCapability.WEIGHTED_EXAMPLES)
return true;
return super.supportsCapability(lc);
}
/**
* Constructs a <code>Model</code> repeatedly running a weak learner,
* reweighting the training example set accordingly, and combining the
* hypothesis using the available weighted performance values. If the input
* contains a model, then this model is used as a starting point for
* weighting the examples.
*/
public Model learn(ExampleSet exampleSet) throws OperatorException {
// Read start model if present.
this.readOptionalParameters();
// the resulting model of this operator
Model model;
double[] classPriors = this.prepareWeights(exampleSet);
// check whether only one or no class is present
double maxPrior = Double.NEGATIVE_INFINITY;
double sumPriors = 0;
for (int i=0; i<classPriors.length; i++) {
if (classPriors[i] > maxPrior)
maxPrior = classPriors[i];
sumPriors += classPriors[i];
}
if (Tools.isEqual(sumPriors, maxPrior)) {
// nothing to do, return an empty ensemble model
model = new BayBoostModel(exampleSet, new Vector<BayBoostBaseModelInfo>(), classPriors);
}
else {
// only in this case boosting makes sense
model = this.trainBoostingModel(exampleSet, classPriors);
}
if (this.oldWeights != null) { // need to reset weights
Iterator<Example> reader = exampleSet.iterator();
int i = 0;
while (reader.hasNext() && i < this.oldWeights.length) {
reader.next().setWeight(this.oldWeights[i++]);
}
} else { // need to delete the weights attribute
Attribute weight = exampleSet.getAttributes().getWeight();
exampleSet.getAttributes().remove(weight);
exampleSet.getExampleTable().removeAttribute(weight);
}
return model;
}
/**
* Creates a weight attribute if not yet done. It either backs up the old
* weoghts for restoring them later, or it fills the newly created attribute
* with the initial value of 1. If rescaling to equal class priors is
* activated then the weights are set accordingly.
*
* @param exampleSet
* the example set to be prepared
* @return a <code>double[]</code> array containing the class priors.
*/
protected double[] prepareWeights(ExampleSet exampleSet) {
Attribute weightAttr = exampleSet.getAttributes().getWeight();
if (weightAttr == null) {
this.oldWeights = null;
// example weights are initialized so that the total weight
// is equal to the number of examples:
this.performance = exampleSet.size();
return this.createNewWeightAttribute(exampleSet);
}
else {
// Back up old weights and compute priors:
this.oldWeights = new double[exampleSet.size()];
double[] priors = new double[exampleSet.getAttributes().getLabel().getMapping().size()];
double totalWeight = 0;
Iterator<Example> reader = exampleSet.iterator();
for (int i = 0; (reader.hasNext() && i < oldWeights.length); i++) {
Example example = reader.next();
if (example != null) {
double weight = example.getWeight();
this.oldWeights[i] = weight;
int label = (int) example.getLabel();
if (0 <= label && label < priors.length) {
priors[label] += weight;
totalWeight += weight;
}
else example.setWeight(0); // Unrecognized label, try to ignore it!
}
}
this.performance = totalWeight;
// Normalize:
for (int i = 0; i < priors.length; i++) {
priors[i] /= totalWeight;
}
return priors;
}
}
private double[] createNewWeightAttribute(ExampleSet exampleSet) {
com.rapidminer.example.Tools.createWeightAttribute(exampleSet);
Iterator<Example> exRead = exampleSet.iterator();
int numClasses = exampleSet.getAttributes().getLabel().getMapping().getValues().size();
double[] classPriors = new double[numClasses];
int total = exampleSet.size();
double invTotal = 1.0d / total;
if (this.getParameterAsBoolean(PARAMETER_RESCALE_LABEL_PRIORS) == false) {
while (exRead.hasNext()) {
Example example = exRead.next();
example.setWeight(1);
classPriors[(int) (example.getLabel())] += invTotal;
}
}
else {
// first count the class frequencies
while (exRead.hasNext()) {
classPriors[(int) (exRead.next().getLabel())] += invTotal;
}
this.rescaleToEqualPriors(exampleSet, classPriors);
}
return classPriors;
}
private void rescaleToEqualPriors(ExampleSet exampleSet, double[] currentPriors) {
// The weights of class i are calculated as
// (1 / #classes) / (#rel_freq_class_i)
double[] weights = new double[currentPriors.length];
for (int i = 0; i < weights.length; i++) {
weights[i] = 1.0d / (weights.length * (currentPriors[i]));
}
Iterator<Example> exRead = exampleSet.iterator();
while (exRead.hasNext()) {
Example example = exRead.next();
example.setWeight(weights[(int) (example.getLabel())]);
}
}
/**
* Runs the "embedded" learner on the example set and retuns a
* model.
*
* @param exampleSet
* an <code>ExampleSet</code> to train a model for
* @return a <code>Model</code>
*/
protected Model trainBaseModel(ExampleSet exampleSet) throws OperatorException {
Model model = applyInnerLearner(exampleSet);
return model;
}
/** Helper method reading a start model from the input if present. */
private void readOptionalParameters() {
try {
this.startModel = getInput(Model.class);
} catch (MissingIOObjectException e) {
log(getName() + ": No model found in input.");
}
}
/**
* Helper method applying the start model and adding it to the modelInfo
* collection
*/
private void applyPriorModel(ExampleSet trainingSet, List<BayBoostBaseModelInfo> modelInfo) throws OperatorException {
// If the input contains a model already, initialise the example weights.
if (this.startModel != null) {
ExampleSet resultSet = this.startModel.apply((ExampleSet)trainingSet.clone());
// Initial values and the input model are stored in the output model.
WeightedPerformanceMeasures wp = new WeightedPerformanceMeasures(resultSet);
this.reweightExamples(wp, resultSet);
modelInfo.add(new BayBoostBaseModelInfo(this.startModel, wp.getContingencyMatrix()));
PredictionModel.removePredictedLabel(resultSet);
}
}
/** Main method for training the ensemble classifier */
private BayBoostModel trainBoostingModel(ExampleSet trainingSet, final double[] classPriors) throws OperatorException {
// for models and their probability estimates
Vector<BayBoostBaseModelInfo> modelInfo = new Vector<BayBoostBaseModelInfo>();
// if present apply the start model first
this.applyPriorModel(trainingSet, modelInfo);
// check whether to use the complete training set for training
final double splitRatio = this.getParameterAsDouble(PARAMETER_USE_SUBSET_FOR_TRAINING);
final boolean bootstrap = ((splitRatio > 0) && (splitRatio < 1.0));
log(bootstrap ? "Bootstrapping enabled." : "Bootstrapping disabled.");
final boolean allowSkew = this.getParameterAsBoolean(PARAMETER_ALLOW_MARGINAL_SKEWS);
SplittedExampleSet splittedSet = null;
if (bootstrap == true) {
splittedSet = new SplittedExampleSet(trainingSet, splitRatio, SplittedExampleSet.SHUFFLED_SAMPLING, -1);
}
// maximum number of iterations
final int iterations = this.getParameterAsInt(PARAMETER_ITERATIONS);
L: for (int i = 0; i < iterations; i++) {
this.currentIteration = i;
Model model;
WeightedPerformanceMeasures wp;
ExampleSet iterationSet = (ExampleSet)trainingSet.clone();
if (bootstrap == true) {
splittedSet.selectSingleSubset(0); // switch to learning subset
model = this.trainBaseModel(splittedSet);
// apply model to all examples
iterationSet = model.apply(iterationSet);
// reweight learning subset
wp = new WeightedPerformanceMeasures(splittedSet);
WeightedPerformanceMeasures.reweightExamples(splittedSet, wp.getContingencyMatrix(), allowSkew);
// handle test set: reweight it separately, use its estimates
// for future predictions
splittedSet.selectSingleSubset(1);
wp = new WeightedPerformanceMeasures(splittedSet);
this.performance = // performance should be estimated based on the hold-out set
WeightedPerformanceMeasures.reweightExamples(splittedSet, wp.getContingencyMatrix(), allowSkew);
}
else {
// train one model per iteration
model = this.trainBaseModel(iterationSet);
iterationSet = model.apply(iterationSet);
// get the weighted performance value of the example set with
// respect to the model
wp = new WeightedPerformanceMeasures(iterationSet);
// Reweight the example set with respect to the weighted
// performance values:
this.performance = this.reweightExamples(wp, iterationSet);
}
PredictionModel.removePredictedLabel(iterationSet);
if (classPriors.length == 2) {
//this.debugMessage(wp);
}
// Stop if only one class is present/left.
if (wp.getNumberOfNonEmptyClasses() < 2) {
// Using the model here is just necessary to avoid a
// NullPointerException if this is the first iteration.
// One could use an empty model instead:
modelInfo.add(new BayBoostBaseModelInfo(model, wp.getContingencyMatrix()));
break L; // No more iterations!
}
final ContingencyMatrix cm = wp.getContingencyMatrix();
// Add the new model and its weights to the collection of models:
modelInfo.add(new BayBoostBaseModelInfo(model, cm));
if (this.isModelUseful(cm) == false) {
// If the model is not considered to be useful (low advantage)
// then discard it and stop.
log("Discard model because of low advantage on training data.");
modelInfo.remove(modelInfo.size() - 1);
break L;
}
// Stop if weight is null, because all examples have been explained
// "deterministically".
if (this.performance == 0) {
break L;
}
inApplyLoop();
}
// Build a Model object. Last parameter is "crispPredictions", nowadays
// always true.
return new BayBoostModel(trainingSet, modelInfo, classPriors);
}
/**
* This method reweights the example set with respect to the
* <code>WeightedPerformanceMeasures</code> object. Please note that the
* weights will not be reset at any time, because they continuously change
* from one iteration to the next. This method does not change the priors of
* the classes.
*
* @param wp
* the WeightedPerformanceMeasures to use
* @param exampleSet
* <code>ExampleSet</code> to be reweighted
* @return the total weight of examples as an error estimate
*/
protected double reweightExamples(WeightedPerformanceMeasures wp, ExampleSet exampleSet)
throws OperatorException
{
boolean allowMarginalSkews = this.getParameterAsBoolean(PARAMETER_ALLOW_MARGINAL_SKEWS);
double remainingWeight = WeightedPerformanceMeasures.
reweightExamples(exampleSet, wp.getContingencyMatrix(), allowMarginalSkews);
return remainingWeight;
}
/**
* Helper method to decide whether a model improves the training error
* enough to be considered. Returns always true.
*
* @param cm
* the lift ratio matrix as returned by the getter of the
* WeightedPerformance class
* @return <code>true</code> iff the advantage is high enough to consider
* the model to be useful
*/
private boolean isModelUseful(ContingencyMatrix cm) {
// should rather be decided offline by properly setting
// the number of iterations
return true;
}
/**
* Adds the parameters "number of iterations" and "model
* file".
*/
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
types.add(new ParameterTypeBoolean(PARAMETER_RESCALE_LABEL_PRIORS, "Specifies whether the proportion of labels should be equal by construction after first iteration .", false));
types.add(new ParameterTypeDouble(PARAMETER_USE_SUBSET_FOR_TRAINING, "Fraction of examples used for training, remaining ones are used to estimate the confusion matrix. Set to 1 to turn off test set.", 0, 1, 1));
types.add(new ParameterTypeInt(PARAMETER_ITERATIONS, "The maximum number of iterations.", 1, Integer.MAX_VALUE, 10));
types.add(new ParameterTypeBoolean(PARAMETER_ALLOW_MARGINAL_SKEWS, "Allow to skew the marginal distribution (P(x)) during learning.", true));
return types;
}
}