/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.learner.meta;
import java.awt.Component;
import java.util.Iterator;
import java.util.List;
import javax.swing.JTabbedPane;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.gui.tools.ExtendedJTabbedPane;
import com.rapidminer.operator.IOContainer;
import com.rapidminer.operator.Model;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.learner.PredictionModel;
import com.rapidminer.tools.LogService;
import com.rapidminer.tools.Ontology;
import com.rapidminer.tools.Tools;
/**
* A model for the Bayesian Boosting algorithm by Martin Scholz.
*
* @author Martin Scholz
* @version $Id: BayBoostModel.java,v 1.8 2008/05/09 19:22:48 ingomierswa Exp $
*/
public class BayBoostModel extends PredictionModel {
private static final long serialVersionUID = 5821921049035718838L;
// Holds the models and their weights in array format.
// Please access with getter methods.
private List<BayBoostBaseModelInfo> modelInfo;
// The classes priors in the training set, starting with index 0.
private double[] priors;
// If set to a value i >= 0 then only the first i models are applied
private int maxModelNumber = -1;
private static final String MAX_MODEL_NUMBER = "iteration";
// turn soft into crisp classifiers
private static final String CONV_TO_CRISP = "crisp";
private double threshold = 0.5;
/**
* @param exampleSet
* the example set used for training
* @param modelInfos
* a <code>List</code> of <code>Object[2]</code> arrays, each
* entry holding a model and a <code>double[][]</code> array
* containing weights for all prediction/label combinations.
* @param priors
* an array of the prior probabilities of labels
*/
public BayBoostModel(ExampleSet exampleSet, List<BayBoostBaseModelInfo> modelInfos, double[] priors) {
super(exampleSet);
this.modelInfo = modelInfos;
this.priors = priors;
}
public BayBoostBaseModelInfo getBayBoostBaseModelInfo(int index) {
return this.modelInfo.get(index);
}
/**
* Setting the parameter <code>MAX_MODEL_NUMBER</code> allows to discard
* all but the first n models for specified n. <code>CONV_TO_CRISP</code>
* allows to set another threshold than 0.5 for boolean prediction problems.
*/
public void setParameter(String name, String value) throws OperatorException {
if (name.equalsIgnoreCase(MAX_MODEL_NUMBER)) {
try {
this.maxModelNumber = Integer.parseInt(value);
return;
} catch (NumberFormatException e) {}
} else if (name.equalsIgnoreCase(CONV_TO_CRISP)) {
this.threshold = Double.parseDouble(value.trim());
return;
}
super.setParameter(name, value);
}
/**
* Using this setter with a positive value makes the model discard all
* but the specified number of base models. A value of -1 turns off this
* option.
*/
public void setMaxModelNumber(int numModels) {
this.maxModelNumber = numModels;
}
public Component getVisualizationComponent(IOContainer container) {
JTabbedPane tabPane = new ExtendedJTabbedPane();
for (int i = 0; i < this.getNumberOfModels(); i++) {
Model model = this.getModel(i);
tabPane.add("Model " + (i + 1), model.getVisualizationComponent(container));
}
return tabPane;
}
/** @return a <code>String</code> representation of this boosting model. */
public String toString() {
StringBuffer result = new StringBuffer(super.toString() + Tools.getLineSeparator() + "Number of inner models: " + this.getNumberOfModels() + Tools.getLineSeparators(2));
for (int i = 0; i < this.getNumberOfModels(); i++) {
Model model = this.getModel(i);
result.append((i > 0 ? Tools.getLineSeparator() : "")
+ "Embedded model #" + i + ":" + Tools.getLineSeparator() +
model.toResultString());
}
return result.toString();
}
/** @return the number of embedded models */
public int getNumberOfModels() {
if (this.maxModelNumber >= 0)
return Math.min(this.maxModelNumber, modelInfo.size());
else
return modelInfo.size();
}
/**
* Gets factors for models in the case of general nominal class labels.
*
* @return a <code>double[]</code> object with the factors to be applied
* for each class if the corresponding rule yields
* <code>predicted</code>.
* @param modelNr
* the number of the model
* @param predicted
* the predicted label
* @return a <code>double[]</code> with one factor per class label,
* <code>Double.POSITIVE_INFINITY</code> if the rule
* deterministically predicts a value, and
* <code>RULE_DOES_NOT_APPLY</code> if no prediction can be made.
*/
private double[] getFactorsForModel(int modelNr, int predicted) {
ContingencyMatrix cm = this.modelInfo.get(modelNr).getContingencyMatrix();
return cm.getLiftRatiosForPrediction(predicted);
}
/**
* Getter method for prior class probabilities estimated as the relative
* frequencies in the training set.
*
* @param classIndex
* the index of a class, starting with 0
* @return the prior probability of the specified class
*/
private double getPriorOfClass(int classIndex) {
return this.priors[classIndex];
}
/** Getter for the prior array */
public double[] getPriors() {
double[] result = new double[this.priors.length];
System.arraycopy(this.priors, 0, result, 0, result.length);
return result;
}
/**
* Getter method for embedded models
*
* @param index
* the number of a model part of this boost model
* @return binary or nominal decision model for the given classification
* index.
*/
public Model getModel(int index) {
return this.modelInfo.get(index).getModel();
}
/**
* Getter method for a specific confusion matrix
*
* @param index
* the number of the model for which to read the confusion matrix
* @return a <code>ConfusionMatrix</code> object
*/
public ContingencyMatrix getContingencyMatrix(int index) {
return this.modelInfo.get(index).getContingencyMatrix();
}
/**
* Iterates over all models and returns the class with maximum likelihood.
*
* @param exampleSet
* the set of examples to be classified
* @param predictedLabel
* the label that finally holds the predictions
*/
public ExampleSet performPrediction(ExampleSet exampleSet, Attribute predictedLabel) throws OperatorException {
// Prepare special attributes for storing intermediate results:
final Attribute[] specialAttributes = this.createSpecialAttributes(exampleSet);
this.initIntermediateResultAttributes(exampleSet, specialAttributes);
// Apply all models to the example set, each time updating the
// intermediate results:
for (int i = 0; i < this.getNumberOfModels(); i++) {
Model model = this.getModel(i);
ExampleSet clonedExampleSet = (ExampleSet) exampleSet.clone();
clonedExampleSet = model.apply(clonedExampleSet);
this.updateEstimates(clonedExampleSet, this.getContingencyMatrix(i), specialAttributes);
PredictionModel.removePredictedLabel(clonedExampleSet);
}
// Compute and store probability estimates from the intermediate
// results:
Iterator<Example> reader = exampleSet.iterator();
while (reader.hasNext()) {
Example example = reader.next();
this.translateOddsIntoPredictions(example, specialAttributes, exampleSet.getAttributes().getLabel());
}
// Remove the special attributes used for storing intermediate
// estimates:
this.cleanUpSpecialAttributes(exampleSet, specialAttributes);
return exampleSet;
}
/** Creates a special attribute for each label to store intermediate results. */
private Attribute[] createSpecialAttributes(ExampleSet exampleSet) throws OperatorException {
final String attributePrefix = "BayBoostModelPrediction";
Attribute[] specialAttributes = new Attribute[this.getLabel().getMapping().size()];
for (int i = 0; i < specialAttributes.length; i++) {
specialAttributes[i] = com.rapidminer.example.Tools.createSpecialAttribute(exampleSet, attributePrefix + i, Ontology.NUMERICAL);
}
return specialAttributes;
}
/** Removes the provided special labels from the exampleSet and exampleTable. */
private void cleanUpSpecialAttributes(ExampleSet exampleSet, Attribute[] specialAttributes) throws OperatorException {
for (int i = 0; i < specialAttributes.length; i++) {
exampleSet.getAttributes().remove(specialAttributes[i]);
exampleSet.getExampleTable().removeAttribute(specialAttributes[i]);
}
}
private void initIntermediateResultAttributes(ExampleSet exampleSet, Attribute[] specAttrib) {
// Compute odds ratios from class priors:
double[] priorOdds = new double[this.priors.length];
for (int i = 0; i < priorOdds.length; i++) {
priorOdds[i] = (this.priors[i] == 1) ? Double.POSITIVE_INFINITY : (this.priors[i] / (1 - this.priors[i]));
}
// Initialize each intermediate estimate with the odds ratio of
// the corresponding class:
Iterator<Example> reader = exampleSet.iterator();
while (reader.hasNext()) {
Example example = reader.next();
for (int i = 0; i < specAttrib.length; i++) {
example.setValue(specAttrib[i], priorOdds[i]);
}
}
}
private void translateOddsIntoPredictions(Example example, Attribute[] specAttrib, Attribute exampleSetLabel) {
// Turn lift ratio into conditional probabilities:
double probSum = 0;
double[] classProb = new double[specAttrib.length];
int bestIndex = 0;
for (int n = 0; n < classProb.length; n++) {
// The probability Prob( C | x ) for class C given the description
// can
// be calculated from factor = Prob(C | x) / Prob(neg(C) | x) as
// Prob( C | x ) = factor / (1 + factor):
double odds = example.getValue(specAttrib[n]);
if (Double.isNaN(odds)) {
logWarning("Found NaN odd ratio estimate.");
classProb[n] = 1;
}
else classProb[n] = (Double.isInfinite(odds)) ? 1 : (odds / (1 + odds));
probSum += classProb[n]; // accumulate probabilities, should be 1
if (classProb[n] > classProb[bestIndex]) {
bestIndex = n;
}
}
// Normalize probabilities if the sum is not 1.
// This can happen if the subset defined by a rule does not contain all
// classes.
if (probSum != 1.0) {
for (int k = 0; k < classProb.length; k++) {
classProb[k] /= probSum;
}
}
// Store the final prediction. All internal computations have used the
// indices
// of the stored label. The indices of the new label may be different in
// case
// of stored and reloaded models or example sets. For this reason the
// final
// predictions are written in terms of the strings, avoiding any
// assumptions
// about the mapping.
final String bestLabel;
if (this.getLabel().isNominal() && this.getLabel().getMapping().size() == 2 && this.threshold != 0.5) {
// boolean classification problem --> only in this case a threshold
// makes sense ...
// the local indices:
int posIndex = this.getLabel().getMapping().getPositiveIndex();
int negIndex = this.getLabel().getMapping().getNegativeIndex();
// Decide whether threshold is valid, otherwise just use 0.5:
threshold = (this.threshold >= 0 && this.threshold <= 1) ? this.threshold : 0.5;
// If the threshold is exceeded store the string representing the
// positive class,
// otherwise the one for the negative class.
bestLabel = this.getLabel().getMapping().mapIndex((classProb[posIndex] >= threshold) ? posIndex : negIndex);
}
else { // otherwise: just predict the most probable class
bestLabel = this.getLabel().getMapping().mapIndex(bestIndex);
}
// Write the prediction to the example set. In this case the indices of
// the
// label currently part of the example set have to be used:
example.setValue(example.getAttributes().getPredictedLabel(), exampleSetLabel.getMapping().mapString(bestLabel));
// Set confidence values for all classes:
for (int k = 0; k < classProb.length; k++) {
// The locally used attribute indices correspond to the stored
// label,
// so the String representation required for setting confidences is
// derived from the stored label.
if (Double.isNaN(classProb[k]) || classProb[k] < 0 || classProb[k] > 1) {
logWarning("Found illegal confidence value: " + classProb[k]);
}
example.setConfidence(this.getLabel().getMapping().mapIndex(k), classProb[k]);
}
}
private void updateEstimates(ExampleSet exampleSet, ContingencyMatrix cm, Attribute[] specialAttributes) {
Iterator<Example> reader = exampleSet.iterator();
while (reader.hasNext()) {
Example example = reader.next();
int predicted = (int) example.getPredictedLabel();
L: for (int j = 0; j < cm.getNumberOfClasses(); j++) {
final double liftRatioCurrent = cm.getLiftRatio(j, predicted); // rule: predicted => j
// Change the intermediate estimates, take care about
// deterministic and non-applicable rules:
if (Double.isNaN(liftRatioCurrent)) { // RULE_DOES_NOT_APPLY,
logWarning("Ignoring non-applicable model."); // ignore it
continue L;
}
else if (Double.isInfinite(liftRatioCurrent)) { // Double.POSITIVE_INFINITY
if (example.getValue(specialAttributes[j]) != 0) {
for (int k = 0; k < specialAttributes.length; k++) {
// reset all probabilities to 0
example.setValue(specialAttributes[k], 0);
}
// class is deterministically correct
example.setValue(specialAttributes[j], liftRatioCurrent);
}
else continue L; // ignore factor, class is already known to
// be deterministically incorrect
}
else {
// the "normal" case
double oldValue = example.getValue(specialAttributes[j]);
if (Double.isNaN(oldValue)) {
logWarning("Found NaN value in intermediate odds ratio estimates!");
}
if (!Double.isInfinite(oldValue)) {
example.setValue(specialAttributes[j], oldValue * liftRatioCurrent);
}
}
}
}
}
/**
* Helper method to adjust the intermediate products during model
* application.
*
* @param products
* the intermediate products, these values are changed by the
* method
* @param liftFactors
* the factor vector that applies for the prediction for the
* current example
*
* @return <code>true</code> iff the class is deterministically known
* after applying this method
*/
public static boolean adjustIntermediateProducts(double[] products, double[] liftFactors) {
L: for (int i = 0; i < liftFactors.length; i++) {
// Change the intermediate estimates, take care about deterministic
// and non-applicable rules:
if (Double.isNaN(liftFactors[i])) { // WeightedPerformanceMeasures.RULE_DOES_NOT_APPLY)
LogService.getGlobal().log("Ignoring non-applicable model.", LogService.WARNING);
continue L;
}
else if (Double.isInfinite(liftFactors[i])) {
if (products[i] != 0) {
for (int j = 0; j < products.length; j++) {
products[j] = 0; // reset all probabilities to 0
}
products[i] = liftFactors[i]; // class is deterministically correct
return true; // class is known
}
else continue L; // ignore factor, class is already known to be
// deterministically incorrect
}
else { // the "normal" case
products[i] *= liftFactors[i];
if (Double.isNaN(products[i])) {
LogService.getGlobal().log("Found NaN value in intermediate odds ratio estimates!", LogService.WARNING);
}
}
}
return false;
}
/**
* This method is only supported for boolean target attributes. It computes
* a flattened version of model weights. In constrast to the original
* version the final predictions are additive logarithms of the lift ratios,
* additively rescaled so that the prediction <code>false</code> of model
* i produces <code>-i</code> if <code>true</code> produces weight i.
* This means that only one weight per model is required. The first
* component of the returned array is the part that is independent of any
* prediction, the i-th component is the weight of model i. The (log-)linear
* model predicts depending on whether the linear combination of predictions
* (either -1 or 1) is greater than 0 or not. Infinite values are
* problematic, so a min/max value is used.
*
* @return the flattened weights of all models
*/
public double[] getModelWeights() throws OperatorException {
if (this.getLabel().getMapping().size() != 2)
throw new UserError(null, 114, "BayBoostModel", this.getLabel());
int maxWeight = 10;
final int pos = this.getLabel().getMapping().getPositiveIndex();
final int neg = this.getLabel().getMapping().getNegativeIndex();
double[] weights = new double[this.getNumberOfModels() + 1];
// initialise model independent part
double odds = this.getPriorOfClass(pos) / this.getPriorOfClass(neg);
weights[0] = Math.log(odds);
for (int i = 1; i < weights.length; i++) {
double logPosRatio, logNegRatio;
{
double liftRatiosPos[] = this.getFactorsForModel(i - 1, pos); // lift
// ratios
// for
// pos
// prediction
logPosRatio = Math.log(liftRatiosPos[pos]); // factor applied to
// positive class
logPosRatio = Math.min(maxWeight, Math.max(-maxWeight, logPosRatio)); // exclude
// infinity
// etc.
double liftRatiosNeg[] = this.getFactorsForModel(i - 1, neg); // lift
// ratios
// for
// neg
// prediction
logNegRatio = Math.log(liftRatiosNeg[pos]); // also the factor
// applied to the
// positive class
logNegRatio = Math.min(maxWeight, Math.max(-maxWeight, logNegRatio));
}
// Compute the offset part of both predictions.
// This requires to compare the factors applied to the same
// (positive in this case) class,
// one time when the model predicts positive, and one time if it
// predicts negative.
double indep = (logPosRatio + logNegRatio) / 2;
if (Tools.isEqual(indep, maxWeight) || Tools.isEqual(indep, -maxWeight)) {
// This should not happen. Obviously we found a dummy-model,
// because the prediction is not required!
// Do not just shift the lift, but indicate this point by an
// illegal value:
logPosRatio = 10 * indep;
indep = 0;
}
// Update model independent part, which is valid if both
// model-dependent updates are also made:
weights[0] += indep;
// Update model independent weights:
logPosRatio -= indep; // Next step in principle: logNegRatio -=
// indep, then logNegRatio == logPosRatio.
// Goal reached: One weight suffices per model.
weights[i] = logPosRatio;
}
return weights;
}
}