BayBoostModel.java example

Explorer
ComplexRapidMiner-master
- operator
- src
/*
 *  RapidMiner
 *
 *  Copyright (C) 2001-2008 by Rapid-I and the contributors
 *
 *  Complete list of developers available at our web site:
 *
 *       http://rapid-i.com
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Affero General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Affero General Public License for more details.
 *
 *  You should have received a copy of the GNU Affero General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses/.
 */
package com.rapidminer.operator.learner.meta;

import java.awt.Component;
import java.util.Iterator;
import java.util.List;

import javax.swing.JTabbedPane;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.gui.tools.ExtendedJTabbedPane;
import com.rapidminer.operator.IOContainer;
import com.rapidminer.operator.Model;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.learner.PredictionModel;
import com.rapidminer.tools.LogService;
import com.rapidminer.tools.Ontology;
import com.rapidminer.tools.Tools;


/**
 * A model for the Bayesian Boosting algorithm by Martin Scholz.
 * 
 * @author Martin Scholz
 * @version $Id: BayBoostModel.java,v 1.8 2008/05/09 19:22:48 ingomierswa Exp $
 */
public class BayBoostModel extends PredictionModel {

	private static final long serialVersionUID = 5821921049035718838L;

	// Holds the models and their weights in array format.
	// Please access with getter methods.
	private List<BayBoostBaseModelInfo> modelInfo;

	// The classes priors in the training set, starting with index 0.
	private double[] priors;

	// If set to a value i >= 0 then only the first i models are applied
	private int maxModelNumber = -1;

	private static final String MAX_MODEL_NUMBER = "iteration";

	// turn soft into crisp classifiers
	private static final String CONV_TO_CRISP = "crisp";

	private double threshold = 0.5;

	/**
	 * @param exampleSet
	 *            the example set used for training
	 * @param modelInfos
	 *            a <code>List</code> of <code>Object[2]</code> arrays, each
	 *            entry holding a model and a <code>double[][]</code> array
	 *            containing weights for all prediction/label combinations.
	 * @param priors
	 *            an array of the prior probabilities of labels
	 */
	public BayBoostModel(ExampleSet exampleSet, List<BayBoostBaseModelInfo> modelInfos, double[] priors) {
		super(exampleSet);
		this.modelInfo = modelInfos;
		this.priors = priors;
	}

	public BayBoostBaseModelInfo getBayBoostBaseModelInfo(int index) {
		return this.modelInfo.get(index);
	}

	/**
	 * Setting the parameter <code>MAX_MODEL_NUMBER</code> allows to discard
	 * all but the first n models for specified n. <code>CONV_TO_CRISP</code>
	 * allows to set another threshold than 0.5 for boolean prediction problems.
	 */
	public void setParameter(String name, String value) throws OperatorException {
		if (name.equalsIgnoreCase(MAX_MODEL_NUMBER)) {
			try {
				this.maxModelNumber = Integer.parseInt(value);
				return;
			} catch (NumberFormatException e) {}
		} else if (name.equalsIgnoreCase(CONV_TO_CRISP)) {
			this.threshold = Double.parseDouble(value.trim());
			return;
		}

		super.setParameter(name, value);
	}

	/**
	 * Using this setter with a positive value makes the model discard all
	 * but the specified number of base models. A value of -1 turns off this
	 * option.
	 */
	public void setMaxModelNumber(int numModels) {
		this.maxModelNumber = numModels;
	}
	
	public Component getVisualizationComponent(IOContainer container) {
		JTabbedPane tabPane = new ExtendedJTabbedPane();
		for (int i = 0; i < this.getNumberOfModels(); i++) {
			Model model = this.getModel(i);
			tabPane.add("Model " + (i + 1), model.getVisualizationComponent(container));
		}
		return tabPane;
	}
	
	/** @return a <code>String</code> representation of this boosting model. */
	public String toString() {
		StringBuffer result = new StringBuffer(super.toString() + Tools.getLineSeparator() + "Number of inner models: " + this.getNumberOfModels() + Tools.getLineSeparators(2));
		for (int i = 0; i < this.getNumberOfModels(); i++) {
			Model model = this.getModel(i);
			result.append((i > 0 ? Tools.getLineSeparator() : "")
					+ "Embedded model #" + i + ":" + Tools.getLineSeparator() + 
					model.toResultString());
		}
		return result.toString();
	}

	/** @return the number of embedded models */
	public int getNumberOfModels() {
		if (this.maxModelNumber >= 0)
			return Math.min(this.maxModelNumber, modelInfo.size());
		else
			return modelInfo.size();
	}

	/**
	 * Gets factors for models in the case of general nominal class labels.
	 * 
	 * @return a <code>double[]</code> object with the factors to be applied
	 *         for each class if the corresponding rule yields
	 *         <code>predicted</code>.
	 * @param modelNr
	 *            the number of the model
	 * @param predicted
	 *            the predicted label
	 * @return a <code>double[]</code> with one factor per class label,
	 *         <code>Double.POSITIVE_INFINITY</code> if the rule
	 *         deterministically predicts a value, and
	 *         <code>RULE_DOES_NOT_APPLY</code> if no prediction can be made.
	 */
	private double[] getFactorsForModel(int modelNr, int predicted) {
		ContingencyMatrix cm = this.modelInfo.get(modelNr).getContingencyMatrix();
		return cm.getLiftRatiosForPrediction(predicted);
	}

	/**
	 * Getter method for prior class probabilities estimated as the relative
	 * frequencies in the training set.
	 * 
	 * @param classIndex
	 *            the index of a class, starting with 0
	 * @return the prior probability of the specified class
	 */
	private double getPriorOfClass(int classIndex) {
		return this.priors[classIndex];
	}

	/** Getter for the prior array */
	public double[] getPriors() {
		double[] result = new double[this.priors.length];
		System.arraycopy(this.priors, 0, result, 0, result.length);
		return result;
	}

	/**
	 * Getter method for embedded models
	 * 
	 * @param index
	 *            the number of a model part of this boost model
	 * @return binary or nominal decision model for the given classification
	 *         index.
	 */
	public Model getModel(int index) {
		return this.modelInfo.get(index).getModel();
	}

	/**
	 * Getter method for a specific confusion matrix
	 * 
	 * @param index
	 *            the number of the model for which to read the confusion matrix
	 * @return a <code>ConfusionMatrix</code> object
	 */
	public ContingencyMatrix getContingencyMatrix(int index) {
		return this.modelInfo.get(index).getContingencyMatrix();
	}

	/**
	 * Iterates over all models and returns the class with maximum likelihood.
	 * 
	 * @param exampleSet
	 *            the set of examples to be classified
	 * @param predictedLabel
	 *            the label that finally holds the predictions
	 */
	public ExampleSet performPrediction(ExampleSet exampleSet, Attribute predictedLabel) throws OperatorException {
		// Prepare special attributes for storing intermediate results:
		final Attribute[] specialAttributes = this.createSpecialAttributes(exampleSet);
		this.initIntermediateResultAttributes(exampleSet, specialAttributes);

		// Apply all models to the example set, each time updating the
		// intermediate results:
		for (int i = 0; i < this.getNumberOfModels(); i++) {
			Model model = this.getModel(i);
			ExampleSet clonedExampleSet = (ExampleSet) exampleSet.clone();
			clonedExampleSet = model.apply(clonedExampleSet);
			this.updateEstimates(clonedExampleSet, this.getContingencyMatrix(i), specialAttributes);
			PredictionModel.removePredictedLabel(clonedExampleSet);
		}

		// Compute and store probability estimates from the intermediate
		// results:
		Iterator<Example> reader = exampleSet.iterator();
		while (reader.hasNext()) {
			Example example = reader.next();
			this.translateOddsIntoPredictions(example, specialAttributes, exampleSet.getAttributes().getLabel());
		}

		// Remove the special attributes used for storing intermediate
		// estimates:
		this.cleanUpSpecialAttributes(exampleSet, specialAttributes);
		
		return exampleSet;
	}

	/** Creates a special attribute for each label to store intermediate results. */
	private Attribute[] createSpecialAttributes(ExampleSet exampleSet) throws OperatorException {
		final String attributePrefix = "BayBoostModelPrediction";

		Attribute[] specialAttributes = new Attribute[this.getLabel().getMapping().size()];
		for (int i = 0; i < specialAttributes.length; i++) {
			specialAttributes[i] = com.rapidminer.example.Tools.createSpecialAttribute(exampleSet, attributePrefix + i, Ontology.NUMERICAL);
		}
		return specialAttributes;
	}

	/** Removes the provided special labels from the exampleSet and exampleTable. */
	private void cleanUpSpecialAttributes(ExampleSet exampleSet, Attribute[] specialAttributes) throws OperatorException {
		for (int i = 0; i < specialAttributes.length; i++) {
			exampleSet.getAttributes().remove(specialAttributes[i]);
			exampleSet.getExampleTable().removeAttribute(specialAttributes[i]);
		}
	}

	private void initIntermediateResultAttributes(ExampleSet exampleSet, Attribute[] specAttrib) {
		// Compute odds ratios from class priors:
		double[] priorOdds = new double[this.priors.length];
		for (int i = 0; i < priorOdds.length; i++) {
			priorOdds[i] = (this.priors[i] == 1) ? Double.POSITIVE_INFINITY : (this.priors[i] / (1 - this.priors[i]));
		}

		// Initialize each intermediate estimate with the odds ratio of
		// the corresponding class:
		Iterator<Example> reader = exampleSet.iterator();
		while (reader.hasNext()) {
			Example example = reader.next();
			for (int i = 0; i < specAttrib.length; i++) {
				example.setValue(specAttrib[i], priorOdds[i]);
			}
		}
	}

	private void translateOddsIntoPredictions(Example example, Attribute[] specAttrib, Attribute exampleSetLabel) {
		// Turn lift ratio into conditional probabilities:
		double probSum = 0;
		double[] classProb = new double[specAttrib.length];
		int bestIndex = 0;
		for (int n = 0; n < classProb.length; n++) {
			// The probability Prob( C | x ) for class C given the description
			// can
			// be calculated from factor = Prob(C | x) / Prob(neg(C) | x) as
			// Prob( C | x ) = factor / (1 + factor):
			double odds = example.getValue(specAttrib[n]);
			if (Double.isNaN(odds)) {
				logWarning("Found NaN odd ratio estimate.");
				classProb[n] = 1;
			}
			else classProb[n] = (Double.isInfinite(odds)) ? 1 : (odds / (1 + odds));

			probSum += classProb[n]; // accumulate probabilities, should be 1
			if (classProb[n] > classProb[bestIndex]) {
				bestIndex = n;
			}
		}

		// Normalize probabilities if the sum is not 1.
		// This can happen if the subset defined by a rule does not contain all
		// classes.
		if (probSum != 1.0) {
			for (int k = 0; k < classProb.length; k++) {
				classProb[k] /= probSum;
			}
		}

		// Store the final prediction. All internal computations have used the
		// indices
		// of the stored label. The indices of the new label may be different in
		// case
		// of stored and reloaded models or example sets. For this reason the
		// final
		// predictions are written in terms of the strings, avoiding any
		// assumptions
		// about the mapping.
		final String bestLabel;
		if (this.getLabel().isNominal() && this.getLabel().getMapping().size() == 2 && this.threshold != 0.5) {
			// boolean classification problem --> only in this case a threshold
			// makes sense ...

			// the local indices:
			int posIndex = this.getLabel().getMapping().getPositiveIndex();
			int negIndex = this.getLabel().getMapping().getNegativeIndex();

			// Decide whether threshold is valid, otherwise just use 0.5:
			threshold = (this.threshold >= 0 && this.threshold <= 1) ? this.threshold : 0.5;

			// If the threshold is exceeded store the string representing the
			// positive class,
			// otherwise the one for the negative class.
			bestLabel = this.getLabel().getMapping().mapIndex((classProb[posIndex] >= threshold) ? posIndex : negIndex);
		}
		else { // otherwise: just predict the most probable class
			bestLabel = this.getLabel().getMapping().mapIndex(bestIndex);
		}

		// Write the prediction to the example set. In this case the indices of
		// the
		// label currently part of the example set have to be used:
		example.setValue(example.getAttributes().getPredictedLabel(), exampleSetLabel.getMapping().mapString(bestLabel));

		// Set confidence values for all classes:
		for (int k = 0; k < classProb.length; k++) {
			// The locally used attribute indices correspond to the stored
			// label,
			// so the String representation required for setting confidences is
			// derived from the stored label.
			if (Double.isNaN(classProb[k]) || classProb[k] < 0 || classProb[k] > 1) {
				logWarning("Found illegal confidence value: " + classProb[k]);
			}
			example.setConfidence(this.getLabel().getMapping().mapIndex(k), classProb[k]);
		}

	}

	private void updateEstimates(ExampleSet exampleSet, ContingencyMatrix cm, Attribute[] specialAttributes) {
		Iterator<Example> reader = exampleSet.iterator();
		while (reader.hasNext()) {
			Example example = reader.next();
			int predicted = (int) example.getPredictedLabel();

			L: for (int j = 0; j < cm.getNumberOfClasses(); j++) {
				final double liftRatioCurrent = cm.getLiftRatio(j, predicted); // rule: predicted => j

				// Change the intermediate estimates, take care about
				// deterministic and non-applicable rules:
				if (Double.isNaN(liftRatioCurrent)) { // RULE_DOES_NOT_APPLY,
					logWarning("Ignoring non-applicable model."); // ignore it
					continue L;
				}
				else if (Double.isInfinite(liftRatioCurrent)) { // Double.POSITIVE_INFINITY
					if (example.getValue(specialAttributes[j]) != 0) {
						for (int k = 0; k < specialAttributes.length; k++) {
							// reset all probabilities to 0
							example.setValue(specialAttributes[k], 0);
						}
						// class is deterministically correct
						example.setValue(specialAttributes[j], liftRatioCurrent);
					}
					else continue L; // ignore factor, class is already known to
					// be deterministically incorrect
				}
				else {
					// the "normal" case
					double oldValue = example.getValue(specialAttributes[j]);
					if (Double.isNaN(oldValue)) {
						logWarning("Found NaN value in intermediate odds ratio estimates!");
					}
					if (!Double.isInfinite(oldValue)) {
						example.setValue(specialAttributes[j], oldValue * liftRatioCurrent);
					}
				}
			}
		}
	}

	/**
	 * Helper method to adjust the intermediate products during model
	 * application.
	 * 
	 * @param products
	 *            the intermediate products, these values are changed by the
	 *            method
	 * @param liftFactors
	 *            the factor vector that applies for the prediction for the
	 *            current example
	 * 
	 * @return <code>true</code> iff the class is deterministically known
	 *         after applying this method
	 */
	public static boolean adjustIntermediateProducts(double[] products, double[] liftFactors) {
		L: for (int i = 0; i < liftFactors.length; i++) {
			// Change the intermediate estimates, take care about deterministic
			// and non-applicable rules:
			if (Double.isNaN(liftFactors[i])) { // WeightedPerformanceMeasures.RULE_DOES_NOT_APPLY)
				LogService.getGlobal().log("Ignoring non-applicable model.", LogService.WARNING);
				continue L;
			}
			else if (Double.isInfinite(liftFactors[i])) {
				if (products[i] != 0) {
					for (int j = 0; j < products.length; j++) {
						products[j] = 0; // reset all probabilities to 0
					}
					products[i] = liftFactors[i]; // class is deterministically correct
					return true; // class is known
				}
				else continue L; // ignore factor, class is already known to be
								// deterministically incorrect
			} 
			else { // the "normal" case
				products[i] *= liftFactors[i];
				if (Double.isNaN(products[i])) {
					LogService.getGlobal().log("Found NaN value in intermediate odds ratio estimates!", LogService.WARNING);
				}
			}
		}
		return false;
	}

	/**
	 * This method is only supported for boolean target attributes. It computes
	 * a flattened version of model weights. In constrast to the original
	 * version the final predictions are additive logarithms of the lift ratios,
	 * additively rescaled so that the prediction <code>false</code> of model
	 * i produces <code>-i</code> if <code>true</code> produces weight i.
	 * This means that only one weight per model is required. The first
	 * component of the returned array is the part that is independent of any
	 * prediction, the i-th component is the weight of model i. The (log-)linear
	 * model predicts depending on whether the linear combination of predictions
	 * (either -1 or 1) is greater than 0 or not. Infinite values are
	 * problematic, so a min/max value is used.
	 * 
	 * @return the flattened weights of all models
	 */
	public double[] getModelWeights() throws OperatorException {
		if (this.getLabel().getMapping().size() != 2)
			throw new UserError(null, 114, "BayBoostModel", this.getLabel());

		int maxWeight = 10;

		final int pos = this.getLabel().getMapping().getPositiveIndex();
		final int neg = this.getLabel().getMapping().getNegativeIndex();
		double[] weights = new double[this.getNumberOfModels() + 1];

		// initialise model independent part
		double odds = this.getPriorOfClass(pos) / this.getPriorOfClass(neg);
		weights[0] = Math.log(odds);

		for (int i = 1; i < weights.length; i++) {

			double logPosRatio, logNegRatio;
			{
				double liftRatiosPos[] = this.getFactorsForModel(i - 1, pos); // lift
																				// ratios
																				// for
																				// pos
																				// prediction
				logPosRatio = Math.log(liftRatiosPos[pos]); // factor applied to
															// positive class
				logPosRatio = Math.min(maxWeight, Math.max(-maxWeight, logPosRatio)); // exclude
																						// infinity
																						// etc.

				double liftRatiosNeg[] = this.getFactorsForModel(i - 1, neg); // lift
																				// ratios
																				// for
																				// neg
																				// prediction
				logNegRatio = Math.log(liftRatiosNeg[pos]); // also the factor
															// applied to the
															// positive class
				logNegRatio = Math.min(maxWeight, Math.max(-maxWeight, logNegRatio));
			}

			// Compute the offset part of both predictions.
			// This requires to compare the factors applied to the same
			// (positive in this case) class,
			// one time when the model predicts positive, and one time if it
			// predicts negative.
			double indep = (logPosRatio + logNegRatio) / 2;
			if (Tools.isEqual(indep, maxWeight) || Tools.isEqual(indep, -maxWeight)) {
				// This should not happen. Obviously we found a dummy-model,
				// because the prediction is not required!
				// Do not just shift the lift, but indicate this point by an
				// illegal value:
				logPosRatio = 10 * indep;
				indep = 0;
			}

			// Update model independent part, which is valid if both
			// model-dependent updates are also made:
			weights[0] += indep;
			// Update model independent weights:
			logPosRatio -= indep; // Next step in principle: logNegRatio -=
									// indep, then logNegRatio == logPosRatio.
			// Goal reached: One weight suffices per model.
			weights[i] = logPosRatio;
		}
		return weights;
	}
}