/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.learner.meta; import java.awt.Component; import java.util.Iterator; import java.util.List; import javax.swing.JTabbedPane; import com.rapidminer.example.Attribute; import com.rapidminer.example.Example; import com.rapidminer.example.ExampleSet; import com.rapidminer.gui.tools.ExtendedJTabbedPane; import com.rapidminer.operator.IOContainer; import com.rapidminer.operator.Model; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.operator.learner.PredictionModel; import com.rapidminer.tools.LogService; import com.rapidminer.tools.Ontology; import com.rapidminer.tools.Tools; /** * A model for the Bayesian Boosting algorithm by Martin Scholz. * * @author Martin Scholz * @version $Id: BayBoostModel.java,v 1.8 2008/05/09 19:22:48 ingomierswa Exp $ */ public class BayBoostModel extends PredictionModel { private static final long serialVersionUID = 5821921049035718838L; // Holds the models and their weights in array format. // Please access with getter methods. private List<BayBoostBaseModelInfo> modelInfo; // The classes priors in the training set, starting with index 0. private double[] priors; // If set to a value i >= 0 then only the first i models are applied private int maxModelNumber = -1; private static final String MAX_MODEL_NUMBER = "iteration"; // turn soft into crisp classifiers private static final String CONV_TO_CRISP = "crisp"; private double threshold = 0.5; /** * @param exampleSet * the example set used for training * @param modelInfos * a <code>List</code> of <code>Object[2]</code> arrays, each * entry holding a model and a <code>double[][]</code> array * containing weights for all prediction/label combinations. * @param priors * an array of the prior probabilities of labels */ public BayBoostModel(ExampleSet exampleSet, List<BayBoostBaseModelInfo> modelInfos, double[] priors) { super(exampleSet); this.modelInfo = modelInfos; this.priors = priors; } public BayBoostBaseModelInfo getBayBoostBaseModelInfo(int index) { return this.modelInfo.get(index); } /** * Setting the parameter <code>MAX_MODEL_NUMBER</code> allows to discard * all but the first n models for specified n. <code>CONV_TO_CRISP</code> * allows to set another threshold than 0.5 for boolean prediction problems. */ public void setParameter(String name, String value) throws OperatorException { if (name.equalsIgnoreCase(MAX_MODEL_NUMBER)) { try { this.maxModelNumber = Integer.parseInt(value); return; } catch (NumberFormatException e) {} } else if (name.equalsIgnoreCase(CONV_TO_CRISP)) { this.threshold = Double.parseDouble(value.trim()); return; } super.setParameter(name, value); } /** * Using this setter with a positive value makes the model discard all * but the specified number of base models. A value of -1 turns off this * option. */ public void setMaxModelNumber(int numModels) { this.maxModelNumber = numModels; } public Component getVisualizationComponent(IOContainer container) { JTabbedPane tabPane = new ExtendedJTabbedPane(); for (int i = 0; i < this.getNumberOfModels(); i++) { Model model = this.getModel(i); tabPane.add("Model " + (i + 1), model.getVisualizationComponent(container)); } return tabPane; } /** @return a <code>String</code> representation of this boosting model. */ public String toString() { StringBuffer result = new StringBuffer(super.toString() + Tools.getLineSeparator() + "Number of inner models: " + this.getNumberOfModels() + Tools.getLineSeparators(2)); for (int i = 0; i < this.getNumberOfModels(); i++) { Model model = this.getModel(i); result.append((i > 0 ? Tools.getLineSeparator() : "") + "Embedded model #" + i + ":" + Tools.getLineSeparator() + model.toResultString()); } return result.toString(); } /** @return the number of embedded models */ public int getNumberOfModels() { if (this.maxModelNumber >= 0) return Math.min(this.maxModelNumber, modelInfo.size()); else return modelInfo.size(); } /** * Gets factors for models in the case of general nominal class labels. * * @return a <code>double[]</code> object with the factors to be applied * for each class if the corresponding rule yields * <code>predicted</code>. * @param modelNr * the number of the model * @param predicted * the predicted label * @return a <code>double[]</code> with one factor per class label, * <code>Double.POSITIVE_INFINITY</code> if the rule * deterministically predicts a value, and * <code>RULE_DOES_NOT_APPLY</code> if no prediction can be made. */ private double[] getFactorsForModel(int modelNr, int predicted) { ContingencyMatrix cm = this.modelInfo.get(modelNr).getContingencyMatrix(); return cm.getLiftRatiosForPrediction(predicted); } /** * Getter method for prior class probabilities estimated as the relative * frequencies in the training set. * * @param classIndex * the index of a class, starting with 0 * @return the prior probability of the specified class */ private double getPriorOfClass(int classIndex) { return this.priors[classIndex]; } /** Getter for the prior array */ public double[] getPriors() { double[] result = new double[this.priors.length]; System.arraycopy(this.priors, 0, result, 0, result.length); return result; } /** * Getter method for embedded models * * @param index * the number of a model part of this boost model * @return binary or nominal decision model for the given classification * index. */ public Model getModel(int index) { return this.modelInfo.get(index).getModel(); } /** * Getter method for a specific confusion matrix * * @param index * the number of the model for which to read the confusion matrix * @return a <code>ConfusionMatrix</code> object */ public ContingencyMatrix getContingencyMatrix(int index) { return this.modelInfo.get(index).getContingencyMatrix(); } /** * Iterates over all models and returns the class with maximum likelihood. * * @param exampleSet * the set of examples to be classified * @param predictedLabel * the label that finally holds the predictions */ public ExampleSet performPrediction(ExampleSet exampleSet, Attribute predictedLabel) throws OperatorException { // Prepare special attributes for storing intermediate results: final Attribute[] specialAttributes = this.createSpecialAttributes(exampleSet); this.initIntermediateResultAttributes(exampleSet, specialAttributes); // Apply all models to the example set, each time updating the // intermediate results: for (int i = 0; i < this.getNumberOfModels(); i++) { Model model = this.getModel(i); ExampleSet clonedExampleSet = (ExampleSet) exampleSet.clone(); clonedExampleSet = model.apply(clonedExampleSet); this.updateEstimates(clonedExampleSet, this.getContingencyMatrix(i), specialAttributes); PredictionModel.removePredictedLabel(clonedExampleSet); } // Compute and store probability estimates from the intermediate // results: Iterator<Example> reader = exampleSet.iterator(); while (reader.hasNext()) { Example example = reader.next(); this.translateOddsIntoPredictions(example, specialAttributes, exampleSet.getAttributes().getLabel()); } // Remove the special attributes used for storing intermediate // estimates: this.cleanUpSpecialAttributes(exampleSet, specialAttributes); return exampleSet; } /** Creates a special attribute for each label to store intermediate results. */ private Attribute[] createSpecialAttributes(ExampleSet exampleSet) throws OperatorException { final String attributePrefix = "BayBoostModelPrediction"; Attribute[] specialAttributes = new Attribute[this.getLabel().getMapping().size()]; for (int i = 0; i < specialAttributes.length; i++) { specialAttributes[i] = com.rapidminer.example.Tools.createSpecialAttribute(exampleSet, attributePrefix + i, Ontology.NUMERICAL); } return specialAttributes; } /** Removes the provided special labels from the exampleSet and exampleTable. */ private void cleanUpSpecialAttributes(ExampleSet exampleSet, Attribute[] specialAttributes) throws OperatorException { for (int i = 0; i < specialAttributes.length; i++) { exampleSet.getAttributes().remove(specialAttributes[i]); exampleSet.getExampleTable().removeAttribute(specialAttributes[i]); } } private void initIntermediateResultAttributes(ExampleSet exampleSet, Attribute[] specAttrib) { // Compute odds ratios from class priors: double[] priorOdds = new double[this.priors.length]; for (int i = 0; i < priorOdds.length; i++) { priorOdds[i] = (this.priors[i] == 1) ? Double.POSITIVE_INFINITY : (this.priors[i] / (1 - this.priors[i])); } // Initialize each intermediate estimate with the odds ratio of // the corresponding class: Iterator<Example> reader = exampleSet.iterator(); while (reader.hasNext()) { Example example = reader.next(); for (int i = 0; i < specAttrib.length; i++) { example.setValue(specAttrib[i], priorOdds[i]); } } } private void translateOddsIntoPredictions(Example example, Attribute[] specAttrib, Attribute exampleSetLabel) { // Turn lift ratio into conditional probabilities: double probSum = 0; double[] classProb = new double[specAttrib.length]; int bestIndex = 0; for (int n = 0; n < classProb.length; n++) { // The probability Prob( C | x ) for class C given the description // can // be calculated from factor = Prob(C | x) / Prob(neg(C) | x) as // Prob( C | x ) = factor / (1 + factor): double odds = example.getValue(specAttrib[n]); if (Double.isNaN(odds)) { logWarning("Found NaN odd ratio estimate."); classProb[n] = 1; } else classProb[n] = (Double.isInfinite(odds)) ? 1 : (odds / (1 + odds)); probSum += classProb[n]; // accumulate probabilities, should be 1 if (classProb[n] > classProb[bestIndex]) { bestIndex = n; } } // Normalize probabilities if the sum is not 1. // This can happen if the subset defined by a rule does not contain all // classes. if (probSum != 1.0) { for (int k = 0; k < classProb.length; k++) { classProb[k] /= probSum; } } // Store the final prediction. All internal computations have used the // indices // of the stored label. The indices of the new label may be different in // case // of stored and reloaded models or example sets. For this reason the // final // predictions are written in terms of the strings, avoiding any // assumptions // about the mapping. final String bestLabel; if (this.getLabel().isNominal() && this.getLabel().getMapping().size() == 2 && this.threshold != 0.5) { // boolean classification problem --> only in this case a threshold // makes sense ... // the local indices: int posIndex = this.getLabel().getMapping().getPositiveIndex(); int negIndex = this.getLabel().getMapping().getNegativeIndex(); // Decide whether threshold is valid, otherwise just use 0.5: threshold = (this.threshold >= 0 && this.threshold <= 1) ? this.threshold : 0.5; // If the threshold is exceeded store the string representing the // positive class, // otherwise the one for the negative class. bestLabel = this.getLabel().getMapping().mapIndex((classProb[posIndex] >= threshold) ? posIndex : negIndex); } else { // otherwise: just predict the most probable class bestLabel = this.getLabel().getMapping().mapIndex(bestIndex); } // Write the prediction to the example set. In this case the indices of // the // label currently part of the example set have to be used: example.setValue(example.getAttributes().getPredictedLabel(), exampleSetLabel.getMapping().mapString(bestLabel)); // Set confidence values for all classes: for (int k = 0; k < classProb.length; k++) { // The locally used attribute indices correspond to the stored // label, // so the String representation required for setting confidences is // derived from the stored label. if (Double.isNaN(classProb[k]) || classProb[k] < 0 || classProb[k] > 1) { logWarning("Found illegal confidence value: " + classProb[k]); } example.setConfidence(this.getLabel().getMapping().mapIndex(k), classProb[k]); } } private void updateEstimates(ExampleSet exampleSet, ContingencyMatrix cm, Attribute[] specialAttributes) { Iterator<Example> reader = exampleSet.iterator(); while (reader.hasNext()) { Example example = reader.next(); int predicted = (int) example.getPredictedLabel(); L: for (int j = 0; j < cm.getNumberOfClasses(); j++) { final double liftRatioCurrent = cm.getLiftRatio(j, predicted); // rule: predicted => j // Change the intermediate estimates, take care about // deterministic and non-applicable rules: if (Double.isNaN(liftRatioCurrent)) { // RULE_DOES_NOT_APPLY, logWarning("Ignoring non-applicable model."); // ignore it continue L; } else if (Double.isInfinite(liftRatioCurrent)) { // Double.POSITIVE_INFINITY if (example.getValue(specialAttributes[j]) != 0) { for (int k = 0; k < specialAttributes.length; k++) { // reset all probabilities to 0 example.setValue(specialAttributes[k], 0); } // class is deterministically correct example.setValue(specialAttributes[j], liftRatioCurrent); } else continue L; // ignore factor, class is already known to // be deterministically incorrect } else { // the "normal" case double oldValue = example.getValue(specialAttributes[j]); if (Double.isNaN(oldValue)) { logWarning("Found NaN value in intermediate odds ratio estimates!"); } if (!Double.isInfinite(oldValue)) { example.setValue(specialAttributes[j], oldValue * liftRatioCurrent); } } } } } /** * Helper method to adjust the intermediate products during model * application. * * @param products * the intermediate products, these values are changed by the * method * @param liftFactors * the factor vector that applies for the prediction for the * current example * * @return <code>true</code> iff the class is deterministically known * after applying this method */ public static boolean adjustIntermediateProducts(double[] products, double[] liftFactors) { L: for (int i = 0; i < liftFactors.length; i++) { // Change the intermediate estimates, take care about deterministic // and non-applicable rules: if (Double.isNaN(liftFactors[i])) { // WeightedPerformanceMeasures.RULE_DOES_NOT_APPLY) LogService.getGlobal().log("Ignoring non-applicable model.", LogService.WARNING); continue L; } else if (Double.isInfinite(liftFactors[i])) { if (products[i] != 0) { for (int j = 0; j < products.length; j++) { products[j] = 0; // reset all probabilities to 0 } products[i] = liftFactors[i]; // class is deterministically correct return true; // class is known } else continue L; // ignore factor, class is already known to be // deterministically incorrect } else { // the "normal" case products[i] *= liftFactors[i]; if (Double.isNaN(products[i])) { LogService.getGlobal().log("Found NaN value in intermediate odds ratio estimates!", LogService.WARNING); } } } return false; } /** * This method is only supported for boolean target attributes. It computes * a flattened version of model weights. In constrast to the original * version the final predictions are additive logarithms of the lift ratios, * additively rescaled so that the prediction <code>false</code> of model * i produces <code>-i</code> if <code>true</code> produces weight i. * This means that only one weight per model is required. The first * component of the returned array is the part that is independent of any * prediction, the i-th component is the weight of model i. The (log-)linear * model predicts depending on whether the linear combination of predictions * (either -1 or 1) is greater than 0 or not. Infinite values are * problematic, so a min/max value is used. * * @return the flattened weights of all models */ public double[] getModelWeights() throws OperatorException { if (this.getLabel().getMapping().size() != 2) throw new UserError(null, 114, "BayBoostModel", this.getLabel()); int maxWeight = 10; final int pos = this.getLabel().getMapping().getPositiveIndex(); final int neg = this.getLabel().getMapping().getNegativeIndex(); double[] weights = new double[this.getNumberOfModels() + 1]; // initialise model independent part double odds = this.getPriorOfClass(pos) / this.getPriorOfClass(neg); weights[0] = Math.log(odds); for (int i = 1; i < weights.length; i++) { double logPosRatio, logNegRatio; { double liftRatiosPos[] = this.getFactorsForModel(i - 1, pos); // lift // ratios // for // pos // prediction logPosRatio = Math.log(liftRatiosPos[pos]); // factor applied to // positive class logPosRatio = Math.min(maxWeight, Math.max(-maxWeight, logPosRatio)); // exclude // infinity // etc. double liftRatiosNeg[] = this.getFactorsForModel(i - 1, neg); // lift // ratios // for // neg // prediction logNegRatio = Math.log(liftRatiosNeg[pos]); // also the factor // applied to the // positive class logNegRatio = Math.min(maxWeight, Math.max(-maxWeight, logNegRatio)); } // Compute the offset part of both predictions. // This requires to compare the factors applied to the same // (positive in this case) class, // one time when the model predicts positive, and one time if it // predicts negative. double indep = (logPosRatio + logNegRatio) / 2; if (Tools.isEqual(indep, maxWeight) || Tools.isEqual(indep, -maxWeight)) { // This should not happen. Obviously we found a dummy-model, // because the prediction is not required! // Do not just shift the lift, but indicate this point by an // illegal value: logPosRatio = 10 * indep; indep = 0; } // Update model independent part, which is valid if both // model-dependent updates are also made: weights[0] += indep; // Update model independent weights: logPosRatio -= indep; // Next step in principle: logNegRatio -= // indep, then logNegRatio == logPosRatio. // Goal reached: One weight suffices per model. weights[i] = logPosRatio; } return weights; } }