PredictionModel.java example

/**
 * Copyright (C) 2001-2017 by RapidMiner and the contributors
 * 
 * Complete list of developers available at our web site:
 * 
 * http://rapidminer.com
 * 
 * This program is free software: you can redistribute it and/or modify it under the terms of the
 * GNU Affero General Public License as published by the Free Software Foundation, either version 3
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
 * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License along with this program.
 * If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.learner;

import java.util.Iterator;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.set.ExampleSetUtilities;
import com.rapidminer.example.set.HeaderExampleSet;
import com.rapidminer.example.set.RemappedExampleSet;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.ExampleTable;
import com.rapidminer.operator.AbstractModel;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.tools.Ontology;


/**
 * PredictionModel is the superclass for all objects generated by learners, i.e. it can be used to
 * create a prediction for a given example set.
 *
 * @author Ingo Mierswa
 */
public abstract class PredictionModel extends AbstractModel {

	/**
	 *
	 */
	private static final long serialVersionUID = 6295359038239089617L;

	/**
	 * This parameter specifies the data types at which the model can be applied on.
	 */
	private ExampleSetUtilities.TypesCompareOption compareDataType;

	/**
	 * This parameter specifies the relation between the training {@link ExampleSet} and the input
	 * {@link ExampleSet} which is needed to apply the model on the input {@link ExampleSet}.
	 */
	private ExampleSetUtilities.SetsCompareOption compareSetSize;

	/**
	 * Created a new prediction model which was built on the given example set. Please note that the
	 * given example set is automatically transformed into a {@link HeaderExampleSet} which means
	 * that no reference to the data itself is kept but only to the header, i.e. to the attribute
	 * meta descriptions.
	 *
	 * @deprecated Since RapidMiner Studio 6.0.009. Please use the new Constructor
	 *             {@link #PredictionModel(ExampleSet, com.rapidminer.example.set.ExampleSetUtilities.SetsCompareOption, com.rapidminer.example.set.ExampleSetUtilities.TypesCompareOption)}
	 *             which offers the possibility to check for AttributeType and kind of ExampleSet
	 *             before execution.
	 */
	@Deprecated
	protected PredictionModel(ExampleSet trainingExampleSet) {
		super(trainingExampleSet);
		compareDataType = null;
		compareSetSize = null;
	}

	/**
	 * Creates a new prediction model which is build based on the given {@link ExampleSet}. Please
	 * note that the given ExampleSet is automatically transformed into a {@link HeaderExampleSet}
	 * which means that no reference to the data itself is kept but only to the header, i.e., to the
	 * attribute meta descriptions.
	 *
	 * @param sizeCompareOperator
	 *            describes the allowed relations between the given ExampleSet and future
	 *            ExampleSets on which this Model will be applied. If this parameter is null no
	 *            error will be thrown.
	 * @param typeCompareOperator
	 *            describes the allowed relations between the types of the attributes of the given
	 *            ExampleSet and the types of future attributes of ExampleSet on which this Model
	 *            will be applied. If this parameter is null no error will be thrown.
	 */
	protected PredictionModel(ExampleSet trainingExampleSet, ExampleSetUtilities.SetsCompareOption sizeCompareOperator,
			ExampleSetUtilities.TypesCompareOption typeCompareOperator) {
		super(trainingExampleSet);
		this.compareDataType = typeCompareOperator;
		this.compareSetSize = sizeCompareOperator;
	}

	/**
	 * Subclasses should iterate through the given example set and set the prediction for each
	 * example. The given predicted label attribute was already be added to the example set and
	 * should be used to set the predicted values.
	 */
	public abstract ExampleSet performPrediction(ExampleSet exampleSet, Attribute predictedLabel) throws OperatorException;

	/**
	 * Applies the model by creating a predicted label attribute and setting the predicted label
	 * values.
	 */
	@Override
	public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
		ExampleSet mappedExampleSet = RemappedExampleSet.create(exampleSet, getTrainingHeader(), false, true);
		checkCompatibility(mappedExampleSet);
		Attribute predictedLabel = createPredictionAttributes(mappedExampleSet, getLabel());
		ExampleSet result = performPrediction(mappedExampleSet, predictedLabel);

		// Copy in order to avoid RemappedExampleSets wrapped around each other accumulating over
		// time
		exampleSet = (ExampleSet) exampleSet.clone();
		copyPredictedLabel(result, exampleSet);

		return exampleSet;
	}

	/** Returns the label attribute. */
	public Attribute getLabel() {
		return getTrainingHeader().getAttributes().getLabel();
	}

	/**
	 * This method is invoked before the model is actually applied. The default implementation
	 * performs some basic compatibility checks and writes warnings if the given example set (for
	 * applying the model) does not fit the training example set. Subclasses might override this
	 * method and might throw exceptions which will prevent the application of the model.
	 */
	protected void checkCompatibility(ExampleSet exampleSet) throws OperatorException {
		ExampleSet trainingHeaderSet = getTrainingHeader();
		// check given constraints (might throw an UserError)
		ExampleSetUtilities.checkAttributesMatching(getOperator(), trainingHeaderSet.getAttributes(),
				exampleSet.getAttributes(), compareSetSize, compareDataType);
		// check number of attributes
		if (exampleSet.getAttributes().size() != trainingHeaderSet.getAttributes().size()) {
			logWarning("The number of regular attributes of the given example set does not fit the number of attributes of the training example set, training: "
					+ trainingHeaderSet.getAttributes().size() + ", application: " + exampleSet.getAttributes().size());
		} else {
			// check order of attributes
			Iterator<Attribute> trainingIt = trainingHeaderSet.getAttributes().iterator();
			Iterator<Attribute> applyIt = exampleSet.getAttributes().iterator();
			while (trainingIt.hasNext() && applyIt.hasNext()) {
				if (!trainingIt.next().getName().equals(applyIt.next().getName())) {
					logWarning("The order of attributes is not equal for the training and the application example set. This might lead to problems for some models.");
					break;
				}
			}
		}

		// check if all training attributes are part of the example set and have the same value
		// types and values
		for (Attribute trainingAttribute : trainingHeaderSet.getAttributes()) {
			String name = trainingAttribute.getName();
			Attribute attribute = exampleSet.getAttributes().getRegular(name);
			if (attribute == null) {
				logWarning("The given example set does not contain a regular attribute with name '" + name
						+ "'. This might cause problems for some models depending on this particular attribute.");
			} else {
				if (trainingAttribute.getValueType() != attribute.getValueType()) {
					logWarning("The value types between training and application differ for attribute '" + name
							+ "', training: " + Ontology.VALUE_TYPE_NAMES[trainingAttribute.getValueType()]
							+ ", application: " + Ontology.VALUE_TYPE_NAMES[attribute.getValueType()]);
				} else {
					// check nominal values
					if (trainingAttribute.isNominal()) {
						if (trainingAttribute.getMapping().size() != attribute.getMapping().size()) {
							logWarning("The number of nominal values is not the same for training and application for attribute '"
									+ name
									+ "', training: "
									+ trainingAttribute.getMapping().size()
									+ ", application: "
									+ attribute.getMapping().size());
						} else {
							for (String v : trainingAttribute.getMapping().getValues()) {
								int trainingIndex = trainingAttribute.getMapping().getIndex(v);
								int applicationIndex = attribute.getMapping().getIndex(v);
								if (trainingIndex != applicationIndex) {
									logWarning("The internal nominal mappings are not the same between training and application for attribute '"
											+ name + "'. This will probably lead to wrong results during model application.");
									break;
								}
							}
						}
					}
				}
			}
		}
	}

	/**
	 * This method creates prediction attributes like the predicted label and confidences if needed.
	 */
	protected Attribute createPredictionAttributes(ExampleSet exampleSet, Attribute label) {
		// create and add prediction attribute
		Attribute predictedLabel = AttributeFactory.createAttribute(label, Attributes.PREDICTION_NAME);
		predictedLabel.clearTransformations();
		ExampleTable table = exampleSet.getExampleTable();
		table.addAttribute(predictedLabel);
		exampleSet.getAttributes().setPredictedLabel(predictedLabel);

		// check whether confidence labels should be constructed
		if (supportsConfidences(label)) {
			for (String value : predictedLabel.getMapping().getValues()) {
				Attribute confidence = AttributeFactory.createAttribute(Attributes.CONFIDENCE_NAME + "(" + value + ")",
						Ontology.REAL);
				table.addAttribute(confidence);
				exampleSet.getAttributes().setSpecialAttribute(confidence, Attributes.CONFIDENCE_NAME + "_" + value);
			}
		}
		return predictedLabel;
	}

	/**
	 * This method determines if confidence attributes are created depending on the current label.
	 * Usually this depends only on the fact that the label is nominal, but subclasses might
	 * override this to avoid attribute construction for confidences.
	 */
	protected boolean supportsConfidences(Attribute label) {
		return label != null && label.isNominal();
	}

	/**
	 * Creates a predicted label for the given example set based on the label attribute defined for
	 * this prediction model. Subclasses which override this method should first invoke
	 * super.createPredictedLabel(exampleSet) and should then replace the attribute with a new
	 * predicted label attribute via a method call like
	 * <code>exampleSet.replaceAttribute(predictedLabel, AttributeFactory.changeValueType(predictedLabel, Ontology.REAL)); </code>
	 * . This might be useful in cases where a crisp nominal prediction should be replaced by
	 * confidence predictions.
	 */
	public static Attribute createPredictedLabel(ExampleSet exampleSet, Attribute label) {
		// create and add prediction attribute
		Attribute predictedLabel = AttributeFactory.createAttribute(label, Attributes.PREDICTION_NAME);
		predictedLabel.clearTransformations();
		ExampleTable table = exampleSet.getExampleTable();
		table.addAttribute(predictedLabel);
		exampleSet.getAttributes().setPredictedLabel(predictedLabel);

		// create and add confidence attributes for nominal labels
		if (label.isNominal()) {
			for (String value : predictedLabel.getMapping().getValues()) {
				Attribute confidence = AttributeFactory.createAttribute(Attributes.CONFIDENCE_NAME + "(" + value + ")",
						Ontology.REAL);
				table.addAttribute(confidence);
				exampleSet.getAttributes().setSpecialAttribute(confidence, Attributes.CONFIDENCE_NAME + "_" + value);
			}
		}
		return predictedLabel;
	}

	@Override
	public String toString() {
		return getName() + " (prediction model for label " + getTrainingHeader().getAttributes().getLabel().getName() + ")";
	}

	/**
	 * Helper method in order to reduce memory consumption. This method should be invoked after a
	 * predicted label and confidence are not longer needed, e.g. after each iteration of a
	 * crossvalidation or after a meta learning iteration.
	 */
	public static void removePredictedLabel(ExampleSet exampleSet) {
		removePredictedLabel(exampleSet, true, true);
	}

	/**
	 * Helper method in order to lower memory consumption. This method should be invoked after a
	 * predicted label and confidence are not longer needed, e.g. after each crossvalidation run or
	 * after a meta learning iteration.
	 */
	public static void removePredictedLabel(ExampleSet exampleSet, boolean removePredictionFromTable,
			boolean removeConfidencesFromTable) {
		Attribute predictedLabel = exampleSet.getAttributes().getPredictedLabel();
		if (predictedLabel != null) { // remove old predicted label
			if (predictedLabel.isNominal()) {
				for (String value : predictedLabel.getMapping().getValues()) {
					Attribute currentConfidenceAttribute = exampleSet.getAttributes().getSpecial(
							Attributes.CONFIDENCE_NAME + "_" + value);
					if (currentConfidenceAttribute != null) {
						exampleSet.getAttributes().remove(currentConfidenceAttribute);
						if (removeConfidencesFromTable) {
							exampleSet.getExampleTable().removeAttribute(currentConfidenceAttribute);
						}
					}
				}
			}
			exampleSet.getAttributes().remove(predictedLabel);
			if (removePredictionFromTable) {
				exampleSet.getExampleTable().removeAttribute(predictedLabel);
			}
		}
	}

	/**
	 * Copies the predicted label from the source example set to the destination example set. Does
	 * nothing if the source does not contain a predicted label.
	 */
	public static void copyPredictedLabel(ExampleSet source, ExampleSet destination) {
		Attribute predictedLabel = source.getAttributes().getPredictedLabel();
		if (predictedLabel != null) {
			// remove attributes but do not delete the columns from the table, otherwise copying is
			// not possible
			removePredictedLabel(destination, false, false);
			if (predictedLabel.isNominal()) {
				for (String value : predictedLabel.getMapping().getValues()) {
					Attribute currentConfidenceAttribute = source.getAttributes()
							.getSpecial(Attributes.CONFIDENCE_NAME + "_" + value);

					// it's possible that the model does not create confidences for all label
					// values, so check for null (e.g. OneClass-SVM)
					if (currentConfidenceAttribute != null) {
						Attribute copyOfCurrentConfidenceAttribute = AttributeFactory
								.createAttribute(currentConfidenceAttribute);
						destination.getAttributes().setSpecialAttribute(copyOfCurrentConfidenceAttribute,
								Attributes.CONFIDENCE_NAME + "_" + value);
					}
				}
			}
			Attribute copyOfPredictedLabel = AttributeFactory.createAttribute(predictedLabel);
			destination.getAttributes().setPredictedLabel(copyOfPredictedLabel);
		}

		Attribute costs = source.getAttributes().getCost();
		if (costs != null) {
			destination.getAttributes().setSpecialAttribute(costs, Attributes.CLASSIFICATION_COST);
		}
	}
}