/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.performance;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.Statistics;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorCapability;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ProcessSetupError.Severity;
import com.rapidminer.operator.SimpleProcessSetupError;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.ValueDouble;
import com.rapidminer.operator.learner.CapabilityProvider;
import com.rapidminer.operator.ports.InputPort;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.operator.ports.metadata.CapabilityPrecondition;
import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
import com.rapidminer.operator.ports.metadata.MetaData;
import com.rapidminer.operator.ports.metadata.MetaDataInfo;
import com.rapidminer.operator.ports.metadata.MetaDataUnderspecifiedError;
import com.rapidminer.operator.ports.metadata.PassThroughOrGenerateRule;
import com.rapidminer.operator.ports.metadata.PassThroughRule;
import com.rapidminer.operator.ports.metadata.SimpleMetaDataError;
import com.rapidminer.operator.ports.metadata.SimplePrecondition;
import com.rapidminer.operator.ports.quickfix.ParameterSettingQuickFix;
import com.rapidminer.operator.ports.quickfix.QuickFix;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.parameter.ParameterTypeStringCategory;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.tools.Ontology;
/**
* <p>
* This performance evaluator operator should be used for regression tasks, i.e. in cases where the
* label attribute has a numerical value type. The operator expects a test {@link ExampleSet} as
* input, whose elements have both true and predicted labels, and delivers as output a list of
* performance values according to a list of performance criteria that it calculates. If an input
* performance vector was already given, this is used for keeping the performance values.
* </p>
*
* <p>
* All of the performance criteria can be switched on using boolean parameters. Their values can be
* queried by a {@link com.rapidminer.operator.visualization.ProcessLogOperator} using the same
* names. The main criterion is used for comparisons and need to be specified only for processes
* where performance vectors are compared, e.g. feature selection or other meta optimization process
* setups. If no other main criterion was selected, the first criterion in the resulting performance
* vector will be assumed to be the main criterion.
* </p>
*
* <p>
* The resulting performance vectors are usually compared with a standard performance comparator
* which only compares the fitness values of the main criterion. Other implementations than this
* simple comparator can be specified using the parameter <var>comparator_class</var>. This may for
* instance be useful if you want to compare performance vectors according to the weighted sum of
* the individual criteria. In order to implement your own comparator, simply subclass
* {@link PerformanceComparator}. Please note that for true multi-objective optimization usually
* another selection scheme is used instead of simply replacing the performance comparator.
* </p>
*
* @author Ingo Mierswa
*/
public abstract class AbstractPerformanceEvaluator extends Operator implements CapabilityProvider {
/** The parameter name for "The criterion used for comparing performance vectors." */
public static final String PARAMETER_MAIN_CRITERION = "main_criterion";
/**
* The parameter name for "If set to true, examples with undefined labels are
* skipped."
*/
public static final String PARAMETER_SKIP_UNDEFINED_LABELS = "skip_undefined_labels";
/**
* The parameter name for "Fully qualified classname of the PerformanceComparator
* implementation."
*/
public static final String PARAMETER_COMPARATOR_CLASS = "comparator_class";
/** Indicates if example weights should be used for performance calculations. */
private static final String PARAMETER_USE_EXAMPLE_WEIGHTS = "use_example_weights";
private InputPort exampleSetInput = getInputPorts().createPort("labelled data");
private InputPort performanceInput = getInputPorts().createPort("performance");
private OutputPort performanceOutput = getOutputPorts().createPort("performance");
private OutputPort exampleSetOutput = getOutputPorts().createPort("example set");
/**
* The currently used performance vector. This is be used for logging / plotting purposes.
*/
private PerformanceVector currentPerformanceVector = null;
public AbstractPerformanceEvaluator(OperatorDescription description) {
super(description);
// exampleSetInput.addPrecondition(new ExampleSetPrecondition(exampleSetInput,
// Ontology.VALUE_TYPE, Attributes.LABEL_NAME,
// Attributes.PREDICTION_NAME));
exampleSetInput.addPrecondition(new CapabilityPrecondition(this, exampleSetInput));
exampleSetInput.addPrecondition(new SimplePrecondition(exampleSetInput, new ExampleSetMetaData()) {
@Override
public void makeAdditionalChecks(MetaData metaData) {
if (!(metaData instanceof ExampleSetMetaData)) {
exampleSetInput.addError(new MetaDataUnderspecifiedError(exampleSetInput));
return;
}
ExampleSetMetaData emd = (ExampleSetMetaData) metaData;
if (emd.hasSpecial(Attributes.LABEL_NAME) == MetaDataInfo.YES
&& emd.hasSpecial(Attributes.PREDICTION_NAME) == MetaDataInfo.YES) {
int type1 = emd.getSpecial(Attributes.LABEL_NAME).getValueType();
int type2 = emd.getSpecial(Attributes.PREDICTION_NAME).getValueType();
if (type1 != type2) {
exampleSetInput.addError(new SimpleMetaDataError(Severity.ERROR, exampleSetInput,
"label_prediction_mismatch", new Object[] { Ontology.ATTRIBUTE_VALUE_TYPE.mapIndex(type1),
Ontology.ATTRIBUTE_VALUE_TYPE.mapIndex(type2) }));
} else if (!canEvaluate(type1)) {
exampleSetInput.addError(new SimpleMetaDataError(Severity.ERROR, exampleSetInput,
"cannot_evaluate_label_type", new Object[] { Ontology.ATTRIBUTE_VALUE_TYPE.mapIndex(type1) }));
}
}
}
});
performanceInput.addPrecondition(new SimplePrecondition(performanceInput, new MetaData(PerformanceVector.class),
false));
PassThroughOrGenerateRule performanceRule = new PassThroughOrGenerateRule(performanceInput, performanceOutput,
new MetaData(PerformanceVector.class));
getTransformer().addRule(performanceRule);
getTransformer().addRule(new PassThroughRule(exampleSetInput, exampleSetOutput, false));
// add values for logging
List<PerformanceCriterion> criteria = getCriteria();
for (PerformanceCriterion criterion : criteria) {
addPerformanceValue(criterion.getName(), criterion.getDescription());
}
addValue(new ValueDouble("performance", "The last performance (main criterion).") {
@Override
public double getDoubleValue() {
if (currentPerformanceVector != null) {
return currentPerformanceVector.getMainCriterion().getAverage();
} else {
return Double.NaN;
}
}
});
}
/**
* Returns true iff this operator can evaluate labels of the given value type.
*
* @see Ontology.ATTRIBUTE_VALUE_TYPE
*/
protected abstract boolean canEvaluate(int valueType);
/**
* Delivers the list of criteria which is able for this operator. Please note that all criteria
* in the list must be freshly instantiated since no copy is created in different runs of this
* operator. This is important in order to not mess up the results.
*
* This method must not return null but should return an empty list in this case.
*
* ATTENTION! This method is called during the creation of parameters. Do not try to get a
* parameter value inside this method, since this will create an infinite loop!
*/
public abstract List<PerformanceCriterion> getCriteria();
/**
* Delivers class weights for performance criteria which implement the
* {@link ClassWeightedPerformance} interface. Might return null (for example for regression
* task performance evaluators).
*
* @throws UserError
*/
protected abstract double[] getClassWeights(Attribute label) throws UserError;
/** Performs a check if this operator can be used for this type of exampel set at all. */
protected abstract void checkCompatibility(ExampleSet exampleSet) throws OperatorException;
@Override
public boolean shouldAutoConnect(OutputPort port) {
if (port == exampleSetOutput) {
return getParameterAsBoolean("keep_example_set");
} else {
return super.shouldAutoConnect(port);
}
}
/**
* This method will be invoked before the actual calculation is started. The default
* implementation does nothing. Subclasses might want to override this method.
*/
protected void init(ExampleSet exampleSet) {}
/** Subclasses might override this method and return false. */
protected boolean showSkipNaNLabelsParameter() {
return true;
}
/** Subclasses might override this method and return false. */
protected boolean showComparatorParameter() {
return true;
}
/** Subclasses might override this method and return false. */
protected boolean showCriteriaParameter() {
return true;
}
/**
* Helper method if this operator is constructed anonymously. Assigns the exampleSet to the
* input and returns the PerformanceVector from the output.
*/
public PerformanceVector doWork(ExampleSet exampleSet) throws OperatorException {
exampleSetInput.receive(exampleSet);
doWork();
return performanceOutput.getData(PerformanceVector.class);
}
@Override
public void doWork() throws OperatorException {
ExampleSet testSet = exampleSetInput.getData(ExampleSet.class);
checkCompatibility(testSet);
init(testSet);
PerformanceVector inputPerformance = performanceInput.getDataOrNull(PerformanceVector.class);
performanceOutput.deliver(evaluate(testSet, inputPerformance));
exampleSetOutput.deliver(testSet);
}
// --------------------------------------------------------------------------------
/**
* Adds the performance criteria as plottable values, e.g. for the ProcessLog operator.
*/
private void addPerformanceValue(final String name, String description) {
addValue(new ValueDouble(name, description) {
@Override
public double getDoubleValue() {
if (currentPerformanceVector == null) {
return Double.NaN;
}
PerformanceCriterion c = currentPerformanceVector.getCriterion(name);
if (c != null) {
return c.getAverage();
} else {
return Double.NaN;
}
}
});
}
/**
* Creates a new performance vector if the given one is null. Adds all criteria demanded by the
* user. If the criterion was already part of the performance vector before it will be
* overwritten.
*/
private PerformanceVector initialisePerformanceVector(ExampleSet testSet, PerformanceVector performanceCriteria,
List<PerformanceCriterion> givenCriteria) throws OperatorException {
givenCriteria.clear();
if (performanceCriteria == null) {
performanceCriteria = new PerformanceVector();
} else {
for (int i = 0; i < performanceCriteria.getSize(); i++) {
givenCriteria.add(performanceCriteria.getCriterion(i));
}
}
List<PerformanceCriterion> criteria = getCriteria();
for (PerformanceCriterion criterion : criteria) {
if (checkCriterionName(criterion.getName())) {
performanceCriteria.addCriterion(criterion);
}
}
if (performanceCriteria.size() == 0) {
throw new UserError(this, 910);
}
// set suitable main criterion
if (performanceCriteria.size() == 0) {
List<PerformanceCriterion> availableCriteria = getCriteria();
if (availableCriteria.size() > 0) {
PerformanceCriterion criterion = availableCriteria.get(0);
performanceCriteria.addCriterion(criterion);
performanceCriteria.setMainCriterionName(criterion.getName());
logWarning(getName() + ": No performance criterion selected! Using the first available criterion ("
+ criterion.getName() + ").");
} else {
logWarning(getName() + ": not possible to identify available performance criteria.");
throw new UserError(this, 910);
}
} else {
if (showCriteriaParameter()) {
String mcName = getParameterAsString(PARAMETER_MAIN_CRITERION);
if (mcName != null) {
performanceCriteria.setMainCriterionName(mcName);
}
}
}
// comparator
String comparatorClass = null;
if (showComparatorParameter()) {
comparatorClass = getParameterAsString(PARAMETER_COMPARATOR_CLASS);
}
if (comparatorClass == null) {
performanceCriteria.setComparator(new PerformanceVector.DefaultComparator());
} else {
try {
Class<?> pcClass = com.rapidminer.tools.Tools.classForName(comparatorClass);
if (!PerformanceComparator.class.isAssignableFrom(pcClass)) {
throw new UserError(this, 914, new Object[] { pcClass, PerformanceComparator.class });
} else {
performanceCriteria.setComparator((PerformanceComparator) pcClass.newInstance());
}
} catch (Throwable e) {
throw new UserError(this, e, 904, new Object[] { comparatorClass, e });
}
}
return performanceCriteria;
}
/**
* Returns true if the criterion with the given name should be added to the performance vector.
* This is either the case
* <ol>
* <li>if the boolean parameter was selected by the user</li>
* <li>if the given name is equal to the main criterion</li>
* </ol>
*/
private boolean checkCriterionName(String name) throws UndefinedParameterError {
String mainCriterionName = getParameterAsString(PARAMETER_MAIN_CRITERION);
if (name != null && name.trim().length() != 0 && !name.equals(PerformanceVector.MAIN_CRITERION_FIRST)
&& name.equals(mainCriterionName)) {
return true;
} else {
ParameterType type = getParameterType(name);
if (type != null) {
return getParameterAsBoolean(name);
} else {
return true;
}
}
}
/**
* Evaluates the given test set. All {@link PerformanceCriterion} instances in the given
* {@link PerformanceVector} must be subclasses of {@link MeasuredPerformance}.
*/
protected PerformanceVector evaluate(ExampleSet testSet, PerformanceVector inputPerformance) throws OperatorException {
List<PerformanceCriterion> givenCriteria = new LinkedList<PerformanceCriterion>();
this.currentPerformanceVector = initialisePerformanceVector(testSet, inputPerformance, givenCriteria);
boolean skipUndefined = true;
if (showComparatorParameter()) {
skipUndefined = getParameterAsBoolean(PARAMETER_SKIP_UNDEFINED_LABELS);
}
boolean useExampleWeights = getParameterAsBoolean(PARAMETER_USE_EXAMPLE_WEIGHTS);
evaluate(this, testSet, currentPerformanceVector, givenCriteria, skipUndefined, useExampleWeights);
return currentPerformanceVector;
}
/**
* Static version of {@link #evaluate(ExampleSet,PerformanceVector)}. This method was introduced
* to enable testing of the method.
*
* @param evaluator
* Ususally this. May be null for testing. Only needed for exception.
*/
public static void evaluate(AbstractPerformanceEvaluator evaluator, ExampleSet testSet,
PerformanceVector performanceCriteria, List<PerformanceCriterion> givenCriteria, boolean skipUndefinedLabels,
boolean useExampleWeights) throws OperatorException {
if (testSet.getAttributes().getLabel() == null) {
throw new UserError(evaluator, 105, new Object[0]);
}
if (testSet.getAttributes().getPredictedLabel() == null) {
throw new UserError(evaluator, 107, new Object[0]);
}
// sanity check for weight attribute
if (useExampleWeights) {
Attribute weightAttribute = testSet.getAttributes().getWeight();
if (weightAttribute != null) {
if (!weightAttribute.isNumerical()) {
throw new UserError(evaluator, 120, new Object[] { weightAttribute.getName(),
Ontology.VALUE_TYPE_NAMES[weightAttribute.getValueType()],
Ontology.VALUE_TYPE_NAMES[Ontology.NUMERICAL] });
}
testSet.recalculateAttributeStatistics(weightAttribute);
double minimum = testSet.getStatistics(weightAttribute, Statistics.MINIMUM);
if (Double.isNaN(minimum) || minimum < 0.0d) {
throw new UserError(evaluator, 138, new Object[] { weightAttribute.getName(), "positive values",
"negative for some examples" });
}
}
}
// initialize all criteria
for (int pc = 0; pc < performanceCriteria.size(); pc++) {
PerformanceCriterion c = performanceCriteria.getCriterion(pc);
if (!givenCriteria.contains(c)) {
if (!(c instanceof MeasuredPerformance)) {
throw new UserError(evaluator, 903, new Object[0]);
}
// init all criteria
((MeasuredPerformance) c).startCounting(testSet, useExampleWeights);
// init weight handlers
if (c instanceof ClassWeightedPerformance) {
if (evaluator != null) {
Attribute label = testSet.getAttributes().getLabel();
if (label.isNominal()) {
double[] weights = evaluator.getClassWeights(label);
if (weights != null) {
((ClassWeightedPerformance) c).setWeights(weights);
}
}
}
}
}
}
Iterator<Example> exampleIterator = testSet.iterator();
while (exampleIterator.hasNext()) {
Example example = exampleIterator.next();
if (skipUndefinedLabels && (Double.isNaN(example.getLabel()) || Double.isNaN(example.getPredictedLabel()))) {
continue;
}
for (int pc = 0; pc < performanceCriteria.size(); pc++) {
PerformanceCriterion criterion = performanceCriteria.getCriterion(pc);
if (!givenCriteria.contains(criterion)) {
if (criterion instanceof MeasuredPerformance) {
((MeasuredPerformance) criterion).countExample(example);
}
}
}
if (evaluator != null) {
evaluator.checkForStop();
}
}
}
private String[] getAllCriteriaNames() {
List<PerformanceCriterion> criteria = getCriteria();
String[] result = new String[criteria.size()];
int counter = 0;
for (PerformanceCriterion criterion : criteria) {
result[counter++] = criterion.getName();
}
return result;
}
@Override
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
if (showCriteriaParameter()) {
String[] criteriaNames = getAllCriteriaNames();
if (criteriaNames.length > 0) {
String[] allCriteriaNames = new String[criteriaNames.length + 1];
allCriteriaNames[0] = PerformanceVector.MAIN_CRITERION_FIRST;
System.arraycopy(criteriaNames, 0, allCriteriaNames, 1, criteriaNames.length);
ParameterType type = new ParameterTypeStringCategory(PARAMETER_MAIN_CRITERION,
"The criterion used for comparing performance vectors.", allCriteriaNames, allCriteriaNames[0]);
type.setExpert(false);
types.add(type);
}
List<PerformanceCriterion> criteria = getCriteria();
boolean isDefault = true;
for (PerformanceCriterion criterion : criteria) {
ParameterType type = new ParameterTypeBoolean(criterion.getName(), criterion.getDescription(), isDefault,
false);
types.add(type);
isDefault = false;
}
}
if (showSkipNaNLabelsParameter()) {
types.add(new ParameterTypeBoolean(PARAMETER_SKIP_UNDEFINED_LABELS,
"If set to true, examples with undefined labels are skipped.", true));
}
if (showComparatorParameter()) {
types.add(new ParameterTypeString(PARAMETER_COMPARATOR_CLASS,
"Fully qualified classname of the PerformanceComparator implementation.", true));
}
types.add(new ParameterTypeBoolean(PARAMETER_USE_EXAMPLE_WEIGHTS,
"Indicated if example weights should be used for performance calculations if possible.", true));
return types;
}
/**
* This will override the checkProperties method of operator in order to add some checks for
* possible quickfixes.
*/
@Override
public int checkProperties() {
boolean criterionChecked = false;
List<PerformanceCriterion> criteria = null;
try {
criteria = getCriteria();
if (criteria != null) {
for (PerformanceCriterion criterion : criteria) {
if (checkCriterionName(criterion.getName())) {
criterionChecked = true;
break;
}
}
}
} catch (UndefinedParameterError err) {
}
if (!criterionChecked && criteria != null) {
// building quick fixes
List<QuickFix> quickFixes = new LinkedList<QuickFix>();
if (criteria.size() > 0) {
quickFixes.add(new ParameterSettingQuickFix(AbstractPerformanceEvaluator.this, getCriteria().get(0)
.getName(), "true"));
addError(new SimpleProcessSetupError(Severity.ERROR, this.getPortOwner(), quickFixes,
"performance_criterion_undefined", criteria.get(0).getName()));
}
if (criteria.size() > 1) {
quickFixes.add(new ParameterSettingQuickFix(AbstractPerformanceEvaluator.this, getCriteria().get(1)
.getName(), "true"));
}
}
return super.checkDeprecations();
}
@Override
public boolean supportsCapability(OperatorCapability capability) {
return true;
}
}