/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.performance;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.tools.LogService;
import com.rapidminer.tools.math.Averagable;
/**
* Simple criteria are those which error can be counted for each example and can
* be averaged by the number of examples. Since errors should be minimized, the
* fitness is calculated as -1 multiplied by the the error.
* Subclasses might also want to implement the method
* <code>transform(double)</code> which applies a transformation on the value
* sum divided by the number of counted examples. This is for example usefull in
* case of root_means_squared error. All subclasses can be used for both
* regression and classification problems. In case of classification the
* confidence value for the desired true label is used as prediction.
*
* @author Ingo Mierswa, Simon Fischer
* @version $Id: SimpleCriterion.java,v 1.7 2008/07/31 17:43:41 ingomierswa Exp $
*/
public abstract class SimpleCriterion extends MeasuredPerformance {
/**
*
*/
private static final long serialVersionUID = 242287213804685323L;
private double sum = 0.0;
private double squaresSum = 0.0;
private double exampleCount = 0;
private Attribute predictedAttribute;
private Attribute labelAttribute;
private Attribute weightAttribute;
public SimpleCriterion() {
}
public SimpleCriterion(SimpleCriterion sc) {
super(sc);
this.sum = sc.sum;
this.squaresSum = sc.squaresSum;
this.exampleCount = sc.exampleCount;
this.labelAttribute = (Attribute)sc.labelAttribute.clone();
this.predictedAttribute = (Attribute)sc.predictedAttribute.clone();
if (sc.weightAttribute != null)
this.weightAttribute = (Attribute)sc.weightAttribute.clone();
}
public double getExampleCount() {
return exampleCount;
}
/**
* Invokes <code>countExample(double, double)</code> and counts the
* deviation. In case of a nominal label the confidence of the desired true
* label is used as prediction. For regression problems the usual predicted
* label is used.
*/
public void countExample(Example example) {
double plabel;
double label = example.getValue(labelAttribute);
double weight = 1.0d;
if (weightAttribute != null) {
weight = example.getValue(weightAttribute);
}
if (!predictedAttribute.isNominal()) {
plabel = example.getValue(predictedAttribute);
} else {
String labelS = example.getNominalValue(labelAttribute);
plabel = example.getConfidence(labelS);
label = 1.0d;
}
double deviation = countExample(label, plabel);
if (!Double.isNaN(deviation)) {
countExampleWithWeight(deviation, weight);
} else {
LogService.getGlobal().log("SimpleCriterion: NaN was generated!", LogService.WARNING);
}
}
/** Subclasses must count the example and return the value to sum up. */
protected abstract double countExample(double label, double predictedLabel);
/**
* Simply returns the given value. Subclasses might apply a transformation
* on the error sum divided by the number of examples.
*/
protected double transform(double value) {
return value;
}
protected void countExampleWithWeight(double deviation, double weight) {
if (!Double.isNaN(deviation)) {
sum += deviation * weight;
squaresSum += deviation * deviation * weight * weight;
exampleCount += weight;
}
}
public double getMikroAverage() {
return transform(sum / exampleCount);
}
public double getMikroVariance() {
double mean = getMikroAverage();
double meanSquares = transform(squaresSum) / exampleCount;
return meanSquares - mean * mean;
}
public void startCounting(ExampleSet eset, boolean useExampleWeights) throws OperatorException {
super.startCounting(eset, useExampleWeights);
exampleCount = 0.0d;
sum = squaresSum = 0.0d;
this.predictedAttribute = eset.getAttributes().getPredictedLabel();
this.labelAttribute = eset.getAttributes().getLabel();
if (useExampleWeights)
this.weightAttribute = eset.getAttributes().getWeight();
}
public double getFitness() {
return (-1.0d) * getAverage();
}
/** Returns 0.0. */
public double getMaxFitness() {
return 0.0d;
}
public void buildSingleAverage(Averagable performance) {
SimpleCriterion other = (SimpleCriterion) performance;
this.sum += other.sum;
this.squaresSum += other.squaresSum;
this.exampleCount += other.exampleCount;
}
}