SimpleCriterion.java example

Explorer
ComplexRapidMiner-master
- operator
- src
/*
 *  RapidMiner
 *
 *  Copyright (C) 2001-2008 by Rapid-I and the contributors
 *
 *  Complete list of developers available at our web site:
 *
 *       http://rapid-i.com
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Affero General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Affero General Public License for more details.
 *
 *  You should have received a copy of the GNU Affero General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses/.
 */
package com.rapidminer.operator.performance;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.tools.LogService;
import com.rapidminer.tools.math.Averagable;


/**
 * Simple criteria are those which error can be counted for each example and can
 * be averaged by the number of examples. Since errors should be minimized, the 
 * fitness is calculated as -1 multiplied by the the error. 
 * Subclasses might also want to implement the method
 * <code>transform(double)</code> which applies a transformation on the value
 * sum divided by the number of counted examples. This is for example usefull in
 * case of root_means_squared error. All subclasses can be used for both
 * regression and classification problems. In case of classification the
 * confidence value for the desired true label is used as prediction.
 * 
 * @author Ingo Mierswa, Simon Fischer
 * @version $Id: SimpleCriterion.java,v 1.7 2008/07/31 17:43:41 ingomierswa Exp $
 */
public abstract class SimpleCriterion extends MeasuredPerformance {

	/**
	 * 
	 */
	private static final long serialVersionUID = 242287213804685323L;

	private double sum = 0.0;

	private double squaresSum = 0.0;

	private double exampleCount = 0;

	private Attribute predictedAttribute;

	private Attribute labelAttribute;

	private Attribute weightAttribute;
	
	public SimpleCriterion() {
	}

	public SimpleCriterion(SimpleCriterion sc) {
		super(sc);
		this.sum = sc.sum;
		this.squaresSum = sc.squaresSum;
		this.exampleCount = sc.exampleCount;
        this.labelAttribute = (Attribute)sc.labelAttribute.clone();
        this.predictedAttribute = (Attribute)sc.predictedAttribute.clone();
        if (sc.weightAttribute != null)
        	this.weightAttribute = (Attribute)sc.weightAttribute.clone();
	}

	public double getExampleCount() {
		return exampleCount;
	}

	/**
	 * Invokes <code>countExample(double, double)</code> and counts the
	 * deviation. In case of a nominal label the confidence of the desired true
	 * label is used as prediction. For regression problems the usual predicted
	 * label is used.
	 */
	public void countExample(Example example) {
		double plabel;
		double label = example.getValue(labelAttribute);
		double weight = 1.0d;
		if (weightAttribute != null) {
			weight = example.getValue(weightAttribute);
		}
		if (!predictedAttribute.isNominal()) {
			plabel = example.getValue(predictedAttribute);
		} else {
			String labelS = example.getNominalValue(labelAttribute);
			plabel = example.getConfidence(labelS);
			label = 1.0d;
		}

		double deviation = countExample(label, plabel);
		if (!Double.isNaN(deviation)) {
			countExampleWithWeight(deviation, weight);
		} else {
			LogService.getGlobal().log("SimpleCriterion: NaN was generated!", LogService.WARNING);
		}
	}

	/** Subclasses must count the example and return the value to sum up. */
	protected abstract double countExample(double label, double predictedLabel);

	/**
	 * Simply returns the given value. Subclasses might apply a transformation
	 * on the error sum divided by the number of examples.
	 */
	protected double transform(double value) {
		return value;
	}

	protected void countExampleWithWeight(double deviation, double weight) {
		if (!Double.isNaN(deviation)) {
			sum += deviation * weight;
			squaresSum += deviation * deviation * weight * weight;
			exampleCount += weight;
		}
	}

	public double getMikroAverage() {
		return transform(sum / exampleCount);
	}

	public double getMikroVariance() {
		double mean = getMikroAverage();
		double meanSquares = transform(squaresSum) / exampleCount;
		return meanSquares - mean * mean;
	}

	public void startCounting(ExampleSet eset, boolean useExampleWeights) throws OperatorException {
		super.startCounting(eset, useExampleWeights);
		exampleCount = 0.0d;
		sum = squaresSum = 0.0d;
		this.predictedAttribute = eset.getAttributes().getPredictedLabel();
		this.labelAttribute = eset.getAttributes().getLabel();
		if (useExampleWeights)
			this.weightAttribute = eset.getAttributes().getWeight();
	}

	public double getFitness() {
		return (-1.0d) * getAverage();
	}

    /** Returns 0.0. */
    public double getMaxFitness() {
        return 0.0d;
    }
    
	public void buildSingleAverage(Averagable performance) {
		SimpleCriterion other = (SimpleCriterion) performance;
		this.sum += other.sum;
		this.squaresSum += other.squaresSum;
		this.exampleCount += other.exampleCount;
	}
}