/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.learner.lazy;
import java.util.Iterator;
import java.util.List;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.Statistics;
import com.rapidminer.operator.Model;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.learner.AbstractLearner;
import com.rapidminer.operator.learner.LearnerCapability;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeDouble;
/**
* This learner creates a model, that will simply predict a default value for
* all examples, i.e. the average or median of the true labels (or the mode in
* case of classification) or a fixed specified value. This learner can be used
* to compare the results of "real" learning schemes with guessing.
*
* @see com.rapidminer.operator.learner.lazy.DefaultModel
* @author Stefan Rueping, Ingo Mierswa
* @version $Id: DefaultLearner.java,v 1.6 2008/05/09 19:23:24 ingomierswa Exp $
*/
public class DefaultLearner extends AbstractLearner {
/** The parameter name for "The method to compute the default." */
public static final String PARAMETER_METHOD = "method";
/** The parameter name for "Value returned when method = constant." */
public static final String PARAMETER_CONSTANT = "constant";
private static final String[] METHODS = { "median", "average", "mode", "constant" };
public static final int MEDIAN = 0;
public static final int AVERAGE = 1;
public static final int MODE = 2;
public static final int CONSTANT = 3;
public DefaultLearner(OperatorDescription description) {
super(description);
}
public boolean supportsCapability(LearnerCapability lc) {
if (lc == com.rapidminer.operator.learner.LearnerCapability.POLYNOMINAL_ATTRIBUTES)
return true;
if (lc == com.rapidminer.operator.learner.LearnerCapability.BINOMINAL_ATTRIBUTES)
return true;
if (lc == com.rapidminer.operator.learner.LearnerCapability.NUMERICAL_ATTRIBUTES)
return true;
if (lc == com.rapidminer.operator.learner.LearnerCapability.POLYNOMINAL_CLASS)
return true;
if (lc == com.rapidminer.operator.learner.LearnerCapability.BINOMINAL_CLASS)
return true;
if (lc == com.rapidminer.operator.learner.LearnerCapability.NUMERICAL_CLASS)
return true;
return false;
}
public Model learn(ExampleSet exampleSet) throws OperatorException {
double value = 0.0;
double[] confidences = null;
int method = getParameterAsInt(PARAMETER_METHOD);
Attribute label = exampleSet.getAttributes().getLabel();
if ((label.isNominal()) && ((method == MEDIAN) || (method == AVERAGE))) {
logWarning("Cannot use method '" + METHODS[method] + "' for nominal labels: changing to 'mode'!");
method = MODE;
} else if ((!label.isNominal()) && (method == MODE)) {
logWarning("Cannot use method '" + METHODS[method] + "' for numerical labels: changing to 'average'!");
method = AVERAGE;
}
switch (method) {
case MEDIAN:
double[] labels = new double[exampleSet.size()];
Iterator<Example> r = exampleSet.iterator();
int counter = 0;
while (r.hasNext()) {
Example example = r.next();
labels[counter++] = example.getValue(example.getAttributes().getLabel());
}
java.util.Arrays.sort(labels);
value = labels[exampleSet.size() / 2];
break;
case AVERAGE:
exampleSet.recalculateAttributeStatistics(label);
value = exampleSet.getStatistics(label, Statistics.AVERAGE);
break;
case MODE:
exampleSet.recalculateAttributeStatistics(label);
value = exampleSet.getStatistics(label, Statistics.MODE);
confidences = new double[label.getMapping().size()];
for (int i = 0; i < confidences.length; i++) {
confidences[i] = exampleSet.getStatistics(label, Statistics.COUNT, label.getMapping().mapIndex(i)) / exampleSet.size();
}
break;
case CONSTANT:
value = getParameterAsDouble(PARAMETER_CONSTANT);
break;
default:
// cannot happen
throw new OperatorException("DefaultLearner: Unknown default method '" + method + "'!");
}
log("Default value is '" + (label.isNominal() ? label.getMapping().mapIndex((int) value) : value + "") + "'.");
return new DefaultModel(exampleSet, value, confidences);
}
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
ParameterType type = new ParameterTypeCategory(PARAMETER_METHOD, "The method to compute the default.", METHODS, MEDIAN);
type.setExpert(false);
types.add(type);
types.add(new ParameterTypeDouble(PARAMETER_CONSTANT, "Value returned when method = constant.", Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0.0d));
return types;
}
}