/*
* RapidMiner
*
* Copyright (C) 2001-2011 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.learner.local;
import java.util.List;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.Model;
import com.rapidminer.operator.OperatorCapability;
import com.rapidminer.operator.OperatorCreationException;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.learner.AbstractLearner;
import com.rapidminer.operator.learner.local.LocalPolynomialRegressionModel.RegressionData;
import com.rapidminer.operator.preprocessing.weighting.LocalPolynomialExampleWeightingOperator;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.conditions.BooleanParameterCondition;
import com.rapidminer.tools.OperatorService;
import com.rapidminer.tools.math.container.GeometricDataCollection;
import com.rapidminer.tools.math.container.LinearList;
import com.rapidminer.tools.math.similarity.DistanceMeasure;
import com.rapidminer.tools.math.similarity.DistanceMeasures;
import com.rapidminer.tools.math.smoothing.SmoothingKernels;
/**
* This operator provides functionality to perform a local regression. That means, that if the label value for a
* point in the data space is requested, the local neighborhood of this point is searched. For this search the distance
* measure specified in the distance measure parameter is used. After the neighborhood has been determined, its
* datapoints are used for fitting a polynomial of the specified degree using the weighted least squares optimization.
* The value of this polynom at the requested point in data space is then returned as result. During the fitting of the
* polynom, the neighborhoods data points are weighted by their distance to the requested point. Here again the distance
* function specified in the parameters is used. The weight is calculated from the distance using the kernel smoother,
* specified in the parameters. The resulting weight is then included into the least squares optimization. If the
* training example set contains a weight attribute, the distance based weight is multiplied by the example's weight. If
* the parameter use_robust_estimation is checked, a LocalPolynomialExampleWeighting is performed with the same
* parameters as the following LocalPolynomialRegression. For different settings the operator
* LocalPolynomialExampleWeighting might be used as a preprocessing step instead of checking the parameter. The effect
* is, that outlier will be downweighted so that the least squares fitting will not be affected by them anymore.
*
* Since it is a local method, the computational need for training is minimal: In fact, each example is only stored in a
* way which provides a fast neighborhood search during application time. Since all calculations are performed during
* application time, it is slower than for example SVM, LinearRegression or NaiveBayes. In fact it really much depends
* on the number of training examples and the number of attributes. If a higher degree than 1 is used, the calculations
* take much longer, because implicitly the polynomial expansion must be calculated.
*
* @author Sebastian Land
*
*/
public class LocalPolynomialRegressionOperator extends AbstractLearner {
public static final String PARAMETER_DEGREE = "degree";
public static final String PARAMETER_RIDGE = "ridge_factor";
public static final String PARAMETER_USE_EXAMPLE_WEIGHTS = "use_weights";
public static final String PARAMETER_USE_ROBUST_ESTIMATION = "use_robust_estimation";
public LocalPolynomialRegressionOperator(OperatorDescription description) {
super(description);
}
@Override
public Model learn(ExampleSet exampleSet) throws OperatorException {
DistanceMeasure measure = DistanceMeasures.createMeasure(this);
measure.init(exampleSet);
GeometricDataCollection<RegressionData> data = new LinearList<RegressionData>(measure);
// check if weights should be used
boolean useWeights = getParameterAsBoolean(PARAMETER_USE_EXAMPLE_WEIGHTS);
// check if robust estimate should be performed: Then calculate weights and use it anyway
if (getParameterAsBoolean(PARAMETER_USE_ROBUST_ESTIMATION)) {
useWeights = true;
LocalPolynomialExampleWeightingOperator weightingOperator;
try {
weightingOperator = OperatorService.createOperator(LocalPolynomialExampleWeightingOperator.class);
exampleSet = weightingOperator.doWork((ExampleSet) exampleSet.clone(), this);
} catch (OperatorCreationException e) {
throw new UserError(this, 904, "LocalPolynomialExampleWeighting", e.getMessage());
}
}
Attributes attributes = exampleSet.getAttributes();
Attribute label = attributes.getLabel();
Attribute weightAttribute = attributes.getWeight();
for (Example example : exampleSet) {
double[] values = new double[attributes.size()];
double labelValue = example.getValue(label);
double weight = 1d;
if (weightAttribute != null && useWeights)
weight = example.getValue(weightAttribute);
// filter out examples without influence
if (weight > 0d) {
// copying example values
int i = 0;
for (Attribute attribute : attributes) {
values[i] = example.getValue(attribute);
i++;
}
// inserting into geometric data collection
data.add(values, new RegressionData(values, labelValue, weight));
}
}
return new LocalPolynomialRegressionModel(exampleSet, data, Neighborhoods.createNeighborhood(this), SmoothingKernels.createKernel(this), getParameterAsInt(PARAMETER_DEGREE), getParameterAsDouble(PARAMETER_RIDGE));
}
@Override
public boolean supportsCapability(OperatorCapability capability) {
switch (capability) {
case NUMERICAL_ATTRIBUTES:
case NUMERICAL_LABEL:
return true;
default:
return false;
}
}
@Override
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
ParameterType type = new ParameterTypeInt(PARAMETER_DEGREE, "Specifies the degree of the local fitted polynomial. Please keep in mind, that a higher degree than 2 will increase calculation time extremely and probably suffer from overfitting.", 0, Integer.MAX_VALUE, 2);
type.setExpert(false);
types.add(type);
type = new ParameterTypeDouble(PARAMETER_RIDGE, "Specifies the ridge factor. This factor is used to penalize high coefficients. In order to aviod overfitting this might be increased.", 0, Double.POSITIVE_INFINITY, 0.000000001);
types.add(type);
type = new ParameterTypeBoolean(PARAMETER_USE_ROBUST_ESTIMATION, "If checked, a reweighting of the examples is performed in order to downweight outliers", false);
type.setExpert(false);
types.add(type);
type = new ParameterTypeBoolean(PARAMETER_USE_EXAMPLE_WEIGHTS, "Indicates if example weights should be used if present in the given example set.", true);
type.registerDependencyCondition(new BooleanParameterCondition(this, PARAMETER_USE_ROBUST_ESTIMATION, false, false));
types.add(type);
type = new ParameterTypeInt(LocalPolynomialExampleWeightingOperator.PARAMETER_NUMBER_OF_ITERATIONS, "The number of iterations performed for weight calculation.", 1, Integer.MAX_VALUE, 20);
type.registerDependencyCondition(new BooleanParameterCondition(this, PARAMETER_USE_ROBUST_ESTIMATION, false, true));
types.add(type);
types.addAll(DistanceMeasures.getParameterTypesForNumericals(this));
types.addAll(Neighborhoods.getParameterTypes(this));
types.addAll(SmoothingKernels.getParameterTypes(this));
return types;
}
}