/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.learner.functions; import java.util.List; import com.rapidminer.example.Attribute; import com.rapidminer.example.Example; import com.rapidminer.example.ExampleSet; import com.rapidminer.operator.Model; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.learner.AbstractLearner; import com.rapidminer.operator.learner.LearnerCapability; import com.rapidminer.operator.performance.EstimatedPerformance; import com.rapidminer.operator.performance.PerformanceVector; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeInt; import com.rapidminer.tools.LoggingHandler; import com.rapidminer.tools.RandomGenerator; import com.rapidminer.tools.math.optimization.ec.es.ESOptimization; import com.rapidminer.tools.math.optimization.ec.es.Individual; import com.rapidminer.tools.math.optimization.ec.es.OptimizationValueType; /** * <p>This regression learning operator fits a polynomial of all attributes to * the given data set. If the data set contains a label Y and three attributes * X1, X2, and X3 a function of the form<br /> * <br /> * <code>Y = w0 + w1 * X1 ^ d1 + w2 * X2 ^ d2 + w3 * X3 ^ d3</code><br /> * <br /> * will be fitted to the training data.</p> * * @author Ingo Mierswa * @version $Id: PolynomialRegression.java,v 1.1 2008/07/31 17:07:14 ingomierswa Exp $ */ public class PolynomialRegression extends AbstractLearner { private static final String PARAMETER_MAX_ITERATIONS = "max_iterations"; private static final String PARAMETER_LOCAL_RANDOM_SEED = "local_random_seed"; private static final String PARAMETER_MAX_DEGREE = "max_degree"; private static final String PARAMETER_REPLICATION_FACTOR = "replication_factor"; private static class RegressionOptimization extends ESOptimization { private int replicationFactor; private ExampleSet exampleSet; private Attribute label; public RegressionOptimization(ExampleSet exampleSet, int replicationFactor, int maxIterations, double maxDegree, RandomGenerator random, LoggingHandler logging) { super(getMinVector(exampleSet, replicationFactor), getMaxVector(exampleSet, replicationFactor, maxDegree), 1, exampleSet.getAttributes().size() * 2 * replicationFactor + 1, ESOptimization.INIT_TYPE_RANDOM, maxIterations, maxIterations, ESOptimization.TOURNAMENT_SELECTION, 1.0, true, ESOptimization.GAUSSIAN_MUTATION, 0.01d, 0.0d, false, random, logging); this.replicationFactor = replicationFactor; this.exampleSet = exampleSet; this.label = exampleSet.getAttributes().getLabel(); int index = 0; for (int a = 0; a < exampleSet.getAttributes().size(); a++) { for (int f = 0; f < replicationFactor; f++) { setValueType(index++, OptimizationValueType.VALUE_TYPE_DOUBLE); setValueType(index++, OptimizationValueType.VALUE_TYPE_INT); } } setValueType(exampleSet.getAttributes().size() * replicationFactor * 2, OptimizationValueType.VALUE_TYPE_DOUBLE); } private static double[] getMinVector(ExampleSet exampleSet, int replicationFactor) { double[] result = new double[exampleSet.getAttributes().size() * replicationFactor * 2 + 1]; int index = 0; for (int a = 0; a < exampleSet.getAttributes().size(); a++) { for (int f = 0; f < replicationFactor; f++) { result[index++] = -100; result[index++] = 1; } } result[result.length - 1] = -100; return result; } private static double[] getMaxVector(ExampleSet exampleSet, int replicationFactor, double maxDegree) { double[] result = new double[exampleSet.getAttributes().size() * replicationFactor * 2 + 1]; int index = 0; for (int a = 0; a < exampleSet.getAttributes().size(); a++) { for (int f = 0; f < replicationFactor; f++) { result[index++] = 100; result[index++] = maxDegree; } } result[result.length - 1] = 100; return result; } public PerformanceVector evaluateIndividual(Individual individual) throws OperatorException { double[] values = individual.getValues(); double[][] coefficients = getCoefficients(values); double[][] degrees = getDegrees(values); double offset = getOffset(values); double error = 0.0d; for (Example example : exampleSet) { double prediction = PolynomialRegressionModel.calculatePrediction(example, coefficients, degrees, offset); double diff = Math.abs(example.getValue(label) - prediction); error += diff; } PerformanceVector performanceVector = new PerformanceVector(); performanceVector.addCriterion(new EstimatedPerformance("Polynomial Regression Error", error, 1, true)); return performanceVector; } public double[][] getCoefficients(double[] values) { double[][] coefficients = new double[replicationFactor][exampleSet.getAttributes().size()]; for (int a = 0; a < exampleSet.getAttributes().size(); a++) { for (int f = 0; f < replicationFactor; f++) { coefficients[f][a] = values[a + f * 2]; } } return coefficients; } public double[][] getDegrees(double[] values) { double[][] degrees = new double[replicationFactor][exampleSet.getAttributes().size()]; for (int a = 0; a < exampleSet.getAttributes().size(); a++) { for (int f = 0; f < replicationFactor; f++) { degrees[f][a] = values[a + f * 2 + 1]; } } return degrees; } public double getOffset(double[] values) { return values[values.length - 1]; } } public PolynomialRegression(OperatorDescription description) { super(description); } public Model learn(ExampleSet exampleSet) throws OperatorException { RegressionOptimization optimization = new RegressionOptimization(exampleSet, getParameterAsInt(PARAMETER_REPLICATION_FACTOR), getParameterAsInt(PARAMETER_MAX_ITERATIONS), getParameterAsInt(PARAMETER_MAX_DEGREE), RandomGenerator.getRandomGenerator(getParameterAsInt(PARAMETER_LOCAL_RANDOM_SEED)), this); optimization.optimize(); double[] values = optimization.getBestValuesEver(); double[][] coefficients = optimization.getCoefficients(values); double[][] degrees = optimization.getDegrees(values); double offset = optimization.getOffset(values); return new PolynomialRegressionModel(exampleSet, coefficients, degrees, offset); } public boolean supportsCapability(LearnerCapability lc) { if (lc.equals(LearnerCapability.NUMERICAL_ATTRIBUTES)) return true; if (lc.equals(LearnerCapability.NUMERICAL_CLASS)) return true; if (lc == LearnerCapability.WEIGHTED_EXAMPLES) return true; return false; } public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); ParameterType type = new ParameterTypeInt(PARAMETER_MAX_ITERATIONS, "The maximum number of iterations used for model fitting.", 1, Integer.MAX_VALUE, 100); type.setExpert(false); types.add(type); type = new ParameterTypeInt(PARAMETER_MAX_DEGREE, "The maximal degree used for the final polynomial.", 1, Integer.MAX_VALUE, 5); type.setExpert(false); types.add(type); type = new ParameterTypeInt(PARAMETER_REPLICATION_FACTOR, "The amount of times each input variable is replicated, i.e. how many different degrees and coefficients can be applied to each variable", 1, Integer.MAX_VALUE, 1); type.setExpert(false); types.add(type); type = new ParameterTypeInt(PARAMETER_LOCAL_RANDOM_SEED, "Use the given random seed instead of global random numbers (-1: use global)", -1, Integer.MAX_VALUE, -1); types.add(type); return types; } }