/* * RapidMiner * * Copyright (C) 2001-2011 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.learner.functions.linear; import java.util.Collections; import java.util.List; import com.rapidminer.example.ExampleSet; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.UndefinedParameterError; /** * This class implements the M5Prime feature selection method for Linear Regression. * * @author Sebastian Land */ public class M5PLinearRegressionMethod implements LinearRegressionMethod { @Override public LinearRegressionResult applyMethod(LinearRegression regression, boolean useBias, double ridge, ExampleSet exampleSet, boolean[] isUsedAttribute, int numberOfExamples, int numberOfUsedAttributes, double[] means, double labelMean, double[] standardDeviations, double labelStandardDeviation, double[] coefficientsOnFullData, double errorOnFullData) throws UndefinedParameterError { LinearRegressionResult result = new LinearRegressionResult(); result.isUsedAttribute = isUsedAttribute; result.coefficients = coefficientsOnFullData; result.error = errorOnFullData; // attribute removal as in M5 prime boolean improved = true; int currentNumberOfAttributes = numberOfUsedAttributes; double akaike = (numberOfExamples - numberOfUsedAttributes) + 2 * numberOfUsedAttributes; while (improved) { improved = false; currentNumberOfAttributes--; // find the attribute with the smallest standardized coefficient double minStadardizedCoefficient = 0; int attribute2Deselect = -1; int coefficientIndex = 0; for (int i = 0; i < isUsedAttribute.length; i++) { if (isUsedAttribute[i]) { double standardizedCoefficient = Math.abs(coefficientsOnFullData[coefficientIndex] * standardDeviations[i] / labelStandardDeviation); if ((coefficientIndex == 0) || (standardizedCoefficient < minStadardizedCoefficient)) { minStadardizedCoefficient = standardizedCoefficient; attribute2Deselect = i; } coefficientIndex++; } } // check if removing this attribute improves Akaike if (attribute2Deselect >= 0) { isUsedAttribute[attribute2Deselect] = false; double[] currentCoefficients = regression.performRegression(exampleSet, isUsedAttribute, means, labelMean, ridge); double currentError = regression.getSquaredError(exampleSet, isUsedAttribute, currentCoefficients, useBias); double currentAkaike = currentError / errorOnFullData * (numberOfExamples - numberOfUsedAttributes) + 2 * currentNumberOfAttributes; if (currentAkaike < akaike) { improved = true; akaike = currentAkaike; result.coefficients = currentCoefficients; result.error = currentError; } else { isUsedAttribute[attribute2Deselect] = true; } } } return result; } @Override public List<ParameterType> getParameterTypes() { return Collections.emptyList(); } }