/** * Copyright (C) 2001-2017 by RapidMiner and the contributors * * Complete list of developers available at our web site: * * http://rapidminer.com * * This program is free software: you can redistribute it and/or modify it under the terms of the * GNU Affero General Public License as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License along with this program. * If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.learner.functions; import com.rapidminer.example.Attribute; import com.rapidminer.example.Example; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.set.ExampleSetUtilities; import com.rapidminer.example.set.ExampleSetUtilities.SetsCompareOption; import com.rapidminer.example.set.ExampleSetUtilities.TypesCompareOption; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.OperatorProgress; import com.rapidminer.operator.error.AttributeNotFoundError; import com.rapidminer.operator.learner.PredictionModel; import com.rapidminer.tools.Tools; /** * The model for linear regression. * * @author Ingo Mierswa, Marius Helf */ public class LinearRegressionModel extends PredictionModel { private static final long serialVersionUID = 8381268071090932037L; private static final int OPERATOR_PROGRESS_STEPS = 5000; private String[] attributeNames; private boolean[] selectedAttributes; private double[] coefficients; private double[] standardErrors; private double[] tolerances; private double[] standardizedCoefficients; private double[] tStatistics; private double[] pValues; private boolean useIntercept = true; private String firstClassName = null; private String secondClassName = null; public LinearRegressionModel(ExampleSet exampleSet, boolean[] selectedAttributes, double[] coefficients, double[] standardErrors, double[] standardizedCoefficients, double[] tolerances, double[] tStatistics, double[] pValues, boolean useIntercept, String firstClassName, String secondClassName) { super(exampleSet, null, null); this.attributeNames = com.rapidminer.example.Tools.getRegularAttributeNames(exampleSet); this.selectedAttributes = selectedAttributes; this.coefficients = coefficients; this.standardErrors = standardErrors; this.standardizedCoefficients = standardizedCoefficients; this.tolerances = tolerances; this.tStatistics = tStatistics; this.pValues = pValues; this.useIntercept = useIntercept; this.firstClassName = firstClassName; this.secondClassName = secondClassName; } @Override public ExampleSet performPrediction(ExampleSet exampleSet, Attribute predictedLabel) throws OperatorException { ExampleSetUtilities.checkAttributesMatching(null, this.getTrainingHeader().getAttributes(), exampleSet.getAttributes(), SetsCompareOption.EQUAL, TypesCompareOption.EQUAL); Attribute[] attributes = new Attribute[attributeNames.length]; for (int i = 0; i < attributeNames.length; i++) { attributes[i] = exampleSet.getAttributes().get(attributeNames[i]); if (attributes[i] == null && selectedAttributes[i]) { throw new AttributeNotFoundError(null, null, attributeNames[i]); } } // initialize progress OperatorProgress progress = null; if (getShowProgress() && getOperator() != null && getOperator().getProgress() != null) { progress = getOperator().getProgress(); progress.setTotal(exampleSet.size()); } int progressCounter = 0; for (Example example : exampleSet) { double prediction = 0; int index = 0; int attributeCounter = 0; for (Attribute attribute : attributes) { if (selectedAttributes[attributeCounter]) { prediction += coefficients[index] * example.getValue(attribute); index++; } attributeCounter++; } if (useIntercept) { prediction += coefficients[index]; } if (predictedLabel.isNominal()) { int predictionIndex = prediction > 0.5 ? predictedLabel.getMapping().getIndex(secondClassName) : predictedLabel.getMapping().getIndex(firstClassName); example.setValue(predictedLabel, predictionIndex); // set confidence to numerical prediction, such that can be scaled later. // The line below calculates the logistic function of the prediction. The logistic // function // is symmetric to the point (0.0, 0.5), but we use 0.5 as a prediction threshold, // not 0.0. // For that reason we have to shift the function to the right by 0.5 by subtracting // that value // from the function argument. double logFunction = 1.0d / (1.0d + java.lang.Math.exp(-(prediction - 0.5))); example.setConfidence(secondClassName, logFunction); example.setConfidence(firstClassName, 1 - logFunction); } else { example.setValue(predictedLabel, prediction); } if (progress != null && ++progressCounter % OPERATOR_PROGRESS_STEPS == 0) { progress.setCompleted(progressCounter); } } return exampleSet; } @Override public String toString() { StringBuffer result = new StringBuffer(); boolean first = true; int index = 0; for (int i = 0; i < selectedAttributes.length; i++) { if (selectedAttributes[i]) { result.append(getCoefficientString(coefficients[index], first) + " * " + attributeNames[i] + Tools.getLineSeparator()); index++; first = false; } } if (useIntercept) { result.append(getCoefficientString(coefficients[coefficients.length - 1], first)); } return result.toString(); } private String getCoefficientString(double coefficient, boolean first) { if (!first) { if (coefficient >= 0) { return "+ " + Tools.formatNumber(Math.abs(coefficient)); } else { return "- " + Tools.formatNumber(Math.abs(coefficient)); } } else { if (coefficient >= 0) { return " " + Tools.formatNumber(Math.abs(coefficient)); } else { return "- " + Tools.formatNumber(Math.abs(coefficient)); } } } /** * returns an array containing all names of all attributes used for training */ public String[] getAttributeNames() { return attributeNames; } /** * returns an array containing only the names of those attributes that have been selected to be * included into the model */ public String[] getSelectedAttributeNames() { String[] attributeNames = new String[useIntercept ? coefficients.length - 1 : coefficients.length]; int index = 0; for (int i = 0; i < selectedAttributes.length; i++) { if (selectedAttributes[i]) { attributeNames[index] = this.attributeNames[i]; index++; } } return attributeNames; } public boolean[] getSelectedAttributes() { return selectedAttributes; } public String getFirstLabel() { return firstClassName; } public String getSecondLabel() { return secondClassName; } public boolean usesIntercept() { return useIntercept; } /** * This method will return all used coefficients. So the array will be smaller than the array of * used attribute names! The ordering is the same but will only contain used attributes with the * bias appended. */ public double[] getCoefficients() { return coefficients; } public double[] getStandardizedCoefficients() { return standardizedCoefficients; } public double[] getTolerances() { return tolerances; } public double[] getStandardErrors() { return standardErrors; } public double[] getTStats() { return tStatistics; } public double[] getProbabilities() { return pValues; } }