/** * Copyright (C) 2001-2017 by RapidMiner and the contributors * * Complete list of developers available at our web site: * * http://rapidminer.com * * This program is free software: you can redistribute it and/or modify it under the terms of the * GNU Affero General Public License as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License along with this program. * If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.tools.math; import java.util.logging.Logger; import org.apache.commons.math3.linear.MatrixUtils; import org.apache.commons.math3.linear.RealMatrix; import com.rapidminer.Process; import Jama.Matrix; /** * This class can be used to calculate the coefficients of a (weighted) linear regression. It uses * the class Matrix from the Jama package for most purposes and the class RealMatrix from Apache for * matrix multiplication because it is faster. It is also possible to apply Ridge Regression which * is a sort of regularization well suited especially for ill-posed problems. Please note that for * the dependent matrix Y only one column is allowed. * * @author Ingo Mierswa */ public class LinearRegression { private static Logger logger = Logger.getLogger(Process.class.getName()); /** Performs a weighted linear ridge regression. */ public static double[] performRegression(Matrix x, Matrix y, double[] weights, double ridge) { Matrix weightedIndependent = new Matrix(x.getRowDimension(), x.getColumnDimension()); Matrix weightedDependent = new Matrix(x.getRowDimension(), 1); for (int i = 0; i < weights.length; i++) { double sqrtWeight = Math.sqrt(weights[i]); for (int j = 0; j < x.getColumnDimension(); j++) { weightedIndependent.set(i, j, x.get(i, j) * sqrtWeight); } weightedDependent.set(i, 0, y.get(i, 0) * sqrtWeight); } return performRegression(weightedIndependent, weightedDependent, ridge); } /** Calculates the coefficients of linear ridge regression. */ public static double[] performRegression(Matrix a, Matrix b, double ridge) { RealMatrix x = MatrixUtils.createRealMatrix(a.getArray()); RealMatrix y = MatrixUtils.createRealMatrix(b.getArray()); int numberOfColumns = x.getColumnDimension(); double[] coefficients = new double[numberOfColumns]; RealMatrix xTransposed = x.transpose(); Matrix result; boolean finished = false; while (!finished) { RealMatrix xTx = xTransposed.multiply(x); for (int i = 0; i < numberOfColumns; i++) { xTx.addToEntry(i, i, ridge); } RealMatrix xTy = xTransposed.multiply(y); coefficients = xTy.getColumn(0); try { // do not use Apache LUDecomposition for solve instead because it creates different // results result = new Matrix(xTx.getData()).solve(new Matrix(coefficients, coefficients.length)); for (int i = 0; i < numberOfColumns; i++) { coefficients[i] = result.get(i, 0); } finished = true; } catch (Exception ex) { double ridgeOld = ridge; if (ridge > 0) { ridge *= 10; } else { ridge = 0.0000001; } finished = false; logger.warning("Error during calculation: " + ex.getMessage() + ": Increasing ridge factor from " + ridgeOld + " to " + ridge); } } return coefficients; } }