/* * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fhcrc.cpl.toolbox.statistics; import Jama.Matrix; import java.util.List; public class MatrixUtil { // Takes Matrix X and Matrix Y and returns Matrix containing b values public static Matrix linearRegression(Matrix X, Matrix Y) { Matrix XTrans = X.transpose(); return XTrans.times(X).inverse().times(XTrans).times(Y); } // Java-friendly version that takes an array of arrays of x values, and an array of y values // Returns array of b values public static double[] linearRegression(double[][] xArray, double[] yArray) { int pointCount = yArray.length; int independentCount = xArray.length; double[][] x = new double[pointCount][independentCount + 1]; double[][] y = new double[pointCount][1]; // Transpose into new arrays and add column of 1s to x for(int i=0; i<pointCount; i++) { y[i][0] = yArray[i]; x[i][0] = 1; for (int j=0; j<independentCount; j++) x[i][j + 1] = xArray[j][i]; } return linearRegression(new Matrix(x), new Matrix(y)).getRowPackedCopy(); } // Cover method for Lists of Numbers. Very inefficient public static double[] linearRegression(List<? extends Number> xList, List<? extends Number> yList) { double[] xArray = new double[xList.size()]; double[] yArray = new double[yList.size()]; for (int i=0; i<xList.size(); i++) { xArray[i] = xList.get(i).doubleValue(); yArray[i] = yList.get(i).doubleValue(); } return linearRegression(xArray, yArray); } // Simple version for single independent variable: y = a + bx public static double[] linearRegression(double[] xArray, double[] yArray) { return linearRegression(new double[][]{xArray}, yArray); } // Takes Matrix X and Matrix Y and returns Matrix containing R-squared public static Matrix r2(Matrix X, Matrix Y) { Matrix XTrans = X.transpose(); Matrix YTrans = Y.transpose(); return YTrans.times(X).times( XTrans.times(X).inverse() ).times(XTrans).times(Y).times( YTrans.times(Y).inverse()); } public static double r2(double[][] xArray, double[] yArray) { int pointCount = yArray.length; int independentCount = xArray.length; double[][] x = new double[pointCount][independentCount]; double[][] y = new double[pointCount][1]; double[] xTotal = new double[independentCount]; double[] xAvg = new double[independentCount]; double yTotal = 0; // Calculate average of x columns, y for(int i=0; i<pointCount; i++) { yTotal += yArray[i]; for (int j=0; j<independentCount; j++) xTotal[j] += xArray[j][i]; } for(int i=0; i<independentCount; i++) xAvg[i] = xTotal[i] / pointCount; double yAvg = yTotal / pointCount; // Create deviation score form, transpose into new arrays for(int i=0; i<pointCount; i++) { y[i][0] = yArray[i] - yAvg; for (int j=0; j<independentCount; j++) x[i][j] = xArray[j][i] - xAvg[j]; } return r2(new Matrix(x), new Matrix(y)).get(0, 0); } // Simple version for single independent variable: y = a + bx public static double r2(double[] xArray, double[] yArray) { return r2(new double[][]{xArray}, yArray); } public static double r2(float[] xArray, float[] yArray) { return r2(toDoubleArray(xArray), toDoubleArray(yArray)); } private static double[] toDoubleArray(float[] array) { double[] dArray = new double[array.length]; for (int i=0; i<array.length; i++) dArray[i] = array[i]; return dArray; } public static double sigma(double[] x, double[] y, double[] b) { double errorTotal = 0; for (int i=0; i<y.length; i++) { errorTotal += Math.pow(y[i] - (b[0] + b[1] * x[i]), 2); } return Math.sqrt(errorTotal / (y.length - 2)); } }