package edu.princeton.cs.algs4.ch14;
import edu.princeton.cs.introcs.*;
/*************************************************************************
* Compilation: javac -cp .:jama.jar PolynomialRegression.java
* Execution: java -cp .:jama.jar PolynomialRegression
* Dependencies: jama.jar StdOut.java
*
* % java -cp .:jama.jar PolynomialRegression
* 0.01 N^3 + -1.64 N^2 + 168.92 N + -2113.73 (R^2 = 0.997)
*
*************************************************************************/
import Jama.Matrix;
import Jama.QRDecomposition;
/**
* The <tt>PolynomialRegression</tt> class performs a polynomial regression
* on an set of <em>N</em> data points (<em>y<sub>i</sub></em>, <em>x<sub>i</sub></em>).
* That is, it fits a polynomial
* <em>y</em> = β<sub>0</sub> + β<sub>1</sub> <em>x</em> +
* β<sub>2</sub> <em>x</em><sup>2</sup> + ... +
* β<sub><em>d</em></sub> <em>x</em><sup><em>d</em></sup>
* (where <em>y</em> is the response variable, <em>x</em> is the predictor variable,
* and the β<sub><em>i</em></sub> are the regression coefficients)
* that minimizes the sum of squared residuals of the multiple regression model.
* It also computes associated the coefficient of determination <em>R</em><sup>2</sup>.
* <p>
* This implementation performs a QR-decomposition of the underlying
* Vandermonde matrix, so it is not the fastest or most numerically
* stable way to perform the polynomial regression.
*
* @author Robert Sedgewick
* @author Kevin Wayne
*/
public class PolynomialRegression {
private final int N; // number of observations
private final int degree; // degree of the polynomial regression
private final Matrix beta; // the polynomial regression coefficients
private double SSE; // sum of squares due to error
private double SST; // total sum of squares
/**
* Performs a polynomial reggression on the data points <tt>(y[i], x[i])</tt>.
* @param x the values of the predictor variable
* @param y the corresponding values of the response variable
* @param degree the degree of the polynomial to fit
* @throws java.lang.IllegalArgumentException if the lengths of the two arrays are not equal
*/
public PolynomialRegression(double[] x, double[] y, int degree) {
this.degree = degree;
N = x.length;
// build Vandermonde matrix
double[][] vandermonde = new double[N][degree+1];
for (int i = 0; i < N; i++) {
for (int j = 0; j <= degree; j++) {
vandermonde[i][j] = Math.pow(x[i], j);
}
}
Matrix X = new Matrix(vandermonde);
// create matrix from vector
Matrix Y = new Matrix(y, N);
// find least squares solution
QRDecomposition qr = new QRDecomposition(X);
beta = qr.solve(Y);
// mean of y[] values
double sum = 0.0;
for (int i = 0; i < N; i++)
sum += y[i];
double mean = sum / N;
// total variation to be accounted for
for (int i = 0; i < N; i++) {
double dev = y[i] - mean;
SST += dev*dev;
}
// variation not accounted for
Matrix residuals = X.times(beta).minus(Y);
SSE = residuals.norm2() * residuals.norm2();
}
/**
* Returns the <tt>j</tt>th regression coefficient
* @return the <tt>j</tt>th regression coefficient
*/
public double beta(int j) {
return beta.get(j, 0);
}
/**
* Returns the degree of the polynomial to fit
* @return the degree of the polynomial to fit
*/
public int degree() {
return degree;
}
/**
* Returns the coefficient of determination <em>R</em><sup>2</sup>.
* @return the coefficient of determination <em>R</em><sup>2</sup>, which is a real number between 0 and 1
*/
public double R2() {
if (SST == 0.0) return 1.0; // constant function
return 1.0 - SSE/SST;
}
/**
* Returns the expected response <tt>y</tt> given the value of the predictor
* variable <tt>x</tt>.
* @param x the value of the predictor variable
* @return the expected response <tt>y</tt> given the value of the predictor
* variable <tt>x</tt>
*/
public double predict(double x) {
// horner's method
double y = 0.0;
for (int j = degree; j >= 0; j--)
y = beta(j) + (x * y);
return y;
}
/**
* Returns a string representation of the polynomial regression model.
* @return a string representation of the polynomial regression model,
* including the best-fit polynomial and the coefficient of determination <em>R</em><sup>2</sup>
*/
public String toString() {
String s = "";
int j = degree;
// ignoring leading zero coefficients
while (j >= 0 && Math.abs(beta(j)) < 1E-5)
j--;
// create remaining terms
for (j = j; j >= 0; j--) {
if (j == 0) s += String.format("%.2f ", beta(j));
else if (j == 1) s += String.format("%.2f N + ", beta(j));
else s += String.format("%.2f N^%d + ", beta(j), j);
}
return s + " (R^2 = " + String.format("%.3f", R2()) + ")";
}
public static void main(String[] args) {
double[] x = { 10, 20, 40, 80, 160, 200 };
double[] y = { 100, 350, 1500, 6700, 20160, 40000 };
PolynomialRegression regression = new PolynomialRegression(x, y, 3);
StdOut.println(regression);
}
}
/*************************************************************************
* Copyright 2002-2012, Robert Sedgewick and Kevin Wayne.
*
* This file is part of algs4-package.jar, which accompanies the textbook
*
* Algorithms, 4th edition by Robert Sedgewick and Kevin Wayne,
* Addison-Wesley Professional, 2011, ISBN 0-321-57351-X.
* http://algs4.cs.princeton.edu
*
*
* algs4-package.jar is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* algs4-package.jar is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
* along with algs4-package.jar. If not, see http://www.gnu.org/licenses.
*************************************************************************/