/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.learner.functions.kernel.gaussianprocess; import java.util.TreeSet; import com.rapidminer.operator.learner.functions.kernel.rvm.kernel.Kernel; import Jama.Matrix; /** * Gaussian Process Regression. * * REFERENCES: * * Lehel Csato. Gaussian Processes --- Iterative Sparse Approximations. PhD thesis, Aston University, Birmingham, UK, * March 2002. * * TODO: * - own Matrix implementation with SE-cholesky-decomposition and the ability to constrain matrix operation on sub * matrices * - CompositeKernel implementation for kernels based on finite basis functions * * @author Piotr Kasprzak * @version $Id: Regression.java,v 1.3 2008/05/09 19:23:17 ingomierswa Exp $ * */ public class Regression extends GPBase { /** Used to hold a score value with an associated index */ protected static class Score implements Comparable { double score; int index; Score(double score, int index) { this.score = score; this.index = index; } public int compareTo(Object o) throws NullPointerException { Score s2 = (Score) o; if (score < s2.getScore()) return -1; if (score == s2.getScore()) return 0; return +1; } public boolean equals(Object o) { if (o instanceof Score) return score == ((Score)o).score; else return false; } public int hashCode() { return Double.valueOf(score).hashCode(); } public double getScore() { return score; } public int getIndex() { return index; } } /** Constructor */ public Regression(RegressionProblem problem, Parameter parameter) { super(problem, parameter); } /** * Compute the (canonical) scalar product between x and y, using only the first d components of the vectors */ private double scalarProduct(double[][] x, double[][] y, int d) throws Exception { /* Lengths of x and y must be both >= d */ if (x.length < d || y.length < d) throw new Exception("At least one vector has a too small dimension!"); double result = 0; for (int i = 0; i < d; i++) { result += x[i][0] * y[i][0]; } return result; } /** * Swap the rows / columns of a symmetric n x n matrix, which is represented as a double[][]. */ private void swapRowsAndColumns(double[][] A, int i, int j) { /* Dimensions of the matrix */ int n = A[0].length; /* Temps */ double[] tr; double ts; /* Swap rows of A */ tr = A[i]; A[i] = A[j]; A[j] = tr; /* Swap columns of A */ for (int k = 0; k < n; k++) { ts = A[k][i]; A[k][i] = A[k][j]; A[k][j] = ts; } } /** The hard work is done here */ public Model learn() throws Exception { RegressionProblem problem = (RegressionProblem) this.problem; int numExamples = problem.getProblemSize(); int inputDim = problem.getInputDimension(); double[][] x = problem.getInputVectors(); double[][] y = problem.getTargetVectors(); double[] x_new; // = new double[inputDim]; double y_new; Kernel kernel = problem.getKernel(); /* Maximum number of basis vector to use (= size of various vectors + matrices) */ int dMax = parameter.maxBasisVectors + 1; /* Current number of used basis vectors */ int d = 0; /* Alpha parameterizes the mean of the posterior GP. */ Matrix alpha = new Matrix(dMax, 1); /* C parameterizes the covariance matrix of the posterior GP. */ Matrix C = new Matrix(dMax, dMax); /* s is used in the updates of alpha and C. */ Matrix s = new Matrix(dMax, 1); /* * Q is the inverse Kernel matrix. It's build up iterativly to prevent a O(d^3) performance hit. */ Matrix Q = new Matrix(dMax, dMax); /* * The vector k_t+1 = [K(x_1, x_t+1), K(x_2, x_t+1), ..., K(x_t, x_t+1)], holding the results of using the * kernel on the the new input x_t+1 and the rest of the old inputs */ Matrix k = new Matrix(dMax, 1); /* * e_t+1 are the projection coordinates of the associated basis vector of a new input when using an orthogonal * projection (see Chapter 3.1). We are using a KL-optimal projection (see Chapter 3.3) but the vector e_t+1 is * still needed for the upadte of the inverse kernel gram matrix Q. */ Matrix e = new Matrix(dMax, 1); /* The t+1-th unit vector. */ Matrix u = new Matrix(dMax, 1); /* The basis vectors */ double[][] basisVectors = new double[dMax][inputDim]; /* Temorary result: C_t * k_t+1 */ Matrix C_times_k = new Matrix(dMax, 1); /* Used temporarily */ Matrix t = new Matrix(dMax, 1); // double[] t_row = new double[inputDim]; // double t_scalar = 0; double k_star = 0; double nabla = 0; /* Some statistics */ int gamma_projections = 0; // Number of gamma induced projections int kl_projections = 0; // Number of KL-based projections (after the maximum // number of bvs is exceeded) int geometrical_projections = 0; // Number of geometrical projections (to prevent the // kernel gram matrix from becoming too badly // conditioned) /** Process every input/output example */ for (int i = 0; i < numExamples; i++) { x_new = x[i]; y_new = y[i][0]; /** * Compute k_t+1 = [K(x_1, x_t+1), K(x_2, x_t+1), ..., K(x_t, x_t+1)] and * * k_star = K(x_t+1, x_t+1) */ for (int j = 0; j < d; j++) { k.getArray()[j][0] = kernel.eval(basisVectors[j], x_new); } k_star = kernel.eval(x_new, x_new); /** * Compute the scalars q_new and r_new * * NOTE: * * We imply a gaussian likelihood function (a regression model with gaussian noise) here, which permits us * to have analytical expression for q_new / r_new. A more elaborate Implementation will use integral * approximations (e.g. monta carlo based) to allow for other likelihoods where there are none such * analytical exporessions. * */ /* * Compute m_t+1, the mean of the marginal of the GP at x_t+1, which is gaussian: * * m_t+1 = {k_t+1}^T * alpha_t * */ double m = scalarProduct(k.getArray(), alpha.getArray(), d); /* * Compute sigma_t+1^2, the covariance^2 of the marginal of the GP at x_t+1, which is gaussian: * * {sigma_t+1}^2 = K(x_t+1, x_t+1) + {k_t+1}^T * C_t * k_t+1 * * (see end of chapter 2.4 and chapter 5.1) */ C_times_k = C.times(k); double sigma_2 = k_star + scalarProduct(k.getArray(), C_times_k.getArray(), d); /* Compute q_t+1 = (y_t+1 - m_t+1) / ({sigma_0}^2 + {sigma_t+1}^2) */ double q = (y_new - m) / (problem.sigma_0_2 + sigma_2); /* Compute r_t+1 = -1.0 / ({sigma_0}^2 + {sigma_t+1}^2) */ double r = -1.0 / (problem.sigma_0_2 + sigma_2); /** Compute e_t+1 = Q_t * k_t+1 */ e = Q.times(k); /** * Compute the (length of the residual)^2 [if using an orthogonal projection; we need it for the iterative * update of the inverse kernel gram matrix Q]: * * gamma_t+1 = k_star - {k_t+1}^T * Q_t * k_t+1 (chapter 3.1) * */ double gamma = k_star - scalarProduct(k.getArray(), e.getArray(), d); /* DEBUG: Check if Q is REALLY the inverse kernel gram matrix */ Matrix Gram = new Matrix(d, d); for (int ii = 0; ii < d; ii++) { for (int jj = 0; jj < d; jj++) { Gram.getArray()[ii][jj] = kernel.eval(basisVectors[ii], basisVectors[jj]); } } // IM: WARNING: the variable was never used !!! // Matrix inverseGram = Q.getMatrix(0, d - 1, 0, d - 1); // IM: WARNING: the following Matrix was never used !!! // Matrix diffGram = Gram.times(inverseGram); /** * Check if the length of the residual is too small and if so, use the projection equations / sparse update * (3.29) and proceed to the next input. This should help to maintain a good numerical condition of Q. * * nabla_t+1 = (1 + gamma_t+1 * r_t+1)^{-1} * * s_t+1 = C_t * k_t+1 + e_t+1 * * alpha_t+1 = alpha_t + q_t+1 * nabla_t+1 * s_t+1 * * C_t+1 = C_t + r_t+1 * nabla_t+1 * s_t+1 * {s_t+1}^T */ if (gamma < parameter.epsilon_tol) { nabla = 1.0 / (1 + gamma * r); s = C_times_k.plus(e); alpha = alpha.plus(s.times(q * nabla)); C = C.plus((s.times(s.transpose())).times(r * nabla)); gamma_projections++; } else { /** * Add the basis vector of the current input to the BV set and update the the appropriate variables * using (2.46), increasing the dimensions of all vectors and matrices (u_t+1 is the t+1-th unit vector) * and also updating Q. * * s_t+1 = C_t * k_t+1 + u_t+1 * * alpha_t+1 = alpha_t + q_t+1 * s_t+1 * * C_t+1 = C_t + r_t+1 * s_t+1 * {s_t+1}^T */ /* Build the t+1-th unit vector */ for (int j = 0; j < dMax; j++) { u.getArray()[j][0] = 0; } u.getArray()[d][0] = 1.0; /* Apply the standard online update as described above */ s = C_times_k.plus(u); alpha = alpha.plus(s.times(q)); C = C.plus((s.times(s.transpose())).times(r)); /* * We have enlarged our BV-set, so we need to update the inverse kernel Gram matrix. To prevent a O(d^3) * performance hit, we do it iteratively (Appendix C), which costs us just O(d^2). * * Q_t+1 = Q_t + {gamma_t+1}^{-1} * (e_t+1 - u_t+1) * (e_t+1 - u_t+1)^T */ t = e.minus(u); Q = Q.plus((t.times(t.transpose())).times(1.0 / gamma)); /* Remember the added basis vector */ basisVectors[d] = x_new; d++; Gram = new Matrix(d, d); for (int ii = 0; ii < d; ii++) { for (int jj = 0; jj < d; jj++) { Gram.getArray()[ii][jj] = kernel.eval(basisVectors[ii], basisVectors[jj]); } } // Q.setMatrix(0, d - 1, 0, d - 1, Gram.inverse()); /* Invert Q via cholesky decomposition */ Matrix L = Gram.chol().getL(); Matrix invL = L.inverse(); Q.setMatrix(0, d - 1, 0, d - 1, invL.transpose().times(invL)); } /** * Check if we have too many basis vectors. If so: * * 1.) For each basis vector i calculate a score epsilon_i (3.26) [approximation of (3.24)] * * epsilon_t+1(i) = alpha^2(i) / (q(i) + c(i)) * * 2.) Find the basis vector with the minimum score 3.) Permutate the GP parameterisation, so that this * basis vector is the last added 4.) Remove the last basis vector using eqs. (3.19), (3.21) and (3.22) */ if (d >= dMax) { int min_index = ((Score) getMinScoresKLApprox(alpha, C, Q, d).first()).getIndex(); /* * Permute the relevant vector / matrices, so that the found basis vector is the last. */ deleteBV(alpha, C, Q, basisVectors, d - 1, min_index); d--; kl_projections++; } /** * Check the geometrical score. All BVs with a score below a certain threshold are removed. */ while (d > 0) { Score minScore = (Score) getMinScoresGeometrical(alpha, C, Q, d).first(); if (minScore.getScore() > parameter.geometrical_tol) break; deleteBV(alpha, C, Q, basisVectors, d - 1, minScore.getIndex()); d--; geometrical_projections++; } } return new Model(kernel, basisVectors, alpha.getMatrix(0, d - 1, 0, 0), C.getMatrix(0, d - 1, 0, d - 1), Q.getMatrix(0, d - 1, 0, d - 1), d, true); } /** * Return the scores of all BVs. * * The score used is a mean based approximation to the true KL-distance between the full GP and the GP without the * current BV for which the score is calculated. */ private TreeSet getMinScoresKLApprox(Matrix alpha, Matrix C, Matrix Q, int d) { /* The scores used to decide which basis vector to remove */ TreeSet<Score> scores = new TreeSet<Score>(); /* Compute the basis vector with minimal score */ for (int j = 0; j < d; j++) { double score = alpha.getArray()[j][0] * alpha.getArray()[j][0] / (Q.getArray()[j][j] + C.getArray()[j][j]); scores.add(new Score(score, j)); } return scores; } /** * Return the scores of all BVs. * * The score used is based on the euclidean distance between the current BV and it's orthogonal projection into the * span of all the other BVs. * * This score is especially important since it can (must!) be used to prevent the kernel gram matrix from becoming * too badly conditioned. */ private TreeSet getMinScoresGeometrical(Matrix alpha, Matrix C, Matrix Q, int d) { /* The scores used to decide which basis vector to remove */ TreeSet<Score> scores = new TreeSet<Score>(); /* Compute the basis vector with minimal score */ for (int j = 0; j < d; j++) { double score = 1.0 / Q.getArray()[j][j]; scores.add(new Score(score, j)); } return scores; } /** * Delete the given BV from the BV set by adjusting the parametrisation of the GP using eqs. (3.19), (3.21) and * (3.22): * * alpha_t+1 = alpha^{(r)} - alpha^star / (c^star + q^star) * (Q^star + C^star) * * C_t+1 = C^{(r)} + Q^star * Q^star^T / q^star - (Q^star + C^star) * (Q^star + C^star)^T / (q^star + c^star) * * Q_t+1 = Q^{(r)} - Q^star * Q^star^T / q^star * */ private void deleteBV(Matrix alpha, Matrix C, Matrix Q, double[][] basisVectors, int d, int index) { int inputDim = basisVectors[0].length; // Dimension of input vectors int dMax = basisVectors.length; // Maximium number of BVs double[] t_row = new double[inputDim]; double t_scalar = 0; /* Permutate alpha */ t_scalar = alpha.getArray()[index][0]; alpha.getArray()[index][0] = alpha.getArray()[d][0]; alpha.getArray()[d][0] = t_scalar; /* Permutate C */ swapRowsAndColumns(C.getArray(), index, d); /* Permutate Q */ swapRowsAndColumns(Q.getArray(), index, d); /* Permutate basis vector ordering */ t_row = basisVectors[index]; basisVectors[index] = basisVectors[d]; basisVectors[d] = t_row; /* Apply the pruning eqs. */ double alpha_star = alpha.getArray()[d][0]; double c_star = C.getArray()[d][d]; double q_star = Q.getArray()[d][d]; double[][] C_star = new double[dMax][1]; Matrix vector_C_star = new Matrix(C_star); double[][] Q_star = new double[dMax][1]; Matrix vector_Q_star = new Matrix(Q_star); for (int j = 0; j < d; j++) { C_star[j][0] = C.getArray()[j][d]; Q_star[j][0] = Q.getArray()[j][d]; } alpha.minusEquals((vector_Q_star.plus(vector_C_star)).times(alpha_star / (c_star + q_star))); C.plusEquals((vector_Q_star.times(vector_Q_star.transpose())).times(1.0 / q_star)); C.minusEquals(((vector_Q_star.plus(vector_C_star)).times((vector_Q_star.plus(vector_C_star)).transpose())).times(1.0 / (q_star + c_star))); Q.minusEquals((vector_Q_star.times(vector_Q_star.transpose())).times(1.0 / q_star)); /* Prune projected rows / columns */ alpha.getArray()[d][0] = 0; for (int j = 0; j <= d; j++) { Q.getArray()[d][j] = 0; // Clear d-th row C.getArray()[d][j] = 0; Q.getArray()[j][d] = 0; // Clear d-th column C.getArray()[j][d] = 0; } } /** Identify the GP */ public String toString() { return "Gauss-Regression-GP"; } }