Regression.java example

Explorer
ComplexRapidMiner-master
- operator
- src
/*
 *  RapidMiner
 *
 *  Copyright (C) 2001-2008 by Rapid-I and the contributors
 *
 *  Complete list of developers available at our web site:
 *
 *       http://rapid-i.com
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Affero General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Affero General Public License for more details.
 *
 *  You should have received a copy of the GNU Affero General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses/.
 */
package com.rapidminer.operator.learner.functions.kernel.gaussianprocess;

import java.util.TreeSet;

import com.rapidminer.operator.learner.functions.kernel.rvm.kernel.Kernel;

import Jama.Matrix;

/**
 * Gaussian Process Regression.
 * 
 * REFERENCES:
 * 
 * Lehel Csato. Gaussian Processes --- Iterative Sparse Approximations. PhD thesis, Aston University, Birmingham, UK,
 * March 2002.
 * 
 * TODO:
 *  - own Matrix implementation with SE-cholesky-decomposition and the ability to constrain matrix operation on sub
 * matrices
 *  - CompositeKernel implementation for kernels based on finite basis functions
 * 
 * @author Piotr Kasprzak
 * @version $Id: Regression.java,v 1.3 2008/05/09 19:23:17 ingomierswa Exp $
 * 
 */
public class Regression extends GPBase {

	/** Used to hold a score value with an associated index */

	protected static class Score implements Comparable {

		double score;

		int index;

		Score(double score, int index) {
			this.score = score;
			this.index = index;
		}

		public int compareTo(Object o) throws NullPointerException {
			Score s2 = (Score) o;

			if (score < s2.getScore())
				return -1;

			if (score == s2.getScore())
				return 0;

			return +1;
		}

		public boolean equals(Object o) {
			if (o instanceof Score)
				return score == ((Score)o).score;
			else
				return false;
		}
		
		public int hashCode() {
			return Double.valueOf(score).hashCode();
		}
		
		public double getScore() {
			return score;
		}

		public int getIndex() {
			return index;
		}
	}

	/** Constructor */

	public Regression(RegressionProblem problem, Parameter parameter) {
		super(problem, parameter);
	}

	/**
	 * Compute the (canonical) scalar product between x and y, using only the first d components of the vectors
	 */

	private double scalarProduct(double[][] x, double[][] y, int d) throws Exception {
		/* Lengths of x and y must be both >= d */
		if (x.length < d || y.length < d)
			throw new Exception("At least one vector has a too small dimension!");

		double result = 0;

		for (int i = 0; i < d; i++) {
			result += x[i][0] * y[i][0];
		}

		return result;
	}

	/**
	 * Swap the rows / columns of a symmetric n x n matrix, which is represented as a double[][].
	 */

	private void swapRowsAndColumns(double[][] A, int i, int j) {

		/* Dimensions of the matrix */
		int n = A[0].length;

		/* Temps */
		double[] tr;
		double ts;

		/* Swap rows of A */
		tr = A[i];
		A[i] = A[j];
		A[j] = tr;

		/* Swap columns of A */
		for (int k = 0; k < n; k++) {
			ts = A[k][i];
			A[k][i] = A[k][j];
			A[k][j] = ts;
		}
	}

	/** The hard work is done here */

	public Model learn() throws Exception {

		RegressionProblem problem = (RegressionProblem) this.problem;

		int numExamples = problem.getProblemSize();
		int inputDim = problem.getInputDimension();

		double[][] x = problem.getInputVectors();
		double[][] y = problem.getTargetVectors();

		double[] x_new; // = new double[inputDim];
		double y_new;

		Kernel kernel = problem.getKernel();

		/* Maximum number of basis vector to use (= size of various vectors + matrices) */

		int dMax = parameter.maxBasisVectors + 1;

		/* Current number of used basis vectors */

		int d = 0;

		/* Alpha parameterizes the mean of the posterior GP. */

		Matrix alpha = new Matrix(dMax, 1);

		/* C parameterizes the covariance matrix of the posterior GP. */

		Matrix C = new Matrix(dMax, dMax);

		/* s is used in the updates of alpha and C. */

		Matrix s = new Matrix(dMax, 1);

		/*
		 * Q is the inverse Kernel matrix. It's build up iterativly to prevent a O(d^3) performance hit.
		 */

		Matrix Q = new Matrix(dMax, dMax);

		/*
		 * The vector k_t+1 = [K(x_1, x_t+1), K(x_2, x_t+1), ..., K(x_t, x_t+1)], holding the results of using the
		 * kernel on the the new input x_t+1 and the rest of the old inputs
		 */

		Matrix k = new Matrix(dMax, 1);

		/*
		 * e_t+1 are the projection coordinates of the associated basis vector of a new input when using an orthogonal
		 * projection (see Chapter 3.1). We are using a KL-optimal projection (see Chapter 3.3) but the vector e_t+1 is
		 * still needed for the upadte of the inverse kernel gram matrix Q.
		 */

		Matrix e = new Matrix(dMax, 1);

		/* The t+1-th unit vector. */

		Matrix u = new Matrix(dMax, 1);

		/* The basis vectors */

		double[][] basisVectors = new double[dMax][inputDim];

		/* Temorary result: C_t * k_t+1 */

		Matrix C_times_k = new Matrix(dMax, 1);

		/* Used temporarily */

		Matrix t = new Matrix(dMax, 1);
		// double[] t_row = new double[inputDim];
		// double t_scalar = 0;

		double k_star = 0;
		double nabla = 0;

		/* Some statistics */

		int gamma_projections = 0; // Number of gamma induced projections
		int kl_projections = 0; // Number of KL-based projections (after the maximum
		// number of bvs is exceeded)
		int geometrical_projections = 0; // Number of geometrical projections (to prevent the
		// kernel gram matrix from becoming too badly
		// conditioned)

		/** Process every input/output example */

		for (int i = 0; i < numExamples; i++) {
			x_new = x[i];
			y_new = y[i][0];

			/**
			 * Compute k_t+1 = [K(x_1, x_t+1), K(x_2, x_t+1), ..., K(x_t, x_t+1)] and
			 * 
			 * k_star = K(x_t+1, x_t+1)
			 */

			for (int j = 0; j < d; j++) {
				k.getArray()[j][0] = kernel.eval(basisVectors[j], x_new);
			}

			k_star = kernel.eval(x_new, x_new);

			/**
			 * Compute the scalars q_new and r_new
			 * 
			 * NOTE:
			 * 
			 * We imply a gaussian likelihood function (a regression model with gaussian noise) here, which permits us
			 * to have analytical expression for q_new / r_new. A more elaborate Implementation will use integral
			 * approximations (e.g. monta carlo based) to allow for other likelihoods where there are none such
			 * analytical exporessions.
			 * 
			 */

			/*
			 * Compute m_t+1, the mean of the marginal of the GP at x_t+1, which is gaussian:
			 * 
			 * m_t+1 = {k_t+1}^T * alpha_t
			 * 
			 */

			double m = scalarProduct(k.getArray(), alpha.getArray(), d);

			/*
			 * Compute sigma_t+1^2, the covariance^2 of the marginal of the GP at x_t+1, which is gaussian:
			 * 
			 * {sigma_t+1}^2 = K(x_t+1, x_t+1) + {k_t+1}^T * C_t * k_t+1
			 * 
			 * (see end of chapter 2.4 and chapter 5.1)
			 */

			C_times_k = C.times(k);

			double sigma_2 = k_star + scalarProduct(k.getArray(), C_times_k.getArray(), d);

			/* Compute q_t+1 = (y_t+1 - m_t+1) / ({sigma_0}^2 + {sigma_t+1}^2) */
			double q = (y_new - m) / (problem.sigma_0_2 + sigma_2);

			/* Compute r_t+1 = -1.0 / ({sigma_0}^2 + {sigma_t+1}^2) */
			double r = -1.0 / (problem.sigma_0_2 + sigma_2);

			/** Compute e_t+1 = Q_t * k_t+1 */

			e = Q.times(k);

			/**
			 * Compute the (length of the residual)^2 [if using an orthogonal projection; we need it for the iterative
			 * update of the inverse kernel gram matrix Q]:
			 * 
			 * gamma_t+1 = k_star - {k_t+1}^T * Q_t * k_t+1 (chapter 3.1)
			 * 
			 */

			double gamma = k_star - scalarProduct(k.getArray(), e.getArray(), d);

			/* DEBUG: Check if Q is REALLY the inverse kernel gram matrix */

			Matrix Gram = new Matrix(d, d);
			for (int ii = 0; ii < d; ii++) {
				for (int jj = 0; jj < d; jj++) {
					Gram.getArray()[ii][jj] = kernel.eval(basisVectors[ii], basisVectors[jj]);
				}
			}

			// IM: WARNING: the variable was never used !!!
			// Matrix inverseGram = Q.getMatrix(0, d - 1, 0, d - 1);

			// IM: WARNING: the following Matrix was never used !!!
			// Matrix diffGram = Gram.times(inverseGram);

			/**
			 * Check if the length of the residual is too small and if so, use the projection equations / sparse update
			 * (3.29) and proceed to the next input. This should help to maintain a good numerical condition of Q.
			 * 
			 * nabla_t+1 = (1 + gamma_t+1 * r_t+1)^{-1}
			 * 
			 * s_t+1 = C_t * k_t+1 + e_t+1
			 * 
			 * alpha_t+1 = alpha_t + q_t+1 * nabla_t+1 * s_t+1
			 * 
			 * C_t+1 = C_t + r_t+1 * nabla_t+1 * s_t+1 * {s_t+1}^T
			 */

			if (gamma < parameter.epsilon_tol) {
				nabla = 1.0 / (1 + gamma * r);
				s = C_times_k.plus(e);
				alpha = alpha.plus(s.times(q * nabla));
				C = C.plus((s.times(s.transpose())).times(r * nabla));
				gamma_projections++;
			} else {
				/**
				 * Add the basis vector of the current input to the BV set and update the the appropriate variables
				 * using (2.46), increasing the dimensions of all vectors and matrices (u_t+1 is the t+1-th unit vector)
				 * and also updating Q.
				 * 
				 * s_t+1 = C_t * k_t+1 + u_t+1
				 * 
				 * alpha_t+1 = alpha_t + q_t+1 * s_t+1
				 * 
				 * C_t+1 = C_t + r_t+1 * s_t+1 * {s_t+1}^T
				 */

				/* Build the t+1-th unit vector */

				for (int j = 0; j < dMax; j++) {
					u.getArray()[j][0] = 0;
				}

				u.getArray()[d][0] = 1.0;

				/* Apply the standard online update as described above */

				s = C_times_k.plus(u);
				alpha = alpha.plus(s.times(q));
				C = C.plus((s.times(s.transpose())).times(r));

				/*
				 * We have enlarged our BV-set, so we need to update the inverse kernel Gram matrix. To prevent a O(d^3)
				 * performance hit, we do it iteratively (Appendix C), which costs us just O(d^2).
				 * 
				 * Q_t+1 = Q_t + {gamma_t+1}^{-1} * (e_t+1 - u_t+1) * (e_t+1 - u_t+1)^T
				 */

				t = e.minus(u);

				Q = Q.plus((t.times(t.transpose())).times(1.0 / gamma));

				/* Remember the added basis vector */

				basisVectors[d] = x_new;

				d++;

				Gram = new Matrix(d, d);
				for (int ii = 0; ii < d; ii++) {
					for (int jj = 0; jj < d; jj++) {
						Gram.getArray()[ii][jj] = kernel.eval(basisVectors[ii], basisVectors[jj]);
					}
				}

				// Q.setMatrix(0, d - 1, 0, d - 1, Gram.inverse());

				/* Invert Q via cholesky decomposition */

				Matrix L = Gram.chol().getL();
				Matrix invL = L.inverse();

				Q.setMatrix(0, d - 1, 0, d - 1, invL.transpose().times(invL));
			}

			/**
			 * Check if we have too many basis vectors. If so:
			 * 
			 * 1.) For each basis vector i calculate a score epsilon_i (3.26) [approximation of (3.24)]
			 * 
			 * epsilon_t+1(i) = alpha^2(i) / (q(i) + c(i))
			 * 
			 * 2.) Find the basis vector with the minimum score 3.) Permutate the GP parameterisation, so that this
			 * basis vector is the last added 4.) Remove the last basis vector using eqs. (3.19), (3.21) and (3.22)
			 */

			if (d >= dMax) {
				int min_index = ((Score) getMinScoresKLApprox(alpha, C, Q, d).first()).getIndex();
				/*
				 * Permute the relevant vector / matrices, so that the found basis vector is the last.
				 */
				deleteBV(alpha, C, Q, basisVectors, d - 1, min_index);
				d--;
				kl_projections++;
			}

			/**
			 * Check the geometrical score. All BVs with a score below a certain threshold are removed.
			 */
			while (d > 0) {
				Score minScore = (Score) getMinScoresGeometrical(alpha, C, Q, d).first();
				if (minScore.getScore() > parameter.geometrical_tol)
					break;
				deleteBV(alpha, C, Q, basisVectors, d - 1, minScore.getIndex());
				d--;
				geometrical_projections++;
			}
		}
		return new Model(kernel, basisVectors, alpha.getMatrix(0, d - 1, 0, 0), C.getMatrix(0, d - 1, 0, d - 1), Q.getMatrix(0, d - 1, 0, d - 1), d,
				true);
	}

	/**
	 * Return the scores of all BVs.
	 * 
	 * The score used is a mean based approximation to the true KL-distance between the full GP and the GP without the
	 * current BV for which the score is calculated.
	 */

	private TreeSet getMinScoresKLApprox(Matrix alpha, Matrix C, Matrix Q, int d) {
		/* The scores used to decide which basis vector to remove */
		TreeSet<Score> scores = new TreeSet<Score>();

		/* Compute the basis vector with minimal score */
		for (int j = 0; j < d; j++) {
			double score = alpha.getArray()[j][0] * alpha.getArray()[j][0] / (Q.getArray()[j][j] + C.getArray()[j][j]);
			scores.add(new Score(score, j));
		}

		return scores;
	}

	/**
	 * Return the scores of all BVs.
	 * 
	 * The score used is based on the euclidean distance between the current BV and it's orthogonal projection into the
	 * span of all the other BVs.
	 * 
	 * This score is especially important since it can (must!) be used to prevent the kernel gram matrix from becoming
	 * too badly conditioned.
	 */

	private TreeSet getMinScoresGeometrical(Matrix alpha, Matrix C, Matrix Q, int d) {
		/* The scores used to decide which basis vector to remove */
		TreeSet<Score> scores = new TreeSet<Score>();

		/* Compute the basis vector with minimal score */
		for (int j = 0; j < d; j++) {
			double score = 1.0 / Q.getArray()[j][j];
			scores.add(new Score(score, j));
		}

		return scores;
	}

	/**
	 * Delete the given BV from the BV set by adjusting the parametrisation of the GP using eqs. (3.19), (3.21) and
	 * (3.22):
	 * 
	 * alpha_t+1 = alpha^{(r)} - alpha^star / (c^star + q^star) * (Q^star + C^star)
	 * 
	 * C_t+1 = C^{(r)} + Q^star * Q^star^T / q^star - (Q^star + C^star) * (Q^star + C^star)^T / (q^star + c^star)
	 * 
	 * Q_t+1 = Q^{(r)} - Q^star * Q^star^T / q^star
	 * 
	 */

	private void deleteBV(Matrix alpha, Matrix C, Matrix Q, double[][] basisVectors, int d, int index) {

		int inputDim = basisVectors[0].length; // Dimension of input vectors
		int dMax = basisVectors.length; // Maximium number of BVs

		double[] t_row = new double[inputDim];
		double t_scalar = 0;

		/* Permutate alpha */
		t_scalar = alpha.getArray()[index][0];
		alpha.getArray()[index][0] = alpha.getArray()[d][0];
		alpha.getArray()[d][0] = t_scalar;

		/* Permutate C */
		swapRowsAndColumns(C.getArray(), index, d);

		/* Permutate Q */
		swapRowsAndColumns(Q.getArray(), index, d);

		/* Permutate basis vector ordering */
		t_row = basisVectors[index];
		basisVectors[index] = basisVectors[d];
		basisVectors[d] = t_row;

		/* Apply the pruning eqs. */

		double alpha_star = alpha.getArray()[d][0];
		double c_star = C.getArray()[d][d];
		double q_star = Q.getArray()[d][d];

		double[][] C_star = new double[dMax][1];
		Matrix vector_C_star = new Matrix(C_star);

		double[][] Q_star = new double[dMax][1];
		Matrix vector_Q_star = new Matrix(Q_star);

		for (int j = 0; j < d; j++) {
			C_star[j][0] = C.getArray()[j][d];
			Q_star[j][0] = Q.getArray()[j][d];
		}

		alpha.minusEquals((vector_Q_star.plus(vector_C_star)).times(alpha_star / (c_star + q_star)));

		C.plusEquals((vector_Q_star.times(vector_Q_star.transpose())).times(1.0 / q_star));
		C.minusEquals(((vector_Q_star.plus(vector_C_star)).times((vector_Q_star.plus(vector_C_star)).transpose())).times(1.0 / (q_star + c_star)));

		Q.minusEquals((vector_Q_star.times(vector_Q_star.transpose())).times(1.0 / q_star));

		/* Prune projected rows / columns */

		alpha.getArray()[d][0] = 0;

		for (int j = 0; j <= d; j++) {

			Q.getArray()[d][j] = 0; // Clear d-th row
			C.getArray()[d][j] = 0;

			Q.getArray()[j][d] = 0; // Clear d-th column
			C.getArray()[j][d] = 0;
		}
	}

	/** Identify the GP */
	public String toString() {
		return "Gauss-Regression-GP";
	}
}