/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.learner.functions.kernel.rvm; import java.util.LinkedList; import java.util.List; import com.rapidminer.operator.learner.functions.kernel.rvm.kernel.KernelBasisFunction; import com.rapidminer.operator.learner.functions.kernel.rvm.kernel.KernelRadial; import com.rapidminer.operator.learner.functions.kernel.rvm.util.SECholeskyDecomposition; import com.rapidminer.tools.RandomGenerator; import com.rapidminer.tools.Tools; import Jama.Matrix; /** * Constructive RVM for regression problems (see bla). * * @author Piotr Kasprzak, Ingo Mierswa * @version $Id: ConstructiveRegression.java,v 1.3 2008/05/09 19:22:57 ingomierswa Exp $ * */ public class ConstructiveRegression extends RVMBase { /** Data shared accross various methods */ protected double[][] x; // Input vectors protected double[][] t; // Target vectors protected double[] tVector; // (One dimensional) target vector protected double[][] phi; // Basis functions evaluated on all input vectors protected Matrix PHI_t; // (Pruned) transposed design matrix PHI^t protected double[] alpha; // Vector of inverse variances for the weights protected double beta; // beta = sigma^{-2} = inverse noise variance protected Matrix A; // Diagonal Matrix consisting of alphas protected Matrix SIGMA; // Covariance matrix of weight posterior distribution protected Matrix SIGMA_chol; // Cholesky factor of the above protected Matrix mu; // Mean of the weight posterior distribution protected double s, q; // Used in the criterium for inclusion / deletion of basis vectors protected LinkedList<Integer> basisSet = new LinkedList<Integer>(); /** Constructor */ public ConstructiveRegression(RegressionProblem problem, Parameter parameter) { super(problem, parameter); } /** Take a list holding "Double"-objects and return an "double[]" */ protected double[] convertListToDoubleArray(List list) { double[] array = new double[list.size()]; for (int i = 0; i < array.length; i++) { array[i] = ((Double)list.get(i)).doubleValue(); } return array; } /** Return the inner product of x and y (x.length == y.length assumed) */ protected double innerProduct(double[] x, double[] y) { double sum = 0; for (int i = 0; i < x.length; i++) { sum += x[i] * y[i]; } return sum; } /** * Create pruned versions of all important matrices / vectors so that * only rows / columns matching the indices in basisSet are kept. */ protected void prune(LinkedList<Integer> basisSet) { /** Create PHIt */ double[][] PHI_t_Array = new double[basisSet.size()][]; for (int j = 0; j < basisSet.size(); j++) { PHI_t_Array[j] = phi[basisSet.get(j)]; } PHI_t = new Matrix(PHI_t_Array); /** Create diagonal Matrix A */ A = new Matrix(basisSet.size(), basisSet.size()); for (int j = 0; j < basisSet.size(); j++) { A.set(j, j, alpha[basisSet.get(j)]); } } /** * Update the covariance Matrix of the weight posterior distribution (SIGMA) * along with its cholesky factor: * * SIGMA = (A + beta * PHI^t * PHI)^{-1} * * SIGMA_chol with SIGMA_chol * SIGMA_chol^t = SIGMA */ protected void updateSIGMA() { Matrix SIGMA_inv = PHI_t.times(PHI_t.transpose()); SIGMA_inv.timesEquals(beta); SIGMA_inv.plusEquals(A); /** Update the factor ... */ SECholeskyDecomposition CD = new SECholeskyDecomposition(SIGMA_inv.getArray()); Matrix U = CD.getPTR().times(CD.getL()); SIGMA_chol = U.inverse(); /** Update SIGMA */ SIGMA = (SIGMA_chol.transpose()).times(SIGMA_chol); } /** * Update the mean of the weight posterior distribution (mu): * * mu = beta * SIGMA * PHI^t * t */ protected void updateMu() { mu = SIGMA.times(PHI_t.times(new Matrix(t))); mu.timesEquals(beta); } /** * Compute the scalars s_m, q_m which are part of the criterium for * inclusion / deletion of the given basis m: * * S_m = beta * phi^t_m * phi_m - beta^2 * phi^t_m * PHI * SIGMA * PHI^t * phi_m * Q_m = beta * phi^t_m * t - beta^2 * phi^t_m * PHI * SIGMA * PHI^t * t * * s_m = alpha_m * S_m / (alpha_m - S_m) * q_m = alpha_m * Q_m / (alpha_m - S_m) */ protected void updateCriteriumScalars(int selectedBasis) { Matrix SigmaStuff = (PHI_t.transpose()).times(SIGMA.times(PHI_t)); double S = beta * innerProduct(phi[selectedBasis], phi[selectedBasis]) - beta * beta * innerProduct(phi[selectedBasis], SigmaStuff.times(new Matrix(phi[selectedBasis], phi[selectedBasis].length)).getRowPackedCopy()); double Q = beta * innerProduct(phi[selectedBasis], tVector) - beta * beta * innerProduct(phi[selectedBasis], SigmaStuff.times(new Matrix(t)).getRowPackedCopy()); s = alpha[selectedBasis] * S / (alpha[selectedBasis] - S); q = alpha[selectedBasis] * Q / (alpha[selectedBasis] - S); } /** * Reestimate alpha by setting it to the value which maximizes the * marginal likelihood: * * alpha_i = s^2_i / (q^2_i - s_i) */ protected void reestimateAlpha(int selectedBasis) { alpha[selectedBasis] = s * s / (q * q - s); } /** * Include a basis function into the model. */ protected void includeBasis(int selectedBasis) { basisSet.add(Integer.valueOf(selectedBasis)); reestimateAlpha(selectedBasis); } /** * Delete a basis function from the model. */ protected void deleteBasis(int selectedBasis) { basisSet.remove(Integer.valueOf(selectedBasis)); alpha[selectedBasis] = -1.0d; } /** * Update beta (same as for the "normal" regression rvm) */ protected void updateBeta() { /** Calculate gammas && their sum: gamma_i = 1 - alpha_i * SIGMA_ii */ double[] gammas = new double[basisSet.size()]; for (int j = 0; j < basisSet.size(); j++) { gammas[j] = 1.0d - alpha[basisSet.get(j)] * SIGMA.get(j, j); } double sumGammas = 0; for (int j = 0; j < gammas.length; j++) { sumGammas += gammas[j]; } /** Calculate delta = t - PHI * mu */ Matrix DELTA = (new Matrix(t)).minus(PHI_t.transpose().times(mu)); /** beta = N - sum_i(gamma_i) / norm_l2(DELTA)^2 */ beta = x.length - sumGammas / innerProduct(DELTA.getRowPackedCopy(), DELTA.getRowPackedCopy()); } /** The hard work is done here */ public Model learn() { RegressionProblem problem = (RegressionProblem) this.problem; int numExamples = problem.getProblemSize(); int numBases = numExamples + 1; /** Set iteration control parameters */ // int monIts = 1; /** Init hyperparameters with more or less sensible values (shouldn't be too important) */ // beta = Math.pow(parameter.initSigma, -2); beta = Math.pow(0.5, -2); /** Create m x n matrix (= PHI^t) with all basis functions * (each evaluated at all input vectors) */ x = problem.getInputVectors(); KernelBasisFunction[] kernels = problem.getKernels(); phi = new double[numBases][numExamples]; int i, j; for (j = 0; j < numBases - 1; j++) { for (i = 0; i < numExamples; i++) { phi[j + 1][i] = kernels[j + 1].eval(x[i]); } } // Set bias for (i = 0; i < numExamples; i++) { phi[0][i] = 1.0; } /** Init target vector */ t = problem.getTargetVectors(); tVector = new double[t.length]; for (i = 0; i < t.length; i++) { tVector[i] = t[i][0]; } /** Initialise all alphas to be out-of-model (= -1.0) */ alpha = new double[numBases]; for (i = 0; i < alpha.length; i++) { alpha[i] = -1.0d; } /** Init basisSet */ int selectedBasis = RandomGenerator.getRandomGenerator(0).nextInt(numBases); basisSet.add(Integer.valueOf(selectedBasis)); /** Init alphas (model hyperparameters: inverse variances for the weights) */ double normPhiSquare = innerProduct(phi[selectedBasis], phi[selectedBasis]); alpha[selectedBasis] = normPhiSquare / (innerProduct(phi[selectedBasis], tVector) / normPhiSquare - 1.0d / beta); /** The main iteration */ for (i = 1; i <= parameter.maxIterations; i++) { // get 'old' log alphas double[] logAlphas = new double[alpha.length]; for (j = 0; j < logAlphas.length; j++) { double value = Math.log(alpha[j]); if (Double.isNaN(value)) value = 0.0d; logAlphas[j] = value; } prune(basisSet); updateSIGMA(); updateMu(); /** beta update */ updateBeta(); /** Select a random basis */ // selectedBasis = RandomGenerator.getGlobalRandomGenerator().nextInt(numBases); selectedBasis = i % numBases; /** Test for inclusion / deletion */ updateCriteriumScalars(selectedBasis); double theta = q * q - s; if (theta > 0) { if (alpha[selectedBasis] > 0) { /** Basis already in the model => reestimate alpha */ reestimateAlpha(selectedBasis); } else { /** Basis not in the model => include it */ includeBasis(selectedBasis); } } else if (alpha[selectedBasis] > 0) { /** Basis is part of the model => delete it */ deleteBasis(selectedBasis); } // check for iteration abort double maxLogAlphaChange = 0; for (j = 0; j < logAlphas.length; j++) { double newValue = Math.log(alpha[j]); if (Double.isNaN(newValue)) newValue = 0.0d; double change = Math.abs(logAlphas[j] - newValue); if (change > maxLogAlphaChange) maxLogAlphaChange = change; } if (Tools.isNotEqual(maxLogAlphaChange, 0.0d) && (maxLogAlphaChange < parameter.min_delta_log_alpha)) { break; } } /** Create model */ double[] finalWeights = new double[basisSet.size()]; KernelBasisFunction[] finalKernels = new KernelBasisFunction[basisSet.size()]; boolean bias = false; for (j = 0; j < basisSet.size(); j++) { finalWeights[j] = mu.get(j, 0); if (basisSet.get(j) == 0) { // bias wasn't pruned bias = true; finalKernels[j] = new KernelBasisFunction(new KernelRadial()); } else { finalKernels[j] = kernels[basisSet.get(j)]; } } Model model = new Model(finalWeights, finalKernels, bias, true); return model; } /** Identify the RVM */ public String toString() { return "Constructive-Regression-RVM"; } }