/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.learner.functions.kernel.rvm; import java.util.Iterator; import java.util.LinkedList; import com.rapidminer.operator.learner.functions.kernel.rvm.kernel.KernelBasisFunction; import com.rapidminer.operator.learner.functions.kernel.rvm.kernel.KernelRadial; import com.rapidminer.operator.learner.functions.kernel.rvm.util.SECholeskyDecomposition; import Jama.Matrix; /** * * The standard (slow, non scaling) regression RVM (see bla). * * @author Piotr Kasprzak, Ingo Mierswa * @version $Id: RVMRegression.java,v 1.3 2008/05/09 19:22:57 ingomierswa Exp $ * */ public class RVMRegression extends RVMBase { /** Constructor */ public RVMRegression(RegressionProblem problem, Parameter parameter) { super(problem, parameter); } /** The hard work is done here */ public Model learn() { RegressionProblem problem = (RegressionProblem) this.problem; int numExamples = problem.getProblemSize(); int numBases = numExamples + 1; /** Init hyperparameters with more or less sensible values (shouldn't be too important) */ double initBeta = 1.0 / Math.pow(parameter.initSigma, 2); /** Create design/basis matrix PHI (N x M) with: * N: number of examples * M: number of examples + 1 (because of the bias in the first column) * PHI(n,m) = phi_m(x_n) = K_m(x_n, x_m) [K being the kernel; x_n, x_m the n-the/m-th example] */ double[][] x = problem.getInputVectors(); KernelBasisFunction[] kernels = problem.getKernels(); double[][] PHI = new double[numExamples][numBases]; int i, j; for (j = 0; j < numBases - 1; j++) { for (i = 0; i < numExamples; i++) { PHI[i][j + 1] = kernels[j + 1].eval(x[i]); } } // Set bias for (i = 0; i < numExamples; i++) { PHI[i][0] = 1.0; } /** Init weights/alpha/gamma/index vectors */ double[] alphas = new double[numBases]; // inverse variances for the weights for (j = 0; j < numBases; j++) { alphas[j] = parameter.initAlpha; } /** Matrix-esize everything */ Matrix matrixPHI = new Matrix(PHI); Matrix vectorT = new Matrix(problem.getTargetVectors()); Matrix vectorAlpha = new Matrix(alphas, numBases); Matrix vectorPHI_T = (matrixPHI.transpose()).times(vectorT); LinkedList<Integer> unprunedIndicesList = null; // List of indices of unpruned alphas/weights/basisfuntions (= relevance vectors) int[] unprunedIndicesArray = null; // Array of the above, is also used for the reverse mapping of the indices of a pruned // vector to the one of a unpruned vector Matrix prunedVectorWeights = null; /** The main iteration */ for (i = 1; i <= parameter.maxIterations; i++) { // Prune associated basis functions / weights for too big alphas unprunedIndicesList = new LinkedList<Integer>(); for (j = 0; j < numBases; j++) { if (vectorAlpha.get(j, 0) >= parameter.alpha_max) { // pruned } else { unprunedIndicesList.add(Integer.valueOf(j)); } } unprunedIndicesArray = new int[unprunedIndicesList.size()]; Iterator iter = unprunedIndicesList.iterator(); for (j = 0; j < unprunedIndicesList.size(); j++) { unprunedIndicesArray[j] = ((Integer) iter.next()).intValue(); } Matrix prunedMatrixPHI = matrixPHI.getMatrix(0, matrixPHI.getRowDimension() - 1, unprunedIndicesArray); Matrix prunedVectorPHI_T = vectorPHI_T.getMatrix(unprunedIndicesArray, 0, 0); Matrix prunedVectorAlpha = vectorAlpha.getMatrix(unprunedIndicesArray, 0, 0); // Calculate mean and covariance matrix of the weight posterior distribution (gaussian) using // current hyperparamter estimates. For numeric stability the cholesky decomposition is used. Matrix matrixAlphaDiag = new Matrix(prunedVectorAlpha.getRowDimension(), prunedVectorAlpha.getRowDimension(), 0); for (j = 0; j < prunedVectorAlpha.getRowDimension(); j++) { matrixAlphaDiag.set(j, j, prunedVectorAlpha.get(j, 0)); } Matrix matrixSIGMAInv = (prunedMatrixPHI.transpose()).times(prunedMatrixPHI); // PHI' * PHI matrixSIGMAInv.timesEquals(initBeta); // PHI' * PHI * beta matrixSIGMAInv.plusEquals(matrixAlphaDiag); // PHI' * PHI * beta + diag(alpha) = SIGMA^-1 // Matrix matrixU = (matrixSIGMAInv.chol()).getL(); // U * U' = SIGMA^-1 SECholeskyDecomposition CD = new SECholeskyDecomposition(matrixSIGMAInv.getArray()); Matrix matrixU = CD.getPTR().times(CD.getL()); Matrix matrixUInv = matrixU.inverse(); // w = mu = beta * SIGMA * PHI' * t // = beta * (SIGMA^-1)^-1 * PHI' * t // = beta * (U' * U)^-1 * PHI' * t // = beta * U^-1 * U'^-1 * PHI' * t prunedVectorWeights = ((matrixUInv.transpose()).times(matrixUInv.times(prunedVectorPHI_T))).times(initBeta); // Get diagonal elements of covariance matrix SIGMA double[] diagSIGMA = new double[matrixUInv.getRowDimension()]; for (j = 0; j < diagSIGMA.length; j++) { double value = 0; for (int k = 0; k < diagSIGMA.length; k++) { value += matrixUInv.get(k, j) * matrixUInv.get(k, j); } diagSIGMA[j] = value; } // Calculate gammas: gamma = 1 - alpha * SIGMA_ii double[] gammas = new double[diagSIGMA.length]; for (j = 0; j < gammas.length; j++) { gammas[j] = 1.0 - prunedVectorAlpha.get(j, 0) * diagSIGMA[j]; } // Get log alphas double[] logAlphas = new double[prunedVectorAlpha.getRowDimension()]; for (j = 0; j < logAlphas.length; j++) { logAlphas[j] = Math.log(prunedVectorAlpha.get(j, 0)); } // Alpha update: alpha = gamma / mu^2 = gamma / w^2; for (j = 0; j < prunedVectorAlpha.getRowDimension(); j++) { double newAlpha = gammas[j] / (prunedVectorWeights.get(j, 0) * prunedVectorWeights.get(j, 0)); prunedVectorAlpha.set(j, 0, newAlpha); } // Check for iteration abort double maxLogAlphaChange = 0; for (j = 0; j < logAlphas.length; j++) { double change = Math.abs(logAlphas[j] - Math.log(prunedVectorAlpha.get(j, 0))); if (change > maxLogAlphaChange) maxLogAlphaChange = change; } if (maxLogAlphaChange < parameter.min_delta_log_alpha) { break; } // Beta update: beta = 1 / sigma^2 = N - sum(gammas) / norm_l2(t - PHI * mu)^2 double dataError = 0; Matrix dataDelta = vectorT.minus(prunedMatrixPHI.times(prunedVectorWeights)); // t - PHI * mu for (j = 0; j < numExamples; j++) { dataError += (dataDelta.get(j, 0) * dataDelta.get(j, 0)); // norm_l2(t - PHI * mu)^2 } double sumGammas = 0; for (j = 0; j < gammas.length; j++) { sumGammas += gammas[j]; // sum(gammas) } initBeta = (numExamples - sumGammas) / dataError; // update the (unpruned) alpha vector with the corresponding values from the pruned alpha vector for (j = 0; j < prunedVectorAlpha.getRowDimension(); j++) { vectorAlpha.set(unprunedIndicesArray[j], 0, prunedVectorAlpha.get(j, 0)); } } /** Create model */ double[] finalWeights = new double[unprunedIndicesArray.length]; KernelBasisFunction[] finalKernels = new KernelBasisFunction[unprunedIndicesArray.length]; boolean bias = false; for (j = 0; j < unprunedIndicesArray.length; j++) { finalWeights[j] = prunedVectorWeights.get(j, 0); if (unprunedIndicesArray[j] == 0) { // bias wasn't pruned bias = true; finalKernels[j] = new KernelBasisFunction(new KernelRadial()); } else { finalKernels[j] = kernels[unprunedIndicesArray[j]]; } } Model model = new Model(finalWeights, finalKernels, bias, true); return model; } /** Identify the RVM */ public String toString() { return "Regression-RVM"; } }