RVMRegression.java example

Explorer
ComplexRapidMiner-master
- operator
- src
/*
 *  RapidMiner
 *
 *  Copyright (C) 2001-2008 by Rapid-I and the contributors
 *
 *  Complete list of developers available at our web site:
 *
 *       http://rapid-i.com
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Affero General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Affero General Public License for more details.
 *
 *  You should have received a copy of the GNU Affero General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses/.
 */
package com.rapidminer.operator.learner.functions.kernel.rvm;

import java.util.Iterator;
import java.util.LinkedList;

import com.rapidminer.operator.learner.functions.kernel.rvm.kernel.KernelBasisFunction;
import com.rapidminer.operator.learner.functions.kernel.rvm.kernel.KernelRadial;
import com.rapidminer.operator.learner.functions.kernel.rvm.util.SECholeskyDecomposition;

import Jama.Matrix;

/**
 * 
 * The standard (slow, non scaling) regression RVM (see bla).
 * 
 * @author Piotr Kasprzak, Ingo Mierswa
 * @version $Id: RVMRegression.java,v 1.3 2008/05/09 19:22:57 ingomierswa Exp $
 *
 */
public class RVMRegression extends RVMBase {

	/** Constructor */
	public RVMRegression(RegressionProblem problem, Parameter parameter) {
		super(problem, parameter);
	}
	
	/** The hard work is done here */
	public Model learn() {
		
		RegressionProblem problem 	= (RegressionProblem) this.problem;
		
    	int numExamples			= problem.getProblemSize();
    	int numBases				= numExamples + 1;
    	
    	    	
    	/** Init hyperparameters with more or less sensible values (shouldn't be too important) */
    	
    	double initBeta				= 1.0 / Math.pow(parameter.initSigma, 2);
    	
    	/** Create design/basis matrix PHI (N x M) with:
    	 *  N: number of examples
    	 *  M: number of examples + 1 (because of the bias in the first column)
    	 *  PHI(n,m) = phi_m(x_n) = K_m(x_n, x_m) [K being the kernel; x_n, x_m the n-the/m-th example] 
    	 */
    	double[][] x					= problem.getInputVectors();    	
    	KernelBasisFunction[] kernels	= problem.getKernels();						
    	double[][] PHI 					= new double[numExamples][numBases];
    	int i, j;
   		for (j = 0; j < numBases - 1; j++) {
   	    	for (i = 0; i < numExamples; i++) {
    			PHI[i][j + 1] = kernels[j + 1].eval(x[i]); 
    		}
    	}
    	
    	// Set bias
    	for (i = 0; i < numExamples; i++) {
    		PHI[i][0] = 1.0;
    	}
    	
    	/** Init weights/alpha/gamma/index vectors */
    	
    	double[] alphas		= new double[numBases];						// inverse variances for the weights				 
    	
    	for (j = 0; j < numBases; j++) {
    		alphas[j]		= parameter.initAlpha;
    	}
    	
    	/** Matrix-esize everything */
    	
    	Matrix matrixPHI		= new Matrix(PHI);
    	Matrix vectorT			= new Matrix(problem.getTargetVectors());
    	Matrix vectorAlpha		= new Matrix(alphas,	numBases);
    	Matrix vectorPHI_T		= (matrixPHI.transpose()).times(vectorT);

		LinkedList<Integer> unprunedIndicesList 	= null;					// List of indices of unpruned alphas/weights/basisfuntions (= relevance vectors)
		int[] unprunedIndicesArray 					= null;					// Array of the above, is also used for the reverse mapping of the indices of a pruned
																			// vector to the one of a unpruned vector
		Matrix prunedVectorWeights = null;

    	/** The main iteration */
    	for (i = 1; i <= parameter.maxIterations; i++) {
    		
    		// Prune associated basis functions / weights for too big alphas

    		unprunedIndicesList = new LinkedList<Integer>();
    		for (j = 0; j < numBases; j++) {
    			if (vectorAlpha.get(j, 0) >= parameter.alpha_max) {
    				// pruned
    			} else {
    				unprunedIndicesList.add(Integer.valueOf(j));
    			}
    		}
    		
    		unprunedIndicesArray = new int[unprunedIndicesList.size()];
    		Iterator iter = unprunedIndicesList.iterator();
    		for (j = 0; j < unprunedIndicesList.size(); j++) {
    			unprunedIndicesArray[j] = ((Integer) iter.next()).intValue();    			
    		}
    		
    		Matrix prunedMatrixPHI 		= matrixPHI.getMatrix(0, matrixPHI.getRowDimension() - 1, unprunedIndicesArray); 
    		Matrix prunedVectorPHI_T	= vectorPHI_T.getMatrix(unprunedIndicesArray, 0, 0); 
    		Matrix prunedVectorAlpha	= vectorAlpha.getMatrix(unprunedIndicesArray, 0, 0);
    		
    		// Calculate mean and covariance matrix of the weight posterior distribution (gaussian) using 
    		// current hyperparamter estimates. For numeric stability the cholesky decomposition is used.
    		
    		Matrix matrixAlphaDiag		= new Matrix(prunedVectorAlpha.getRowDimension(), prunedVectorAlpha.getRowDimension(), 0);
    		for (j = 0; j < prunedVectorAlpha.getRowDimension(); j++) {
    			matrixAlphaDiag.set(j, j, prunedVectorAlpha.get(j, 0));
    		}
    		Matrix matrixSIGMAInv		= (prunedMatrixPHI.transpose()).times(prunedMatrixPHI);		// PHI' * PHI
    		matrixSIGMAInv.timesEquals(initBeta);														// PHI' * PHI * beta
    		matrixSIGMAInv.plusEquals(matrixAlphaDiag);												// PHI' * PHI * beta + diag(alpha) = SIGMA^-1
    		
//    		Matrix matrixU				= (matrixSIGMAInv.chol()).getL();								// U * U' = SIGMA^-1
    		
    		SECholeskyDecomposition CD	= new SECholeskyDecomposition(matrixSIGMAInv.getArray());
    		Matrix matrixU				= CD.getPTR().times(CD.getL());

    		Matrix matrixUInv			= matrixU.inverse();
    		
    		// w = mu = beta * SIGMA * PHI' * t 
    		//   = beta * (SIGMA^-1)^-1 * PHI' * t
    		//   = beta * (U' * U)^-1 * PHI' * t
    		//   = beta * U^-1 * U'^-1 * PHI' * t
    		
    		prunedVectorWeights			= ((matrixUInv.transpose()).times(matrixUInv.times(prunedVectorPHI_T))).times(initBeta);
    		
    		// Get diagonal elements of covariance matrix SIGMA
    		
    		double[] diagSIGMA			= new double[matrixUInv.getRowDimension()];
    		for (j = 0; j < diagSIGMA.length; j++) {
    			double value = 0;
    			for (int k = 0; k < diagSIGMA.length; k++) {
    				value += matrixUInv.get(k, j) * matrixUInv.get(k, j);
    			}
    			diagSIGMA[j] = value;
    		}

    		// Calculate gammas: gamma = 1 - alpha * SIGMA_ii
    		
    		double[] gammas = new double[diagSIGMA.length];
    		for (j = 0; j < gammas.length; j++) {
    			gammas[j] = 1.0 - prunedVectorAlpha.get(j, 0) * diagSIGMA[j];
    		}
    		
    		// Get log alphas
    		
    		double[] logAlphas = new double[prunedVectorAlpha.getRowDimension()];
    		for (j = 0; j < logAlphas.length; j++) {
    				logAlphas[j] = Math.log(prunedVectorAlpha.get(j, 0));
    		}
    		
    		// Alpha update: alpha = gamma / mu^2 = gamma / w^2;
    		
    		for (j = 0; j < prunedVectorAlpha.getRowDimension(); j++) {
    			double newAlpha = gammas[j] / (prunedVectorWeights.get(j, 0) * prunedVectorWeights.get(j, 0));
    			prunedVectorAlpha.set(j, 0, newAlpha);
    		}
    		    	    
    	    // Check for iteration abort
    	    
    	    double maxLogAlphaChange = 0;
    	    for (j = 0; j < logAlphas.length; j++) {
    	    	double change = Math.abs(logAlphas[j] - Math.log(prunedVectorAlpha.get(j, 0)));
    	    	if (change > maxLogAlphaChange)
    	    		maxLogAlphaChange = change;
    	    }
    		
    	    if (maxLogAlphaChange < parameter.min_delta_log_alpha) {
    	    	break;
    	    }

    		// Beta update: beta = 1 / sigma^2 = N - sum(gammas) / norm_l2(t - PHI * mu)^2
    		
    	    double dataError = 0;
    	    Matrix dataDelta = vectorT.minus(prunedMatrixPHI.times(prunedVectorWeights));	// t - PHI * mu
    	    for (j = 0; j < numExamples; j++) {
    	    	dataError += (dataDelta.get(j, 0) * dataDelta.get(j, 0));					// norm_l2(t - PHI * mu)^2
    	    }
    	    
    	    double sumGammas = 0;
    	    for (j = 0; j < gammas.length; j++) {
    	    	sumGammas += gammas[j];														// sum(gammas)    	    	
    	    }
    	    
    	    initBeta = (numExamples - sumGammas) / dataError;

    	    // update the (unpruned) alpha vector with the corresponding values from the pruned alpha vector
    	    
    	    for (j = 0; j < prunedVectorAlpha.getRowDimension(); j++) {
    	    	vectorAlpha.set(unprunedIndicesArray[j], 0, prunedVectorAlpha.get(j, 0));
    	    }    	    
    	}
    	
    	/** Create model */
    	double[] finalWeights				= new double[unprunedIndicesArray.length];
    	KernelBasisFunction[] finalKernels	= new KernelBasisFunction[unprunedIndicesArray.length];
    	
    	boolean bias			= false;
    	
    	for (j = 0; j < unprunedIndicesArray.length; j++) {
    		finalWeights[j]		= prunedVectorWeights.get(j, 0);
    		if (unprunedIndicesArray[j] == 0) {
    			// bias wasn't pruned
    			bias 			= true;
    			finalKernels[j]	= new KernelBasisFunction(new KernelRadial());
    		} else {
    			finalKernels[j]	= kernels[unprunedIndicesArray[j]]; 
    		}
    	}

    	Model model = new Model(finalWeights, finalKernels, bias, true);
    	return model;
	}
	
	/** Identify the RVM */
	public String toString() {
		return "Regression-RVM";
	}
}