/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.learner.functions.kernel.rvm;
import java.util.LinkedList;
import java.util.List;
import com.rapidminer.operator.learner.functions.kernel.rvm.kernel.KernelBasisFunction;
import com.rapidminer.operator.learner.functions.kernel.rvm.kernel.KernelRadial;
import com.rapidminer.operator.learner.functions.kernel.rvm.util.SECholeskyDecomposition;
import com.rapidminer.tools.RandomGenerator;
import com.rapidminer.tools.Tools;
import Jama.Matrix;
/**
* Constructive RVM for regression problems (see bla).
*
* @author Piotr Kasprzak, Ingo Mierswa
* @version $Id: ConstructiveRegression.java,v 1.3 2008/05/09 19:22:57 ingomierswa Exp $
*
*/
public class ConstructiveRegression extends RVMBase {
/** Data shared accross various methods */
protected double[][] x; // Input vectors
protected double[][] t; // Target vectors
protected double[] tVector; // (One dimensional) target vector
protected double[][] phi; // Basis functions evaluated on all input vectors
protected Matrix PHI_t; // (Pruned) transposed design matrix PHI^t
protected double[] alpha; // Vector of inverse variances for the weights
protected double beta; // beta = sigma^{-2} = inverse noise variance
protected Matrix A; // Diagonal Matrix consisting of alphas
protected Matrix SIGMA; // Covariance matrix of weight posterior distribution
protected Matrix SIGMA_chol; // Cholesky factor of the above
protected Matrix mu; // Mean of the weight posterior distribution
protected double s, q; // Used in the criterium for inclusion / deletion of basis vectors
protected LinkedList<Integer> basisSet = new LinkedList<Integer>();
/** Constructor */
public ConstructiveRegression(RegressionProblem problem, Parameter parameter) {
super(problem, parameter);
}
/** Take a list holding "Double"-objects and return an "double[]" */
protected double[] convertListToDoubleArray(List list) {
double[] array = new double[list.size()];
for (int i = 0; i < array.length; i++) {
array[i] = ((Double)list.get(i)).doubleValue();
}
return array;
}
/** Return the inner product of x and y (x.length == y.length assumed) */
protected double innerProduct(double[] x, double[] y) {
double sum = 0;
for (int i = 0; i < x.length; i++) {
sum += x[i] * y[i];
}
return sum;
}
/**
* Create pruned versions of all important matrices / vectors so that
* only rows / columns matching the indices in basisSet are kept.
*/
protected void prune(LinkedList<Integer> basisSet) {
/** Create PHIt */
double[][] PHI_t_Array = new double[basisSet.size()][];
for (int j = 0; j < basisSet.size(); j++) {
PHI_t_Array[j] = phi[basisSet.get(j)];
}
PHI_t = new Matrix(PHI_t_Array);
/** Create diagonal Matrix A */
A = new Matrix(basisSet.size(), basisSet.size());
for (int j = 0; j < basisSet.size(); j++) {
A.set(j, j, alpha[basisSet.get(j)]);
}
}
/**
* Update the covariance Matrix of the weight posterior distribution (SIGMA)
* along with its cholesky factor:
*
* SIGMA = (A + beta * PHI^t * PHI)^{-1}
*
* SIGMA_chol with SIGMA_chol * SIGMA_chol^t = SIGMA
*/
protected void updateSIGMA() {
Matrix SIGMA_inv = PHI_t.times(PHI_t.transpose());
SIGMA_inv.timesEquals(beta);
SIGMA_inv.plusEquals(A);
/** Update the factor ... */
SECholeskyDecomposition CD = new SECholeskyDecomposition(SIGMA_inv.getArray());
Matrix U = CD.getPTR().times(CD.getL());
SIGMA_chol = U.inverse();
/** Update SIGMA */
SIGMA = (SIGMA_chol.transpose()).times(SIGMA_chol);
}
/**
* Update the mean of the weight posterior distribution (mu):
*
* mu = beta * SIGMA * PHI^t * t
*/
protected void updateMu() {
mu = SIGMA.times(PHI_t.times(new Matrix(t)));
mu.timesEquals(beta);
}
/**
* Compute the scalars s_m, q_m which are part of the criterium for
* inclusion / deletion of the given basis m:
*
* S_m = beta * phi^t_m * phi_m - beta^2 * phi^t_m * PHI * SIGMA * PHI^t * phi_m
* Q_m = beta * phi^t_m * t - beta^2 * phi^t_m * PHI * SIGMA * PHI^t * t
*
* s_m = alpha_m * S_m / (alpha_m - S_m)
* q_m = alpha_m * Q_m / (alpha_m - S_m)
*/
protected void updateCriteriumScalars(int selectedBasis) {
Matrix SigmaStuff = (PHI_t.transpose()).times(SIGMA.times(PHI_t));
double S = beta * innerProduct(phi[selectedBasis], phi[selectedBasis]) -
beta * beta * innerProduct(phi[selectedBasis],
SigmaStuff.times(new Matrix(phi[selectedBasis], phi[selectedBasis].length)).getRowPackedCopy());
double Q = beta * innerProduct(phi[selectedBasis], tVector) -
beta * beta * innerProduct(phi[selectedBasis],
SigmaStuff.times(new Matrix(t)).getRowPackedCopy());
s = alpha[selectedBasis] * S / (alpha[selectedBasis] - S);
q = alpha[selectedBasis] * Q / (alpha[selectedBasis] - S);
}
/**
* Reestimate alpha by setting it to the value which maximizes the
* marginal likelihood:
*
* alpha_i = s^2_i / (q^2_i - s_i)
*/
protected void reestimateAlpha(int selectedBasis) {
alpha[selectedBasis] = s * s / (q * q - s);
}
/**
* Include a basis function into the model.
*/
protected void includeBasis(int selectedBasis) {
basisSet.add(Integer.valueOf(selectedBasis));
reestimateAlpha(selectedBasis);
}
/**
* Delete a basis function from the model.
*/
protected void deleteBasis(int selectedBasis) {
basisSet.remove(Integer.valueOf(selectedBasis));
alpha[selectedBasis] = -1.0d;
}
/**
* Update beta (same as for the "normal" regression rvm)
*/
protected void updateBeta() {
/** Calculate gammas && their sum: gamma_i = 1 - alpha_i * SIGMA_ii */
double[] gammas = new double[basisSet.size()];
for (int j = 0; j < basisSet.size(); j++) {
gammas[j] = 1.0d - alpha[basisSet.get(j)] * SIGMA.get(j, j);
}
double sumGammas = 0;
for (int j = 0; j < gammas.length; j++) {
sumGammas += gammas[j];
}
/** Calculate delta = t - PHI * mu */
Matrix DELTA = (new Matrix(t)).minus(PHI_t.transpose().times(mu));
/** beta = N - sum_i(gamma_i) / norm_l2(DELTA)^2 */
beta = x.length - sumGammas / innerProduct(DELTA.getRowPackedCopy(), DELTA.getRowPackedCopy());
}
/** The hard work is done here */
public Model learn() {
RegressionProblem problem = (RegressionProblem) this.problem;
int numExamples = problem.getProblemSize();
int numBases = numExamples + 1;
/** Set iteration control parameters */
// int monIts = 1;
/** Init hyperparameters with more or less sensible values (shouldn't be too important) */
// beta = Math.pow(parameter.initSigma, -2);
beta = Math.pow(0.5, -2);
/** Create m x n matrix (= PHI^t) with all basis functions
* (each evaluated at all input vectors)
*/
x = problem.getInputVectors();
KernelBasisFunction[] kernels = problem.getKernels();
phi = new double[numBases][numExamples];
int i, j;
for (j = 0; j < numBases - 1; j++) {
for (i = 0; i < numExamples; i++) {
phi[j + 1][i] = kernels[j + 1].eval(x[i]);
}
}
// Set bias
for (i = 0; i < numExamples; i++) {
phi[0][i] = 1.0;
}
/** Init target vector */
t = problem.getTargetVectors();
tVector = new double[t.length];
for (i = 0; i < t.length; i++) {
tVector[i] = t[i][0];
}
/** Initialise all alphas to be out-of-model (= -1.0) */
alpha = new double[numBases];
for (i = 0; i < alpha.length; i++) {
alpha[i] = -1.0d;
}
/** Init basisSet */
int selectedBasis = RandomGenerator.getRandomGenerator(0).nextInt(numBases);
basisSet.add(Integer.valueOf(selectedBasis));
/** Init alphas (model hyperparameters: inverse variances for the weights) */
double normPhiSquare = innerProduct(phi[selectedBasis], phi[selectedBasis]);
alpha[selectedBasis] = normPhiSquare / (innerProduct(phi[selectedBasis], tVector) / normPhiSquare - 1.0d / beta);
/** The main iteration */
for (i = 1; i <= parameter.maxIterations; i++) {
// get 'old' log alphas
double[] logAlphas = new double[alpha.length];
for (j = 0; j < logAlphas.length; j++) {
double value = Math.log(alpha[j]);
if (Double.isNaN(value))
value = 0.0d;
logAlphas[j] = value;
}
prune(basisSet);
updateSIGMA();
updateMu();
/** beta update */
updateBeta();
/** Select a random basis */
// selectedBasis = RandomGenerator.getGlobalRandomGenerator().nextInt(numBases);
selectedBasis = i % numBases;
/** Test for inclusion / deletion */
updateCriteriumScalars(selectedBasis);
double theta = q * q - s;
if (theta > 0) {
if (alpha[selectedBasis] > 0) {
/** Basis already in the model => reestimate alpha */
reestimateAlpha(selectedBasis);
} else {
/** Basis not in the model => include it */
includeBasis(selectedBasis);
}
} else if (alpha[selectedBasis] > 0) {
/** Basis is part of the model => delete it */
deleteBasis(selectedBasis);
}
// check for iteration abort
double maxLogAlphaChange = 0;
for (j = 0; j < logAlphas.length; j++) {
double newValue = Math.log(alpha[j]);
if (Double.isNaN(newValue))
newValue = 0.0d;
double change = Math.abs(logAlphas[j] - newValue);
if (change > maxLogAlphaChange)
maxLogAlphaChange = change;
}
if (Tools.isNotEqual(maxLogAlphaChange, 0.0d) && (maxLogAlphaChange < parameter.min_delta_log_alpha)) {
break;
}
}
/** Create model */
double[] finalWeights = new double[basisSet.size()];
KernelBasisFunction[] finalKernels = new KernelBasisFunction[basisSet.size()];
boolean bias = false;
for (j = 0; j < basisSet.size(); j++) {
finalWeights[j] = mu.get(j, 0);
if (basisSet.get(j) == 0) {
// bias wasn't pruned
bias = true;
finalKernels[j] = new KernelBasisFunction(new KernelRadial());
} else {
finalKernels[j] = kernels[basisSet.get(j)];
}
}
Model model = new Model(finalWeights, finalKernels, bias, true);
return model;
}
/** Identify the RVM */
public String toString() {
return "Constructive-Regression-RVM";
}
}