SECholeskyDecomposition.java example

Explorer
rapidminer-studio-master
- doc
  - doc
- src
/**
 * Copyright (C) 2001-2017 by RapidMiner and the contributors
 * 
 * Complete list of developers available at our web site:
 * 
 * http://rapidminer.com
 * 
 * This program is free software: you can redistribute it and/or modify it under the terms of the
 * GNU Affero General Public License as published by the Free Software Foundation, either version 3
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
 * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License along with this program.
 * If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.learner.functions.kernel.rvm.util;

import Jama.Matrix;
import com.rapidminer.tools.Tools;

import java.util.LinkedList;


/**
 * A modified cholesky factorization.
 * 
 * Given a n x n matrix A this decomposition produces a matrix L, such that L * L' = A + E, with E
 * being a minimal diagonal matrix making the sum of both matrices positive definite (and regular).
 * In contrast to the standard cholesky decomposition (as implemented in e.g. Jama) the matrix A
 * doesn't have to be regular and positive definite as often happens due to numerical instabilities.
 * The determination of the diagonal elements of E is based on Gerschgorin's circle theorem
 * minimizing ||E||_inf.
 * 
 * The a-priori upper bound of ||E||_inf is linear in n;
 * 
 * REFERENCES:
 * 
 * Robert B. Schnabel and Elizabeth Eskow. 1990. "A New Modified Cholesky Factorization," SIAM
 * Journal of Scientific Statistical Computating, 11, 6: 1136-58.
 * 
 * Robert B. Schnabel and Elizabeth Eskow. 1999.
 * "A revised modified Cholesky factorization algorithm," SIAM J. Optim., 9, 4: 1135--1148
 * (electronic)
 * 
 * Jeff Gill and Gary King. 1998. "`Hessian not Invertable.' Help!" manuscript in progress, Harvard
 * University.
 * 
 * @author Piotr Kasprzak
 * 
 */
public class SECholeskyDecomposition {

	/**
	 * The L-factor (lower triangle matrix) of matrix factorization calculated in decompose().
	 * 
	 * NOTE: Due to pivoting the rows and columns DO NOT MATCH with the original matrix A.
	 */

	private Matrix L = null;

	/**
	 * The matrix used to reverse the pivot-transformations used in decompose() to construct L (PTR
	 * = Pivot Transform Reverse):
	 * 
	 * PTR * (L * L') * PTR' = A + E.
	 * 
	 * It's constructed from the reverse sequence of row/column swaps recorded in decompose().
	 */

	private Matrix PTR = null;

	/**
	 * Data structures needed for the recording of the pivot transformations.
	 */
	private static class PivotTransform {

		int pos1;
		int pos2;

		PivotTransform(int pos1, int pos2) {
			this.pos1 = pos1;
			this.pos2 = pos2;
		}
	}

	private LinkedList<PivotTransform> pivotTransformQueue = new LinkedList<PivotTransform>();

	private Matrix E = null;			// The error-matrix E (a diagonal matrix) (with pivoting reversed)
	private double[] E_Diagonal = null;			// The diagonal elements of E (with pivoting still applied)
	private double ENorm = 0;			// Infinity norm (= maximum of diagonals) of E

	private double detL = 0;			// The determinant of L

	private int n = 0;			// Size of A

	/**
	 * Constructors.
	 * 
	 * @param A
	 */

	public SECholeskyDecomposition(double[][] A) {
		this.decompose(new Matrix(A));
	}

	public SECholeskyDecomposition(Matrix A) {
		this.decompose(A);
	}

	/**
	 * Swap rows i<->j and columns i<->j.
	 * 
	 * @param A
	 * @param i
	 * @param j
	 */

	private void swapRowsAndColumns(double[][] A, int i, int j, boolean isSymmetric, int offset) {

		double tmp;
		int k;

		i += offset;
		j += offset;

		if (!isSymmetric) {

			/** Matrix is n x n standard (the easy case). */

			double[] tr;

			tr = A[i];								// Swap rows of A
			A[i] = A[j];
			A[j] = tr;

			for (k = offset; k < n; k++) {				// Swap columns of A
				tmp = A[k][i];
				A[k][i] = A[k][j];
				A[k][j] = tmp;
			}

		} else {

			/** Matrix is symmetric, only the lower triangle is used */

			int t;

			double[] ti = new double[n];
			double[] tj = new double[n];

			if (i > j) {								// Make i < j
				t = i;
				i = j;
				j = t;
			}

			for (k = offset; k < i; k++) {				// Save i-th row / column
				ti[k] = A[i][k];
			}

			for (k = i; k < n; k++) {
				ti[k] = A[k][i];
			}

			tmp = ti[i];							// simulate row / column swap
			ti[i] = ti[j];
			ti[j] = tmp;

			for (k = offset; k < j; k++) {				// safe j-th row / column
				tj[k] = A[j][k];
			}

			for (k = j; k < n; k++) {
				tj[k] = A[k][j];
			}

			tmp = tj[i];							// simulate row / column swap
			tj[i] = tj[j];
			tj[j] = tmp;

			for (k = offset; k < i; k++) {				// Set i-th row (complete), j-th row (first part)
				A[i][k] = tj[k];
				A[j][k] = ti[k];
			}

			for (k = i; k < j; k++) {					// Set i-th column (first part), j-th row (second & final
										// part)
				A[k][i] = tj[k];
				A[j][k] = ti[k];
			}

			for (k = j; k < n; k++) {					// Set i-th column (second & final part), j-th column
										// (complete)
				A[k][i] = tj[k];
				A[k][j] = ti[k];
			}
		}
	}

	/**
	 * Do the hard work.
	 * 
	 * @param MA_orig
	 */

	private void decompose(Matrix MA_orig) {

		double mach_eps = 2.23e-16;							// Machine precision
		double tau = Math.pow(mach_eps, 1.0 / 3.0);	// Cubic root of macheps
		double tau_mod = Math.pow(mach_eps, 2.0 / 3.0);
		double mu = 0.1;								// Part of the relaxation introduced in revised SE90 that allows
		// small negative diagonal elements in next submatrix.
		// \mu \in [0,1]

		n = MA_orig.getRowDimension();
		E_Diagonal = new double[n];

		Matrix MA = (Matrix) MA_orig.clone();			// Don't override the original Matrix
		Matrix ML = new Matrix(n, n, 0);				// L matrix factor

		double[][] A = MA.getArray();
		double[][] L = ML.getArray();

		double delta_prev = 0;								// Previous diagonal element of E
		double delta = 0;								// Current delta
		boolean phaseone = true;								// We start in phase one

		int i, j, k;

		j = 0;													// Current iteration

		/** Do some sanity checks on A. */

		if (n != MA.getColumnDimension()) {
			// TODO: ERROR: Matrix not square
		}

		/** Determine the maximum magnitude of the diagonal elements of A. */

		double gamma = 0;

		for (i = 0; i < n; i++) {
			if (Math.abs(A[i][i]) > gamma) {
				gamma = Math.abs(A[i][i]);
			}
		}

		/** PHASE ONE, matrix A potentially positive definite */

		while (j < n && phaseone) {

			/**
			 * Find the pivot element for current submatrix and the minimum of the diagonal
			 * elements.
			 */

			int pivot = j;									// Pivot index
			double pivot_v = Double.NEGATIVE_INFINITY;				// Pivot value

			double min = Double.POSITIVE_INFINITY;

			for (i = j; i < n; i++) {

				if (A[i][i] > pivot_v) {
					pivot_v = A[i][i];
					pivot = i;
				}

				if (A[i][i] < min) {
					min = A[i][i];
				}
			}

			/**
			 * Check for end of phase one, continue if - pivot numerically > 0 - no large negative
			 * eigenvalue expected in the next submatrix
			 */

			if (pivot_v < tau_mod * gamma || min < -mu * pivot_v) {
				phaseone = false;
				break;
			}

			/** Excecute the pivoting. */

			if (pivot != j) {

				swapRowsAndColumns(A, 0, pivot - j, true, j);
				swapRowsAndColumns(L, j, pivot, false, 0);

				pivotTransformQueue.add(new PivotTransform(pivot, j));
			}

			/**
			 * Do a lookahead to check if the diagonal elements of the next submatrix become too
			 * negative.
			 */

			min = Double.POSITIVE_INFINITY;
			double value;

			for (i = j + 1; i < n; i++) {

				value = A[i][i] - A[i][j] * A[i][j] / A[j][j];
				if (value < min) {
					min = value;
				}
			}

			if (min < -mu * gamma) {
				phaseone = false;
				break;
			}

			/** Perform a iteration of standard cholesky factorization. */

			E_Diagonal[j] = 0;									// Current delta is 0

			L[j][j] = Math.sqrt(A[j][j]);						// Calculate the j-th diagonal element of L
			for (i = j + 1; i < n; i++) {							// Calculate the rest of jth-column (below the diagonal)
											// of L
				L[i][j] = A[i][j] / L[j][j];
				for (k = j + 1; k <= i; k++) {					// Update the i-th row of the next submatrix (in A)
					A[i][k] = A[i][k] - L[i][j] * L[k][j];
				}
			}

			j++;
		}

		/** PHASE TWO, matrix A NOT positive definite. */

		if (!phaseone && j == n - 1) {

			/** Special case: last diagonal element negative. */

			delta = -A[j][j] + Math.max(tau * -A[j][j] / (1.0 - tau), tau_mod * gamma);
			A[j][j] += delta;
			L[j][j] = Math.sqrt(A[j][j]);
			E_Diagonal[j] = delta;

		}

		if (!phaseone && j < n - 1) {

			k = j;												// k + 1 == number of iteration performed in phase one

			/** Calculate lower Gerschgorin bounds of A_k. */

			double[] g = new double[n - k];

			for (i = k; i < n; i++) {

				double sum_l = 0, sum_r = 0;
				int l;

				for (l = k; l < i; l++) {						// Left side of the diagonal element
					sum_l += Math.abs(A[i][l]);
				}
				for (l = i + 1; l < n; l++) {						// Right side of the diagonal element (using symmetry)
					sum_r += Math.abs(A[l][i]);
				}
				g[i - k] = A[i][i] - sum_l - sum_r;
			}

			/** The main iteration of PHASE TWO. */

			for (j = k; j < n - 2; j++) {

				/** Pivot on maximum lower Gerschgorin bound. */

				int gmax = j;
				double gmax_v = Double.NEGATIVE_INFINITY;

				for (i = j; i < n; i++) {
					if (g[i - k] > gmax_v) {
						gmax_v = g[i - k];
						gmax = i;
					}
				}

				/** Excecute the pivoting. */

				if (gmax != j) {

					swapRowsAndColumns(A, 0, gmax - j, true, j);
					swapRowsAndColumns(L, j, gmax, false, 0);

					pivotTransformQueue.add(new PivotTransform(gmax, j));
				}

				/** Calculate the delta to be added to the diagnoal element (= E_jj) */

				double normj = 0;
				for (i = j + 1; i < n; i++) {
					normj += Math.abs(A[i][j]);
				}
				delta = Math.max(0, Math.max(-A[j][j] + Math.max(normj, tau_mod * gamma), delta_prev));
				if (delta > 0) {
					A[j][j] += delta;
					delta_prev = delta;
				}

				E_Diagonal[j] = delta;

				/**
				 * Update Gerschgorin lower bounds, not too sure this is correct, but hey, no risk,
				 * no fun.
				 */
				/*
				 * Shevek notes that this uses floating point equality and therefore the risk is
				 * higher than might have been assumed.
				 */
				if (Tools.isNotEqual(A[j][j], normj)) {
					double t = 1.0 - normj / A[j][j];
					for (i = j + 1; i < n; i++) {
						g[i - k] += t * Math.abs(A[i][j]);
					}
				}

				/** Perform a iteration of standard cholesky factorization. */

				L[j][j] = Math.sqrt(A[j][j]);						// Calculate the j-th diagonal element of L
				for (i = j + 1; i < n; i++) {							// Calculate the rest of jth-column (below the
												// diagonal) of L
					L[i][j] = A[i][j] / L[j][j];
					for (int l = j + 1; l <= i; l++) {					// Update the i-th row of the next submatrix
														// (in A)
						A[i][l] = A[i][l] - L[i][j] * L[l][j];
					}
				}

			}

			/** Process the final 2 x 2 submatrix. */

			double[][] S = new double[2][2];

			S[0][0] = A[n - 2][n - 2];
			S[1][1] = A[n - 1][n - 1];
			S[1][0] = S[0][1] = A[n - 1][n - 2];

			Matrix MS = new Matrix(S);
			double[] evs = MS.eig().getRealEigenvalues();

			double lambda_lo = evs[0];
			double lambda_hi = evs[1];

			delta = Math.max(0, Math.max(
					-lambda_lo + Math.max(tau * (lambda_hi - lambda_lo) / (1.0 - tau), tau_mod * gamma), delta_prev));

			if (delta > 0) {
				A[n - 2][n - 2] += delta;
				A[n - 1][n - 1] += delta;
				delta_prev = delta;
			}

			L[n - 2][n - 2] = Math.sqrt(A[n - 2][n - 2]);
			L[n - 1][n - 2] = A[n - 1][n - 2] / L[n - 2][n - 2];
			L[n - 1][n - 1] = Math.sqrt(A[n - 1][n - 1] - L[n - 1][n - 2] * L[n - 1][n - 2]);

			E_Diagonal[n - 2] = E_Diagonal[n - 1] = delta;
		}

		this.L = ML;

		/** Because delta_i+1 >= delta_i, delta_prev == ||E||_inf */
		this.ENorm = delta_prev;
	}

	/**
	 * Build the Pivot-Transform-Reverse matrix PTR.
	 * 
	 * We start with a n x n identity matrix and for each recorded pivot transformation swap the
	 * according rows, reversing the sequence and starting with the last pivot transformation.
	 */

	private void buildPTR() {

		double[] temp_row; // = new double[n];
		double[][] PTRA;

		int k;
		PivotTransform pt;

		PTR = Matrix.identity(n, n);
		PTRA = PTR.getArray();

		k = pivotTransformQueue.size();

		while (k-- > 0) {
			pt = pivotTransformQueue.removeLast();

			temp_row = PTRA[pt.pos1];
			PTRA[pt.pos1] = PTRA[pt.pos2];
			PTRA[pt.pos2] = temp_row;
		}
	}

	/**
	 * Return the lower triangle matrix factor of the cholesky decomposition.
	 */

	public Matrix getL() {
		return L;
	}

	/**
	 * Return the matrix PTR with PTR * (L * L') * PTR' = A + E.
	 */

	public Matrix getPTR() {
		if (PTR == null) {
			buildPTR();
		}
		return PTR;
	}

	/**
	 * Return the diagonal of error-matrix E with the pivoting still applied.
	 */
	public double[] getE_Diagonal() {
		return E_Diagonal;
	}

	/**
	 * Return the error-matrix E with pivoting reversed.
	 */

	public Matrix getE() {

		if (E == null) {

			/* Create n x 1 vector with the diagonal elements of E */
			Matrix diagVector = new Matrix(E_Diagonal.clone(), n);

			/* Use PTR matrix to reverse the pivot transformations on this vector */
			if (PTR == null) {
				buildPTR();
			}
			diagVector = PTR.times(diagVector);

			E = new Matrix(n, n, 0.0);

			/*
			 * Because E is a diagonal matrix we can now easily build it from the pivot adjusted
			 * vector
			 */
			for (int i = 0; i < n; i++) {
				E.set(i, i, diagVector.get(i, 0));
			}
		}

		return this.E;
	}

	/**
	 * Return the infinity norm of matrix E.
	 */

	public double getENorm() {
		return this.ENorm;
	}

	/**
	 * Return the determinant of L.
	 */

	public double getDetL() {

		if (detL == 0) {

			double det = 1;
			for (int i = 0; i < L.getRowDimension(); i++) {
				det *= L.get(i, i);
			}

			detL = det;
		}

		return detL;
	}

	/**
	 * Return the determinant of A.
	 */

	public double getDetA() {
		return (getDetL() * getDetL());
	}
}