/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.learner.functions.kernel.rvm.util; import java.util.LinkedList; import com.rapidminer.tools.Tools; import Jama.Matrix; /** * A modified cholesky factorization. * * Given a n x n matrix A this decomposition produces a matrix L, such that * L * L' = A + E, with E being a minimal diagonal matrix making the sum of * both matrices positive definite (and regular). In contrast to the standard * cholesky decomposition (as implemented in e.g. Jama) the matrix A doesn't * have to be regular and positive definite as often happens due to numerical * instabilities. The determination of the diagonal elements of E is based * on Gerschgorin's circle theorem minimizing ||E||_inf. * * The a-priori upper bound of ||E||_inf is linear in n; * * REFERENCES: * * Robert B. Schnabel and Elizabeth Eskow. 1990. "A New Modified Cholesky * Factorization," SIAM Journal of Scientific Statistical Computating, * 11, 6: 1136-58. * * Robert B. Schnabel and Elizabeth Eskow. 1999. "A revised modified Cholesky * factorization algorithm," SIAM J. Optim., 9, 4: 1135--1148 (electronic) * * Jeff Gill and Gary King. 1998. "`Hessian not Invertable.' Help!" * manuscript in progress, Harvard University. * * @author Piotr Kasprzak * @version $Id: SECholeskyDecomposition.java,v 1.4 2008/05/09 20:57:26 ingomierswa Exp $ * */ public class SECholeskyDecomposition { /** * The L-factor (lower triangle matrix) of matrix factorization * calculated in decompose(). * * NOTE: Due to pivoting the rows and columns DO NOT MATCH with the * original matrix A. */ private Matrix L = null; /** * The matrix used to reverse the pivot-transformations used in decompose() * to construct L (PTR = Pivot Transform Reverse): * * PTR * (L * L') * PTR' = A + E. * * It's constructed from the reverse sequence of row/column swaps recorded * in decompose(). */ private Matrix PTR = null; /** * Data structures needed for the recording of the pivot transformations. */ private static class PivotTransform { int pos1; int pos2; PivotTransform(int pos1, int pos2) { this.pos1 = pos1; this.pos2 = pos2; } } private LinkedList<PivotTransform> pivotTransformQueue = new LinkedList<PivotTransform>(); private Matrix E = null; // The error-matrix E (a diagonal matrix) (with pivoting reversed) private double[] E_Diagonal = null; // The diagonal elements of E (with pivoting still applied) private double ENorm = 0; // Infinity norm (= maximum of diagonals) of E private double detL = 0; // The determinant of L private int n = 0; // Size of A /** * Constructors. * * @param A */ public SECholeskyDecomposition(double[][] A) { this.decompose(new Matrix(A)); } public SECholeskyDecomposition(Matrix A) { this.decompose(A); } /** * Swap rows i<->j and columns i<->j. * * @param A * @param i * @param j */ private void swapRowsAndColumns(double[][] A, int i, int j, boolean isSymmetric, int offset) { double tmp; int k; i += offset; j += offset; if (!isSymmetric) { /** Matrix is n x n standard (the easy case). */ double[] tr; tr = A[i]; // Swap rows of A A[i] = A[j]; A[j] = tr; for (k = offset; k < n; k++) { // Swap columns of A tmp = A[k][i]; A[k][i] = A[k][j]; A[k][j] = tmp; } } else { /** Matrix is symmetric, only the lower triangle is used */ int t; double[] ti = new double[n]; double[] tj = new double[n]; if (i > j) { // Make i < j t = i; i = j; j = t; } for (k = offset; k < i; k++) { // Save i-th row / column ti[k] = A[i][k]; } for (k = i; k < n; k++) { ti[k] = A[k][i]; } tmp = ti[i]; // simulate row / column swap ti[i] = ti[j]; ti[j] = tmp; for (k = offset; k < j; k++) { // safe j-th row / column tj[k] = A[j][k]; } for (k = j; k < n; k++) { tj[k] = A[k][j]; } tmp = tj[i]; // simulate row / column swap tj[i] = tj[j]; tj[j] = tmp; for (k = offset; k < i; k++) { // Set i-th row (complete), j-th row (first part) A[i][k] = tj[k]; A[j][k] = ti[k]; } for (k = i; k < j; k++) { // Set i-th column (first part), j-th row (second & final part) A[k][i] = tj[k]; A[j][k] = ti[k]; } for (k = j; k < n; k++) { // Set i-th column (second & final part), j-th column (complete) A[k][i] = tj[k]; A[k][j] = ti[k]; } } } /** * Do the hard work. * * @param MA_orig */ private void decompose(Matrix MA_orig) { double mach_eps = 2.23e-16; // Machine precision double tau = Math.pow(mach_eps, 1.0 / 3.0); // Cubic root of macheps double tau_mod = Math.pow(mach_eps, 2.0 / 3.0); double mu = 0.1; // Part of the relaxation introduced in revised SE90 that allows // small negative diagonal elements in next submatrix. // \mu \in [0,1] n = MA_orig.getRowDimension(); E_Diagonal = new double[n]; Matrix MA = (Matrix) MA_orig.clone(); // Don't override the original Matrix Matrix ML = new Matrix(n, n, 0); // L matrix factor double[][] A = MA.getArray(); double[][] L = ML.getArray(); double delta_prev = 0; // Previous diagonal element of E double delta = 0; // Current delta boolean phaseone = true; // We start in phase one int i, j, k; j = 0; // Current iteration /** Do some sanity checks on A. */ if (n != MA.getColumnDimension()) { // TODO: ERROR: Matrix not square } /** Determine the maximum magnitude of the diagonal elements of A. */ double gamma = 0; for (i = 0; i < n; i++) { if (Math.abs(A[i][i]) > gamma) { gamma = Math.abs(A[i][i]); } } /** PHASE ONE, matrix A potentially positive definite */ while (j < n && phaseone) { /** Find the pivot element for current submatrix and the minimum * of the diagonal elements. */ int pivot = j; // Pivot index double pivot_v = Double.NEGATIVE_INFINITY; // Pivot value double min = Double.POSITIVE_INFINITY; for (i = j; i < n; i++) { if (A[i][i] > pivot_v) { pivot_v = A[i][i]; pivot = i; } if (A[i][i] < min) { min = A[i][i]; } } /** Check for end of phase one, continue if * - pivot numerically > 0 * - no large negative eigenvalue expected in the next submatrix */ if (pivot_v < tau_mod * gamma || min < -mu * pivot_v) { phaseone = false; break; } /** Excecute the pivoting. */ if (pivot != j) { swapRowsAndColumns(A, 0, pivot-j, true, j); swapRowsAndColumns(L, j, pivot , false, 0); pivotTransformQueue.add(new PivotTransform(pivot, j)); } /** Do a lookahead to check if the diagonal elements of the next * submatrix become too negative. */ min = Double.POSITIVE_INFINITY; double value; for (i = j+1; i < n; i++) { value = A[i][i] - A[i][j] * A[i][j] / A[j][j]; if (value < min) { min = value; } } if (min < -mu * gamma) { phaseone = false; break; } /** Perform a iteration of standard cholesky factorization. */ E_Diagonal[j] = 0; // Current delta is 0 L[j][j] = Math.sqrt(A[j][j]); // Calculate the j-th diagonal element of L for (i = j+1; i < n; i++) { // Calculate the rest of jth-column (below the diagonal) of L L[i][j] = A[i][j] / L[j][j]; for (k = j+1; k <= i; k++) { // Update the i-th row of the next submatrix (in A) A[i][k] = A[i][k] - L[i][j] * L[k][j]; } } j++; } /** PHASE TWO, matrix A NOT positive definite. */ if (!phaseone && j == n-1) { /** Special case: last diagonal element negative. */ delta = -A[j][j] + Math.max(tau * -A[j][j] / (1.0 - tau), tau_mod * gamma); A[j][j] += delta; L[j][j] = Math.sqrt(A[j][j]); E_Diagonal[j] = delta; } if (!phaseone && j < n-1) { k = j; // k + 1 == number of iteration performed in phase one /** Calculate lower Gerschgorin bounds of A_k. */ double[] g = new double[n - k]; for (i = k; i < n; i++) { double sum_l = 0, sum_r = 0; int l; for (l = k; l < i; l++) { // Left side of the diagonal element sum_l += Math.abs(A[i][l]); } for (l = i+1; l < n; l++) { // Right side of the diagonal element (using symmetry) sum_r += Math.abs(A[l][i]); } g[i-k] = A[i][i] - sum_l - sum_r; } /** The main iteration of PHASE TWO. */ for (j = k; j < n-2; j++) { /** Pivot on maximum lower Gerschgorin bound. */ int gmax = j; double gmax_v = Double.NEGATIVE_INFINITY; for (i = j; i < n; i++) { if (g[i-k] > gmax_v) { gmax_v = g[i-k]; gmax = i; } } /** Excecute the pivoting. */ if (gmax != j) { swapRowsAndColumns(A, 0, gmax-j, true, j); swapRowsAndColumns(L, j, gmax, false, 0); pivotTransformQueue.add(new PivotTransform(gmax, j)); } /** Calculate the delta to be added to the diagnoal element (= E_jj) */ double normj = 0; for (i = j+1; i < n; i++) { normj += Math.abs(A[i][j]); } delta = Math.max(0, Math.max(-A[j][j] + Math.max(normj, tau_mod * gamma), delta_prev)); if (delta > 0) { A[j][j] += delta; delta_prev = delta; } E_Diagonal[j] = delta; /** Update Gerschgorin lower bounds, not too sure this is * correct, but hey, no risk, no fun. */ /* Shevek notes that this uses floating point * equality and therefore the risk is higher than * might have been assumed. */ if (Tools.isNotEqual(A[j][j], normj)) { double t = 1.0 - normj / A[j][j]; for (i = j+1; i < n; i++) { g[i-k] += t * Math.abs(A[i][j]); } } /** Perform a iteration of standard cholesky factorization. */ L[j][j] = Math.sqrt(A[j][j]); // Calculate the j-th diagonal element of L for (i = j+1; i < n; i++) { // Calculate the rest of jth-column (below the diagonal) of L L[i][j] = A[i][j] / L[j][j]; for (int l = j+1; l <= i; l++) { // Update the i-th row of the next submatrix (in A) A[i][l] = A[i][l] - L[i][j] * L[l][j]; } } } /** Process the final 2 x 2 submatrix. */ double[][] S = new double[2][2]; S[0][0] = A[n-2][n-2]; S[1][1] = A[n-1][n-1]; S[1][0] = S[0][1] = A[n-1][n-2]; Matrix MS = new Matrix(S); double[] evs = MS.eig().getRealEigenvalues(); double lambda_lo = evs[0]; double lambda_hi = evs[1]; delta = Math.max(0, Math.max(-lambda_lo + Math.max(tau * (lambda_hi - lambda_lo) / (1.0 - tau), tau_mod * gamma), delta_prev)); if (delta > 0) { A[n-2][n-2] += delta; A[n-1][n-1] += delta; delta_prev = delta; } L[n-2][n-2] = Math.sqrt(A[n-2][n-2]); L[n-1][n-2] = A[n-1][n-2] / L[n-2][n-2]; L[n-1][n-1] = Math.sqrt(A[n-1][n-1] - L[n-1][n-2] * L[n-1][n-2]); E_Diagonal[n-2] = E_Diagonal[n-1] = delta; } this.L = ML; /** Because delta_i+1 >= delta_i, delta_prev == ||E||_inf */ this.ENorm = delta_prev; } /** * Build the Pivot-Transform-Reverse matrix PTR. * * We start with a n x n identity matrix and for each recorded pivot * transformation swap the according rows, reversing the sequence and * starting with the last pivot transformation. */ private void buildPTR() { double[] temp_row; // = new double[n]; double[][] PTRA; int k; PivotTransform pt; PTR = Matrix.identity(n, n); PTRA = PTR.getArray(); k = pivotTransformQueue.size(); while (k-- > 0) { pt = pivotTransformQueue.removeLast(); temp_row = PTRA[pt.pos1]; PTRA[pt.pos1] = PTRA[pt.pos2]; PTRA[pt.pos2] = temp_row; } } /** * Return the lower triangle matrix factor of the cholesky decomposition. */ public Matrix getL() { return L; } /** * Return the matrix PTR with PTR * (L * L') * PTR' = A + E. */ public Matrix getPTR() { if (PTR == null) { buildPTR(); } return PTR; } /** * Return the diagonal of error-matrix E with the pivoting still applied. */ public double[] getE_Diagonal() { return E_Diagonal; } /** * Return the error-matrix E with pivoting reversed. */ public Matrix getE() { if (E == null) { /* Create n x 1 vector with the diagonal elements of E */ Matrix diagVector = new Matrix(E_Diagonal.clone(), n); /* Use PTR matrix to reverse the pivot transformations on this vector */ if (PTR == null) buildPTR(); diagVector = PTR.times(diagVector); E = new Matrix(n, n, 0.0); /* Because E is a diagonal matrix we can now easily build it from the * pivot adjusted vector */ for (int i = 0; i < n; i++) { E.set(i, i, diagVector.get(i, 0)); } } return this.E; } /** * Return the infinity norm of matrix E. */ public double getENorm() { return this.ENorm; } /** * Return the determinant of L. */ public double getDetL() { if (detL == 0) { double det = 1; for (int i = 0; i < L.getRowDimension(); i++) { det *= L.get(i, i); } detL = det; } return detL; } /** * Return the determinant of A. */ public double getDetA() { return (getDetL() * getDetL()); } }