MVNormal.java example

Explorer
topic-modeling-master
package cc.mallet.util;

import java.util.Arrays;
import java.text.NumberFormat;
import cc.mallet.util.Randoms;

import cc.mallet.types.*;

/** Tools for working with multivariate normal distributions */

public class MVNormal {

	/** Simple Cholesky decomposition, with no checks on squareness, 
	 *	symmetricality, or positive definiteness. This follows the
	 *  implementation from JAMA fairly closely.
	 *  <p>
	 *  Returns L such that LL' = A and L is lower-triangular.
	 */ 
	
	public static double[] cholesky(double[] input, int numRows) {

		// Initialize the result. Note that java sets all elements to 0.
		double[] result = new double[ input.length ];
		double sumRowSquared = 0.0;
		double dotProduct = 0.0;

		// For each off-diagonal cell l_{jk} in the result,
		//  we need an index into the jth row and the kth row.
		// These are therefore both really row offsets, but one
		//  corresponds to the beginning of the (row)th row 
		//  and the other to the beginning of the (column)th row.
		int rowOffset = 0;
		int colOffset = 0;

		for (int row = 0; row < numRows; row++) {

			sumRowSquared = 0.0;
			rowOffset = row * numRows;
			
			for (int col = 0; col < row; col++) {

				dotProduct = 0.0;
				colOffset = col * numRows;

				for (int i = 0; i < col; i++) {
					dotProduct += 
						result[ rowOffset + i ] *
						result[ colOffset + i ];
				}

				result[ rowOffset + col ] = 
					(input[ rowOffset + col ] - dotProduct) /
					result[ colOffset + col ];
				sumRowSquared +=
					result[rowOffset + col] * 
					result[rowOffset + col];
			}

			//  Now the diagonal element
			result[ rowOffset + row ] =
				Math.sqrt(input[ rowOffset + row ] - sumRowSquared);
		}

		return result;
	}

	public static double[] bandCholesky(double[] input, int numRows) {

		// Initialize the result. Note that java sets all elements to 0.
		double[] result = new double[ input.length ];
		double sumRowSquared = 0.0;
		double dotProduct = 0.0;

		// For each off-diagonal cell l_{jk} in the result,
		//  we need an index into the jth row and the kth row.
		// These are therefore both really row offsets, but one
		//  corresponds to the beginning of the (row)th row 
		//  and the other to the beginning of the (column)th row.
		int rowOffset = 0;
		int colOffset = 0;

		int firstNonZero;

		for (int row = 0; row < numRows; row++) {

			sumRowSquared = 0.0;
			rowOffset = row * numRows;
			
			firstNonZero = row;

			for (int col = 0; col < row; col++) {

				if (firstNonZero == row) {
					if (input[ rowOffset + col ] == 0) {
						continue;
					}
					else {
						firstNonZero = col;
					}
				}

				dotProduct = 0.0;
				colOffset = col * numRows;

				for (int i = firstNonZero; i < col; i++) {
					dotProduct += 
						result[ rowOffset + i ] *
						result[ colOffset + i ];
				}

				result[ rowOffset + col ] = 
					(input[ rowOffset + col ] - dotProduct) /
					result[ colOffset + col ];
				sumRowSquared +=
					result[rowOffset + col] * 
					result[rowOffset + col];
			}

			//  Now the diagonal element
			result[ rowOffset + row ] =
				Math.sqrt(input[ rowOffset + row ] - sumRowSquared);
		}

		return result;
	}

	/** For testing band cholesky factorization */
	public static double[] bandMatrixRoot (int dim, int bandwidth) {
		double[] result = new double[dim * dim];
		
		for (int row = 0; row < dim; row++) {
			int rowOffset = row * dim;

			for (int col = Math.max(0, (row - bandwidth + 1));
				 col <= row;
				 col++) {
				result[rowOffset + col] = 1.0;
			}
		}

		return result;
	}

	/** Sample a multivariate normal from a precision matrix 
	 *    (ie inverse covariance matrix)
	 */
	public static double[] nextMVNormal(double[] mean, double[] precision, 
										Randoms random) {
		return nextMVNormalWithCholesky(mean, cholesky(precision, mean.length), random);
	}

	public static double[] nextMVNormalWithCholesky(double[] mean, double[] precisionLowerTriangular,
													Randoms random) {

		int n = mean.length;

		// Initialize vector z to standard normals
		//  [NB: using the same array for z and x]
		double[] result = new double[ n ];
		for (int i = 0; i < n; i++) {
			result[i] = random.nextGaussian();
		}
		
		// Now solve trans(L) x = z using back substitution
		double innerProduct;
		
		for (int i = n-1; i >= 0; i--) {
			innerProduct = 0.0;
			for (int j = i+1; j < n; j++) {
				// the cholesky decomp got us the precisionLowerTriangular triangular
				//  matrix, but we really want the transpose.
				innerProduct += result[j] * precisionLowerTriangular[ (n * j) + i ];
			}

			result[i] = (result[i] - innerProduct) / precisionLowerTriangular[ (n * i) + i ];
		}

		for (int i = 0; i < n; i++) {
			result[i] += mean[i];
		}

		return result;
	}
	
	/** Sample a vector x from N(m, (LL')<sup>-1</sup>, such that 
	 *   sum_i x_i = 0.
	 */
	public static double[] nextZeroSumMVNormalWithCholesky(double[] mean, double[] precisionLowerTriangular,
														   Randoms random) {

		int n = mean.length;

		double[] result = nextMVNormalWithCholesky(mean, precisionLowerTriangular, random);
		double sum = 0.0;
		for (int i = 0; i < n; i++) {
			sum += result[i];
		}
		
		// get the sum of each row of the inverse precision matrix
		//  by solving the two triangular systems L(L'x) = Ly = 1, L'x = y.

		double[] ones = new double[n];
		Arrays.fill(ones, 1.0);

		double[] firstSolution = solveWithForwardSubstitution(ones, precisionLowerTriangular);
		double[] rowSums = solveWithBackSubstitution(firstSolution, precisionLowerTriangular);

		double sumOfRowSums = 0.0;
		for (int i = 0; i < n; i++) {
			sumOfRowSums += rowSums[i];
		}

		double inverseSumOfRowSums = 1.0 / sumOfRowSums;

		for (int i = 0; i < n; i++) {
			result[i] -= inverseSumOfRowSums * rowSums[i] * sum;
		}

		return result;
	}
	
	public static double[][] nextMVNormal(int n, double[] mean, double[] precision,
										  Randoms random) {
		double[][] result = new double[n][];

		for (int i=0; i < n; i++) {
			result[i] = nextMVNormal(mean, precision, random);
		}

		return result;
	}

	public static FeatureVector nextFeatureVector(Alphabet alphabet, double[] mean,
												  double[] precision, Randoms random) {

		return new FeatureVector(alphabet, nextMVNormal(mean, precision, random));
	}

	/**
	 *  @param priorMean A vector of mean values
	 *  @param priorPrecisionDiagonal A vector representing a diagonal prior precision matrix
	 *  @param precision A precision matrix
	 */
	public static double[] nextMVNormalPosterior(double[] priorMean, double[] priorPrecisionDiagonal,
												 double[] precision,
												 double[] observedMean, int observations, 
												 Randoms random) {
		int dimension = priorMean.length;

		// Q_0 mu_0 + n Q y_bar
		double[] linearCombination = new double[dimension];

		for (int i=0; i<dimension; i++) {
			linearCombination[i] = priorMean[i] * priorPrecisionDiagonal[i];

			double innerProduct = 0.0;
			for (int j = 0; j < dimension; j++) {
				innerProduct += precision[ (dimension * i) + j ] * observedMean[j];
			}
			
			linearCombination[i] += observations * innerProduct;
		}
		
		// Q_0 + n Q
		double[] posteriorPrecision = new double[precision.length];
		
		for (int row = 0; row < dimension; row++) {
			for (int col = 0; col < dimension; col++) {
				posteriorPrecision[ (dimension * row) + col ] = 
					observations * precision[ (dimension * row) + col ];
				if (row == col) {
					posteriorPrecision[ (dimension * row) + col ] += 
						priorPrecisionDiagonal[row];
				}
			}
		}

		double[] inversePosteriorPrecision = invertSPD(posteriorPrecision, dimension);
		
		double[] posteriorMean = new double[dimension];

		for (int row = 0; row < dimension; row++) {
			double innerProduct = 0.0;
			for (int col = 0; col < dimension; col++) {
				innerProduct +=
					inversePosteriorPrecision[ (dimension * row) + col ] *
					linearCombination[ col ];
			}

			posteriorMean[row] = innerProduct;
		}

		return nextMVNormal(posteriorMean, posteriorPrecision, random);
	}

	/** 
	 * This method returns x such that L'x = b.
	 *  Note the transpose: this method assumes that 
	 *  the input matrix is LOWER triangular, even though
	 *  back substitution operates on UPPER triangular matrices.
	 */
	public static double[] solveWithBackSubstitution(double[] b, double[] lowerTriangular) {
        double innerProduct;

		int n = b.length;
		double[] result = new double[n];

        for (int i = n-1; i >= 0; i--) {
			innerProduct = 0.0;
            for (int j = i+1; j < n; j++) {
				// Assume we're dealing with a single lower triangular
				//  matrix from a cholesky decomposition, so index into
				//  it as if it is the transpose.
				innerProduct += result[j] * lowerTriangular[ (n * j) + i ];
            }

            result[i] = (b[i] - innerProduct) / lowerTriangular[ (n * i) + i ];
        }

		return result;
	}

	/** 
	 * This method returns x such that Lx = b
	 *  where L is lower triangular
	 */
	public static double[] solveWithForwardSubstitution(double[] b, double[] lowerTriangular) {
        double innerProduct;

		int n = b.length;
		double[] result = new double[n];

        for (int i = 0; i < n; i++) {
			innerProduct = 0.0;
            for (int j = 0; j < i; j++) {
				innerProduct += result[j] * lowerTriangular[ (n * i) + j ];
            }

            result[i] = (b[i] - innerProduct) / lowerTriangular[ (n * i) + i ];
        }

		return result;
	}

	/**
	 *  This method returns the (lower-triangular) inverse of a lower triangular
	 *   matrix.
	 */
	public static double[] invertLowerTriangular(double[] inputMatrix, int dimension) {
		double[] outputMatrix = new double[inputMatrix.length];

		double x;

		for (int row = 0; row < dimension; row++) {
			for (int col = 0; col <= row; col++) {
				// Off-diagonal elements are the negative inner product
				//  (up to the row index) of the row from input and the col
				//  from the output, divided by the diagonal from the input.
				
				if (col == row) {
					// Diagonal elements are the same, but add 1 to the numerator
					x = 1.0;
				}
				else {
					x = 0.0;
				}
				
				for (int i = col; i < row; i++) {
					x -= inputMatrix[ (dimension * row) + i ] * outputMatrix[ (dimension * i) + col ];
				}

				outputMatrix[ (dimension * row) + col ] = x / inputMatrix[ (dimension * row) + row ];
			}

		}

		return outputMatrix;
	}

	/**
	 *  Returns L'L for lower triangular matrix L.
	 */
	public static double[] lowerTriangularCrossproduct(double[] inputMatrix, int dimension) {
		
		double[] outputMatrix = new double[inputMatrix.length];

		double innerProduct;

		for (int row = 0; row < dimension; row++) {
			for (int col = row; col < dimension; col++) {

				innerProduct = 0.0;

				for (int i = col; i < dimension; i++) {
					innerProduct += inputMatrix[ row + (dimension * i) ] * inputMatrix[ col + (dimension * i) ];
				}

				outputMatrix[ (dimension * row) + col ] = innerProduct;
				outputMatrix[ row + (dimension * col) ] = innerProduct;				
			}
		}

		return outputMatrix;
	}

	/** 
	 *  Returns (lower-triangular) X = AB for square lower-triangular matrices A and B
	 */
	public static double[] lowerTriangularProduct(double[] leftMatrix, double[] rightMatrix, int dimension) {
		
		double[] outputMatrix = new double[leftMatrix.length];

        double innerProduct;

        for (int row = 0; row < dimension; row++) {
            for (int col = 0; col <= row; col++) {

                innerProduct = 0.0;
				for (int i = col; i <= row; i++) {
					innerProduct += leftMatrix[ (dimension * row) + i ] * rightMatrix[ (dimension * i) + col ];
				}

				outputMatrix[ (dimension * row) + col ] = innerProduct;
			}
		}

		return outputMatrix;

	}

	public static double[] invertSPD(double[] inputMatrix, int dimension) {
		
		return lowerTriangularCrossproduct(invertLowerTriangular(bandCholesky(inputMatrix, dimension),
																 dimension),
										   dimension);
	}

	/**
	 *  A Wishart random variate, based on R code by Bill Venables.
	 *
	 *  @param sqrtScaleMatrix The lower-triangular matrix square root of the scale matrix.
	 *     To draw from the posterior of a precision (ie inverse covariance) matrix,
	 *     this should be chol( S^{-1} ), where S is the scatter matrix X'X of 
	 *     columns of MV normal observations X.
	 *  @param dimension The size of the matrix
	 *  @param degreesOfFreedom  The degree of freedom for the Wishart. Should be greater than dimension. For 
	 *     a posterior distribution, this is the number of observations + the df of the prior.
	 */
	public static double[] nextWishart(double[] sqrtScaleMatrix, int dimension,
									   int degreesOfFreedom, Randoms random) {

		double[] sample = new double[sqrtScaleMatrix.length];
		
		for (int row = 0; row < dimension; row++) {

			for (int col = 0; col < row; col++) {
				sample[ (row * dimension) + col ] = random.nextGaussian(0, 1);
			}
			
			sample[ (row * dimension) + row ] = Math.sqrt(random.nextChiSq(degreesOfFreedom));
		}

		//System.out.println(doubleArrayToString(sample, dimension));
		//System.out.println(doubleArrayToString(sqrtScaleMatrix, dimension));		
		//System.out.println(doubleArrayToString(lowerTriangularProduct(sample, sqrtScaleMatrix, dimension), dimension));

		System.out.println(diagonalToString(sample, dimension));
		System.out.println(diagonalToString(sqrtScaleMatrix, dimension));		
		System.out.println(diagonalToString(lowerTriangularProduct(sample, sqrtScaleMatrix, dimension), dimension));

		return lowerTriangularCrossproduct(lowerTriangularProduct(sample, sqrtScaleMatrix, dimension), dimension);

	}

	public static double[] nextWishartPosterior(double[] scatterMatrix, int observations,
												double[] priorPrecisionDiagonal, int priorDF,
												int dimension, Randoms random) {

		double[] scatterPlusPrior = new double[scatterMatrix.length];
		System.arraycopy(scatterMatrix, 0, scatterPlusPrior, 0, scatterMatrix.length);

		for (int i=0; i < dimension; i++) {
			scatterPlusPrior[ (dimension * i) + i ] += 1.0 / priorPrecisionDiagonal[i];
		}

		System.out.println(" inverted scatter plus prior");
		System.out.println(diagonalToString(invertSPD(scatterPlusPrior, dimension), dimension));

		System.out.println(" chol inverted scatter plus prior");
        System.out.println(diagonalToString(cholesky(invertSPD(scatterPlusPrior, dimension), dimension), dimension));

		double[] sqrtScaleMatrix = cholesky(invertSPD(scatterPlusPrior, dimension), dimension);
		
		return nextWishart(sqrtScaleMatrix, dimension, observations + priorDF, random);
	}
	
	/** Create a string representation of a square matrix in one-dimensional array format
	 */
	public static String doubleArrayToString(double[] matrix, int dimension) {
		NumberFormat formatter = NumberFormat.getInstance();
        formatter.setMaximumFractionDigits(10);


		StringBuffer output = new StringBuffer();

		for (int row = 0; row < dimension; row++) {
			for (int col = 0; col < dimension; col++) {
				output.append(formatter.format(matrix[ (dimension * row) + col ]));
				output.append("\t");
			}
			output.append("\n");
		}
		
		return output.toString();
	}

	public static String diagonalToString(double[] matrix, int dimension) {
        NumberFormat formatter = NumberFormat.getInstance();
        formatter.setMaximumFractionDigits(4);


        StringBuffer output = new StringBuffer();

        for (int row = 0; row < dimension; row++) {
			output.append(formatter.format(matrix[ (dimension * row) + row ]));
			output.append(" ");
        }

        return output.toString();
    }


	public static double[] getScatterMatrix(double[][] observationMatrix) {
		int observations = observationMatrix.length;
		int dimension = observationMatrix[0].length;

		double[] outputMatrix = new double[dimension * dimension];
		double[] means = new double[dimension];

		// collect the sample means

		for (int i = 0; i < observations; i++) {
            for (int d = 0; d < dimension; d++) {
                means[d] += observationMatrix[i][d];
            }
		}

		for (int d = 0; d < dimension; d++) {
			means[d] /= observations;
		}

		// now the sample covariance (times n)

		for (int i = 0; i < observations; i++) {
            for (int d1 = 0; d1 < dimension; d1++) {
                for (int d2 = 0; d2 < dimension; d2++) {
                    outputMatrix[ (dimension * d1) + d2 ] +=
                        (observationMatrix[i][d1] - means[d1]) *
						(observationMatrix[i][d2] - means[d2]);
                }
			}
        }

		return outputMatrix;
	}

	public static void testCholesky() {

		int observations = 1000;

		double[] mean = new double[20];
		double[] precisionMatrix = new double[ 20 * 20 ];
		for (int i=0; i<20; i++) {
			precisionMatrix[ (20 * i) + i ] = 1.0;
		}

		Randoms random = new Randoms();
		double[] scatterMatrix = getScatterMatrix(nextMVNormal(observations, mean, precisionMatrix, random));
		
		double[] priorPrecision = new double[20];
		Arrays.fill(priorPrecision, 1.0);
		
		nextWishartPosterior(scatterMatrix, observations, priorPrecision, 21, 20, random);
	}


	public static void main (String[] args) {

		//double[] spd = { 19.133825, -1.180869, 6.403880, 
		//				 -1.180869,  8.895968, 1.280748,
		//				 6.403880,  1.280748, 9.155951 };
		
		double[] spd = {3.0, 0.0, -1.0, 
						0.0, 3.0, 0.0,
						-1.0, 0.0, 3.0};
		
		Randoms random = new Randoms();
		double[] mean = { 1.0, 1.0, 1.0 };
		double[] lower = cholesky(spd, 3);

		for (int iter = 0; iter < 10; iter++) {
			double[] sample = nextMVNormalWithCholesky(mean, lower,
													   random);
			for (int i=0; i<sample.length; i++) {
				System.out.print(sample[i] + "\t");
			}
			System.out.println();
		}

		for (int iter = 0; iter < 10; iter++) {
			double[] sample = nextZeroSumMVNormalWithCholesky(mean, lower,
															  random);
			for (int i=0; i<sample.length; i++) {
				System.out.print(sample[i] + "\t");
			}
			System.out.println();
		}


		/*
		int dim = 100;

		double[] bandLower = bandMatrixRoot(dim, 3);
		System.out.println(doubleArrayToString(bandLower, dim));

		double[] bandMatrix = lowerTriangularCrossproduct(bandLower, dim);
		System.out.println(doubleArrayToString(bandMatrix, dim));

		long startTime;

		startTime = System.currentTimeMillis();
		for (int i=0; i<100000; i++) {
			bandCholesky(bandMatrix, dim);
		}
		System.out.println(System.currentTimeMillis() - startTime);

		startTime = System.currentTimeMillis();
		for (int i=0; i<100000; i++) {
			cholesky(bandMatrix, dim);
		}
		System.out.println(System.currentTimeMillis() - startTime);
		*/
		
		/*

		double[] l = {2.87527, 0.0, 0.0, -2.4168, 1.28, 0.0, -0.585168, -2.792234, 2.769609};
		double[] spd = { 19.133825, -1.180869, 6.403880, 
						 -1.180869,  8.895968, 1.280748,
						 6.403880,  1.280748, 9.155951 };

		double[] scatter = { 103.59761, -16.370939, 12.694755,
							 -16.37094, 106.117048,  4.079818,
							 12.69476,   4.079818, 94.065152 };

		double[] priorDiagonal = { 1.0, 1.0, 1.0 };

		testCholesky();
		*/

		//System.out.println(doubleArrayToString(nextWishartPosterior(scatter, 100, priorDiagonal, 10, 3, new Randoms()), 3));

		/*
		long startTime = System.currentTimeMillis();
		for (int i=0; i<10000; i++) {
			invertSPD(spd, 3);
		}
		System.out.println(System.currentTimeMillis() - startTime);
		*/		

		//System.out.println(doubleArrayToString(invertSPD(spd, 3), 3));
		//System.out.println(doubleArrayToString(nextWishart(l, 3, 25, new Randoms()), 3));

		/*
		double[] precisionMatrix = {0.98, -1.0, 0.0, -1.0, 2.13, -1.0, 0.0, -1.0, 1.01};
		double[] mean = new double[3];

		Randoms random = new Randoms();

		for (int i=0; i<10; i++) {
			double[] variate = nextMVNormal(mean, precisionMatrix, random);
			
			for (int j=0; j<variate.length; j++) {
				System.out.print(variate[j] + "\t");
			}
			System.out.println();
		}
		*/
		
	}
}