package cc.mallet.util; import java.util.Arrays; import java.text.NumberFormat; import cc.mallet.util.Randoms; import cc.mallet.types.*; /** Tools for working with multivariate normal distributions */ public class MVNormal { /** Simple Cholesky decomposition, with no checks on squareness, * symmetricality, or positive definiteness. This follows the * implementation from JAMA fairly closely. * <p> * Returns L such that LL' = A and L is lower-triangular. */ public static double[] cholesky(double[] input, int numRows) { // Initialize the result. Note that java sets all elements to 0. double[] result = new double[ input.length ]; double sumRowSquared = 0.0; double dotProduct = 0.0; // For each off-diagonal cell l_{jk} in the result, // we need an index into the jth row and the kth row. // These are therefore both really row offsets, but one // corresponds to the beginning of the (row)th row // and the other to the beginning of the (column)th row. int rowOffset = 0; int colOffset = 0; for (int row = 0; row < numRows; row++) { sumRowSquared = 0.0; rowOffset = row * numRows; for (int col = 0; col < row; col++) { dotProduct = 0.0; colOffset = col * numRows; for (int i = 0; i < col; i++) { dotProduct += result[ rowOffset + i ] * result[ colOffset + i ]; } result[ rowOffset + col ] = (input[ rowOffset + col ] - dotProduct) / result[ colOffset + col ]; sumRowSquared += result[rowOffset + col] * result[rowOffset + col]; } // Now the diagonal element result[ rowOffset + row ] = Math.sqrt(input[ rowOffset + row ] - sumRowSquared); } return result; } public static double[] bandCholesky(double[] input, int numRows) { // Initialize the result. Note that java sets all elements to 0. double[] result = new double[ input.length ]; double sumRowSquared = 0.0; double dotProduct = 0.0; // For each off-diagonal cell l_{jk} in the result, // we need an index into the jth row and the kth row. // These are therefore both really row offsets, but one // corresponds to the beginning of the (row)th row // and the other to the beginning of the (column)th row. int rowOffset = 0; int colOffset = 0; int firstNonZero; for (int row = 0; row < numRows; row++) { sumRowSquared = 0.0; rowOffset = row * numRows; firstNonZero = row; for (int col = 0; col < row; col++) { if (firstNonZero == row) { if (input[ rowOffset + col ] == 0) { continue; } else { firstNonZero = col; } } dotProduct = 0.0; colOffset = col * numRows; for (int i = firstNonZero; i < col; i++) { dotProduct += result[ rowOffset + i ] * result[ colOffset + i ]; } result[ rowOffset + col ] = (input[ rowOffset + col ] - dotProduct) / result[ colOffset + col ]; sumRowSquared += result[rowOffset + col] * result[rowOffset + col]; } // Now the diagonal element result[ rowOffset + row ] = Math.sqrt(input[ rowOffset + row ] - sumRowSquared); } return result; } /** For testing band cholesky factorization */ public static double[] bandMatrixRoot (int dim, int bandwidth) { double[] result = new double[dim * dim]; for (int row = 0; row < dim; row++) { int rowOffset = row * dim; for (int col = Math.max(0, (row - bandwidth + 1)); col <= row; col++) { result[rowOffset + col] = 1.0; } } return result; } /** Sample a multivariate normal from a precision matrix * (ie inverse covariance matrix) */ public static double[] nextMVNormal(double[] mean, double[] precision, Randoms random) { return nextMVNormalWithCholesky(mean, cholesky(precision, mean.length), random); } public static double[] nextMVNormalWithCholesky(double[] mean, double[] precisionLowerTriangular, Randoms random) { int n = mean.length; // Initialize vector z to standard normals // [NB: using the same array for z and x] double[] result = new double[ n ]; for (int i = 0; i < n; i++) { result[i] = random.nextGaussian(); } // Now solve trans(L) x = z using back substitution double innerProduct; for (int i = n-1; i >= 0; i--) { innerProduct = 0.0; for (int j = i+1; j < n; j++) { // the cholesky decomp got us the precisionLowerTriangular triangular // matrix, but we really want the transpose. innerProduct += result[j] * precisionLowerTriangular[ (n * j) + i ]; } result[i] = (result[i] - innerProduct) / precisionLowerTriangular[ (n * i) + i ]; } for (int i = 0; i < n; i++) { result[i] += mean[i]; } return result; } /** Sample a vector x from N(m, (LL')<sup>-1</sup>, such that * sum_i x_i = 0. */ public static double[] nextZeroSumMVNormalWithCholesky(double[] mean, double[] precisionLowerTriangular, Randoms random) { int n = mean.length; double[] result = nextMVNormalWithCholesky(mean, precisionLowerTriangular, random); double sum = 0.0; for (int i = 0; i < n; i++) { sum += result[i]; } // get the sum of each row of the inverse precision matrix // by solving the two triangular systems L(L'x) = Ly = 1, L'x = y. double[] ones = new double[n]; Arrays.fill(ones, 1.0); double[] firstSolution = solveWithForwardSubstitution(ones, precisionLowerTriangular); double[] rowSums = solveWithBackSubstitution(firstSolution, precisionLowerTriangular); double sumOfRowSums = 0.0; for (int i = 0; i < n; i++) { sumOfRowSums += rowSums[i]; } double inverseSumOfRowSums = 1.0 / sumOfRowSums; for (int i = 0; i < n; i++) { result[i] -= inverseSumOfRowSums * rowSums[i] * sum; } return result; } public static double[][] nextMVNormal(int n, double[] mean, double[] precision, Randoms random) { double[][] result = new double[n][]; for (int i=0; i < n; i++) { result[i] = nextMVNormal(mean, precision, random); } return result; } public static FeatureVector nextFeatureVector(Alphabet alphabet, double[] mean, double[] precision, Randoms random) { return new FeatureVector(alphabet, nextMVNormal(mean, precision, random)); } /** * @param priorMean A vector of mean values * @param priorPrecisionDiagonal A vector representing a diagonal prior precision matrix * @param precision A precision matrix */ public static double[] nextMVNormalPosterior(double[] priorMean, double[] priorPrecisionDiagonal, double[] precision, double[] observedMean, int observations, Randoms random) { int dimension = priorMean.length; // Q_0 mu_0 + n Q y_bar double[] linearCombination = new double[dimension]; for (int i=0; i<dimension; i++) { linearCombination[i] = priorMean[i] * priorPrecisionDiagonal[i]; double innerProduct = 0.0; for (int j = 0; j < dimension; j++) { innerProduct += precision[ (dimension * i) + j ] * observedMean[j]; } linearCombination[i] += observations * innerProduct; } // Q_0 + n Q double[] posteriorPrecision = new double[precision.length]; for (int row = 0; row < dimension; row++) { for (int col = 0; col < dimension; col++) { posteriorPrecision[ (dimension * row) + col ] = observations * precision[ (dimension * row) + col ]; if (row == col) { posteriorPrecision[ (dimension * row) + col ] += priorPrecisionDiagonal[row]; } } } double[] inversePosteriorPrecision = invertSPD(posteriorPrecision, dimension); double[] posteriorMean = new double[dimension]; for (int row = 0; row < dimension; row++) { double innerProduct = 0.0; for (int col = 0; col < dimension; col++) { innerProduct += inversePosteriorPrecision[ (dimension * row) + col ] * linearCombination[ col ]; } posteriorMean[row] = innerProduct; } return nextMVNormal(posteriorMean, posteriorPrecision, random); } /** * This method returns x such that L'x = b. * Note the transpose: this method assumes that * the input matrix is LOWER triangular, even though * back substitution operates on UPPER triangular matrices. */ public static double[] solveWithBackSubstitution(double[] b, double[] lowerTriangular) { double innerProduct; int n = b.length; double[] result = new double[n]; for (int i = n-1; i >= 0; i--) { innerProduct = 0.0; for (int j = i+1; j < n; j++) { // Assume we're dealing with a single lower triangular // matrix from a cholesky decomposition, so index into // it as if it is the transpose. innerProduct += result[j] * lowerTriangular[ (n * j) + i ]; } result[i] = (b[i] - innerProduct) / lowerTriangular[ (n * i) + i ]; } return result; } /** * This method returns x such that Lx = b * where L is lower triangular */ public static double[] solveWithForwardSubstitution(double[] b, double[] lowerTriangular) { double innerProduct; int n = b.length; double[] result = new double[n]; for (int i = 0; i < n; i++) { innerProduct = 0.0; for (int j = 0; j < i; j++) { innerProduct += result[j] * lowerTriangular[ (n * i) + j ]; } result[i] = (b[i] - innerProduct) / lowerTriangular[ (n * i) + i ]; } return result; } /** * This method returns the (lower-triangular) inverse of a lower triangular * matrix. */ public static double[] invertLowerTriangular(double[] inputMatrix, int dimension) { double[] outputMatrix = new double[inputMatrix.length]; double x; for (int row = 0; row < dimension; row++) { for (int col = 0; col <= row; col++) { // Off-diagonal elements are the negative inner product // (up to the row index) of the row from input and the col // from the output, divided by the diagonal from the input. if (col == row) { // Diagonal elements are the same, but add 1 to the numerator x = 1.0; } else { x = 0.0; } for (int i = col; i < row; i++) { x -= inputMatrix[ (dimension * row) + i ] * outputMatrix[ (dimension * i) + col ]; } outputMatrix[ (dimension * row) + col ] = x / inputMatrix[ (dimension * row) + row ]; } } return outputMatrix; } /** * Returns L'L for lower triangular matrix L. */ public static double[] lowerTriangularCrossproduct(double[] inputMatrix, int dimension) { double[] outputMatrix = new double[inputMatrix.length]; double innerProduct; for (int row = 0; row < dimension; row++) { for (int col = row; col < dimension; col++) { innerProduct = 0.0; for (int i = col; i < dimension; i++) { innerProduct += inputMatrix[ row + (dimension * i) ] * inputMatrix[ col + (dimension * i) ]; } outputMatrix[ (dimension * row) + col ] = innerProduct; outputMatrix[ row + (dimension * col) ] = innerProduct; } } return outputMatrix; } /** * Returns (lower-triangular) X = AB for square lower-triangular matrices A and B */ public static double[] lowerTriangularProduct(double[] leftMatrix, double[] rightMatrix, int dimension) { double[] outputMatrix = new double[leftMatrix.length]; double innerProduct; for (int row = 0; row < dimension; row++) { for (int col = 0; col <= row; col++) { innerProduct = 0.0; for (int i = col; i <= row; i++) { innerProduct += leftMatrix[ (dimension * row) + i ] * rightMatrix[ (dimension * i) + col ]; } outputMatrix[ (dimension * row) + col ] = innerProduct; } } return outputMatrix; } public static double[] invertSPD(double[] inputMatrix, int dimension) { return lowerTriangularCrossproduct(invertLowerTriangular(bandCholesky(inputMatrix, dimension), dimension), dimension); } /** * A Wishart random variate, based on R code by Bill Venables. * * @param sqrtScaleMatrix The lower-triangular matrix square root of the scale matrix. * To draw from the posterior of a precision (ie inverse covariance) matrix, * this should be chol( S^{-1} ), where S is the scatter matrix X'X of * columns of MV normal observations X. * @param dimension The size of the matrix * @param degreesOfFreedom The degree of freedom for the Wishart. Should be greater than dimension. For * a posterior distribution, this is the number of observations + the df of the prior. */ public static double[] nextWishart(double[] sqrtScaleMatrix, int dimension, int degreesOfFreedom, Randoms random) { double[] sample = new double[sqrtScaleMatrix.length]; for (int row = 0; row < dimension; row++) { for (int col = 0; col < row; col++) { sample[ (row * dimension) + col ] = random.nextGaussian(0, 1); } sample[ (row * dimension) + row ] = Math.sqrt(random.nextChiSq(degreesOfFreedom)); } //System.out.println(doubleArrayToString(sample, dimension)); //System.out.println(doubleArrayToString(sqrtScaleMatrix, dimension)); //System.out.println(doubleArrayToString(lowerTriangularProduct(sample, sqrtScaleMatrix, dimension), dimension)); System.out.println(diagonalToString(sample, dimension)); System.out.println(diagonalToString(sqrtScaleMatrix, dimension)); System.out.println(diagonalToString(lowerTriangularProduct(sample, sqrtScaleMatrix, dimension), dimension)); return lowerTriangularCrossproduct(lowerTriangularProduct(sample, sqrtScaleMatrix, dimension), dimension); } public static double[] nextWishartPosterior(double[] scatterMatrix, int observations, double[] priorPrecisionDiagonal, int priorDF, int dimension, Randoms random) { double[] scatterPlusPrior = new double[scatterMatrix.length]; System.arraycopy(scatterMatrix, 0, scatterPlusPrior, 0, scatterMatrix.length); for (int i=0; i < dimension; i++) { scatterPlusPrior[ (dimension * i) + i ] += 1.0 / priorPrecisionDiagonal[i]; } System.out.println(" inverted scatter plus prior"); System.out.println(diagonalToString(invertSPD(scatterPlusPrior, dimension), dimension)); System.out.println(" chol inverted scatter plus prior"); System.out.println(diagonalToString(cholesky(invertSPD(scatterPlusPrior, dimension), dimension), dimension)); double[] sqrtScaleMatrix = cholesky(invertSPD(scatterPlusPrior, dimension), dimension); return nextWishart(sqrtScaleMatrix, dimension, observations + priorDF, random); } /** Create a string representation of a square matrix in one-dimensional array format */ public static String doubleArrayToString(double[] matrix, int dimension) { NumberFormat formatter = NumberFormat.getInstance(); formatter.setMaximumFractionDigits(10); StringBuffer output = new StringBuffer(); for (int row = 0; row < dimension; row++) { for (int col = 0; col < dimension; col++) { output.append(formatter.format(matrix[ (dimension * row) + col ])); output.append("\t"); } output.append("\n"); } return output.toString(); } public static String diagonalToString(double[] matrix, int dimension) { NumberFormat formatter = NumberFormat.getInstance(); formatter.setMaximumFractionDigits(4); StringBuffer output = new StringBuffer(); for (int row = 0; row < dimension; row++) { output.append(formatter.format(matrix[ (dimension * row) + row ])); output.append(" "); } return output.toString(); } public static double[] getScatterMatrix(double[][] observationMatrix) { int observations = observationMatrix.length; int dimension = observationMatrix[0].length; double[] outputMatrix = new double[dimension * dimension]; double[] means = new double[dimension]; // collect the sample means for (int i = 0; i < observations; i++) { for (int d = 0; d < dimension; d++) { means[d] += observationMatrix[i][d]; } } for (int d = 0; d < dimension; d++) { means[d] /= observations; } // now the sample covariance (times n) for (int i = 0; i < observations; i++) { for (int d1 = 0; d1 < dimension; d1++) { for (int d2 = 0; d2 < dimension; d2++) { outputMatrix[ (dimension * d1) + d2 ] += (observationMatrix[i][d1] - means[d1]) * (observationMatrix[i][d2] - means[d2]); } } } return outputMatrix; } public static void testCholesky() { int observations = 1000; double[] mean = new double[20]; double[] precisionMatrix = new double[ 20 * 20 ]; for (int i=0; i<20; i++) { precisionMatrix[ (20 * i) + i ] = 1.0; } Randoms random = new Randoms(); double[] scatterMatrix = getScatterMatrix(nextMVNormal(observations, mean, precisionMatrix, random)); double[] priorPrecision = new double[20]; Arrays.fill(priorPrecision, 1.0); nextWishartPosterior(scatterMatrix, observations, priorPrecision, 21, 20, random); } public static void main (String[] args) { //double[] spd = { 19.133825, -1.180869, 6.403880, // -1.180869, 8.895968, 1.280748, // 6.403880, 1.280748, 9.155951 }; double[] spd = {3.0, 0.0, -1.0, 0.0, 3.0, 0.0, -1.0, 0.0, 3.0}; Randoms random = new Randoms(); double[] mean = { 1.0, 1.0, 1.0 }; double[] lower = cholesky(spd, 3); for (int iter = 0; iter < 10; iter++) { double[] sample = nextMVNormalWithCholesky(mean, lower, random); for (int i=0; i<sample.length; i++) { System.out.print(sample[i] + "\t"); } System.out.println(); } for (int iter = 0; iter < 10; iter++) { double[] sample = nextZeroSumMVNormalWithCholesky(mean, lower, random); for (int i=0; i<sample.length; i++) { System.out.print(sample[i] + "\t"); } System.out.println(); } /* int dim = 100; double[] bandLower = bandMatrixRoot(dim, 3); System.out.println(doubleArrayToString(bandLower, dim)); double[] bandMatrix = lowerTriangularCrossproduct(bandLower, dim); System.out.println(doubleArrayToString(bandMatrix, dim)); long startTime; startTime = System.currentTimeMillis(); for (int i=0; i<100000; i++) { bandCholesky(bandMatrix, dim); } System.out.println(System.currentTimeMillis() - startTime); startTime = System.currentTimeMillis(); for (int i=0; i<100000; i++) { cholesky(bandMatrix, dim); } System.out.println(System.currentTimeMillis() - startTime); */ /* double[] l = {2.87527, 0.0, 0.0, -2.4168, 1.28, 0.0, -0.585168, -2.792234, 2.769609}; double[] spd = { 19.133825, -1.180869, 6.403880, -1.180869, 8.895968, 1.280748, 6.403880, 1.280748, 9.155951 }; double[] scatter = { 103.59761, -16.370939, 12.694755, -16.37094, 106.117048, 4.079818, 12.69476, 4.079818, 94.065152 }; double[] priorDiagonal = { 1.0, 1.0, 1.0 }; testCholesky(); */ //System.out.println(doubleArrayToString(nextWishartPosterior(scatter, 100, priorDiagonal, 10, 3, new Randoms()), 3)); /* long startTime = System.currentTimeMillis(); for (int i=0; i<10000; i++) { invertSPD(spd, 3); } System.out.println(System.currentTimeMillis() - startTime); */ //System.out.println(doubleArrayToString(invertSPD(spd, 3), 3)); //System.out.println(doubleArrayToString(nextWishart(l, 3, 25, new Randoms()), 3)); /* double[] precisionMatrix = {0.98, -1.0, 0.0, -1.0, 2.13, -1.0, 0.0, -1.0, 1.01}; double[] mean = new double[3]; Randoms random = new Randoms(); for (int i=0; i<10; i++) { double[] variate = nextMVNormal(mean, precisionMatrix, random); for (int j=0; j<variate.length; j++) { System.out.print(variate[j] + "\t"); } System.out.println(); } */ } }