/*******************************************************************************
* Copyright (c) 2010 Haifeng Li
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
package smile.projection;
import java.io.Serializable;
import smile.math.Math;
import smile.math.matrix.*;
/**
* Probabilistic principal component analysis. PPCA is a simplified factor analysis
* that employs a latent variable model with linear relationship:
* <pre>
* y ∼ W * x + μ + ε
* </pre>
* where latent variables x ∼ N(0, I), error (or noise) ε ∼ N(0, Ψ),
* and μ is the location term (mean). In PPCA, an isotropic noise model is used,
* i.e., noise variances constrained to be equal (Ψ<sub>i</sub> = σ<sup>2</sup>).
* A close form of estimation of above parameters can be obtained
* by maximum likelihood method.
*
* <h2>References</h2>
* <ol>
* <li>Michael E. Tipping and Christopher M. Bishop. Probabilistic Principal Component Analysis. Journal of the Royal Statistical Society. Series B (Statistical Methodology) 61(3):611-622, 1999. </li>
* </ol>
*
* @see PCA
*
* @author Haifeng Li
*/
public class PPCA implements Projection<double[]>, Serializable {
private static final long serialVersionUID = 1L;
/**
* The sample mean.
*/
private double[] mu;
/**
* The projected sample mean.
*/
private double[] pmu;
/**
* The variance of noise part.
*/
private double noise;
/**
* Loading matrix.
*/
private DenseMatrix loading;
/**
* Projection matrix.
*/
private DenseMatrix projection;
/**
* Returns the variable loading matrix, ordered from largest to smallest
* by corresponding eigenvalues.
*/
public DenseMatrix getLoadings() {
return loading;
}
/**
* Returns the center of data.
*/
public double[] getCenter() {
return mu;
}
/**
* Returns the variance of noise.
*/
public double getNoiseVariance() {
return noise;
}
/**
* Returns the projection matrix. Note that this is not the matrix W in the
* latent model.
*/
public DenseMatrix getProjection() {
return projection;
}
@Override
public double[] project(double[] x) {
if (x.length != mu.length) {
throw new IllegalArgumentException(String.format("Invalid input vector size: %d, expected: %d", x.length, mu.length));
}
double[] y = new double[projection.nrows()];
projection.ax(x, y);
Math.minus(y, pmu);
return y;
}
@Override
public double[][] project(double[][] x) {
if (x[0].length != mu.length) {
throw new IllegalArgumentException(String.format("Invalid input vector size: %d, expected: %d", x[0].length, mu.length));
}
double[][] y = new double[x.length][projection.nrows()];
for (int i = 0; i < x.length; i++) {
projection.ax(x[i], y[i]);
Math.minus(y[i], pmu);
}
return y;
}
/**
* Constructor. Learn probabilistic principal component analysis.
* @param data training data of which each row is a sample.
* @param k the number of principal component to learn.
*/
public PPCA(double[][] data, int k) {
int m = data.length;
int n = data[0].length;
mu = Math.colMean(data);
DenseMatrix cov = new ColumnMajorMatrix(n, n);
for (int l = 0; l < m; l++) {
for (int i = 0; i < n; i++) {
for (int j = 0; j <= i; j++) {
cov.add(i, j, (data[l][i] - mu[i]) * (data[l][j] - mu[j]));
}
}
}
for (int i = 0; i < n; i++) {
for (int j = 0; j <= i; j++) {
cov.div(i, j, m);
cov.set(j, i, cov.get(i, j));
}
}
EigenValueDecomposition eigen = new EigenValueDecomposition(cov);
double[] evalues = eigen.getEigenValues();
DenseMatrix evectors = eigen.getEigenVectors();
noise = 0.0;
for (int i = k; i < n; i++) {
noise += evalues[i];
}
noise /= (n - k);
loading = new ColumnMajorMatrix(n, k);
for (int i = 0; i < n; i++) {
for (int j = 0; j < k; j++) {
loading.set(i, j, evectors.get(i, j) * Math.sqrt(evalues[j] - noise));
}
}
DenseMatrix M = loading.ata();
for (int i = 0; i < k; i++) {
M.add(i, i, noise);
}
CholeskyDecomposition chol = new CholeskyDecomposition(M);
DenseMatrix Mi = chol.inverse();
projection = Mi.abtmm(loading);
pmu = new double[projection.nrows()];
projection.ax(mu, pmu);
}
}