KPCA.java example

Explorer
smile-master
/*******************************************************************************
 * Copyright (c) 2010 Haifeng Li
 *   
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *  
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *******************************************************************************/
package smile.projection;

import java.io.Serializable;
import smile.math.Math;
import smile.math.kernel.MercerKernel;
import smile.math.matrix.EigenValueDecomposition;

/**
 * Kernel principal component analysis. Kernel PCA is an extension of
 * principal component analysis (PCA) using techniques of kernel methods.
 * Using a kernel, the originally linear operations of PCA are done in a
 * reproducing kernel Hilbert space with a non-linear mapping.
 * <p>
 * In practice, a large data set leads to a large Kernel/Gram matrix K, and
 * storing K may become a problem. One way to deal with this is to perform
 * clustering on your large dataset, and populate the kernel with the means
 * of those clusters. Since even this method may yield a relatively large K,
 * it is common to compute only the top P eigenvalues and eigenvectors of K.
 * <p>
 * Kernel PCA with an isotropic kernel function is closely related to metric MDS.
 * Carrying out metric MDS on the kernel matrix K produces an equivalent configuration
 * of points as the distance (2(1 - K(x<sub>i</sub>, x<sub>j</sub>)))<sup>1/2</sup>
 * computed in feature space.
 * <p>
 * Kernel PCA also has close connections with Isomap, LLE, and Laplacian eigenmaps.
 *
 * <h2>References</h2>
 * <ol>
 * <li>Bernhard Scholkopf, Alexander Smola, and Klaus-Robert Muller. Nonlinear Component Analysis as a Kernel Eigenvalue Problem. Neural Computation, 1998.</li>
 * </ol>
 *
 * @see smile.math.kernel.MercerKernel
 * @see PCA
 * @see smile.manifold.IsoMap
 * @see smile.manifold.LLE
 * @see smile.manifold.LaplacianEigenmap
 * @see smile.mds.SammonMapping
 *
 * @author Haifeng Li
 */
public class KPCA<T> implements Projection<T>, Serializable {
    private static final long serialVersionUID = 1L;

    /**
     * The dimension of feature space.
     */
    private int p;
    /**
     * Learning data.
     */
    private T[] data;
    /**
     * Mercer kernel.
     */
    private MercerKernel<T> kernel;
    /**
     * The row mean of kernel matrix.
     */
    private double[] mean;
    /**
     * The mean of kernel matrix.
     */
    private double mu;
    /**
     * Eigenvalues of kernel principal components.
     */
    private double[] latent;
    /**
     * Projection matrix.
     */
    private double[][] projection;
    /**
     * The coordinates of projected training data.
     */
    private double[][] coordinates;

    /**
     * Constructor. Learn kernel principal component analysis.
     * @param data learning data.
     * @param kernel Mercer kernel to compute kernel matrix.
     * @param threshold only principal components with eigenvalues larger than
     * the given threshold will be kept.
     */
    public KPCA(T[] data, MercerKernel<T> kernel, double threshold) {
        this(data, kernel, data.length, threshold);
    }

    /**
     * Constructor. Learn kernel principal component analysis.
     * @param data learning data.
     * @param kernel Mercer kernel to compute kernel matrix.
     * @param k choose upto k principal components (larger than 0.0001) used for projection.
     */
    public KPCA(T[] data, MercerKernel<T> kernel, int k) {
        this(data, kernel, k, 0.0001);
    }

    /**
     * Constructor. Constructor. Learn kernel principal component analysis.
     * @param data learning data.
     * @param kernel Mercer kernel to compute kernel matrix.
     * @param k choose top k principal components used for projection.
     * @param threshold only principal components with eigenvalues larger than
     * the given threshold will be kept.
     */
    public KPCA(T[] data, MercerKernel<T> kernel, int k, double threshold) {
        if (threshold < 0) {
            throw new IllegalArgumentException("Invalid threshold = " + threshold);
        }

        if (k < 1 || k > data.length) {
            throw new IllegalArgumentException("Invalid dimension of feature space: " + k);
        }

        this.data = data;
        this.kernel = kernel;
        int n = data.length;

        double[][] K = new double[n][n];
        for (int i = 0; i < n; i++) {
            for (int j = 0; j <= i; j++) {
                K[i][j] = kernel.k(data[i], data[j]);
                K[j][i] = K[i][j];
            }
        }

        mean = Math.rowMean(K);
        mu = Math.mean(mean);

        for (int i = 0; i < n; i++) {
            for (int j = 0; j <= i; j++) {
                K[i][j] = K[i][j] - mean[i] - mean[j] + mu;
                K[j][i] = K[i][j];
            }
        }

        EigenValueDecomposition eigen = Math.eigen(K, k);

        p = 0;
        for (int i = 0; i < k; i++) {
            double e = (eigen.getEigenValues()[i] /= n);
            if (e > threshold) {
                p++;
            } else {
                break;
            }
        }

        latent = new double[p];
        projection = new double[p][n];
        for (int j = 0; j < p; j++) {
            latent[j] = eigen.getEigenValues()[j];
            double s = Math.sqrt(latent[j]);
            for (int i = 0; i < n; i++) {
                projection[j][i] = eigen.getEigenVectors().get(i, j) / s;
            }
        }

        coordinates = new double[n][p];
        for (int i = 0; i < n; i++) {
            Math.ax(projection, K[i], coordinates[i]);
        }
    }

    /**
     * Returns the eigenvalues of kernel principal components, ordered from largest to smallest.
     */
    public double[] getVariances() {
        return latent;
    }

    /**
     * Returns the projection matrix. The dimension reduced data can be obtained
     * by y = W * K(x, ·).
     */
    public double[][] getProjection() {
        return projection;
    }

    /**
     * Returns the nonlinear principal component scores, i.e., the representation
     * of learning data in the nonlinear principal component space. Rows
     * correspond to observations, columns to components.
     */
    public double[][] getCoordinates() {
        return coordinates;
    }

    @Override
    public double[] project(T x) {
        int n = data.length;

        double[] y = new double[n];
        for (int i = 0; i < n; i++) {
            y[i] = kernel.k(x, data[i]);
        }

        double my = Math.mean(y);
        for (int i = 0; i < n; i++) {
            y[i] = y[i] - my - mean[i] + mu;
        }

        double[] z = new double[p];
        Math.ax(projection, y, z);
        return z;
    }

    @Override
    public double[][] project(T[] x) {
        int m = x.length;
        int n = data.length;
        double[][] y = new double[m][n];
        for (int i = 0; i < m; i++) {
            for (int j = 0; j < n; j++) {
                y[i][j] = kernel.k(x[i], data[j]);
            }

            double my = Math.mean(y[i]);
            for (int j = 0; j < n; j++) {
                y[i][j] = y[i][j] - my - mean[j] + mu;
            }

        }

        double[][] z = new double[x.length][p];
        for (int i = 0; i < y.length; i++) {
            Math.ax(projection, y[i], z[i]);
        }
        return z;
    }
}