ThinSingularValueDecomposition.java example

Explorer
Foundry-master
- Components
/*
 * File:                ThinSingularValueDecomposition.java
 * Authors:             Kevin R. Dixon
 * Company:             Sandia National Laboratories
 * Project:             Cognitive Foundry
 *
 * Copyright October 9, 2007, Sandia Corporation.  Under the terms of Contract
 * DE-AC04-94AL85000, there is a non-exclusive license for use of this work by
 * or on behalf of the U.S. Government. Export of this program may require a
 * license from the United States Government. See CopyrightHistory.txt for
 * complete details.
 *
 */

package gov.sandia.cognition.learning.algorithm.pca;

import gov.sandia.cognition.annotation.CodeReview;
import gov.sandia.cognition.learning.data.DatasetUtil;
import gov.sandia.cognition.learning.function.vector.MultivariateDiscriminant;
import gov.sandia.cognition.math.MultivariateStatisticsUtil;
import gov.sandia.cognition.math.matrix.decomposition.EigenvectorPowerIteration;
import gov.sandia.cognition.math.matrix.Matrix;
import gov.sandia.cognition.math.matrix.MatrixFactory;
import gov.sandia.cognition.math.matrix.Vector;
import java.util.ArrayList;
import java.util.Collection;

/**
 * Computes the "thin" singular value decomposition of a dataset.  That is,
 * we find the top "numComponents" left singular values of the data matrix by
 * using the {@code EigenvectorPowerIteration} algorithm to find successive
 * components. This method is extremely fast to converge, produces accurate
 * estimates of eigenvectors, and is computationally and memory efficient.
 * In my experience, this approach has been uniformly superior to the 
 * {@code GeneralizedHebbianAlgorithm} approach to computing singular vectors
 * (in terms of accuracy, memory, and computation time).
 *
 * @see gov.sandia.cognition.math.matrix.decomposition.EigenvectorPowerIteration
 * @author Kevin R. Dixon
 * @since  2.0
 *
 */
@CodeReview(
    reviewer="Kevin R. Dixon",
    date="2008-07-23",
    changesNeeded=false,
    comments={
        "Minor changes to javadoc.",
        "Looks fine."
    }
)
public class ThinSingularValueDecomposition
    extends AbstractPrincipalComponentsAnalysis
{

    /**
     * Creates a new instance of ThinSingularValueDecomposition
     * @param numComponents
     * Number of components to extract from the data, must be greater than zero
     */
    public ThinSingularValueDecomposition(
        final int numComponents)
    {
        this(numComponents, null);
    }

    /**
     * Creates a new instance of ThingSingularValueDecomposition
     * @param numComponents 
     * Number of components to extract from the data, must be greater than zero
     * @param learned 
     * Vector function that maps the input space onto a numComponents-dimension
     * Vector representing the directions of maximal variance (information
     * gain).  The i-th row in the matrix approximates the i-th column of the
     * "U" matrix of the Singular Value Decomposition.
     */
    public ThinSingularValueDecomposition(
        final int numComponents,
        final PrincipalComponentsAnalysisFunction learned)
    {
        super(numComponents, learned);
    }

    /**
     * Creates a PrincipalComponentsAnalysisFunction based on the number of
     * components and the given data.  This will return the top "numComponents"
     * number of left eigenvectors of the data.
     * @param data 
     * Dataset of which compute the PCA, with each Vector of equal dimension
     * @return 
     * Vector function that maps the input space onto a numComponents-dimension
     * Vector representing the directions of maximal variance (information
     * gain).  The i-th row in the matrix approximates the i-th column of the
     * "U" matrix of the Singular Value Decomposition.
     */
    public PrincipalComponentsAnalysisFunction learn(
        final Collection<Vector> data)
    {
        PrincipalComponentsAnalysisFunction pca =
            ThinSingularValueDecomposition.learn(data, this.getNumComponents());
        this.setResult(pca);
        return pca;
    }

    /**
     * Creates a PrincipalComponentsAnalysisFunction based on the number of
     * components and the given data.  This will return the top "numComponents"
     * number of left eigenvectors of the data.
     * @param data 
     * Dataset of which compute the PCA, with each Vector of equal dimension
     * @param numComponents 
     * Number of components to extract from the data, must be greater than zero
     * @return 
     * Vector function that maps the input space onto a numComponents-dimension
     * Vector representing the directions of maximal variance (information
     * gain).  The i-th row in the matrix approximates the i-th column of the
     * "U" matrix of the Singular Value Decomposition.
     */
    public static PrincipalComponentsAnalysisFunction learn(
        final Collection<Vector> data,
        final int numComponents)
    {

        final Vector mean = MultivariateStatisticsUtil.computeMean(data);
        final ArrayList<Vector> dataArray = new ArrayList<Vector>(data.size());
        for (Vector x : data)
        {
            dataArray.add(x.minus(mean));
        }

        Matrix XXt = DatasetUtil.computeOuterProductDataMatrix(dataArray);

        ArrayList<Vector> components =
            EigenvectorPowerIteration.estimateEigenvectors(XXt, numComponents);

        Matrix V = MatrixFactory.getDefault().createMatrix(
            components.size(), mean.getDimensionality());
        for (int i = 0; i < components.size(); i++)
        {
            V.setRow(i, components.get(i));
        }

        return new PrincipalComponentsAnalysisFunction(
            mean, new MultivariateDiscriminant(V));

    }

}