/*
* File: MixtureOfGaussiansExample.java
* Authors: Justin Basilico
* Company: Sandia National Laboratories
* Project: Cognitive Foundry
*
* Copyright September 11, 2009, Sandia Corporation.
* Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
* license for use of this work by or on behalf of the U.S. Government. Export
* of this program may require a license from the United States Government.
* See CopyrightHistory.txt for complete details.
*
*/
package examples;
import gov.sandia.cognition.learning.algorithm.clustering.KMeansClustererWithRemoval;
import gov.sandia.cognition.learning.algorithm.clustering.cluster.GaussianCluster;
import gov.sandia.cognition.learning.algorithm.clustering.cluster.GaussianClusterCreator;
import gov.sandia.cognition.learning.algorithm.clustering.divergence.GaussianClusterDivergenceFunction;
import gov.sandia.cognition.learning.algorithm.clustering.initializer.NeighborhoodGaussianClusterInitializer;
import gov.sandia.cognition.math.matrix.Matrix;
import gov.sandia.cognition.math.matrix.MatrixFactory;
import gov.sandia.cognition.math.matrix.Vector;
import gov.sandia.cognition.math.matrix.VectorFactory;
import gov.sandia.cognition.statistics.distribution.MixtureOfGaussians;
import gov.sandia.cognition.statistics.distribution.MultivariateGaussian;
import java.util.ArrayList;
import java.util.Random;
/**
* This example shows how to learn a MixtureOfGaussians from a set of data. It
* uses a random dataset to provide the training data and then learns a
* MixtureOfGaussians using both a soft learner and a hard learner. The hard
* learner does hard assignment of points to gaussians when learning the
* mixture while the soft learner does soft assignment.
*
* @author Justin Basilico
* @since 3.0
*/
public class MixtureOfGaussiansExample
{
/**
* Runs the example.
*
* @param args
* Command-line arguments (ignored).
*/
public static void main(
final String[] args)
{
// Part 1: Create some data.
// To start with we need some data to learn from. To do this, we are
// going to create a random mixture of gaussians and then generate
// random samples from it. In a real application you would typically
// load in some data set of Vectors instead of generating the data.
// Here are some general parameters for the example.
final int dimensionality = 2;
final double range = 10.0;
final int numSamples = 100;
final int actualNumGaussians = 2;
final int guessedNumGaussians = 2;
// Create a random number generator to use to generate our data.
final Random random = new Random(47);
// Note: Using a Random like this means that each time the example is
// run a same set of data will be generated.
// Create a mixture of gaussians as mechanism to create some random
// data.
final MixtureOfGaussians.PDF actualMixture = createRandomMixtureOfGaussians(
random, actualNumGaussians, dimensionality, range);
// Print out our actual mixture so we can look at it to compare how
// the learner does.
System.out.println("Actual gaussians:");
printMixture(actualMixture);
System.out.println();
// Now we sample from the mixture to create our training dataset.
final ArrayList<Vector> data = actualMixture.sample(random, numSamples);
// Part 2: Using a Soft Learner
// Now that we have some data to use, we create a soft learner to try
// and fit a mixture of gaussians to the example data that we have.
// We construct a new learner object.
// Next we configure the parameters of the soft learner.
// For this example, the only real parameter we are concerned with
// is telling the soft learner how many gaussians to look for. We
// do this by calling the setNumGaussians method.
final MixtureOfGaussians.EMLearner softLearner =
new MixtureOfGaussians.EMLearner( guessedNumGaussians, random );
// Now that our learner is configured, we call the learning algorithm
// by calling the learn method and passing in our dataset. This runs
// the algorithm and then returns the mixture of gaussians it has
// learned from that data.
MixtureOfGaussians.PDF learnedMixture = softLearner.learn(data);
System.out.println("Soft Learned Gaussians: ");
printMixture(learnedMixture);
System.out.println();
// Part 3: Using a Hard Learner
// Using the hard learner is a little more complicated than the soft
// learner because you can pass in the specific clustering algorithm
// that you want it to use.
// We are going to try using the K-means algorithm with removal to
// do hard clustering on the data.
final int maxIterations = 1000;
final double removalThreshold = 0.1;
final KMeansClustererWithRemoval<Vector, GaussianCluster> kmeans =
new KMeansClustererWithRemoval<Vector, GaussianCluster>(
guessedNumGaussians,
maxIterations,new NeighborhoodGaussianClusterInitializer(random),
new GaussianClusterDivergenceFunction(),
new GaussianClusterCreator(),
removalThreshold);
// Note that here we pass in the guessed number of gaussians to the
// constructor for KMeans so that it knows how many clusters to start
// with.
// After we have created our clustering algorithm, we create a new
// hard learner and pass the clustering algorithm to it.
final MixtureOfGaussians.Learner hardLearner =
new MixtureOfGaussians.Learner(kmeans);
// The hard learner does not have any parameters to tune since it just
// uses the parameters of the KMeans algorithm.
// Now we cal the learn method on the hard learner and again get out
// our learned mixture of gaussians.
learnedMixture = hardLearner.learn(data);
System.out.println("Hard Learned Gaussians: ");
printMixture(learnedMixture);
System.out.println();
}
/**
* Prints a mixture of gaussians to System.out.
*
* @param mixture
* The mixture to print.
*/
public static void printMixture(
final MixtureOfGaussians.PDF mixture)
{
// Loop through the mixture and print out the random variables that
// make up the mixture.
for (int i = 0; i < mixture.getDistributionCount(); i++)
{
final MultivariateGaussian gaussian =
mixture.getDistributions().get(i);
// Get some information about the gaussian.
final double prior = mixture.getPriorWeights()[i] / mixture.getPriorWeightSum();
final Vector mean = gaussian.getMean();
final Matrix covariance = gaussian.getCovariance();
System.out.println("Gaussian " + (i + 1));
System.out.println("Prior: " + prior);
System.out.println("Mean: " + mean);
System.out.println("Covariance: ");
System.out.println(covariance);
}
}
/**
* Creates a random mixture of gaussians containing the requested number
* of gaussians inside it of the given dimensionality.
*
* @param random
* The random number generator.
* @param numGaussians
* The number of gaussians to put in the mixture.
* @param dimensionality
* The dimensionality of multivariate gaussians in the mixture.
* @param range
* The range of values to allow the mixture over.
* @return
* A new random mixture of gaussians.
*/
public static MixtureOfGaussians.PDF createRandomMixtureOfGaussians(
final Random random,
final int numGaussians,
final int dimensionality,
final double range)
{
// Create a random set of gaussians to form the mixture.
final ArrayList<MultivariateGaussian.PDF> gaussians =
new ArrayList<MultivariateGaussian.PDF>(numGaussians);
for (int i = 0; i < numGaussians; i++)
{
final MultivariateGaussian.PDF gaussian =
createRandomGaussian(random, dimensionality, range);
gaussians.add(gaussian);
}
return new MixtureOfGaussians.PDF(gaussians);
}
/**
* Creates a random multivariate gaussian.
*
* @param random
* The random number generator.
* @param dimensionality
* The dimensionality of the multivariate gaussian to create.
* @param range
* The range of values for the gaussian
* @return
* A new random multivariate gaussian.
*/
public static MultivariateGaussian.PDF createRandomGaussian(
final Random random,
final int dimensionality,
final double range)
{
return new MultivariateGaussian.PDF(
VectorFactory.getDefault().createUniformRandom(
dimensionality, -range, range, random),
MatrixFactory.getDefault().createIdentity(
dimensionality, dimensionality).scale(0.5 * range));
}
}