PolynomialKMeansClusteringTrainer.java example

Explorer
marytts-master
/**
 * Copyright 2007 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */
package marytts.machinelearning;

import java.awt.Color;
import java.util.Arrays;

import javax.swing.JFrame;

import marytts.signalproc.display.FunctionGraph;
import marytts.util.math.MathUtils;
import marytts.util.math.Polynomial;

/**
 * K-Means clustering training algorithm
 * 
 * Reference: J. MacQueen, 1967, "Some methods for classification and analysis of multivariate observations", Proc. Fifth Berkeley
 * Symp. on Math. Statist. and Prob., Vol. 1 (Univ. of Calif. Press, 1967), pp. 281-297.
 * 
 * This version is adapted to work with a distance function between polynomials.
 * 
 * @author Oytun Türk, Marc Schröder
 */
public class PolynomialKMeansClusteringTrainer {
	/**
	 * This function clusters polynomials using K-Means clustering procedure, using a polynomial distance function. Training
	 * consists of four steps: (a) Initialization (random assignment of cluster means using data points that are far away from
	 * each other + slight random shifts) (b) Hard clustering of samples according to new cluster means (c) Update of cluster
	 * means using assigned samples (d) Re-iteration of (b) and (c) until convergence, i.e. when overall cluster occupancy does
	 * not change much
	 * 
	 * @param polynomials
	 *            the observations to cluster
	 * @param kmeansParams
	 *            All training parameters are given by kmeansParams (See KMeansClusteringTrainerParams.java for details)
	 * @return the clusters trained
	 */
	public static PolynomialCluster[] train(Polynomial[] polynomials, KMeansClusteringTrainerParams kmeansParams) {

		int[] totalObservationsInClusters; // Total number of observations in each cluster
		int[] clusterIndices; // Assigned cluster for each observation vector

		int observations = polynomials.length;
		int polynomialOrder = polynomials[0].getOrder();

		// Intermediate representations for computing updated cluster means:
		Polynomial[] m_new = new Polynomial[kmeansParams.numClusters];

		// b[i][j] == true if observation i belongs to cluster j
		boolean[][] b = new boolean[observations][kmeansParams.numClusters];
		boolean[][] b_old = new boolean[observations][kmeansParams.numClusters];

		Polynomial[] clusterMeans = new Polynomial[kmeansParams.numClusters];
		for (int k = 0; k < kmeansParams.numClusters; k++)
			clusterMeans[k] = new Polynomial(polynomialOrder);

		for (int t = 1; t <= observations; t++) {
			Arrays.fill(b[t - 1], false);
		}

		// Select initial cluster centers
		Polynomial mAll = Polynomial.mean(polynomials);

		double[] dists = new double[observations];
		double[] tmp = new double[kmeansParams.numClusters + 1];

		for (int k = 1; k <= kmeansParams.numClusters; k++) {
			// For each cluster, initiate it with the observation that is most distant from the clusters initiated so far
			for (int t = 1; t <= observations; t++) {
				if (k > 1) {
					for (int i = 1; i <= k - 1; i++) {
						tmp[i - 1] = clusterMeans[i - 1].polynomialDistance(polynomials[t - 1]);
					}
					tmp[k - 1] = mAll.polynomialDistance(polynomials[t - 1]);
					dists[t - 1] = MathUtils.mean(tmp, 0, k - 1);
				} else {
					dists[t - 1] = mAll.polynomialDistance(polynomials[t - 1]);
				}
			}

			double maxD = Double.MIN_VALUE;
			int maxInd = -1;
			for (int t = 1; t <= observations; t++) {
				if (dists[t - 1] > maxD) {
					maxD = dists[t - 1];
					maxInd = t;
				}
			}

			clusterMeans[k - 1].copyCoeffs(polynomials[maxInd - 1]);

			// System.out.println("Cluster center " + String.valueOf(k) + " initialized...");
		}
		//

		int[] tinyClusterInds = new int[kmeansParams.numClusters];
		int numTinyClusters = 0;
		totalObservationsInClusters = new int[kmeansParams.numClusters];
		clusterIndices = new int[observations];

		int iter = 0;
		boolean bCont = true;
		while (bCont) {
			// Associate each observation with the nearest cluster
			for (int t = 1; t <= observations; t++) { // Over all observations
				double minDist = Double.MAX_VALUE;
				int ind = -1;
				for (int i = 1; i <= kmeansParams.numClusters; i++) { // Over all clusters
					double tmpDist = clusterMeans[i - 1].polynomialDistance(polynomials[t - 1]);
					b[t - 1][i - 1] = false;
					if (tmpDist < minDist) {
						minDist = tmpDist;
						ind = i;
					}
				}
				// associate the observation with the cluster to which it has minimum distance:
				b[t - 1][ind - 1] = true;
			}

			// Prepare means per cluster based on new cluster members:
			for (int i = 1; i <= kmeansParams.numClusters; i++) {
				totalObservationsInClusters[i - 1] = 0;
				tinyClusterInds[i - 1] = 0;
			}

			int c = 1; // count tiny clusters
			for (int i = 1; i <= kmeansParams.numClusters; i++) {
				m_new[i - 1] = new Polynomial(polynomialOrder);

				for (int t = 1; t <= observations; t++) {
					if (b[t - 1][i - 1]) { // observation t is associated with cluster i
						for (int d = 0; d <= polynomialOrder; d++)
							m_new[i - 1].coeffs[d] += polynomials[t - 1].coeffs[d];

						clusterIndices[t - 1] = i - 1; // zero-based
						(totalObservationsInClusters[i - 1])++;
					}
				}

				// Do something if totalObservationsInClusters[i-1] is less than some value
				// (i.e. there are too few observations for the cluster)
				if ((double) totalObservationsInClusters[i - 1] < kmeansParams.minSamplesInOneCluster) {
					tinyClusterInds[c - 1] = i;
					numTinyClusters++;
					c++;
				}
			}

			// Update the means of clusters if these are big enough,
			// and replace tiny clusters with random variations of big ones:
			c = 0;
			// need doubles to use quicksort:
			double[] tmps = new double[totalObservationsInClusters.length];
			for (int a = 0; a < tmps.length; a++) {
				tmps[a] = totalObservationsInClusters[a];
			}
			int[] inds = MathUtils.quickSort(tmps, 0, kmeansParams.numClusters - 1);
			for (int i = 1; i <= kmeansParams.numClusters; i++) {
				if (totalObservationsInClusters[i - 1] >= kmeansParams.minSamplesInOneCluster) { // a normal-sized cluster --
																									// update mean
					for (int d = 0; d <= polynomialOrder; d++) {
						clusterMeans[i - 1].coeffs[d] = m_new[i - 1].coeffs[d] / totalObservationsInClusters[i - 1];
					}
				} else { // a tiny cluster -- reinitialise with a random variation of one of the big clusters
					for (int d = 0; d <= polynomialOrder; d++) {
						double rnd = 2 * (Math.random() - 0.5) /* a random number between -1 and 1 */
								* clusterMeans[inds[kmeansParams.numClusters - c - 1]].coeffs[d] * 0.01;
						clusterMeans[i - 1].coeffs[d] = clusterMeans[inds[kmeansParams.numClusters - c - 1]].coeffs[d] + rnd;
					}
					c++;
				}
			}

			int[] prev_totals = totalObservationsInClusters.clone();

			iter++;
			// Count number of observations that have changed cluster:
			int totChanged = 0;
			if (iter > 1) {
				if (iter >= kmeansParams.maxIterations) {
					bCont = false;
				}

				for (int t = 1; t <= observations; t++) {
					for (int i = 1; i <= kmeansParams.numClusters; i++) {
						if (b_old[t - 1][i - 1] != b[t - 1][i - 1]) {
							totChanged++;
							break; // Count each difference once
						}
					}
				}

				double changedPerc = (double) totChanged / observations * 100.0;
				if (changedPerc < kmeansParams.minClusterChangePercent) { // stop if number of clusters changed is less than
																			// %MIN_CHANGE_PERCENT of total observation
					bCont = false;
				}
				// System.out.println("K-Means iteration: " + String.valueOf(iter) + " with " + String.valueOf(changedPerc) +
				// " percent of cluster assignments updated");
			}
			// else
			// System.out.println("K-Means iteration: " + String.valueOf(iter) + " K-means initialized");

			for (int t = 1; t <= observations; t++) {
				System.arraycopy(b[t - 1], 0, b_old[t - 1], 0, b[t - 1].length);
			}
		}

		// We do not compute covariances here, because we are unidimensional only.

		// Now fill the custers with their means and members:
		PolynomialCluster[] clusters = new PolynomialCluster[kmeansParams.numClusters];
		for (int i = 1; i <= kmeansParams.numClusters; i++) {
			Polynomial[] members = new Polynomial[totalObservationsInClusters[i - 1]];
			int m = 0;
			for (int t = 1; t <= observations; t++) {
				if (b[t - 1][i - 1]) {
					members[m] = polynomials[t - 1];
					m++;
				}
			}
			assert m == members.length;
			clusters[i - 1] = new PolynomialCluster(clusterMeans[i - 1], members);
		}

		return clusters;
		// System.out.println("K-Means clustering completed...");
	}

	public static void main(String[] args) {
		// Test clustering with random polynomials, and visualise result
		int order = 3;
		int numPolynomials = 1000;
		int numClusters = 50;

		// Initialise with random data:
		Polynomial[] ps = new Polynomial[numPolynomials];
		for (int i = 0; i < numPolynomials; i++) {
			double[] coeffs = new double[order + 1];
			for (int c = 0; c < coeffs.length; c++) {
				coeffs[c] = Math.random();
			}
			ps[i] = new Polynomial(coeffs);
		}
		KMeansClusteringTrainerParams params = new KMeansClusteringTrainerParams();
		params.numClusters = numClusters;

		// Train:
		PolynomialCluster[] clusters = PolynomialKMeansClusteringTrainer.train(ps, params);

		// Visualise:
		FunctionGraph clusterGraph = new FunctionGraph(0, 1, new double[1]);
		clusterGraph.setYMinMax(0, 5);
		clusterGraph.setPrimaryDataSeriesStyle(Color.BLUE, FunctionGraph.DRAW_DOTS, FunctionGraph.DOT_FULLCIRCLE);
		JFrame jf = clusterGraph.showInJFrame("", false, true);
		for (int i = 0; i < clusters.length; i++) {
			double[] meanValues = clusters[i].getMeanPolynomial().generatePolynomialValues(100, 0, 1);
			clusterGraph.updateData(0, 1. / meanValues.length, meanValues);

			Polynomial[] members = clusters[i].getClusterMembers();
			for (int m = 0; m < members.length; m++) {
				double[] pred = members[m].generatePolynomialValues(meanValues.length, 0, 1);
				clusterGraph.addDataSeries(pred, Color.GRAY, FunctionGraph.DRAW_LINE, -1);
				jf.repaint();
			}

			jf.setTitle("Cluster " + (i + 1) + " of " + clusters.length + ": " + members.length + " members");
			jf.repaint();

			try {
				Thread.sleep(500);
			} catch (InterruptedException ie) {
			}
		}
		System.exit(0);
	}

}