/**
*
* Interface for k-means clustering. called by main.
* For different number of clusters (min to max),
* it executes k-means clustering to find the best cluster number.
*
* @author Hyojin Sung (sung12@cs.uiuc.edu)
* @author Rakesh Komuravelli
*
*/
public class Cluster {
/** Array of cluser centers */
private float[][] clusterCentres;
/** Number of clusters */
private int bestNclusters;
/** Getter for cluster centers */
public float[][] getClusterCentres() {
return clusterCentres;
}
/** Getter for number of clusters */
public int getBestNclusters() {
return bestNclusters;
}
/** Extract Moments */
private static float[] extractMoments(float[] data, int num_elts,
int num_moments)
{
float[] moments = new float[num_moments];
for (int i = 0; i < num_elts; i++) {
moments[0] += data[i];
}
moments[0] = moments[0] / num_elts;
for (int i = 1; i < num_moments; i++) {
moments[i] = 0;
for (int j = 0; j < num_elts; j++) {
moments[i] += Math.pow((data[j]-moments[0]), i+1);
}
moments[i] = moments[i] / num_elts;
}
return moments;
}
/** Zscore Transformation */
private static void zscoreTransform (Point[] data,
int numObjects,
int numAttributes)
{
float[] single_variable;
float[] moments;
single_variable = new float[numObjects];
for (int i = 0; i < numAttributes; i++) {
for (int j = 0; j < numObjects; j++) {
single_variable[j] = data[j].getFeature(i);
}
moments = extractMoments(single_variable, numObjects, 2);
moments[1] = (float) Math.sqrt((double)moments[1]);
for (int j = 0; j < numObjects; j++) {
data[j].setFeature((data[j].getFeature(i)-moments[0])/moments[1], i);
}
}
}
/** Find best clusters */
public int execute (
int nthreads, /* in: number of threads*/
int numObjects, /* number of input objects */
int numAttributes, /* size of attribute of each object */
Point[] attributes,
boolean use_zscore_transform,
int min_nclusters, /* testing k range from min to max */
int max_nclusters,
float threshold) /* in: */
{
int itime;
int nclusters;
if (use_zscore_transform) {
zscoreTransform(attributes, numObjects, numAttributes);
}
itime = 0;
RandomType randomPtr = new RandomType();
long start = System.nanoTime();
//From min_nclusters to max_nclusters, find best_nclusters
for (nclusters = min_nclusters; nclusters <= max_nclusters; nclusters++) {
randomPtr.random_seed(7);
Normal work = new Normal(nthreads,
attributes,
numAttributes,
numObjects,
nclusters,
threshold,
randomPtr);
float[][] tmp_cluster_centres = work.execute();
{
clusterCentres = tmp_cluster_centres;
bestNclusters = nclusters;
}
itime++;
} /* nclusters */
long end = System.nanoTime();
System.out.println("Overall elapsed time for finding best clusters = "
+ (end-start)/1000000000.0);
return 0;
}
}