package kmeans; /** * A KMeans object o can compute K means of a given set of * points of dimension o.myDim. * <p> * This class implements a sequential program, that is readily parallelizable. * * A translation of the x10.dist/samples/KMeans.x10 program to Java */ public class KMeansSequential { static SumVector[] redCluster; static SumVector[] blackCluster; /** * Compute myK means for the given set of points of dimension myDim. */ static void computeMeans(int myK, int numIterations, float EPS, int numPoints, int numDimensions, float[] initialCluster, float[] points) { assert numDimensions * myK == initialCluster.length; redCluster = new SumVector[myK]; for (int i=0; i<myK; i++) { redCluster[i] = new SumVector(numDimensions, initialCluster, i); } blackCluster = new SumVector[myK]; for (int i=0; i<myK; i++) { blackCluster[i] = new SumVector(numDimensions); } long start = System.nanoTime(); for (int i = 1; i <= numIterations; i++) { SumVector[] tmp = redCluster; redCluster = blackCluster; blackCluster = tmp; for (int p= 0; p<numPoints; p++) { int closest = -1; float closestDist = Float.MAX_VALUE; for (int k=0; k<myK; k++) { // compute closest mean in cluster. float dist = blackCluster[k].distance(numDimensions, points, p); if (dist < closestDist) { closestDist = dist; closest = k; } } redCluster[closest].addIn(numDimensions, points, p); } for (int k=0; k<myK; k++) { redCluster[k].normalize(); } if (false) { boolean converged = true; for (int k=0; k<myK; k++) { if (redCluster[k].distance(blackCluster[k]) > EPS) { converged=false; break; } } if (converged) break; } for (int k=0; k<myK; k++) { blackCluster[k].makeZero(); } } long stop = System.nanoTime(); System.out.println("Total time (seconds)"+((double)(stop-start)/1e9)); } public static void main (String[] args) { String fileName = "points.dat"; int K = 4; int iterations = 50; float EPS = 0.1f; int argIndex = 0; while (argIndex < args.length) { String arg = args[argIndex++]; if (arg.equals("-k")) { K = Integer.parseInt(args[argIndex++]); } else if (arg.equals("-i")) { iterations = Integer.parseInt(args[argIndex++]); } else if (arg.equals("-e")) { EPS = Float.parseFloat(args[argIndex++]); } else { fileName = arg; } } KMeansDataSet data = KMeansDataSet.readPointsFromFile(fileName); float[] initialCluster = new float[K*data.numDimensions]; System.arraycopy(data.points, 0, initialCluster, 0, initialCluster.length); computeMeans(K, iterations, EPS, data.numPoints, data.numDimensions, initialCluster, data.points); SumVector[] result = redCluster; for (int k=0; k<K; k++) { result[k].print(); } System.out.println(); } } // vim: shiftwidth=4:tabstop=4:expandtab