/** * The main KMeans class * Takes an ascii file as input and calls the clustering routine * * Performs a fuzzy c-means clustering on the data. Fuzzy clustering * is performed using min to max clusters and the clustering that gets the best * score according to a compactness and separation criterion are returned. * * @author Rakesh Komuravelli */ import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.StringTokenizer; public class KMeans { /** Input file name */ private String filename; /** Cluster object */ private Cluster cluster; /** Min clusters */ private int minClusters; /** Max clusters */ private int maxClusters; /** Input points */ private Point[] inpPoints; /** Number of points */ private int numObjs; /** Number of attributes per point */ private int numAttrs; /** Number of threads */ private int nthreads; /** Whether to use zscore transformation */ private boolean useZscore; /** Threshold */ private float threshold; /** Best number of clusters */ private int bestNClusters; /** Cluster centers */ private float[][] clusterCenters; /** * Constructor */ public KMeans(String filename, int maxClusters, int minClusters, int nthreads, boolean useZscore, float threshold) { this.cluster = new Cluster(); this.filename = filename; this.maxClusters = maxClusters; this.minClusters = minClusters; this.nthreads = nthreads; this.useZscore = useZscore; this.threshold = threshold; } /** Usage routine */ private static void usage() { String help = new String("Usage: java KMeans <filename>" + " <minClusters> <maxClusters> <nthreads>"); System.out.println(help); } /** Read input data */ private void readInput() { File inFile = new File(filename); StringBuilder data = new StringBuilder(); boolean flag = false; //get the number of objects and the number of attributes per object try { BufferedReader input = new BufferedReader(new FileReader(inFile)); String line = null; try { while((line = input.readLine()) != null) { numObjs++; if(!flag) { StringTokenizer tok = new StringTokenizer(line, " \t\n"); while(tok.hasMoreElements()) { tok.nextToken(); numAttrs++; } //do not add the id of the object into numattrs numAttrs--; flag = true; } } } finally { input.close(); } } catch(IOException ex) { ex.printStackTrace(); } //Get the list of input points inpPoints = new Point[numObjs]; int i = 0; int j = 0; try { BufferedReader input = new BufferedReader(new FileReader(inFile)); String line = null; try { while((line = input.readLine()) != null) { StringTokenizer tok = new StringTokenizer(line, " \t\n"); tok.nextToken(); j = 0; inpPoints[i] = new Point(numAttrs); while(tok.hasMoreElements()) { inpPoints[i].setFeature(Float.parseFloat(tok.nextToken()), j); j++; } i++; } } finally { input.close(); } } catch(IOException ex) { ex.printStackTrace(); } } /** Make clusters */ private void makeClusters() { int[] cluster_assign; float[][] cluster_centers; Point[] attributes = new Point[numObjs]; int nloops = 1; //call the clustering method for(int loop = 0; loop < nloops; loop++) { //copy the input points into attributes for(int m = 0; m < numObjs; m++) attributes[m] = inpPoints[m].copy(); cluster.execute(nthreads, numObjs, numAttrs, attributes, useZscore, minClusters, maxClusters, threshold); } //get the result: number of clusters and the cluster centers clusterCenters = cluster.getClusterCentres(); bestNClusters = cluster.getBestNclusters(); } /** Dump input */ public void dumpInput() { //dump inputs part1 System.out.println("number of threads: " + nthreads); System.out.println("numObjs: " + numObjs); System.out.println("numAttributes: " + numAttrs); System.out.println("zscore: " + useZscore); System.out.println("max clusters: " + maxClusters); System.out.println("min clusters: " + minClusters); System.out.println("threshold: " + threshold); for(int z = 0; z < numObjs; z++) System.out.print(inpPoints[z]); } /** Print output */ public void printResult() { //print the result: cluster centers for(int m = 0; m < bestNClusters; m++) { System.out.print(m + " "); for(int n = 0; n < numAttrs; n++) { System.out.print(clusterCenters[m][n] + " "); } System.out.println(); } } /** The main function */ public static void main(String[] args) { String fname; int maxClusters; int minClusters; int nthreads = 1; int blockSize; //5th argument is for emitting debug output if(args.length < 4 || args.length > 5) { usage(); System.exit(0); } fname = args[0]; if(fname == "") usage(); minClusters = Integer.parseInt(args[1]); maxClusters = Integer.parseInt(args[2]); nthreads = Integer.parseInt(args[3]); boolean useZscore = true; float threshold = 0.001f; KMeans kmeans = new KMeans(fname, maxClusters, minClusters, nthreads, useZscore, threshold); kmeans.readInput(); //debug //kmeans.dumpInput(); kmeans.makeClusters(); if(args.length == 5) kmeans.printResult(); } }