/** * KMeansDriver.java */ package com.chinamobile.bcbsp.examples.kmeans; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import org.apache.hadoop.fs.Path; import com.chinamobile.bcbsp.BSPConfiguration; import com.chinamobile.bcbsp.Constants; import com.chinamobile.bcbsp.io.KeyValueBSPFileInputFormat; import com.chinamobile.bcbsp.io.TextBSPFileOutputFormat; import com.chinamobile.bcbsp.util.BSPJob; /** * KMeansDriver * This is used to drive the KMeans example. * * @author Bai Qiushi * @version 1.0 2012-2-29 */ public class KMeansDriver { /** * Note: * To use this KMeansBSP example: * (1)Must set the "KMEANS_K" value into the job configuration. * * e.g. BSPJob job.set(KMeansBSP.KMEANS_K, String.valueOf(x)); * * (2)Must set the "KMEANS_CENTERS" value into the job configuration. * * e.g. BSPJob job.set(KMeansBSP.KMEANS_KCENTERS, "x11-x12-...-x1n|x21-x22-...-x2n|...|xk1-xk2-...-xkn"); * * (3)Must register the "AGGREGATE_KCENTERS" aggregator into the job configuration. * * use: BSPJob registerAggregator(KMeansBSP.AGGREGATE_KCENTERS, KCentersAggregator.class, KCentersAggregateValue.class); * * (4)Must register the "AGGREGATE_ERRORS" aggregator into the job configuration. * * use: BSPJob registerAggregator( */ public static void main(String[] args) throws Exception { if (args.length < 5) { System.out.println("Usage: <nSupersteps> <FileInputPath> <FileOutputPath> <K> <K-Centers FilePath>" + " <SplitSize(MB)> <PartitionNum> <SendThreshold> <SendCombineThreshold> " + " <MemDataPercent> <Beta> <HashBucketNum> <MsgPackSize>"); System.exit(-1); } // Set the base configuration for the job BSPConfiguration conf = new BSPConfiguration(); BSPJob bsp = new BSPJob(conf, KMeansDriver.class); bsp.setJobName("KMeans"); //bsp.setNumPartition(2); bsp.setNumSuperStep(Integer.parseInt(args[0])); bsp.setPartitionType(Constants.PARTITION_TYPE.HASH); bsp.setPriority(Constants.PRIORITY.NORMAL); bsp.setWritePartition(com.chinamobile.bcbsp.partition.NotDivideWritePartition.class); // Set the BSP.class bsp.setBspClass(KMeansBSP.class); bsp.setVertexClass(KMVertex.class); bsp.setEdgeClass(KMEdge.class); // Set the InputFormat.class and OutputFormat.classl bsp.setInputFormatClass(KeyValueBSPFileInputFormat.class); bsp.setOutputFormatClass(TextBSPFileOutputFormat.class); // Set the InputPath and OutputPath KeyValueBSPFileInputFormat.addInputPath(bsp, new Path(args[1])); TextBSPFileOutputFormat.setOutputPath(bsp, new Path(args[2])); // Set the graph data implementation version as disk version. bsp.setGraphDataVersion(bsp.DISK_VERSION); // Set the message queues implementation version as disk version. bsp.setMessageQueuesVersion(bsp.DISK_VERSION); // Register the aggregator. bsp.registerAggregator(KMeansBSP.AGGREGATE_KCENTERS, KCentersAggregator.class, KCentersAggregateValue.class); // Set the kmeans.k bsp.set(KMeansBSP.KMEANS_K, String.valueOf(args[3])); File kCentersFile = new File(args[4]); if (!kCentersFile.exists()) { System.out.println("K-Centers FilePath does not exist!"); System.exit(-1); } FileReader fr = new FileReader(kCentersFile); BufferedReader br = new BufferedReader(fr); String kCenters = br.readLine(); //Give seed k centers. //String kCenters = "5.4-3.7-1.5-0.2|6.1-2.8-4.7-1.2|7.7-3.0-6.1-2.3"; // Set the kmeans.kcenters bsp.set(KMeansBSP.KMEANS_CENTERS, kCenters); if (args.length > 5) { bsp.setSplitSize(Integer.valueOf(args[5])); } if (args.length > 6) { bsp.setNumPartition(Integer.parseInt(args[6])); } if (args.length > 7) { bsp.setSendThreshold(Integer.parseInt(args[7])); } if (args.length > 8) { bsp.setSendCombineThreshold(Integer.parseInt(args[8])); } if (args.length > 9) { bsp.setMemoryDataPercent(Float.parseFloat(args[9])); } if (args.length > 10) { bsp.setBeta(Float.parseFloat(args[10])); } if (args.length > 11) { bsp.setHashBucketNumber(Integer.parseInt(args[11])); } if (args.length > 12) { bsp.setMessagePackSize(Integer.parseInt(args[12])); } // Summit the job! bsp.waitForCompletion(true); } }