package cc.mallet.cluster.tui; import java.io.FileInputStream; import java.io.ObjectInputStream; import java.util.logging.Logger; import cc.mallet.cluster.Clustering; import cc.mallet.cluster.Clusterings; import cc.mallet.types.InstanceList; import cc.mallet.util.CommandOption; import cc.mallet.util.MalletLogger; //In progress public class Clusterings2Info { private static Logger logger = MalletLogger.getLogger(Clusterings2Info.class.getName()); public static void main (String[] args) { CommandOption .setSummary(Clusterings2Info.class, "A tool to print statistics about a Clusterings."); CommandOption.process(Clusterings2Info.class, args); Clusterings clusterings = null; try { ObjectInputStream iis = new ObjectInputStream(new FileInputStream(inputFile.value)); clusterings = (Clusterings) iis.readObject(); } catch (Exception e) { System.err.println("Exception reading clusterings from " + inputFile.value + " " + e); e.printStackTrace(); } if (printOption.value) { for (int i = 0; i < clusterings.size(); i++) { Clustering c = clusterings.get(i); for (int j = 0; j < c.getNumClusters(); j++) { InstanceList cluster = c.getCluster(j); for (int k = 0; k < cluster.size(); k++) { System.out.println("clustering " + i + " cluster " + j + " element " + k + " " + cluster.get(k).getData()); } System.out.println(); } } } logger.info("number clusterings=" + clusterings.size()); int totalInstances = 0; int totalClusters = 0; for (int i = 0; i < clusterings.size(); i++) { Clustering c = clusterings.get(i); totalClusters += c.getNumClusters(); totalInstances += c.getNumInstances(); } logger.info("total instances=" + totalInstances); logger.info("total clusters=" + totalClusters); logger.info("instances per clustering=" + (double) totalInstances / clusterings.size()); logger.info("instances per cluster=" + (double) totalInstances / totalClusters); logger.info("clusters per clustering=" + (double) totalClusters / clusterings.size()); } static CommandOption.String inputFile = new CommandOption.String( Clusterings2Info.class, "input", "FILENAME", true, "text.vectors", "The filename from which to read the list of instances.", null); static CommandOption.Boolean printOption = new CommandOption.Boolean(Clusterings2Info.class, "print", "BOOLEAN", false, false, "If true, print all clusters", null); }