ClusterEvaluation.java example

Explorer
wekax-master
- weka-3-6-2
- wekaUT
  - GetAllSubPackages.java
  - weka
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    ClusterEvaluation.java
 *    Copyright (C) 1999 Mark Hall
 *
 */

package  weka.clusterers;

import  java.util.*;
import  java.io.*;
import  weka.core.*;
import  weka.filters.Filter;
import  weka.filters.unsupervised.attribute.Remove;

/**
 * Class for evaluating clustering models.<p>
 *
 * Valid options are: <p>
 *
 * -t <name of the training file> <br>
 * Specify the training file. <p>
 *
 * -T <name of the test file> <br>
 * Specify the test file to apply clusterer to. <p>
 *
 * -d <name of file to save clustering model to> <br>
 * Specify output file. <p>
 *
 * -l <name of file to load clustering model from> <br>
 * Specifiy input file. <p>
 *
 * -p <attribute range> <br>
 * Output predictions. Predictions are for the training file if only the
 * training file is specified, otherwise they are for the test file. The range
 * specifies attribute values to be output with the predictions.
 * Use '-p 0' for none. <p>
 *
 * -x <num folds> <br>
 * Set the number of folds for a cross validation of the training data.
 * Cross validation can only be done for distribution clusterers and will
 * be performed if the test file is missing. <p>
 *
 * -c <class> <br>
 * Set the class attribute. If set, then class based evaluation of clustering
 * is performed. <p>
 *
 * @author   Mark Hall (mhall@cs.waikato.ac.nz)
 * @version  $Revision: 1.1.1.1 $
 */
public class ClusterEvaluation {

  /** the instances to cluster */
  private Instances instances;

  /** the clusterer */
  private Clusterer clusterer;

  /** do cross validation (DistributionClusterers only) */
  private boolean m_doXval;

  /** the number of folds to use for cross validation */
  private int m_numFolds;

  /** seed to use for cross validation */
  private int m_seed;

  /** holds a string describing the results of clustering the training data */
  private StringBuffer results;

  private int numInstances;
  private int numClusters;
  private int numClasses;

  /** holds the assigments of instances to clusters for a particular testing
      dataset */
  private double [] assignments;

  /** will hold the mapping of classes to clusters (for class based 
      evaluation) */
  private int [] m_classToCluster = null;

  /**
   * set the clusterer
   * @param clusterer the clusterer to use
   */
  public void setClusterer(Clusterer clusterer) {
    this.clusterer=clusterer;
	numClusters=clusterer.K;
	setInstances(clusterer.instances);
  }
  public void setInstances(Instances instances){
	this.instances=instances;
	numInstances=instances.numInstances();
	if(instances.classIndex()>-1)numClasses=instances.classAttribute().numValues();
  }

  /**
   * set whether or not to do cross validation
   * @param x true if cross validation is to be done
   */
  public void setDoXval(boolean x) {
    m_doXval = x;
  }

  /**
   * set the number of folds to use for cross validation
   * @param folds the number of folds
   */
  public void setFolds(int folds) {
    m_numFolds = folds;
  }

  /**
   * set the seed to use for cross validation
   * @param s the seed.
   */
  public void setSeed(int s) {
    m_seed = s;
  }

  /**
   * return the results of clustering.
   * @return a string detailing the results of clustering a data set
   */
  public String clusterResultsToString() {
    return results.toString();
  }

  /**
   * Return the number of clusters found for the most recent call to
   * evaluateClusterer
   * @return the number of clusters found
   */
  public int getNumClusters() {
    return numClusters;
  }

  /**
   * Return an array of cluster assignments corresponding to the most
   * recent set of instances clustered.
   * @return an array of cluster assignments
   */
  public double [] getClusterAssignments() {
    return assignments;
  }

  /**
   * Return the array (ordered by cluster number) of minimum error class to
   * cluster mappings
   * @return an array of class to cluster mappings
   */
  public int [] getClassesToClusters() {
    return m_classToCluster;
  }

  /**
   * Constructor. Sets defaults for each member variable. Default Clusterer
   * is EM.
   */
  public ClusterEvaluation () {
	  this(new EM());
  }
  public ClusterEvaluation(Clusterer clusterer){
	  this(clusterer,clusterer.instances);
  }
  public ClusterEvaluation(Clusterer clusterer,Instances instances){
    setFolds(10);
    setDoXval(false);
    setSeed(1);
    setClusterer(clusterer);
	setInstances(instances);
    results=new StringBuffer();
    assignments=null;
  }

  /**
   * Evaluate the clusterer on a set of instances. Calculates clustering
   * statistics and stores cluster assigments for the instances in
   * assignments
   * @param test the set of instances to cluster
   * @exception Exception if something goes wrong
   */
  public void evaluateClusterer(Instances test) throws Exception {
      setInstances(test);
      clusterer.clusterInstances(test);
      evaluateClusterer();
  }
  public void evaluateClusterer()throws Exception{
    int i = 0;
    double[] dist;
    double temp;
    int numInstFieldWidth = (int)((Math.log(numInstances)/Math.log(10))+1);
    double[] instanceStats = new double[numClusters];
    int unclusteredInstances = 0;
    assignments=clusterer.getAssignments();
    for (i=0;i<numInstances;i++) {
      if(assignments[i]==-1){
        unclusteredInstances++;
      }else{
        instanceStats[(int)assignments[i]]++;
      }
    }
    double sum = Utils.sum(instanceStats);
    results.append(clusterer.toString());
    results.append(" Clustered Instances\n\n");
    int clustFieldWidth = (int)((Math.log(numClusters)/Math.log(10))+1);
    for (i = 0; i < numClusters; i++) {
	results.append(Utils.doubleToString((double)i,clustFieldWidth,0));
        results.append("\t");
        results.append(Utils.doubleToString(instanceStats[i],numInstFieldWidth,0));
        results.append(" (");
        results.append(Utils.doubleToString((instanceStats[i]/sum*100.0),3,0));
        results.append("%)\n");
    }
    results.append("\nUnclustered instances : "+unclusteredInstances);
    if(clusterer instanceof DistributionClusterer){
      int loglk=0;
      double t=0;
      for(i=0;i<numInstances;i++){
        t=((DistributionClusterer)clusterer).densityForInstance(instances.instance(i));
        if(t>0)loglk+=Math.log(t);
      }
      loglk/=sum;
      results.append("\nLog likelihood: "+Utils.doubleToString(loglk,1,5)+"\n");
    }
    if(instances.classIndex()!=-1)evaluateClustersWithRespectToClass();
  }
  public static String evaluateClusterer(Clusterer clusterer)throws Exception{
      return evaluateClusterer(clusterer,clusterer.instances);
  }
  public static String evaluateClusterer(Clusterer clusterer,Instances instances)throws Exception{
	  ClusterEvaluation ce=new ClusterEvaluation(clusterer,instances);
	  ce.assignments=ce.clusterer.getAssignments();
	  ce.evaluateClustersWithRespectToClass();
	  return ce.results.toString();
  }
  /**
   * Evaluates cluster assignments with respect to actual class labels.
   * Assumes that clusterer has been trained and tested on
   * inst (minus the class).
   * @param inst the instances (including class) to evaluate with respect to
   * @exception Exception if something goes wrong
   */
  public void evaluateClustersWithRespectToClass()throws Exception{
	  evaluateClustersWithRespectToClass(instances.allClasses(),instances.classes());
  }
  public void evaluateClustersWithRespectToClass(double[] allClasses,String[] classes)throws Exception{
    int [][] counts = new int [numClusters][numClasses];
    int [] clusterTotals = new int[numClusters];
    int [] classTotals=new int[numClasses];
    double [] best = new double[numClusters+1];
    double [] current = new double[numClusters+1];
    for(int i=0;i<numInstances;i++){
      int clusterId=(int)assignments[i];
      int classId=(int)allClasses[i];
      counts[clusterId][classId]++;
      clusterTotals[clusterId]++;
      classTotals[classId]++;
    }
    best[numClusters] = Double.MAX_VALUE;
    mapClasses(0, counts, clusterTotals, current, best, 0);
    results.append("\n\nClasses: ");
    for(int i=0;i<numClasses;i++)results.append(classes[i]).append("\t");
    results.append("\nClasses to Clusters:\n");
    results.append(Utils.toString(counts,"Cluster","Class"));

    int Cwidth = 1 + (int)(Math.log(numClusters) / Math.log(10));
    // add the minimum error assignment
    for (int i = 0; i < numClusters; i++) {
      if (clusterTotals[i] > 0) {
	results.append("\nCluster ");
	results.append(Utils.toString((double)i,Cwidth));
	results.append(" <-- ");
	
	if (best[i] < 0) {
	  results.append("No class\n");
	} else {
	  results.append(classes[(int)best[i]]).append("\n");
	}
      }
    }
    results.append("\nIncorrectly clustered instances :\t");
	results.append(best[numClusters]);
    results.append("\t");
	results.append(Utils.doubleToString((best[numClusters] /
						       numInstances * 
						       100.0), 8, 4));
	results.append(" %\n");
    
    double [][] p=new double[numClusters][numClasses];
    double [][] r=new double[numClusters][numClasses];
    double [][] f=new double[numClusters][numClasses];
    double [] p1=new double[numClusters];
    double [] p2=new double[numClasses];
    double [] Es=new double[numClusters];
    double [] Ps=new double[numClusters];
    double [] Fs=new double[numClasses];
    double E=0,P=0,F=0,H1=0,H2=0,NMI;
    for(int i=0;i<numClusters;i++){
        p1[i]=(double)clusterTotals[i]/numInstances;
        for(int j=0;j<numClasses;j++){
            if(counts[i][j]==0){
                p[i][j]=r[i][j]=0;
                f[i][j]=0;
            }else{
                p[i][j]=(double)counts[i][j]/clusterTotals[i];
                r[i][j]=(double)counts[i][j]/classTotals[j];
                f[i][j]=2*p[i][j]*r[i][j]/(p[i][j]+r[i][j]);
                Es[i]+=-p[i][j]*Math.log(p[i][j])/Math.log(2);
                if(p[i][j]>Ps[i])Ps[i]=p[i][j];
                if(f[i][j]>Fs[j])Fs[j]=f[i][j];
            }
        }
        E+=p1[i]*Es[i];
        P+=p1[i]*Ps[i];
        H1+=-p1[i]*Math.log(p1[i])/Math.log(2);
    }
    for(int j=0;j<numClasses;j++){
        p2[j]=(double)classTotals[j]/numInstances;
        F+=p2[j]*Fs[j];
        H2+=-p2[j]*Math.log(p2[j])/Math.log(2);
    }
    NMI=(H2-E)/Math.sqrt(H1*H2);
    results.append("E="+E+" P="+P+" F="+F+" H1="+H1+" H2="+H2+" NMI="+NMI+"\n");

    // copy the class assignments
    m_classToCluster = new int [numClusters];
    for (int i = 0; i < numClusters; i++) {
      m_classToCluster[i] = (int)best[i];
    }
  }

  /**
   * Finds the minimum error mapping of classes to clusters. Recursively
   * considers all possible class to cluster assignments.
   * @param lev the cluster being processed
   * @param counts the counts of classes in clusters
   * @param clusterTotals the total number of examples in each cluster
   * @param current the current path through the class to cluster assignment
   * tree
   * @param best the best assignment path seen
   * @param error accumulates the error for a particular path
   */
  private void mapClasses(int lev, int [][] counts, int [] clusterTotals,
			  double [] current, double [] best, int error) {
    // leaf
    if (lev == numClusters) {
      if (error < best[numClusters]) {
	best[numClusters] = error;
	for (int i = 0; i < numClusters; i++) {
	  best[i] = current[i];
	}
      }
    } else {
      // empty cluster -- ignore
      if (clusterTotals[lev] == 0) {
	current[lev] = -1; // cluster ignored
	mapClasses(lev+1, counts, clusterTotals, current, best,
		   error);
      } else {
	// first try no class assignment to this cluster
	current[lev] = -1; // cluster assigned no class (ie all errors)
	mapClasses(lev+1, counts, clusterTotals, current, best,
		   error+clusterTotals[lev]);
	// now loop through the classes in this cluster
	for (int i = 0; i < counts[0].length; i++) {
	  if (counts[lev][i] > 0) {
	    boolean ok = true;
	    // check to see if this class has already been assigned
	    for (int j = 0; j < lev; j++) {
	      if ((int)current[j] == i) {
		ok = false;
		break;
	      }
	    }
	    if (ok) {
	      current[lev] = i;
	      mapClasses(lev+1, counts, clusterTotals, current, best, 
			 (error + (clusterTotals[lev] - counts[lev][i])));
	    }
	  }
	}
      }
    }
  }

  /**
   * Evaluates a clusterer with the options given in an array of
   * strings. It takes the string indicated by "-t" as training file, the
   * string indicated by "-T" as test file.
   * If the test file is missing, a stratified ten-fold
   * cross-validation is performed (distribution clusterers only).
   * Using "-x" you can change the number of
   * folds to be used, and using "-s" the random seed.
   * If the "-p" option is present it outputs the classification for
   * each test instance. If you provide the name of an object file using
   * "-l", a clusterer will be loaded from the given file. If you provide the
   * name of an object file using "-d", the clusterer built from the
   * training data will be saved to the given file.
   *
   * @param clusterer machine learning clusterer
   * @param options the array of string containing the options
   * @exception Exception if model could not be evaluated successfully
   * @return a string describing the results 
   */
  public static String evaluateClusterer (Clusterer clusterer, 
					  String[] options)
    throws Exception {
    int seed = 1, folds = 10;
    boolean doXval = false;
    Instances train = null;
    Instances test = null;
    Random random;
    String trainFileName, testFileName, seedString, foldsString, objectInputFileName, objectOutputFileName, attributeRangeString;
    String[] savedOptions = null;
    boolean printClusterAssignments = false;
    Range attributesToOutput = null;
    ObjectInputStream objectInputStream = null;
    ObjectOutputStream objectOutputStream = null;
    StringBuffer text = new StringBuffer();
    int theClass = -1; // class based evaluation of clustering

    try {
      if (Utils.getFlag('h', options)) {
        throw  new Exception("Help requested.");
      }

      // Get basic options (options the same for all clusterers
      objectInputFileName = Utils.getOption('l', options);
      objectOutputFileName = Utils.getOption('d', options);
      trainFileName = Utils.getOption('t', options);
      testFileName = Utils.getOption('T', options);

      // Check -p option
      try{
          attributeRangeString=Utils.getOption('p',options);
      }catch(Exception e){
          throw new Exception(e.getMessage()+"\nNOTE: the -p option expects a parameter specifying a range of attributes to list with the predictions. Use '-p 0' for none.");
      }
      if(attributeRangeString.length()!=0){
          printClusterAssignments=true;
          if(!attributeRangeString.equals("0"))attributesToOutput=new Range(attributeRangeString);
      }

      if (trainFileName.length() == 0) {
        if (objectInputFileName.length() == 0) {
          throw  new Exception("No training file and no object " 
			       + "input file given.");
        }

        if (testFileName.length() == 0) {
          throw  new Exception("No training file and no test file given.");
        }
      }
      else {
	if ((objectInputFileName.length() != 0) 
	    && (printClusterAssignments == false)) {
	  throw  new Exception("Can't use both train and model file " 
			       + "unless -p specified.");
	}
      }

      seedString = Utils.getOption('s', options);

      if (seedString.length() != 0) {
	seed = Integer.parseInt(seedString);
      }

      foldsString = Utils.getOption('x', options);

      if (foldsString.length() != 0) {
	folds = Integer.parseInt(foldsString);
	doXval = true;
      }
    }
    catch (Exception e) {
      throw new Exception('\n'+e.getMessage()+makeOptionString(clusterer));
    }

    try{
        if(trainFileName.length()!=0){
            System.out.println("Reading train file: "+trainFileName);
            train=new Instances(new BufferedReader(new FileReader(trainFileName)));
            System.out.println("Relation: "+train.relationName());
            System.out.println("\tnumInstances: "+train.numInstances());
            System.out.println("\tnumAttributes: "+train.numAttributes());
	String classString = Utils.getOption('c',options);
	if (classString.length() != 0) {
	  if (classString.compareTo("last") == 0) {
	    theClass = train.numAttributes();
	  } else if (classString.compareTo("first") == 0) {
	    theClass = 1;
	  } else {
	    theClass = Integer.parseInt(classString);
	  }
	  if (doXval || testFileName.length() != 0) {
	    throw new Exception("Can only do class based evaluation on the "
				+"training data");
	  }
	  
	  if (objectInputFileName.length() != 0) {
	    throw new Exception("Can't load a clusterer and do class based "
				+"evaluation");
	  }
	}
    if(theClass!=-1){
		if(!train.attribute(theClass-1).isNominal())throw new Exception("Class must be nominal!");
        System.out.println("Setting classIndex: "+(theClass-1));
        train.setClassIndex(theClass-1);
    }
        }

      if (objectInputFileName.length() != 0) {
	objectInputStream = new ObjectInputStream(new FileInputStream(objectInputFileName));
      }

      if (objectOutputFileName.length() != 0) {
	objectOutputStream = new 
	  ObjectOutputStream(new FileOutputStream(objectOutputFileName));
      }
    }
    catch (Exception e) {
      throw  new Exception("ClusterEvaluation: " + e.getMessage() + '.');
    }

    // Save options
    if (options != null) {
      savedOptions = new String[options.length];
      System.arraycopy(options, 0, savedOptions, 0, options.length);
    }

    if (objectInputFileName.length() != 0) {
      Utils.checkForRemainingOptions(options);
      clusterer = (Clusterer)objectInputStream.readObject();
      objectInputStream.close();
    }else{
      System.out.println("About to set options for clusterer ...");
      if(clusterer instanceof OptionHandler)((OptionHandler)clusterer).setOptions(options);
      Utils.checkForRemainingOptions(options);
      // Build the clusterer if no object file provided
      if(train.classIndex()==-1)clusterer.buildClusterer(train);
      else{
        System.out.println("About to remove classAttribute at position "+theClass);
		Remove filter=new Remove();
		filter.setAttributeIndices(""+theClass);
		filter.setInvertSelection(false);
		filter.setInputFormat(train);
		Instances clusterTrain = Filter.useFilter(train,filter);
        System.out.println("\tnumAttributes: "+clusterTrain.numAttributes());
        System.out.println("\tclassIndex: "+clusterTrain.classIndex());
		if(clusterer.K==0)clusterer.K=train.classAttribute().numValues();
		clusterer.buildClusterer(clusterTrain);
		ClusterEvaluation ce=new ClusterEvaluation(clusterer,train);
		ce.evaluateClusterer();
		return "\n\n=== Clustering stats for training data ===\n\n"+ce.clusterResultsToString();
      }
    }

    /* Output cluster predictions only (for the test data if specified,
       otherwise for the training data */
    if (printClusterAssignments) {
      return  printClusterings(clusterer, train, testFileName, attributesToOutput);
    }

    text.append(clusterer.toString());
    text.append("\n\n=== Clustering stats for training data ===\n\n" 
		+ printClusterStats(clusterer, trainFileName));

    if (testFileName.length() != 0) {
      text.append("\n\n=== Clustering stats for testing data ===\n\n" 
		  + printClusterStats(clusterer, testFileName));
    }

    if ((clusterer instanceof DistributionClusterer) && 
	(doXval == true) && 
	(testFileName.length() == 0) && 
	(objectInputFileName.length() == 0)) {
      // cross validate the log likelihood on the training data
      random = new Random(seed);
      random.setSeed(seed);
      train.randomize(random);
      text.append(crossValidateModel(clusterer.getClass().getName()
				     , train, folds, savedOptions));
    }

    // Save the clusterer if an object output file is provided
    if (objectOutputFileName.length() != 0) {
      objectOutputStream.writeObject(clusterer);
      objectOutputStream.flush();
      objectOutputStream.close();
    }

    return  text.toString();
  }


  /**
   * Performs a cross-validation 
   * for a distribution clusterer on a set of instances.
   *
   * @param clustererString a string naming the class of the clusterer
   * @param data the data on which the cross-validation is to be 
   * performed 
   * @param numFolds the number of folds for the cross-validation
   * @param options the options to the clusterer
   * @return a string containing the cross validated log likelihood
   * @exception Exception if a clusterer could not be generated 
   */
  public static String crossValidateModel (String clustererString, 
					   Instances data, 
					   int numFolds, 
					   String[] options)
    throws Exception {
    Clusterer clusterer = null;
    Instances train, test;
    String[] savedOptions = null;
    double foldAv;
    double CvAv = 0.0;
    double[] tempDist;
    StringBuffer CvString = new StringBuffer();

    if (options != null) {
      savedOptions = new String[options.length];
    }

    data = new Instances(data);

    for (int i = 0; i < numFolds; i++) {
      // create clusterer
      try {
	clusterer = (Clusterer)Class.forName(clustererString).newInstance();
      }
      catch (Exception e) {
	throw  new Exception("Can't find class with name " 
			     + clustererString + '.');
      }

      if (!(clusterer instanceof DistributionClusterer)) {
	throw  new Exception(clustererString 
			     + " must be a distrinbution " 
			     + "clusterer.");
      }

      // Save options
      if (options != null) {
	System.arraycopy(options, 0, savedOptions, 0, options.length);
      }

      // Parse options
      if (clusterer instanceof OptionHandler) {
	try {
	  ((OptionHandler)clusterer).setOptions(savedOptions);
	  Utils.checkForRemainingOptions(savedOptions);
	}
	catch (Exception e) {
	  throw  new Exception("Can't parse given options in " 
			       + "cross-validation!");
	}
      }

      // Build and test classifier 
      train = data.trainCV(numFolds, i);
      clusterer.buildClusterer(train);
      test = data.testCV(numFolds, i);
      foldAv = 0.0;

      for (int j = 0; j < test.numInstances(); j++) {
	try {
	  double temp = ((DistributionClusterer)clusterer).
	    densityForInstance(test.instance(j));
	  //	double temp = Utils.sum(tempDist);

	  if (temp > 0) {
	    foldAv += Math.log(temp);
	  }
	} catch (Exception ex) {
	  // unclustered instances
	}
      }

      CvAv += (foldAv/test.numInstances());
    }

    CvAv /= numFolds;
    CvString.append("\n" + numFolds 
		    + " fold CV Log Likelihood: " 
		    + Utils.doubleToString(CvAv, 6, 4) 
		    + "\n");
    return  CvString.toString();
  }


  // ===============
  // Private methods
  // ===============
  /**
   * Print the cluster statistics for either the training
   * or the testing data.
   *
   * @param clusterer the clusterer to use for generating statistics.
   * @return a string containing cluster statistics.
   * @exception if statistics can't be generated.
   */
  private static String printClusterStats (Clusterer clusterer, 
					   String fileName)
    throws Exception {
    StringBuffer text = new StringBuffer();
    int i = 0;
    int cnum;
    double loglk = 0.0;
    double[] dist;
    double temp;
    int numClusters = clusterer.numberOfClusters();
    double[] instanceStats = new double[numClusters];
    int unclusteredInstances = 0;

    if (fileName.length() != 0) {
      BufferedReader inStream = null;

      try {
	inStream = new BufferedReader(new FileReader(fileName));
      }
      catch (Exception e) {
	throw  new Exception("Can't open file " + e.getMessage() + '.');
      }

      Instances inst = new Instances(inStream, 1);

      while (inst.readInstance(inStream)) {
	try {
	  cnum = clusterer.clusterInstance(inst.instance(0));

	  if (clusterer instanceof DistributionClusterer) {
	    temp = ((DistributionClusterer)clusterer).
	      densityForInstance(inst.instance(0));
	    //	    temp = Utils.sum(dist);

	    if (temp > 0) {
	      loglk += Math.log(temp);
	    }
	  }
	  instanceStats[cnum]++;
	}
	catch (Exception e) {
	  unclusteredInstances++;
	}
	inst.delete(0);
	i++;
      }

      /*
      // count the actual number of used clusters
      int count = 0;
      for (i = 0; i < numClusters; i++) {
	if (instanceStats[i] > 0) {
	  count++;
	}
      }
      if (count > 0) {
	double [] tempStats = new double [count];
	count=0;
	for (i=0;i<numClusters;i++) {
	  if (instanceStats[i] > 0) {
	    tempStats[count++] = instanceStats[i];
	}
	}
	instanceStats = tempStats;
	numClusters = instanceStats.length;
	} */

      int clustFieldWidth = (int)((Math.log(numClusters)/Math.log(10))+1);
      int numInstFieldWidth = (int)((Math.log(i)/Math.log(10))+1);
      double sum = Utils.sum(instanceStats);
      loglk /= sum;
      text.append("Clustered Instances\n");

      for (i = 0; i < numClusters; i++) {
	if (instanceStats[i] > 0) {
	  text.append(Utils.doubleToString((double)i, 
					   clustFieldWidth, 0) 
		      + "      " 
		      + Utils.doubleToString(instanceStats[i], 
					     numInstFieldWidth, 0) 
		      + " (" 
		    + Utils.doubleToString((instanceStats[i]/sum*100.0)
					   , 3, 0) + "%)\n");
	}
      }
      if (unclusteredInstances > 0) {
	text.append("\nUnclustered Instances : "+unclusteredInstances);
      }

      if (clusterer instanceof DistributionClusterer) {
	text.append("\n\nLog likelihood: " 
		    + Utils.doubleToString(loglk, 1, 5) 
		    + "\n");
      }
    }

    return  text.toString();
  }


  /**
   * Print the cluster assignments for either the training
   * or the testing data.
   *
   * @param clusterer the clusterer to use for cluster assignments
   * @return a string containing the instance indexes and cluster assigns.
   * @exception if cluster assignments can't be printed
   */
  private static String printClusterings (Clusterer clusterer, Instances train,
					  String testFileName, Range attributesToOutput)
    throws Exception {
    StringBuffer text = new StringBuffer();
    int i = 0;
    int cnum;

    if (testFileName.length() != 0) {
      BufferedReader testStream = null;

      try {
	testStream = new BufferedReader(new FileReader(testFileName));
      }
      catch (Exception e) {
	throw  new Exception("Can't open file " + e.getMessage() + '.');
      }

      Instances test = new Instances(testStream, 1);

      while (test.readInstance(testStream)) {
	try {
	  cnum = clusterer.clusterInstance(test.instance(0));
	
	  text.append(i + " " + cnum + " "
		      + attributeValuesString(test.instance(0), attributesToOutput) + "\n");
	}
	catch (Exception e) {
	  /*	  throw  new Exception('\n' + "Unable to cluster instance\n" 
		  + e.getMessage()); */
	  text.append(i + " Unclustered "
		      + attributeValuesString(test.instance(0), attributesToOutput) + "\n");
	}
	test.delete(0);
	i++;
      }
    }
    else// output for training data
      {
	for (i = 0; i < train.numInstances(); i++) {
	  try {
	    cnum = clusterer.clusterInstance(train.instance(i));
	 
	    text.append(i + " " + cnum + " "
			+ attributeValuesString(train.instance(i), attributesToOutput)
			+ "\n");
	  }
	  catch (Exception e) {
	    /*  throw  new Exception('\n' 
				 + "Unable to cluster instance\n" 
				 + e.getMessage()); */
	    text.append(i + " Unclustered "
			+ attributeValuesString(train.instance(i), attributesToOutput)
			+ "\n");
	  }
	}
      }

    return  text.toString();
  }

  /**
   * Builds a string listing the attribute values in a specified range of indices,
   * separated by commas and enclosed in brackets.
   *
   * @param instance the instance to print the values from
   * @param attributes the range of the attributes to list
   * @return a string listing values of the attributes in the range
   */
  private static String attributeValuesString(Instance instance, Range attRange) {
    StringBuffer text = new StringBuffer();
    if (attRange != null) {
      boolean firstOutput = true;
      attRange.setUpper(instance.numAttributes() - 1);
      for (int i=0; i<instance.numAttributes(); i++)
	if (attRange.isInRange(i)) {
	  if (firstOutput) text.append("(");
	  else text.append(",");
	  text.append(instance.toString(i));
	  firstOutput = false;
	}
      if (!firstOutput) text.append(")");
    }
    return text.toString();
  }

  /**
   * Make up the help string giving all the command line options
   *
   * @param clusterer the clusterer to include options for
   * @return a string detailing the valid command line options
   */
  private static String makeOptionString (Clusterer clusterer) {
    StringBuffer optionsText = new StringBuffer("");
    // General options
    optionsText.append("\n\nGeneral options:\n\n");
    optionsText.append("-t <name of training file>\n");
    optionsText.append("\tSets training file.\n");
    optionsText.append("-T <name of test file>\n");
    optionsText.append("-l <name of input file>\n");
    optionsText.append("\tSets model input file.\n");
    optionsText.append("-d <name of output file>\n");
    optionsText.append("\tSets model output file.\n");
    optionsText.append("-p <attribute range>\n");
    optionsText.append("\tOutput predictions. Predictions are for " 
		       + "training file" 
		       + "\n\tif only training file is specified," 
		       + "\n\totherwise predictions are for the test file."
		       + "\n\tThe range specifies attribute values to be output"
		       + "\n\twith the predictions. Use '-p 0' for none.\n");
    optionsText.append("-x <number of folds>\n");
    optionsText.append("\tOnly Distribution Clusterers can be cross " 
		       + "validated.\n");
    optionsText.append("-s <random number seed>\n");
    optionsText.append("-c <class index>\n");
    optionsText.append("\tSet class attribute. If supplied, class is ignored");
    optionsText.append("\n\tduring clustering but is used in a classes to");
    optionsText.append("\n\tclusters evaluation.\n");

    // Get scheme-specific options
    if (clusterer instanceof OptionHandler) {
      optionsText.append("\nOptions specific to " 
			 + clusterer.getClass().getName() + ":\n\n");
      Enumeration enum = ((OptionHandler)clusterer).listOptions();

      while (enum.hasMoreElements()) {
	Option option = (Option)enum.nextElement();
	optionsText.append(option.synopsis() + '\n');
	optionsText.append(option.description() + "\n");
      }
    }

    return  optionsText.toString();
  }


  /**
   * Main method for testing this class.
   *
   * @param args the options
   */
  public static void main (String[] args) {
    try {
      if (args.length == 0) {
	throw  new Exception("The first argument must be the name of a " 
			     + "clusterer");
      }

      String ClustererString = args[0];
      args[0] = "";
      Clusterer newClusterer = Clusterer.forName(ClustererString, null);
      System.out.println(evaluateClusterer(newClusterer, args));
    }
    catch (Exception e) {
      System.out.println(e.getMessage());
    }
  }

}