Evaluation.java example

Explorer
LPmade-master
- weka
  - src
    - weka
/*
 *    This program is free software; you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation; either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with this program; if not, write to the Free Software
 *    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 *    Evaluation.java
 *    Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
 *
 */
package weka.classifiers;

import weka.classifiers.evaluation.NominalPrediction;
import weka.classifiers.evaluation.ThresholdCurve;
import weka.classifiers.xml.XMLClassifier;
import weka.core.Drawable;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Range;
import weka.core.RevisionHandler;
import weka.core.RevisionUtils;
import weka.core.Summarizable;
import weka.core.Utils;
import weka.core.Version;
import weka.core.converters.ConverterUtils.DataSink;
import weka.core.converters.ConverterUtils.DataSource;
import weka.core.xml.KOML;
import weka.core.xml.XMLOptions;
import weka.core.xml.XMLSerialization;
import weka.estimators.Estimator;
import weka.estimators.KernelEstimator;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.io.Reader;
import java.util.Date;
import java.util.Enumeration;
import java.util.Random;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

/**
 * Class for evaluating machine learning models. <p/>
 *
 * ------------------------------------------------------------------- <p/>
 *
 * General options when evaluating a learning scheme from the command-line: <p/>
 *
 * -t filename <br/>
 * Name of the file with the training data. (required) <p/>
 *
 * -T filename <br/>
 * Name of the file with the test data. If missing a cross-validation 
 * is performed. <p/>
 *
 * -c index <br/>
 * Index of the class attribute (1, 2, ...; default: last). <p/>
 *
 * -x number <br/>
 * The number of folds for the cross-validation (default: 10). <p/>
 *
 * -no-cv <br/>
 * No cross validation.  If no test file is provided, no evaluation
 * is done. <p/>
 * 
 * -split-percentage percentage <br/>
 * Sets the percentage for the train/test set split, e.g., 66. <p/>
 * 
 * -preserve-order <br/>
 * Preserves the order in the percentage split instead of randomizing
 * the data first with the seed value ('-s'). <p/>
 *
 * -s seed <br/>
 * Random number seed for the cross-validation and percentage split
 * (default: 1). <p/>
 *
 * -m filename <br/>
 * The name of a file containing a cost matrix. <p/>
 *
 * -l filename <br/>
 * Loads classifier from the given file. In case the filename ends with ".xml" 
 * the options are loaded from XML. <p/>
 *
 * -d filename <br/>
 * Saves classifier built from the training data into the given file. In case 
 * the filename ends with ".xml" the options are saved XML, not the model. <p/>
 *
 * -v <br/>
 * Outputs no statistics for the training data. <p/>
 *
 * -o <br/>
 * Outputs statistics only, not the classifier. <p/>
 * 
 * -i <br/>
 * Outputs information-retrieval statistics per class. <p/>
 *
 * -k <br/>
 * Outputs information-theoretic statistics. <p/>
 *
 * -p range <br/>
 * Outputs predictions for test instances (or the train instances if no test
 * instances provided and -no-cv is used), along with the attributes in the specified range 
 * (and nothing else). Use '-p 0' if no attributes are desired. <p/>
 * 
 * -distribution <br/>
 * Outputs the distribution instead of only the prediction
 * in conjunction with the '-p' option (only nominal classes). <p/>
 *
 * -r <br/>
 * Outputs cumulative margin distribution (and nothing else). <p/>
 *
 * -g <br/> 
 * Only for classifiers that implement "Graphable." Outputs
 * the graph representation of the classifier (and nothing
 * else). <p/>
 * 
 * -xml filename | xml-string <br/>
 * Retrieves the options from the XML-data instead of the command line. <p/>
 * 
 * -threshold-file file <br/>
 * The file to save the threshold data to.
 * The format is determined by the extensions, e.g., '.arff' for ARFF
 * format or '.csv' for CSV. <p/>
 *         
 * -threshold-label label <br/>
 * The class label to determine the threshold data for
 * (default is the first label) <p/>
 *         
 * ------------------------------------------------------------------- <p/>
 *
 * Example usage as the main of a classifier (called FunkyClassifier):
 * <code> <pre>
 * public static void main(String [] args) {
 *   runClassifier(new FunkyClassifier(), args);
 * }
 * </pre> </code> 
 * <p/>
 *
 * ------------------------------------------------------------------ <p/>
 *
 * Example usage from within an application:
 * <code> <pre>
 * Instances trainInstances = ... instances got from somewhere
 * Instances testInstances = ... instances got from somewhere
 * Classifier scheme = ... scheme got from somewhere
 *
 * Evaluation evaluation = new Evaluation(trainInstances);
 * evaluation.evaluateModel(scheme, testInstances);
 * System.out.println(evaluation.toSummaryString());
 * </pre> </code> 
 *
 *
 * @author   Eibe Frank (eibe@cs.waikato.ac.nz)
 * @author   Len Trigg (trigg@cs.waikato.ac.nz)
 * @version  $Revision: 1.88 $
 */
public class Evaluation
		implements Summarizable, RevisionHandler {

	/** The number of classes. */
	protected int m_NumClasses;
	/** The number of folds for a cross-validation. */
	protected int m_NumFolds;
	/** The weight of all incorrectly classified instances. */
	protected double m_Incorrect;
	/** The weight of all correctly classified instances. */
	protected double m_Correct;
	/** The weight of all unclassified instances. */
	protected double m_Unclassified;
	/*** The weight of all instances that had no class assigned to them. */
	protected double m_MissingClass;
	/** The weight of all instances that had a class assigned to them. */
	protected double m_WithClass;
	/** Array for storing the confusion matrix. */
	protected double[][] m_ConfusionMatrix;
	/** The names of the classes. */
	protected String[] m_ClassNames;
	/** Is the class nominal or numeric? */
	protected boolean m_ClassIsNominal;
	/** The prior probabilities of the classes */
	protected double[] m_ClassPriors;
	/** The sum of counts for priors */
	protected double m_ClassPriorsSum;
	/** The cost matrix (if given). */
	protected CostMatrix m_CostMatrix;
	/** The total cost of predictions (includes instance weights) */
	protected double m_TotalCost;
	/** Sum of errors. */
	protected double m_SumErr;
	/** Sum of absolute errors. */
	protected double m_SumAbsErr;
	/** Sum of squared errors. */
	protected double m_SumSqrErr;
	/** Sum of class values. */
	protected double m_SumClass;
	/** Sum of squared class values. */
	protected double m_SumSqrClass;
	/*** Sum of predicted values. */
	protected double m_SumPredicted;
	/** Sum of squared predicted values. */
	protected double m_SumSqrPredicted;
	/** Sum of predicted * class values. */
	protected double m_SumClassPredicted;
	/** Sum of absolute errors of the prior */
	protected double m_SumPriorAbsErr;
	/** Sum of absolute errors of the prior */
	protected double m_SumPriorSqrErr;
	/** Total Kononenko & Bratko Information */
	protected double m_SumKBInfo;
	/*** Resolution of the margin histogram */
	protected static int k_MarginResolution = 500;
	/** Cumulative margin distribution */
	protected double m_MarginCounts[];
	/** Number of non-missing class training instances seen */
	protected int m_NumTrainClassVals;
	/** Array containing all numeric training class values seen */
	protected double[] m_TrainClassVals;
	/** Array containing all numeric training class weights */
	protected double[] m_TrainClassWeights;
	/** Numeric class error estimator for prior */
	protected Estimator m_PriorErrorEstimator;
	/** Numeric class error estimator for scheme */
	protected Estimator m_ErrorEstimator;
	/**
	 * The minimum probablility accepted from an estimator to avoid
	 * taking log(0) in Sf calculations.
	 */
	protected static final double MIN_SF_PROB = Double.MIN_VALUE;
	/** Total entropy of prior predictions */
	protected double m_SumPriorEntropy;
	/** Total entropy of scheme predictions */
	protected double m_SumSchemeEntropy;
	/** The list of predictions that have been generated (for computing AUC) */
	private FastVector m_Predictions;
	/** enables/disables the use of priors, e.g., if no training set is
	 * present in case of de-serialized schemes */
	protected boolean m_NoPriors = false;

	/**
	 * Initializes all the counters for the evaluation.
	 * Use <code>useNoPriors()</code> if the dataset is the test set and you
	 * can't initialize with the priors from the training set via
	 * <code>setPriors(Instances)</code>.
	 *
	 * @param data 	set of training instances, to get some header
	 * 			information and prior class distribution information
	 * @throws Exception 	if the class is not defined
	 * @see 		#useNoPriors()
	 * @see 		#setPriors(Instances)
	 */
	public Evaluation(Instances data) throws Exception {

		this(data, null);
	}

	/**
	 * Initializes all the counters for the evaluation and also takes a
	 * cost matrix as parameter.
	 * Use <code>useNoPriors()</code> if the dataset is the test set and you
	 * can't initialize with the priors from the training set via
	 * <code>setPriors(Instances)</code>.
	 *
	 * @param data 	set of training instances, to get some header
	 * 			information and prior class distribution information
	 * @param costMatrix 	the cost matrix---if null, default costs will be used
	 * @throws Exception 	if cost matrix is not compatible with
	 * 			data, the class is not defined or the class is numeric
	 * @see 		#useNoPriors()
	 * @see 		#setPriors(Instances)
	 */
	public Evaluation(Instances data, CostMatrix costMatrix)
			throws Exception {

		m_NumClasses = data.numClasses();
		m_NumFolds = 1;
		m_ClassIsNominal = data.classAttribute().isNominal();

		if (m_ClassIsNominal) {
			m_ConfusionMatrix = new double[m_NumClasses][m_NumClasses];
			m_ClassNames = new String[m_NumClasses];
			for (int i = 0; i < m_NumClasses; i++) {
				m_ClassNames[i] = data.classAttribute().value(i);
			}
		}
		m_CostMatrix = costMatrix;
		if (m_CostMatrix != null) {
			if (!m_ClassIsNominal) {
				throw new Exception("Class has to be nominal if cost matrix " +
						"given!");
			}
			if (m_CostMatrix.size() != m_NumClasses) {
				throw new Exception("Cost matrix not compatible with data!");
			}
		}
		m_ClassPriors = new double[m_NumClasses];
		setPriors(data);
		m_MarginCounts = new double[k_MarginResolution + 1];
	}

	/**
	 * Returns the area under ROC for those predictions that have been collected
	 * in the evaluateClassifier(Classifier, Instances) method. Returns
	 * Instance.missingValue() if the area is not available.
	 *
	 * @param classIndex the index of the class to consider as "positive"
	 * @return the area under the ROC curve or not a number
	 */
	public double areaUnderROC(int classIndex) {

		// Check if any predictions have been collected
		if (m_Predictions == null) {
			return Instance.missingValue();
		} else {
			ThresholdCurve tc = new ThresholdCurve();
			Instances result = tc.getCurve(m_Predictions, classIndex);
			return ThresholdCurve.getROCArea(result);
		}
	}

	/**
	 * Returns a copy of the confusion matrix.
	 *
	 * @return a copy of the confusion matrix as a two-dimensional array
	 */
	public double[][] confusionMatrix() {

		double[][] newMatrix = new double[m_ConfusionMatrix.length][0];

		for (int i = 0; i < m_ConfusionMatrix.length; i++) {
			newMatrix[i] = new double[m_ConfusionMatrix[i].length];
			System.arraycopy(m_ConfusionMatrix[i], 0, newMatrix[i], 0,
					m_ConfusionMatrix[i].length);
		}
		return newMatrix;
	}

	/**
	 * Performs a (stratified if class is nominal) cross-validation
	 * for a classifier on a set of instances. Now performs
	 * a deep copy of the classifier before each call to
	 * buildClassifier() (just in case the classifier is not
	 * initialized properly).
	 *
	 * @param classifier the classifier with any options set.
	 * @param data the data on which the cross-validation is to be
	 * performed
	 * @param numFolds the number of folds for the cross-validation
	 * @param random random number generator for randomization
	 * @param forPredictionsString varargs parameter that, if supplied, is
	 * expected to hold a StringBuffer to print predictions to,
	 * a Range of attributes to output and a Boolean (true if the distribution
	 * is to be printed)
	 * @throws Exception if a classifier could not be generated
	 * successfully or the class is not defined
	 */
	public void crossValidateModel(Classifier classifier,
			Instances data, int numFolds, Random random,
			Object... forPredictionsPrinting)
			throws Exception {

		// Make a copy of the data we can reorder
		data = new Instances(data);
		data.randomize(random);
		if (data.classAttribute().isNominal()) {
			data.stratify(numFolds);
		}

		// We assume that the first element is a StringBuffer, the second a Range (attributes
		// to output) and the third a Boolean (whether or not to output a distribution instead
		// of just a classification)
		if (forPredictionsPrinting.length > 0) {
			// print the header first
			StringBuffer buff = (StringBuffer) forPredictionsPrinting[0];
			Range attsToOutput = (Range) forPredictionsPrinting[1];
			boolean printDist = ((Boolean) forPredictionsPrinting[2]).booleanValue();
			printClassificationsHeader(data, attsToOutput, printDist, buff);
		}

		// Do the folds
		for (int i = 0; i < numFolds; i++) {
			Instances train = data.trainCV(numFolds, i, random);
			setPriors(train);
			Classifier copiedClassifier = Classifier.makeCopy(classifier);
			copiedClassifier.buildClassifier(train);
			Instances test = data.testCV(numFolds, i);
			evaluateModel(copiedClassifier, test, forPredictionsPrinting);
		}
		m_NumFolds = numFolds;
	}

	/**
	 * Performs a (stratified if class is nominal) cross-validation
	 * for a classifier on a set of instances.
	 *
	 * @param classifierString a string naming the class of the classifier
	 * @param data the data on which the cross-validation is to be
	 * performed
	 * @param numFolds the number of folds for the cross-validation
	 * @param options the options to the classifier. Any options
	 * @param random the random number generator for randomizing the data
	 * accepted by the classifier will be removed from this array.
	 * @throws Exception if a classifier could not be generated
	 * successfully or the class is not defined
	 */
	public void crossValidateModel(String classifierString,
			Instances data, int numFolds,
			String[] options, Random random)
			throws Exception {

		crossValidateModel(Classifier.forName(classifierString, options),
				data, numFolds, random);
	}

	/**
	 * Evaluates a classifier with the options given in an array of
	 * strings. <p/>
	 *
	 * Valid options are: <p/>
	 *
	 * -t filename <br/>
	 * Name of the file with the training data. (required) <p/>
	 *
	 * -T filename <br/>
	 * Name of the file with the test data. If missing a cross-validation
	 * is performed. <p/>
	 *
	 * -c index <br/>
	 * Index of the class attribute (1, 2, ...; default: last). <p/>
	 *
	 * -x number <br/>
	 * The number of folds for the cross-validation (default: 10). <p/>
	 *
	 * -no-cv <br/>
	 * No cross validation.  If no test file is provided, no evaluation
	 * is done. <p/>
	 *
	 * -split-percentage percentage <br/>
	 * Sets the percentage for the train/test set split, e.g., 66. <p/>
	 *
	 * -preserve-order <br/>
	 * Preserves the order in the percentage split instead of randomizing
	 * the data first with the seed value ('-s'). <p/>
	 *
	 * -s seed <br/>
	 * Random number seed for the cross-validation and percentage split
	 * (default: 1). <p/>
	 *
	 * -m filename <br/>
	 * The name of a file containing a cost matrix. <p/>
	 *
	 * -l filename <br/>
	 * Loads classifier from the given file. In case the filename ends with
	 * ".xml" the options are loaded from XML. <p/>
	 *
	 * -d filename <br/>
	 * Saves classifier built from the training data into the given file. In case
	 * the filename ends with ".xml" the options are saved XML, not the model. <p/>
	 *
	 * -v <br/>
	 * Outputs no statistics for the training data. <p/>
	 *
	 * -o <br/>
	 * Outputs statistics only, not the classifier. <p/>
	 *
	 * -i <br/>
	 * Outputs detailed information-retrieval statistics per class. <p/>
	 *
	 * -k <br/>
	 * Outputs information-theoretic statistics. <p/>
	 *
	 * -p range <br/>
	 * Outputs predictions for test instances (or the train instances if no test
	 * instances provided  and -no-cv is used), along with the attributes in the specified range (and
	 *  nothing else). Use '-p 0' if no attributes are desired. <p/>
	 *
	 * -distribution <br/>
	 * Outputs the distribution instead of only the prediction
	 * in conjunction with the '-p' option (only nominal classes). <p/>
	 *
	 * -r <br/>
	 * Outputs cumulative margin distribution (and nothing else). <p/>
	 *
	 * -g <br/>
	 * Only for classifiers that implement "Graphable." Outputs
	 * the graph representation of the classifier (and nothing
	 * else). <p/>
	 *
	 * -xml filename | xml-string <br/>
	 * Retrieves the options from the XML-data instead of the command line. <p/>
	 *
	 * -threshold-file file <br/>
	 * The file to save the threshold data to.
	 * The format is determined by the extensions, e.g., '.arff' for ARFF
	 * format or '.csv' for CSV. <p/>
	 *
	 * -threshold-label label <br/>
	 * The class label to determine the threshold data for
	 * (default is the first label) <p/>
	 *
	 * @param classifierString class of machine learning classifier as a string
	 * @param options the array of string containing the options
	 * @throws Exception if model could not be evaluated successfully
	 * @return a string describing the results
	 */
	public static String evaluateModel(String classifierString,
			String[] options) throws Exception {

		Classifier classifier;

		// Create classifier
		try {
			classifier =
					(Classifier) Class.forName(classifierString).newInstance();
		} catch (Exception e) {
			throw new Exception("Can't find class with name " + classifierString + '.');
		}
		return evaluateModel(classifier, options);
	}

	/**
	 * A test method for this class. Just extracts the first command line
	 * argument as a classifier class name and calls evaluateModel.
	 * @param args an array of command line arguments, the first of which
	 * must be the class name of a classifier.
	 */
	public static void main(String[] args) {

		try {
			if (args.length == 0) {
				throw new Exception("The first argument must be the class name" + " of a classifier");
			}
			String classifier = args[0];
			args[0] = "";
			System.out.println(evaluateModel(classifier, args));
		} catch (Exception ex) {
			ex.printStackTrace();
			System.err.println(ex.getMessage());
		}
	}

	/**
	 * Evaluates a classifier with the options given in an array of
	 * strings. <p/>
	 *
	 * Valid options are: <p/>
	 *
	 * -t name of training file <br/>
	 * Name of the file with the training data. (required) <p/>
	 *
	 * -T name of test file <br/>
	 * Name of the file with the test data. If missing a cross-validation
	 * is performed. <p/>
	 *
	 * -c class index <br/>
	 * Index of the class attribute (1, 2, ...; default: last). <p/>
	 *
	 * -x number of folds <br/>
	 * The number of folds for the cross-validation (default: 10). <p/>
	 *
	 * -no-cv <br/>
	 * No cross validation.  If no test file is provided, no evaluation
	 * is done. <p/>
	 *
	 * -split-percentage percentage <br/>
	 * Sets the percentage for the train/test set split, e.g., 66. <p/>
	 *
	 * -preserve-order <br/>
	 * Preserves the order in the percentage split instead of randomizing
	 * the data first with the seed value ('-s'). <p/>
	 *
	 * -s seed <br/>
	 * Random number seed for the cross-validation and percentage split
	 * (default: 1). <p/>
	 *
	 * -m file with cost matrix <br/>
	 * The name of a file containing a cost matrix. <p/>
	 *
	 * -l filename <br/>
	 * Loads classifier from the given file. In case the filename ends with
	 * ".xml" the options are loaded from XML. <p/>
	 *
	 * -d filename <br/>
	 * Saves classifier built from the training data into the given file. In case
	 * the filename ends with ".xml" the options are saved XML, not the model. <p/>
	 *
	 * -v <br/>
	 * Outputs no statistics for the training data. <p/>
	 *
	 * -o <br/>
	 * Outputs statistics only, not the classifier. <p/>
	 *
	 * -i <br/>
	 * Outputs detailed information-retrieval statistics per class. <p/>
	 *
	 * -k <br/>
	 * Outputs information-theoretic statistics. <p/>
	 *
	 * -p range <br/>
	 * Outputs predictions for test instances (or the train instances if no test
	 * instances provided and -no-cv is used), along with the attributes in the specified range
	 * (and nothing else). Use '-p 0' if no attributes are desired. <p/>
	 *
	 * -distribution <br/>
	 * Outputs the distribution instead of only the prediction
	 * in conjunction with the '-p' option (only nominal classes). <p/>
	 *
	 * -r <br/>
	 * Outputs cumulative margin distribution (and nothing else). <p/>
	 *
	 * -g <br/>
	 * Only for classifiers that implement "Graphable." Outputs
	 * the graph representation of the classifier (and nothing
	 * else). <p/>
	 *
	 * -xml filename | xml-string <br/>
	 * Retrieves the options from the XML-data instead of the command line. <p/>
	 *
	 * @param classifier machine learning classifier
	 * @param options the array of string containing the options
	 * @throws Exception if model could not be evaluated successfully
	 * @return a string describing the results
	 */
	public static String evaluateModel(Classifier classifier,
			String[] options) throws Exception {

		Instances train = null, tempTrain, test = null, template = null;
		int seed = 1, folds = 10, classIndex = -1;
		boolean noCrossValidation = false;
		String trainFileName, testFileName, sourceClass,
				classIndexString, seedString, foldsString, objectInputFileName,
				objectOutputFileName, attributeRangeString;
		boolean noOutput = false,
				printClassifications = false, trainStatistics = true,
				printMargins = false, printComplexityStatistics = false,
				printGraph = false, classStatistics = false, printSource = false;
		StringBuffer text = new StringBuffer();
		DataSource trainSource = null, testSource = null;
		ObjectInputStream objectInputStream = null;
		BufferedInputStream xmlInputStream = null;
		CostMatrix costMatrix = null;
		StringBuffer schemeOptionsText = null;
		Range attributesToOutput = null;
		long trainTimeStart = 0, trainTimeElapsed = 0,
				testTimeStart = 0, testTimeElapsed = 0;
		String xml = "";
		String[] optionsTmp = null;
		Classifier classifierBackup;
		Classifier classifierClassifications = null;
		boolean printDistribution = false;
		int actualClassIndex = -1;  // 0-based class index
		String splitPercentageString = "";
		int splitPercentage = -1;
		boolean preserveOrder = false;
		boolean trainSetPresent = false;
		boolean testSetPresent = false;
		String thresholdFile;
		String thresholdLabel;
		StringBuffer predsBuff = null; // predictions from cross-validation

		// help requested?
		if (Utils.getFlag("h", options) || Utils.getFlag("help", options)) {
			throw new Exception("\nHelp requested." + makeOptionString(classifier));
		}

		try {
			// do we get the input from XML instead of normal parameters?
			xml = Utils.getOption("xml", options);
			if (!xml.equals("")) {
				options = new XMLOptions(xml).toArray();
			}

			// is the input model only the XML-Options, i.e. w/o built model?
			optionsTmp = new String[options.length];
			for (int i = 0; i < options.length; i++) {
				optionsTmp[i] = options[i];
			}

			if (Utils.getOption('l', optionsTmp).toLowerCase().endsWith(".xml")) {
				// load options from serialized data ('-l' is automatically erased!)
				XMLClassifier xmlserial = new XMLClassifier();
				Classifier cl = (Classifier) xmlserial.read(Utils.getOption('l', options));
				// merge options
				optionsTmp = new String[options.length + cl.getOptions().length];
				System.arraycopy(cl.getOptions(), 0, optionsTmp, 0, cl.getOptions().length);
				System.arraycopy(options, 0, optionsTmp, cl.getOptions().length, options.length);
				options = optionsTmp;
			}

			noCrossValidation = Utils.getFlag("no-cv", options);
			// Get basic options (options the same for all schemes)
			classIndexString = Utils.getOption('c', options);
			if (classIndexString.length() != 0) {
				if (classIndexString.equals("first")) {
					classIndex = 1;
				} else if (classIndexString.equals("last")) {
					classIndex = -1;
				} else {
					classIndex = Integer.parseInt(classIndexString);
				}
			}
			trainFileName = Utils.getOption('t', options);
			objectInputFileName = Utils.getOption('l', options);
			objectOutputFileName = Utils.getOption('d', options);
			testFileName = Utils.getOption('T', options);
			foldsString = Utils.getOption('x', options);
			if (foldsString.length() != 0) {
				folds = Integer.parseInt(foldsString);
			}
			seedString = Utils.getOption('s', options);
			if (seedString.length() != 0) {
				seed = Integer.parseInt(seedString);
			}
			if (trainFileName.length() == 0) {
				if (objectInputFileName.length() == 0) {
					throw new Exception("No training file and no object " +
							"input file given.");
				}
				if (testFileName.length() == 0) {
					throw new Exception("No training file and no test " +
							"file given.");
				}
			} else if ((objectInputFileName.length() != 0) &&
					((!(classifier instanceof UpdateableClassifier)) ||
					(testFileName.length() == 0))) {
				throw new Exception("Classifier not incremental, or no " +
						"test file provided: can't " +
						"use both train and model file.");
			}
			try {
				if (trainFileName.length() != 0) {
					trainSetPresent = true;
					trainSource = new DataSource(trainFileName);
				}
				if (testFileName.length() != 0) {
					testSetPresent = true;
					testSource = new DataSource(testFileName);
				}
				if (objectInputFileName.length() != 0) {
					InputStream is = new FileInputStream(objectInputFileName);
					if (objectInputFileName.endsWith(".gz")) {
						is = new GZIPInputStream(is);
					}
					// load from KOML?
					if (!(objectInputFileName.endsWith(".koml") && KOML.isPresent())) {
						objectInputStream = new ObjectInputStream(is);
						xmlInputStream = null;
					} else {
						objectInputStream = null;
						xmlInputStream = new BufferedInputStream(is);
					}
				}
			} catch (Exception e) {
				throw new Exception("Can't open file " + e.getMessage() + '.');
			}
			if (testSetPresent) {
				template = test = testSource.getStructure();
				if (classIndex != -1) {
					test.setClassIndex(classIndex - 1);
				} else {
					if ((test.classIndex() == -1) || (classIndexString.length() != 0)) {
						test.setClassIndex(test.numAttributes() - 1);
					}
				}
				actualClassIndex = test.classIndex();
			} else {
				// percentage split
				splitPercentageString = Utils.getOption("split-percentage", options);
				if (splitPercentageString.length() != 0) {
					if (foldsString.length() != 0) {
						throw new Exception(
								"Percentage split cannot be used in conjunction with " + "cross-validation ('-x').");
					}
					splitPercentage = Integer.parseInt(splitPercentageString);
					if ((splitPercentage <= 0) || (splitPercentage >= 100)) {
						throw new Exception("Percentage split value needs be >0 and <100.");
					}
				} else {
					splitPercentage = -1;
				}
				preserveOrder = Utils.getFlag("preserve-order", options);
				if (preserveOrder) {
					if (splitPercentage == -1) {
						throw new Exception("Percentage split ('-percentage-split') is missing.");
					}
				}
				// create new train/test sources
				if (splitPercentage > 0) {
					testSetPresent = true;
					Instances tmpInst = trainSource.getDataSet(actualClassIndex);
					if (!preserveOrder) {
						tmpInst.randomize(new Random(seed));
					}
					int trainSize = tmpInst.numInstances() * splitPercentage / 100;
					int testSize = tmpInst.numInstances() - trainSize;
					Instances trainInst = new Instances(tmpInst, 0, trainSize);
					Instances testInst = new Instances(tmpInst, trainSize, testSize);
					trainSource = new DataSource(trainInst);
					testSource = new DataSource(testInst);
					template = test = testSource.getStructure();
					if (classIndex != -1) {
						test.setClassIndex(classIndex - 1);
					} else {
						if ((test.classIndex() == -1) || (classIndexString.length() != 0)) {
							test.setClassIndex(test.numAttributes() - 1);
						}
					}
					actualClassIndex = test.classIndex();
				}
			}
			if (trainSetPresent) {
				template = train = trainSource.getStructure();
				if (classIndex != -1) {
					train.setClassIndex(classIndex - 1);
				} else {
					if ((train.classIndex() == -1) || (classIndexString.length() != 0)) {
						train.setClassIndex(train.numAttributes() - 1);
					}
				}
				actualClassIndex = train.classIndex();
				if ((testSetPresent) && !test.equalHeaders(train)) {
					throw new IllegalArgumentException("Train and test file not compatible!");
				}
			}
			if (template == null) {
				throw new Exception("No actual dataset provided to use as template");
			}
			costMatrix = handleCostOption(
					Utils.getOption('m', options), template.numClasses());

			classStatistics = Utils.getFlag('i', options);
			noOutput = Utils.getFlag('o', options);
			trainStatistics = !Utils.getFlag('v', options);
			printComplexityStatistics = Utils.getFlag('k', options);
			printMargins = Utils.getFlag('r', options);
			printGraph = Utils.getFlag('g', options);
			sourceClass = Utils.getOption('z', options);
			printSource = (sourceClass.length() != 0);
			printDistribution = Utils.getFlag("distribution", options);
			thresholdFile = Utils.getOption("threshold-file", options);
			thresholdLabel = Utils.getOption("threshold-label", options);

			// Check -p option
			try {
				attributeRangeString = Utils.getOption('p', options);
			} catch (Exception e) {
				throw new Exception(e.getMessage() + "\nNOTE: the -p option has changed. " +
						"It now expects a parameter specifying a range of attributes " +
						"to list with the predictions. Use '-p 0' for none.");
			}
			if (attributeRangeString.length() != 0) {
				printClassifications = true;
				if (!attributeRangeString.equals("0")) {
					attributesToOutput = new Range(attributeRangeString);
				}
			}

			if (!printClassifications && printDistribution) {
				throw new Exception("Cannot print distribution without '-p' option!");
			}

			// if no training file given, we don't have any priors
			if ((!trainSetPresent) && (printComplexityStatistics)) {
				throw new Exception("Cannot print complexity statistics ('-k') without training file ('-t')!");
			}

			// If a model file is given, we can't process
			// scheme-specific options
			if (objectInputFileName.length() != 0) {
				Utils.checkForRemainingOptions(options);
			} else {

				// Set options for classifier
				if (classifier instanceof OptionHandler) {
					for (int i = 0; i < options.length; i++) {
						if (options[i].length() != 0) {
							if (schemeOptionsText == null) {
								schemeOptionsText = new StringBuffer();
							}
							if (options[i].indexOf(' ') != -1) {
								schemeOptionsText.append('"' + options[i] + "\" ");
							} else {
								schemeOptionsText.append(options[i] + " ");
							}
						}
					}
					((OptionHandler) classifier).setOptions(options);
				}
			}
			Utils.checkForRemainingOptions(options);
		} catch (Exception e) {
			throw new Exception("\nWeka exception: " + e.getMessage() + makeOptionString(classifier));
		}

		// Setup up evaluation objects
		Evaluation trainingEvaluation = new Evaluation(new Instances(template, 0), costMatrix);
		Evaluation testingEvaluation = new Evaluation(new Instances(template, 0), costMatrix);

		// disable use of priors if no training file given
		if (!trainSetPresent) {
			testingEvaluation.useNoPriors();
		}

		if (objectInputFileName.length() != 0) {
			// Load classifier from file
			if (objectInputStream != null) {
				classifier = (Classifier) objectInputStream.readObject();
				// try and read a header (if present)
				Instances savedStructure = null;
				try {
					savedStructure = (Instances) objectInputStream.readObject();
				} catch (Exception ex) {
					// don't make a fuss
				}
				if (savedStructure != null) {
					// test for compatibility with template
					if (!template.equalHeaders(savedStructure)) {
						throw new Exception("training and test set are not compatible");
					}
				}
				objectInputStream.close();
			} else {
				// whether KOML is available has already been checked (objectInputStream would null otherwise)!
				classifier = (Classifier) KOML.read(xmlInputStream);
				xmlInputStream.close();
			}
		}

		// backup of fully setup classifier for cross-validation
		classifierBackup = Classifier.makeCopy(classifier);

		// Build the classifier if no object file provided
		if ((classifier instanceof UpdateableClassifier) &&
				(testSetPresent || noCrossValidation) &&
				(costMatrix == null) &&
				(trainSetPresent)) {
			// Build classifier incrementally
			trainingEvaluation.setPriors(train);
			testingEvaluation.setPriors(train);
			trainTimeStart = System.currentTimeMillis();
			if (objectInputFileName.length() == 0) {
				classifier.buildClassifier(train);
			}
			Instance trainInst;
			while (trainSource.hasMoreElements(train)) {
				trainInst = trainSource.nextElement(train);
				trainingEvaluation.updatePriors(trainInst);
				testingEvaluation.updatePriors(trainInst);
				((UpdateableClassifier) classifier).updateClassifier(trainInst);
			}
			trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
		} else if (objectInputFileName.length() == 0) {
			// Build classifier in one go
			tempTrain = trainSource.getDataSet(actualClassIndex);
			trainingEvaluation.setPriors(tempTrain);
			testingEvaluation.setPriors(tempTrain);
			trainTimeStart = System.currentTimeMillis();
			classifier.buildClassifier(tempTrain);
			trainTimeElapsed = System.currentTimeMillis() - trainTimeStart;
		}

/*  FOR LARGE DATA SETS
		// backup of fully trained classifier for printing the classifications
		if (printClassifications) {
			classifierClassifications = Classifier.makeCopy(classifier);
		}
*/
		// Save the classifier if an object output file is provided
		if (objectOutputFileName.length() != 0) {
			OutputStream os = new FileOutputStream(objectOutputFileName);
			// binary
			if (!(objectOutputFileName.endsWith(".xml") || (objectOutputFileName.endsWith(".koml") && KOML.isPresent()))) {
				if (objectOutputFileName.endsWith(".gz")) {
					os = new GZIPOutputStream(os);
				}
				ObjectOutputStream objectOutputStream = new ObjectOutputStream(os);
				objectOutputStream.writeObject(classifier);
				if (template != null) {
					objectOutputStream.writeObject(template);
				}
				objectOutputStream.flush();
				objectOutputStream.close();
			} // KOML/XML
			else {
				BufferedOutputStream xmlOutputStream = new BufferedOutputStream(os);
				if (objectOutputFileName.endsWith(".xml")) {
					XMLSerialization xmlSerial = new XMLClassifier();
					xmlSerial.write(xmlOutputStream, classifier);
				} else // whether KOML is present has already been checked
				// if not present -> ".koml" is interpreted as binary - see above
				if (objectOutputFileName.endsWith(".koml")) {
					KOML.write(xmlOutputStream, classifier);
				}
				xmlOutputStream.close();
			}
		}

/* FOR LARGE DATA SETS
		// If classifier is drawable output string describing graph
		if ((classifier instanceof Drawable) && (printGraph)) {
			return ((Drawable) classifier).graph();
		}

		// Output the classifier as equivalent source
		if ((classifier instanceof Sourcable) && (printSource)) {
			return wekaStaticWrapper((Sourcable) classifier, sourceClass);
		}

		// Output model
		if (!(noOutput || printMargins)) {
			if (classifier instanceof OptionHandler) {
				if (schemeOptionsText != null) {
					text.append("\nOptions: " + schemeOptionsText);
					text.append("\n");
				}
			}
			text.append("\n" + classifier.toString() + "\n");
		}

		if (!printMargins && (costMatrix != null)) {
			text.append("\n=== Evaluation Cost Matrix ===\n\n");
			text.append(costMatrix.toString());
		}
*/ // FOR LARGE DATA SETS
		// Output test instance predictions only
		if (printClassifications) {
			DataSource source = testSource;
			predsBuff = new StringBuffer();
			// no test set -> use train set
			if (source == null && noCrossValidation) {
				source = trainSource;
				predsBuff.append("\n=== Predictions on training data ===\n\n");
			} else {
				predsBuff.append("\n=== Predictions on test data ===\n\n");
			}
			if (source != null) {
				/*      return printClassifications(classifierClassifications, new Instances(template, 0),
				source, actualClassIndex + 1, attributesToOutput,
				printDistribution); */
				printClassifications(classifier, new Instances(template, 0),
						source, actualClassIndex + 1, attributesToOutput,
						printDistribution, predsBuff);
				/* FOR LARGE DATA SETS
				printClassifications(classifierClassifications, new Instances(template, 0),
						source, actualClassIndex + 1, attributesToOutput,
						printDistribution, predsBuff);
				 */
			//        return predsText.toString();
			}
		}
/*  FOR LARGE DATA SETS
		// Compute error estimate from training data
		if ((trainStatistics) && (trainSetPresent)) {

			if ((classifier instanceof UpdateableClassifier) &&
					(testSetPresent) &&
					(costMatrix == null)) {

				// Classifier was trained incrementally, so we have to
				// reset the source.
				trainSource.reset();

				// Incremental testing
				train = trainSource.getStructure(actualClassIndex);
				testTimeStart = System.currentTimeMillis();
				Instance trainInst;
				while (trainSource.hasMoreElements(train)) {
					trainInst = trainSource.nextElement(train);
					trainingEvaluation.evaluateModelOnce((Classifier) classifier, trainInst);
				}
				testTimeElapsed = System.currentTimeMillis() - testTimeStart;
			} else {
				testTimeStart = System.currentTimeMillis();
				trainingEvaluation.evaluateModel(
						classifier, trainSource.getDataSet(actualClassIndex));
				testTimeElapsed = System.currentTimeMillis() - testTimeStart;
			}

			// Print the results of the training evaluation
			if (printMargins) {
				return trainingEvaluation.toCumulativeMarginDistributionString();
			} else {
				text.append("\nTime taken to build model: " + Utils.doubleToString(trainTimeElapsed / 1000.0, 2) + " seconds");

				if (splitPercentage > 0) {
					text.append("\nTime taken to test model on training split: ");
				} else {
					text.append("\nTime taken to test model on training data: ");
				}
				text.append(Utils.doubleToString(testTimeElapsed / 1000.0, 2) + " seconds");

				if (splitPercentage > 0) {
					text.append(trainingEvaluation.toSummaryString("\n\n=== Error on training" + " split ===\n", printComplexityStatistics));
				} else {
					text.append(trainingEvaluation.toSummaryString("\n\n=== Error on training" + " data ===\n", printComplexityStatistics));
				}

				if (template.classAttribute().isNominal()) {
					if (classStatistics) {
						text.append("\n\n" + trainingEvaluation.toClassDetailsString());
					}
					if (!noCrossValidation) {
						text.append("\n\n" + trainingEvaluation.toMatrixString());
					}
				}

			}
		}

		// Compute proper error estimates
		if (testSource != null) {
			// Testing is on the supplied test data
			testSource.reset();
			test = testSource.getStructure(test.classIndex());
			Instance testInst;
			while (testSource.hasMoreElements(test)) {
				testInst = testSource.nextElement(test);
				testingEvaluation.evaluateModelOnceAndRecordPrediction(
						(Classifier) classifier, testInst);
			}

			if (splitPercentage > 0) {
				text.append("\n\n" + testingEvaluation.toSummaryString("=== Error on test split ===\n",
						printComplexityStatistics));
			} else {
				text.append("\n\n" + testingEvaluation.toSummaryString("=== Error on test data ===\n",
						printComplexityStatistics));
			}

		} else if (trainSource != null) {
			if (!noCrossValidation) {
				// Testing is via cross-validation on training data
				Random random = new Random(seed);
				// use untrained (!) classifier for cross-validation
				classifier = Classifier.makeCopy(classifierBackup);
				if (!printClassifications) {
					testingEvaluation.crossValidateModel(classifier,
							trainSource.getDataSet(actualClassIndex),
							folds, random);
					if (template.classAttribute().isNumeric()) {
						text.append("\n\n\n" + testingEvaluation.toSummaryString("=== Cross-validation ===\n",
								printComplexityStatistics));
					} else {
						text.append("\n\n\n" + testingEvaluation.toSummaryString("=== Stratified " +
								"cross-validation ===\n",
								printComplexityStatistics));
					}
				} else {
					predsBuff = new StringBuffer();
					predsBuff.append("\n=== Predictions under cross-validation ===\n\n");
					testingEvaluation.crossValidateModel(classifier,
							trainSource.getDataSet(actualClassIndex),
							folds, random, predsBuff, attributesToOutput,
							new Boolean(printDistribution));
					if (template.classAttribute().isNumeric()) {
						text.append("\n\n\n" + testingEvaluation.toSummaryString("=== Cross-validation ===\n",
								printComplexityStatistics));
					} else {
						text.append("\n\n\n" + testingEvaluation.toSummaryString("=== Stratified " +
								"cross-validation ===\n",
								printComplexityStatistics));
					}
				}
			}
		}
		if (template.classAttribute().isNominal()) {
			if (classStatistics && !noCrossValidation) {
				text.append("\n\n" + testingEvaluation.toClassDetailsString());
			}
			if (!noCrossValidation) {
				text.append("\n\n" + testingEvaluation.toMatrixString());
			}

			// predictions from cross-validation?
			if (predsBuff != null) {
				text.append("\n" + predsBuff);
			}
		}

		if ((thresholdFile.length() != 0) && template.classAttribute().isNominal()) {
			int labelIndex = 0;
			if (thresholdLabel.length() != 0) {
				labelIndex = template.classAttribute().indexOfValue(thresholdLabel);
			}
			if (labelIndex == -1) {
				throw new IllegalArgumentException(
						"Class label '" + thresholdLabel + "' is unknown!");
			}
			ThresholdCurve tc = new ThresholdCurve();
			Instances result = tc.getCurve(testingEvaluation.predictions(), labelIndex);
			DataSink.write(thresholdFile, result);
		}

 */ // FOR LARGE DATA SETS
		return text.toString();
	}

	/**
	 * Attempts to load a cost matrix.
	 *
	 * @param costFileName the filename of the cost matrix
	 * @param numClasses the number of classes that should be in the cost matrix
	 * (only used if the cost file is in old format).
	 * @return a <code>CostMatrix</code> value, or null if costFileName is empty
	 * @throws Exception if an error occurs.
	 */
	protected static CostMatrix handleCostOption(String costFileName,
			int numClasses)
			throws Exception {

		if ((costFileName != null) && (costFileName.length() != 0)) {
			System.out.println(
					"NOTE: The behaviour of the -m option has changed between WEKA 3.0" + " and WEKA 3.1. -m now carries out cost-sensitive *evaluation*" + " only. For cost-sensitive *prediction*, use one of the" + " cost-sensitive metaschemes such as" + " weka.classifiers.meta.CostSensitiveClassifier or" + " weka.classifiers.meta.MetaCost");

			Reader costReader = null;
			try {
				costReader = new BufferedReader(new FileReader(costFileName));
			} catch (Exception e) {
				throw new Exception("Can't open file " + e.getMessage() + '.');
			}
			try {
				// First try as a proper cost matrix format
				return new CostMatrix(costReader);
			} catch (Exception ex) {
				try {
					// Now try as the poxy old format :-)
					//System.err.println("Attempting to read old format cost file");
					try {
						costReader.close(); // Close the old one
						costReader = new BufferedReader(new FileReader(costFileName));
					} catch (Exception e) {
						throw new Exception("Can't open file " + e.getMessage() + '.');
					}
					CostMatrix costMatrix = new CostMatrix(numClasses);
					//System.err.println("Created default cost matrix");
					costMatrix.readOldFormat(costReader);
					return costMatrix;
				//System.err.println("Read old format");
				} catch (Exception e2) {
					// re-throw the original exception
					//System.err.println("Re-throwing original exception");
					throw ex;
				}
			}
		} else {
			return null;
		}
	}

	/**
	 * Evaluates the classifier on a given set of instances. Note that
	 * the data must have exactly the same format (e.g. order of
	 * attributes) as the data used to train the classifier! Otherwise
	 * the results will generally be meaningless.
	 *
	 * @param classifier machine learning classifier
	 * @param data set of test instances for evaluation
	 * @param forPredictionsString varargs parameter that, if supplied, is
	 * expected to hold a StringBuffer to print predictions to,
	 * a Range of attributes to output and a Boolean (true if the distribution
	 * is to be printed)
	 * @return the predictions
	 * @throws Exception if model could not be evaluated
	 * successfully
	 */
	public double[] evaluateModel(Classifier classifier,
			Instances data,
			Object... forPredictionsPrinting) throws Exception {
		// for predictions printing
		StringBuffer buff = null;
		Range attsToOutput = null;
		boolean printDist = false;

		double predictions[] = new double[data.numInstances()];

		if (forPredictionsPrinting.length > 0) {
			buff = (StringBuffer) forPredictionsPrinting[0];
			attsToOutput = (Range) forPredictionsPrinting[1];
			printDist = ((Boolean) forPredictionsPrinting[2]).booleanValue();
		}

		// Need to be able to collect predictions if appropriate (for AUC)

		for (int i = 0; i < data.numInstances(); i++) {
			predictions[i] = evaluateModelOnceAndRecordPrediction((Classifier) classifier,
					data.instance(i));
			if (buff != null) {
				buff.append(predictionText(classifier, data.instance(i), i,
						attsToOutput, printDist));
			}
		}

		return predictions;
	}

	/**
	 * Evaluates the classifier on a single instance and records the
	 * prediction (if the class is nominal).
	 *
	 * @param classifier machine learning classifier
	 * @param instance the test instance to be classified
	 * @return the prediction made by the clasifier
	 * @throws Exception if model could not be evaluated
	 * successfully or the data contains string attributes
	 */
	public double evaluateModelOnceAndRecordPrediction(Classifier classifier,
			Instance instance) throws Exception {

		Instance classMissing = (Instance) instance.copy();
		double pred = 0;
		classMissing.setDataset(instance.dataset());
		classMissing.setClassMissing();
		if (m_ClassIsNominal) {
			if (m_Predictions == null) {
				m_Predictions = new FastVector();
			}
			double[] dist = classifier.distributionForInstance(classMissing);
			pred = Utils.maxIndex(dist);
			if (dist[(int) pred] <= 0) {
				pred = Instance.missingValue();
			}
			updateStatsForClassifier(dist, instance);
			m_Predictions.addElement(new NominalPrediction(instance.classValue(), dist,
					instance.weight()));
		} else {
			pred = classifier.classifyInstance(classMissing);
			updateStatsForPredictor(pred, instance);
		}
		return pred;
	}

	/**
	 * Evaluates the classifier on a single instance.
	 *
	 * @param classifier machine learning classifier
	 * @param instance the test instance to be classified
	 * @return the prediction made by the clasifier
	 * @throws Exception if model could not be evaluated
	 * successfully or the data contains string attributes
	 */
	public double evaluateModelOnce(Classifier classifier,
			Instance instance) throws Exception {

		Instance classMissing = (Instance) instance.copy();
		double pred = 0;
		classMissing.setDataset(instance.dataset());
		classMissing.setClassMissing();
		if (m_ClassIsNominal) {
			double[] dist = classifier.distributionForInstance(classMissing);
			pred = Utils.maxIndex(dist);
			if (dist[(int) pred] <= 0) {
				pred = Instance.missingValue();
			}
			updateStatsForClassifier(dist, instance);
		} else {
			pred = classifier.classifyInstance(classMissing);
			updateStatsForPredictor(pred, instance);
		}
		return pred;
	}

	/**
	 * Evaluates the supplied distribution on a single instance.
	 *
	 * @param dist the supplied distribution
	 * @param instance the test instance to be classified
	 * @return the prediction
	 * @throws Exception if model could not be evaluated
	 * successfully
	 */
	public double evaluateModelOnce(double[] dist,
			Instance instance) throws Exception {
		double pred;
		if (m_ClassIsNominal) {
			pred = Utils.maxIndex(dist);
			if (dist[(int) pred] <= 0) {
				pred = Instance.missingValue();
			}
			updateStatsForClassifier(dist, instance);
		} else {
			pred = dist[0];
			updateStatsForPredictor(pred, instance);
		}
		return pred;
	}

	/**
	 * Evaluates the supplied distribution on a single instance.
	 *
	 * @param dist the supplied distribution
	 * @param instance the test instance to be classified
	 * @return the prediction
	 * @throws Exception if model could not be evaluated
	 * successfully
	 */
	public double evaluateModelOnceAndRecordPrediction(double[] dist,
			Instance instance) throws Exception {
		double pred;
		if (m_ClassIsNominal) {
			if (m_Predictions == null) {
				m_Predictions = new FastVector();
			}
			pred = Utils.maxIndex(dist);
			if (dist[(int) pred] <= 0) {
				pred = Instance.missingValue();
			}
			updateStatsForClassifier(dist, instance);
			m_Predictions.addElement(new NominalPrediction(instance.classValue(), dist,
					instance.weight()));
		} else {
			pred = dist[0];
			updateStatsForPredictor(pred, instance);
		}
		return pred;
	}

	/**
	 * Evaluates the supplied prediction on a single instance.
	 *
	 * @param prediction the supplied prediction
	 * @param instance the test instance to be classified
	 * @throws Exception if model could not be evaluated
	 * successfully
	 */
	public void evaluateModelOnce(double prediction,
			Instance instance) throws Exception {

		if (m_ClassIsNominal) {
			updateStatsForClassifier(makeDistribution(prediction),
					instance);
		} else {
			updateStatsForPredictor(prediction, instance);
		}
	}

	/**
	 * Returns the predictions that have been collected.
	 *
	 * @return a reference to the FastVector containing the predictions
	 * that have been collected. This should be null if no predictions
	 * have been collected (e.g. if the class is numeric).
	 */
	public FastVector predictions() {

		return m_Predictions;
	}

	/**
	 * Wraps a static classifier in enough source to test using the weka
	 * class libraries.
	 *
	 * @param classifier a Sourcable Classifier
	 * @param className the name to give to the source code class
	 * @return the source for a static classifier that can be tested with
	 * weka libraries.
	 * @throws Exception if code-generation fails
	 */
	public static String wekaStaticWrapper(Sourcable classifier, String className)
			throws Exception {

		StringBuffer result = new StringBuffer();
		String staticClassifier = classifier.toSource(className);

		result.append("// Generated with Weka " + Version.VERSION + "\n");
		result.append("//\n");
		result.append("// This code is public domain and comes with no warranty.\n");
		result.append("//\n");
		result.append("// Timestamp: " + new Date() + "\n");
		result.append("\n");
		result.append("package weka.classifiers;\n");
		result.append("\n");
		result.append("import weka.core.Attribute;\n");
		result.append("import weka.core.Capabilities;\n");
		result.append("import weka.core.Capabilities.Capability;\n");
		result.append("import weka.core.Instance;\n");
		result.append("import weka.core.Instances;\n");
		result.append("import weka.classifiers.Classifier;\n");
		result.append("\n");
		result.append("public class WekaWrapper\n");
		result.append("  extends Classifier {\n");

		// globalInfo
		result.append("\n");
		result.append("  /**\n");
		result.append("   * Returns only the toString() method.\n");
		result.append("   *\n");
		result.append("   * @return a string describing the classifier\n");
		result.append("   */\n");
		result.append("  public String globalInfo() {\n");
		result.append("    return toString();\n");
		result.append("  }\n");

		// getCapabilities
		result.append("\n");
		result.append("  /**\n");
		result.append("   * Returns the capabilities of this classifier.\n");
		result.append("   *\n");
		result.append("   * @return the capabilities\n");
		result.append("   */\n");
		result.append("  public Capabilities getCapabilities() {\n");
		result.append(((Classifier) classifier).getCapabilities().toSource("result", 4));
		result.append("    return result;\n");
		result.append("  }\n");

		// buildClassifier
		result.append("\n");
		result.append("  /**\n");
		result.append("   * only checks the data against its capabilities.\n");
		result.append("   *\n");
		result.append("   * @param i the training data\n");
		result.append("   */\n");
		result.append("  public void buildClassifier(Instances i) throws Exception {\n");
		result.append("    // can classifier handle the data?\n");
		result.append("    getCapabilities().testWithFail(i);\n");
		result.append("  }\n");

		// classifyInstance
		result.append("\n");
		result.append("  /**\n");
		result.append("   * Classifies the given instance.\n");
		result.append("   *\n");
		result.append("   * @param i the instance to classify\n");
		result.append("   * @return the classification result\n");
		result.append("   */\n");
		result.append("  public double classifyInstance(Instance i) throws Exception {\n");
		result.append("    Object[] s = new Object[i.numAttributes()];\n");
		result.append("    \n");
		result.append("    for (int j = 0; j < s.length; j++) {\n");
		result.append("      if (!i.isMissing(j)) {\n");
		result.append("        if (i.attribute(j).isNominal())\n");
		result.append("          s[j] = new String(i.stringValue(j));\n");
		result.append("        else if (i.attribute(j).isNumeric())\n");
		result.append("          s[j] = new Double(i.value(j));\n");
		result.append("      }\n");
		result.append("    }\n");
		result.append("    \n");
		result.append("    // set class value to missing\n");
		result.append("    s[i.classIndex()] = null;\n");
		result.append("    \n");
		result.append("    return " + className + ".classify(s);\n");
		result.append("  }\n");

		// toString
		result.append("\n");
		result.append("  /**\n");
		result.append("   * Returns only the classnames and what classifier it is based on.\n");
		result.append("   *\n");
		result.append("   * @return a short description\n");
		result.append("   */\n");
		result.append("  public String toString() {\n");
		result.append("    return \"Auto-generated classifier wrapper, based on " + classifier.getClass().getName() + " (generated with Weka " + Version.VERSION + ").\\n" + "\" + this.getClass().getName() + \"/" + className + "\";\n");
		result.append("  }\n");

		// main
		result.append("\n");
		result.append("  /**\n");
		result.append("   * Runs the classfier from commandline.\n");
		result.append("   *\n");
		result.append("   * @param args the commandline arguments\n");
		result.append("   */\n");
		result.append("  public static void main(String args[]) {\n");
		result.append("    runClassifier(new WekaWrapper(), args);\n");
		result.append("  }\n");
		result.append("}\n");

		// actual classifier code
		result.append("\n");
		result.append(staticClassifier);

		return result.toString();
	}

	/**
	 * Gets the number of test instances that had a known class value
	 * (actually the sum of the weights of test instances with known
	 * class value).
	 *
	 * @return the number of test instances with known class
	 */
	public final double numInstances() {

		return m_WithClass;
	}

	/**
	 * Gets the number of instances incorrectly classified (that is, for
	 * which an incorrect prediction was made). (Actually the sum of the weights
	 * of these instances)
	 *
	 * @return the number of incorrectly classified instances
	 */
	public final double incorrect() {

		return m_Incorrect;
	}

	/**
	 * Gets the percentage of instances incorrectly classified (that is, for
	 * which an incorrect prediction was made).
	 *
	 * @return the percent of incorrectly classified instances
	 * (between 0 and 100)
	 */
	public final double pctIncorrect() {

		return 100 * m_Incorrect / m_WithClass;
	}

	/**
	 * Gets the total cost, that is, the cost of each prediction times the
	 * weight of the instance, summed over all instances.
	 *
	 * @return the total cost
	 */
	public final double totalCost() {

		return m_TotalCost;
	}

	/**
	 * Gets the average cost, that is, total cost of misclassifications
	 * (incorrect plus unclassified) over the total number of instances.
	 *
	 * @return the average cost.
	 */
	public final double avgCost() {

		return m_TotalCost / m_WithClass;
	}

	/**
	 * Gets the number of instances correctly classified (that is, for
	 * which a correct prediction was made). (Actually the sum of the weights
	 * of these instances)
	 *
	 * @return the number of correctly classified instances
	 */
	public final double correct() {

		return m_Correct;
	}

	/**
	 * Gets the percentage of instances correctly classified (that is, for
	 * which a correct prediction was made).
	 *
	 * @return the percent of correctly classified instances (between 0 and 100)
	 */
	public final double pctCorrect() {

		return 100 * m_Correct / m_WithClass;
	}

	/**
	 * Gets the number of instances not classified (that is, for
	 * which no prediction was made by the classifier). (Actually the sum
	 * of the weights of these instances)
	 *
	 * @return the number of unclassified instances
	 */
	public final double unclassified() {

		return m_Unclassified;
	}

	/**
	 * Gets the percentage of instances not classified (that is, for
	 * which no prediction was made by the classifier).
	 *
	 * @return the percent of unclassified instances (between 0 and 100)
	 */
	public final double pctUnclassified() {

		return 100 * m_Unclassified / m_WithClass;
	}

	/**
	 * Returns the estimated error rate or the root mean squared error
	 * (if the class is numeric). If a cost matrix was given this
	 * error rate gives the average cost.
	 *
	 * @return the estimated error rate (between 0 and 1, or between 0 and
	 * maximum cost)
	 */
	public final double errorRate() {

		if (!m_ClassIsNominal) {
			return Math.sqrt(m_SumSqrErr / (m_WithClass - m_Unclassified));
		}
		if (m_CostMatrix == null) {
			return m_Incorrect / m_WithClass;
		} else {
			return avgCost();
		}
	}

	/**
	 * Returns value of kappa statistic if class is nominal.
	 *
	 * @return the value of the kappa statistic
	 */
	public final double kappa() {


		double[] sumRows = new double[m_ConfusionMatrix.length];
		double[] sumColumns = new double[m_ConfusionMatrix.length];
		double sumOfWeights = 0;
		for (int i = 0; i < m_ConfusionMatrix.length; i++) {
			for (int j = 0; j < m_ConfusionMatrix.length; j++) {
				sumRows[i] += m_ConfusionMatrix[i][j];
				sumColumns[j] += m_ConfusionMatrix[i][j];
				sumOfWeights += m_ConfusionMatrix[i][j];
			}
		}
		double correct = 0, chanceAgreement = 0;
		for (int i = 0; i < m_ConfusionMatrix.length; i++) {
			chanceAgreement += (sumRows[i] * sumColumns[i]);
			correct += m_ConfusionMatrix[i][i];
		}
		chanceAgreement /= (sumOfWeights * sumOfWeights);
		correct /= sumOfWeights;

		if (chanceAgreement < 1) {
			return (correct - chanceAgreement) / (1 - chanceAgreement);
		} else {
			return 1;
		}
	}

	/**
	 * Returns the correlation coefficient if the class is numeric.
	 *
	 * @return the correlation coefficient
	 * @throws Exception if class is not numeric
	 */
	public final double correlationCoefficient() throws Exception {

		if (m_ClassIsNominal) {
			throw new Exception("Can't compute correlation coefficient: " +
					"class is nominal!");
		}

		double correlation = 0;
		double varActual =
				m_SumSqrClass - m_SumClass * m_SumClass /
				(m_WithClass - m_Unclassified);
		double varPredicted =
				m_SumSqrPredicted - m_SumPredicted * m_SumPredicted /
				(m_WithClass - m_Unclassified);
		double varProd =
				m_SumClassPredicted - m_SumClass * m_SumPredicted /
				(m_WithClass - m_Unclassified);

		if (varActual * varPredicted <= 0) {
			correlation = 0.0;
		} else {
			correlation = varProd / Math.sqrt(varActual * varPredicted);
		}

		return correlation;
	}

	/**
	 * Returns the mean absolute error. Refers to the error of the
	 * predicted values for numeric classes, and the error of the
	 * predicted probability distribution for nominal classes.
	 *
	 * @return the mean absolute error
	 */
	public final double meanAbsoluteError() {

		return m_SumAbsErr / (m_WithClass - m_Unclassified);
	}

	/**
	 * Returns the mean absolute error of the prior.
	 *
	 * @return the mean absolute error
	 */
	public final double meanPriorAbsoluteError() {

		if (m_NoPriors) {
			return Double.NaN;
		}

		return m_SumPriorAbsErr / m_WithClass;
	}

	/**
	 * Returns the relative absolute error.
	 *
	 * @return the relative absolute error
	 * @throws Exception if it can't be computed
	 */
	public final double relativeAbsoluteError() throws Exception {

		if (m_NoPriors) {
			return Double.NaN;
		}

		return 100 * meanAbsoluteError() / meanPriorAbsoluteError();
	}

	/**
	 * Returns the root mean squared error.
	 *
	 * @return the root mean squared error
	 */
	public final double rootMeanSquaredError() {

		return Math.sqrt(m_SumSqrErr / (m_WithClass - m_Unclassified));
	}

	/**
	 * Returns the mean squared error.
	 *
	 * @return the mean squared error
	 */
	public final double meanSquaredError() {

		return m_SumSqrErr / (m_WithClass - m_Unclassified);
	}

	/**
	 * Returns the root mean prior squared error.
	 *
	 * @return the root mean prior squared error
	 */
	public final double rootMeanPriorSquaredError() {

		if (m_NoPriors) {
			return Double.NaN;
		}

		return Math.sqrt(m_SumPriorSqrErr / m_WithClass);
	}

	/**
	 * Returns the root relative squared error if the class is numeric.
	 *
	 * @return the root relative squared error
	 */
	public final double rootRelativeSquaredError() {

		if (m_NoPriors) {
			return Double.NaN;
		}

		return 100.0 * rootMeanSquaredError() /
				rootMeanPriorSquaredError();
	}

	/**
	 * Calculate the entropy of the prior distribution
	 *
	 * @return the entropy of the prior distribution
	 * @throws Exception if the class is not nominal
	 */
	public final double priorEntropy() throws Exception {

		if (!m_ClassIsNominal) {
			throw new Exception("Can't compute entropy of class prior: " +
					"class numeric!");
		}

		if (m_NoPriors) {
			return Double.NaN;
		}

		double entropy = 0;
		for (int i = 0; i < m_NumClasses; i++) {
			entropy -= m_ClassPriors[i] / m_ClassPriorsSum * Utils.log2(m_ClassPriors[i] / m_ClassPriorsSum);
		}
		return entropy;
	}

	/**
	 * Return the total Kononenko & Bratko Information score in bits
	 *
	 * @return the K&B information score
	 * @throws Exception if the class is not nominal
	 */
	public final double KBInformation() throws Exception {

		if (!m_ClassIsNominal) {
			throw new Exception("Can't compute K&B Info score: " +
					"class numeric!");
		}

		if (m_NoPriors) {
			return Double.NaN;
		}

		return m_SumKBInfo;
	}

	/**
	 * Return the Kononenko & Bratko Information score in bits per
	 * instance.
	 *
	 * @return the K&B information score
	 * @throws Exception if the class is not nominal
	 */
	public final double KBMeanInformation() throws Exception {

		if (!m_ClassIsNominal) {
			throw new Exception("Can't compute K&B Info score: " + "class numeric!");
		}

		if (m_NoPriors) {
			return Double.NaN;
		}

		return m_SumKBInfo / (m_WithClass - m_Unclassified);
	}

	/**
	 * Return the Kononenko & Bratko Relative Information score
	 *
	 * @return the K&B relative information score
	 * @throws Exception if the class is not nominal
	 */
	public final double KBRelativeInformation() throws Exception {

		if (!m_ClassIsNominal) {
			throw new Exception("Can't compute K&B Info score: " +
					"class numeric!");
		}

		if (m_NoPriors) {
			return Double.NaN;
		}

		return 100.0 * KBInformation() / priorEntropy();
	}

	/**
	 * Returns the total entropy for the null model
	 *
	 * @return the total null model entropy
	 */
	public final double SFPriorEntropy() {

		if (m_NoPriors) {
			return Double.NaN;
		}

		return m_SumPriorEntropy;
	}

	/**
	 * Returns the entropy per instance for the null model
	 *
	 * @return the null model entropy per instance
	 */
	public final double SFMeanPriorEntropy() {

		if (m_NoPriors) {
			return Double.NaN;
		}

		return m_SumPriorEntropy / m_WithClass;
	}

	/**
	 * Returns the total entropy for the scheme
	 *
	 * @return the total scheme entropy
	 */
	public final double SFSchemeEntropy() {

		if (m_NoPriors) {
			return Double.NaN;
		}

		return m_SumSchemeEntropy;
	}

	/**
	 * Returns the entropy per instance for the scheme
	 *
	 * @return the scheme entropy per instance
	 */
	public final double SFMeanSchemeEntropy() {

		if (m_NoPriors) {
			return Double.NaN;
		}

		return m_SumSchemeEntropy / (m_WithClass - m_Unclassified);
	}

	/**
	 * Returns the total SF, which is the null model entropy minus
	 * the scheme entropy.
	 *
	 * @return the total SF
	 */
	public final double SFEntropyGain() {

		if (m_NoPriors) {
			return Double.NaN;
		}

		return m_SumPriorEntropy - m_SumSchemeEntropy;
	}

	/**
	 * Returns the SF per instance, which is the null model entropy
	 * minus the scheme entropy, per instance.
	 *
	 * @return the SF per instance
	 */
	public final double SFMeanEntropyGain() {

		if (m_NoPriors) {
			return Double.NaN;
		}

		return (m_SumPriorEntropy - m_SumSchemeEntropy) /
				(m_WithClass - m_Unclassified);
	}

	/**
	 * Output the cumulative margin distribution as a string suitable
	 * for input for gnuplot or similar package.
	 *
	 * @return the cumulative margin distribution
	 * @throws Exception if the class attribute is nominal
	 */
	public String toCumulativeMarginDistributionString() throws Exception {

		if (!m_ClassIsNominal) {
			throw new Exception("Class must be nominal for margin distributions");
		}
		String result = "";
		double cumulativeCount = 0;
		double margin;
		for (int i = 0; i <= k_MarginResolution; i++) {
			if (m_MarginCounts[i] != 0) {
				cumulativeCount += m_MarginCounts[i];
				margin = (double) i * 2.0 / k_MarginResolution - 1.0;
				result = result + Utils.doubleToString(margin, 7, 3) + ' ' + Utils.doubleToString(cumulativeCount * 100 / m_WithClass, 7, 3) + '\n';
			} else if (i == 0) {
				result = Utils.doubleToString(-1.0, 7, 3) + ' ' + Utils.doubleToString(0, 7, 3) + '\n';
			}
		}
		return result;
	}

	/**
	 * Calls toSummaryString() with no title and no complexity stats
	 *
	 * @return a summary description of the classifier evaluation
	 */
	public String toSummaryString() {

		return toSummaryString("", false);
	}

	/**
	 * Calls toSummaryString() with a default title.
	 *
	 * @param printComplexityStatistics if true, complexity statistics are
	 * returned as well
	 * @return the summary string
	 */
	public String toSummaryString(boolean printComplexityStatistics) {

		return toSummaryString("=== Summary ===\n", printComplexityStatistics);
	}

	/**
	 * Outputs the performance statistics in summary form. Lists
	 * number (and percentage) of instances classified correctly,
	 * incorrectly and unclassified. Outputs the total number of
	 * instances classified, and the number of instances (if any)
	 * that had no class value provided.
	 *
	 * @param title the title for the statistics
	 * @param printComplexityStatistics if true, complexity statistics are
	 * returned as well
	 * @return the summary as a String
	 */
	public String toSummaryString(String title,
			boolean printComplexityStatistics) {

		StringBuffer text = new StringBuffer();

		if (printComplexityStatistics && m_NoPriors) {
			printComplexityStatistics = false;
			System.err.println("Priors disabled, cannot print complexity statistics!");
		}

		text.append(title + "\n");
		try {
			if (m_WithClass > 0) {
				if (m_ClassIsNominal) {

					text.append("Correctly Classified Instances     ");
					text.append(Utils.doubleToString(correct(), 12, 4) + "     " +
							Utils.doubleToString(pctCorrect(),
							12, 4) + " %\n");
					text.append("Incorrectly Classified Instances   ");
					text.append(Utils.doubleToString(incorrect(), 12, 4) + "     " +
							Utils.doubleToString(pctIncorrect(),
							12, 4) + " %\n");
					text.append("Kappa statistic                    ");
					text.append(Utils.doubleToString(kappa(), 12, 4) + "\n");

					if (m_CostMatrix != null) {
						text.append("Total Cost                         ");
						text.append(Utils.doubleToString(totalCost(), 12, 4) + "\n");
						text.append("Average Cost                       ");
						text.append(Utils.doubleToString(avgCost(), 12, 4) + "\n");
					}
					if (printComplexityStatistics) {
						text.append("K&B Relative Info Score            ");
						text.append(Utils.doubleToString(KBRelativeInformation(), 12, 4) + " %\n");
						text.append("K&B Information Score              ");
						text.append(Utils.doubleToString(KBInformation(), 12, 4) + " bits");
						text.append(Utils.doubleToString(KBMeanInformation(), 12, 4) + " bits/instance\n");
					}
				} else {
					text.append("Correlation coefficient            ");
					text.append(Utils.doubleToString(correlationCoefficient(), 12, 4) +
							"\n");
				}
				if (printComplexityStatistics) {
					text.append("Class complexity | order 0         ");
					text.append(Utils.doubleToString(SFPriorEntropy(), 12, 4) + " bits");
					text.append(Utils.doubleToString(SFMeanPriorEntropy(), 12, 4) + " bits/instance\n");
					text.append("Class complexity | scheme          ");
					text.append(Utils.doubleToString(SFSchemeEntropy(), 12, 4) + " bits");
					text.append(Utils.doubleToString(SFMeanSchemeEntropy(), 12, 4) + " bits/instance\n");
					text.append("Complexity improvement     (Sf)    ");
					text.append(Utils.doubleToString(SFEntropyGain(), 12, 4) + " bits");
					text.append(Utils.doubleToString(SFMeanEntropyGain(), 12, 4) + " bits/instance\n");
				}

				text.append("Mean absolute error                ");
				text.append(Utils.doubleToString(meanAbsoluteError(), 12, 4) + "\n");
				text.append("Root mean squared error            ");
				text.append(Utils.doubleToString(rootMeanSquaredError(), 12, 4) + "\n");
				if (!m_NoPriors) {
					text.append("Relative absolute error            ");
					text.append(Utils.doubleToString(relativeAbsoluteError(),
							12, 4) + " %\n");
					text.append("Root relative squared error        ");
					text.append(Utils.doubleToString(rootRelativeSquaredError(),
							12, 4) + " %\n");
				}
			}
			if (Utils.gr(unclassified(), 0)) {
				text.append("UnClassified Instances             ");
				text.append(Utils.doubleToString(unclassified(), 12, 4) + "     " +
						Utils.doubleToString(pctUnclassified(),
						12, 4) + " %\n");
			}
			text.append("Total Number of Instances          ");
			text.append(Utils.doubleToString(m_WithClass, 12, 4) + "\n");
			if (m_MissingClass > 0) {
				text.append("Ignored Class Unknown Instances            ");
				text.append(Utils.doubleToString(m_MissingClass, 12, 4) + "\n");
			}
		} catch (Exception ex) {
			// Should never occur since the class is known to be nominal
			// here
			System.err.println("Arggh - Must be a bug in Evaluation class");
		}

		return text.toString();
	}

	/**
	 * Calls toMatrixString() with a default title.
	 *
	 * @return the confusion matrix as a string
	 * @throws Exception if the class is numeric
	 */
	public String toMatrixString() throws Exception {

		return toMatrixString("=== Confusion Matrix ===\n");
	}

	/**
	 * Outputs the performance statistics as a classification confusion
	 * matrix. For each class value, shows the distribution of
	 * predicted class values.
	 *
	 * @param title the title for the confusion matrix
	 * @return the confusion matrix as a String
	 * @throws Exception if the class is numeric
	 */
	public String toMatrixString(String title) throws Exception {

		StringBuffer text = new StringBuffer();
		char[] IDChars = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
			'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
			'u', 'v', 'w', 'x', 'y', 'z'};
		int IDWidth;
		boolean fractional = false;

		if (!m_ClassIsNominal) {
			throw new Exception("Evaluation: No confusion matrix possible!");
		}

		// Find the maximum value in the matrix
		// and check for fractional display requirement
		double maxval = 0;
		for (int i = 0; i < m_NumClasses; i++) {
			for (int j = 0; j < m_NumClasses; j++) {
				double current = m_ConfusionMatrix[i][j];
				if (current < 0) {
					current *= -10;
				}
				if (current > maxval) {
					maxval = current;
				}
				double fract = current - Math.rint(current);
				if (!fractional && ((Math.log(fract) / Math.log(10)) >= -2)) {
					fractional = true;
				}
			}
		}

		IDWidth = 1 + Math.max((int) (Math.log(maxval) / Math.log(10) + (fractional ? 3 : 0)),
				(int) (Math.log(m_NumClasses) /
				Math.log(IDChars.length)));
		text.append(title).append("\n");
		for (int i = 0; i < m_NumClasses; i++) {
			if (fractional) {
				text.append(" ").append(num2ShortID(i, IDChars, IDWidth - 3)).append("   ");
			} else {
				text.append(" ").append(num2ShortID(i, IDChars, IDWidth));
			}
		}
		text.append("   <-- classified as\n");
		for (int i = 0; i < m_NumClasses; i++) {
			for (int j = 0; j < m_NumClasses; j++) {
				text.append(" ").append(
						Utils.doubleToString(m_ConfusionMatrix[i][j],
						IDWidth,
						(fractional ? 2 : 0)));
			}
			text.append(" | ").append(num2ShortID(i, IDChars, IDWidth)).append(" = ").append(m_ClassNames[i]).append("\n");
		}
		return text.toString();
	}

	/**
	 * Generates a breakdown of the accuracy for each class (with default title),
	 * incorporating various information-retrieval statistics, such as
	 * true/false positive rate, precision/recall/F-Measure.  Should be
	 * useful for ROC curves, recall/precision curves.
	 *
	 * @return the statistics presented as a string
	 * @throws Exception if class is not nominal
	 */
	public String toClassDetailsString() throws Exception {

		return toClassDetailsString("=== Detailed Accuracy By Class ===\n");
	}

	/**
	 * Generates a breakdown of the accuracy for each class,
	 * incorporating various information-retrieval statistics, such as
	 * true/false positive rate, precision/recall/F-Measure.  Should be
	 * useful for ROC curves, recall/precision curves.
	 *
	 * @param title the title to prepend the stats string with
	 * @return the statistics presented as a string
	 * @throws Exception if class is not nominal
	 */
	public String toClassDetailsString(String title) throws Exception {

		if (!m_ClassIsNominal) {
			throw new Exception("Evaluation: No confusion matrix possible!");
		}
		StringBuffer text = new StringBuffer(title + "\nTP Rate   FP Rate" + "   Precision   Recall" + "  F-Measure   ROC Area  Class\n");
		for (int i = 0; i < m_NumClasses; i++) {
			text.append(Utils.doubleToString(truePositiveRate(i), 7, 3)).append("   ");
			text.append(Utils.doubleToString(falsePositiveRate(i), 7, 3)).append("    ");
			text.append(Utils.doubleToString(precision(i), 7, 3)).append("   ");
			text.append(Utils.doubleToString(recall(i), 7, 3)).append("   ");
			text.append(Utils.doubleToString(fMeasure(i), 7, 3)).append("    ");
			double rocVal = areaUnderROC(i);
			if (Instance.isMissingValue(rocVal)) {
				text.append("  ?    ").append("    ");
			} else {
				text.append(Utils.doubleToString(rocVal, 7, 3)).append("    ");
			}
			text.append(m_ClassNames[i]).append('\n');
		}
		return text.toString();
	}

	/**
	 * Calculate the number of true positives with respect to a particular class.
	 * This is defined as<p/>
	 * <pre>
	 * correctly classified positives
	 * </pre>
	 *
	 * @param classIndex the index of the class to consider as "positive"
	 * @return the true positive rate
	 */
	public double numTruePositives(int classIndex) {

		double correct = 0;
		for (int j = 0; j < m_NumClasses; j++) {
			if (j == classIndex) {
				correct += m_ConfusionMatrix[classIndex][j];
			}
		}
		return correct;
	}

	/**
	 * Calculate the true positive rate with respect to a particular class.
	 * This is defined as<p/>
	 * <pre>
	 * correctly classified positives
	 * ------------------------------
	 *       total positives
	 * </pre>
	 *
	 * @param classIndex the index of the class to consider as "positive"
	 * @return the true positive rate
	 */
	public double truePositiveRate(int classIndex) {

		double correct = 0, total = 0;
		for (int j = 0; j < m_NumClasses; j++) {
			if (j == classIndex) {
				correct += m_ConfusionMatrix[classIndex][j];
			}
			total += m_ConfusionMatrix[classIndex][j];
		}
		if (total == 0) {
			return 0;
		}
		return correct / total;
	}

	/**
	 * Calculate the number of true negatives with respect to a particular class.
	 * This is defined as<p/>
	 * <pre>
	 * correctly classified negatives
	 * </pre>
	 *
	 * @param classIndex the index of the class to consider as "positive"
	 * @return the true positive rate
	 */
	public double numTrueNegatives(int classIndex) {

		double correct = 0;
		for (int i = 0; i < m_NumClasses; i++) {
			if (i != classIndex) {
				for (int j = 0; j < m_NumClasses; j++) {
					if (j != classIndex) {
						correct += m_ConfusionMatrix[i][j];
					}
				}
			}
		}
		return correct;
	}

	/**
	 * Calculate the true negative rate with respect to a particular class.
	 * This is defined as<p/>
	 * <pre>
	 * correctly classified negatives
	 * ------------------------------
	 *       total negatives
	 * </pre>
	 *
	 * @param classIndex the index of the class to consider as "positive"
	 * @return the true positive rate
	 */
	public double trueNegativeRate(int classIndex) {

		double correct = 0, total = 0;
		for (int i = 0; i < m_NumClasses; i++) {
			if (i != classIndex) {
				for (int j = 0; j < m_NumClasses; j++) {
					if (j != classIndex) {
						correct += m_ConfusionMatrix[i][j];
					}
					total += m_ConfusionMatrix[i][j];
				}
			}
		}
		if (total == 0) {
			return 0;
		}
		return correct / total;
	}

	/**
	 * Calculate number of false positives with respect to a particular class.
	 * This is defined as<p/>
	 * <pre>
	 * incorrectly classified negatives
	 * </pre>
	 *
	 * @param classIndex the index of the class to consider as "positive"
	 * @return the false positive rate
	 */
	public double numFalsePositives(int classIndex) {

		double incorrect = 0;
		for (int i = 0; i < m_NumClasses; i++) {
			if (i != classIndex) {
				for (int j = 0; j < m_NumClasses; j++) {
					if (j == classIndex) {
						incorrect += m_ConfusionMatrix[i][j];
					}
				}
			}
		}
		return incorrect;
	}

	/**
	 * Calculate the false positive rate with respect to a particular class.
	 * This is defined as<p/>
	 * <pre>
	 * incorrectly classified negatives
	 * --------------------------------
	 *        total negatives
	 * </pre>
	 *
	 * @param classIndex the index of the class to consider as "positive"
	 * @return the false positive rate
	 */
	public double falsePositiveRate(int classIndex) {

		double incorrect = 0, total = 0;
		for (int i = 0; i < m_NumClasses; i++) {
			if (i != classIndex) {
				for (int j = 0; j < m_NumClasses; j++) {
					if (j == classIndex) {
						incorrect += m_ConfusionMatrix[i][j];
					}
					total += m_ConfusionMatrix[i][j];
				}
			}
		}
		if (total == 0) {
			return 0;
		}
		return incorrect / total;
	}

	/**
	 * Calculate number of false negatives with respect to a particular class.
	 * This is defined as<p/>
	 * <pre>
	 * incorrectly classified positives
	 * </pre>
	 *
	 * @param classIndex the index of the class to consider as "positive"
	 * @return the false positive rate
	 */
	public double numFalseNegatives(int classIndex) {

		double incorrect = 0;
		for (int i = 0; i < m_NumClasses; i++) {
			if (i == classIndex) {
				for (int j = 0; j < m_NumClasses; j++) {
					if (j != classIndex) {
						incorrect += m_ConfusionMatrix[i][j];
					}
				}
			}
		}
		return incorrect;
	}

	/**
	 * Calculate the false negative rate with respect to a particular class.
	 * This is defined as<p/>
	 * <pre>
	 * incorrectly classified positives
	 * --------------------------------
	 *        total positives
	 * </pre>
	 *
	 * @param classIndex the index of the class to consider as "positive"
	 * @return the false positive rate
	 */
	public double falseNegativeRate(int classIndex) {

		double incorrect = 0, total = 0;
		for (int i = 0; i < m_NumClasses; i++) {
			if (i == classIndex) {
				for (int j = 0; j < m_NumClasses; j++) {
					if (j != classIndex) {
						incorrect += m_ConfusionMatrix[i][j];
					}
					total += m_ConfusionMatrix[i][j];
				}
			}
		}
		if (total == 0) {
			return 0;
		}
		return incorrect / total;
	}

	/**
	 * Calculate the recall with respect to a particular class.
	 * This is defined as<p/>
	 * <pre>
	 * correctly classified positives
	 * ------------------------------
	 *       total positives
	 * </pre><p/>
	 * (Which is also the same as the truePositiveRate.)
	 *
	 * @param classIndex the index of the class to consider as "positive"
	 * @return the recall
	 */
	public double recall(int classIndex) {

		return truePositiveRate(classIndex);
	}

	/**
	 * Calculate the precision with respect to a particular class.
	 * This is defined as<p/>
	 * <pre>
	 * correctly classified positives
	 * ------------------------------
	 *  total predicted as positive
	 * </pre>
	 *
	 * @param classIndex the index of the class to consider as "positive"
	 * @return the precision
	 */
	public double precision(int classIndex) {

		double correct = 0, total = 0;
		for (int i = 0; i < m_NumClasses; i++) {
			if (i == classIndex) {
				correct += m_ConfusionMatrix[i][classIndex];
			}
			total += m_ConfusionMatrix[i][classIndex];
		}
		if (total == 0) {
			return 0;
		}
		return correct / total;
	}

	/**
	 * Calculate the F-Measure with respect to a particular class.
	 * This is defined as<p/>
	 * <pre>
	 * 2 * recall * precision
	 * ----------------------
	 *   recall + precision
	 * </pre>
	 *
	 * @param classIndex the index of the class to consider as "positive"
	 * @return the F-Measure
	 */
	public double fMeasure(int classIndex) {

		double precision = precision(classIndex);
		double recall = recall(classIndex);
		if ((precision + recall) == 0) {
			return 0;
		}
		return 2 * precision * recall / (precision + recall);
	}

	/**
	 * Sets the class prior probabilities
	 *
	 * @param train the training instances used to determine
	 * the prior probabilities
	 * @throws Exception if the class attribute of the instances is not
	 * set
	 */
	public void setPriors(Instances train) throws Exception {
		m_NoPriors = false;

		if (!m_ClassIsNominal) {

			m_NumTrainClassVals = 0;
			m_TrainClassVals = null;
			m_TrainClassWeights = null;
			m_PriorErrorEstimator = null;
			m_ErrorEstimator = null;

			for (int i = 0; i < train.numInstances(); i++) {
				Instance currentInst = train.instance(i);
				if (!currentInst.classIsMissing()) {
					addNumericTrainClass(currentInst.classValue(),
							currentInst.weight());
				}
			}

		} else {
			for (int i = 0; i < m_NumClasses; i++) {
				m_ClassPriors[i] = 1;
			}
			m_ClassPriorsSum = m_NumClasses;
			for (int i = 0; i < train.numInstances(); i++) {
				if (!train.instance(i).classIsMissing()) {
					m_ClassPriors[(int) train.instance(i).classValue()] +=
							train.instance(i).weight();
					m_ClassPriorsSum += train.instance(i).weight();
				}
			}
		}
	}

	/**
	 * Get the current weighted class counts
	 *
	 * @return the weighted class counts
	 */
	public double[] getClassPriors() {
		return m_ClassPriors;
	}

	/**
	 * Updates the class prior probabilities (when incrementally
	 * training)
	 *
	 * @param instance the new training instance seen
	 * @throws Exception if the class of the instance is not
	 * set
	 */
	public void updatePriors(Instance instance) throws Exception {
		if (!instance.classIsMissing()) {
			if (!m_ClassIsNominal) {
				if (!instance.classIsMissing()) {
					addNumericTrainClass(instance.classValue(),
							instance.weight());
				}
			} else {
				m_ClassPriors[(int) instance.classValue()] +=
						instance.weight();
				m_ClassPriorsSum += instance.weight();
			}
		}
	}

	/**
	 * disables the use of priors, e.g., in case of de-serialized schemes
	 * that have no access to the original training set, but are evaluated
	 * on a set set.
	 */
	public void useNoPriors() {
		m_NoPriors = true;
	}

	/**
	 * Tests whether the current evaluation object is equal to another
	 * evaluation object
	 *
	 * @param obj the object to compare against
	 * @return true if the two objects are equal
	 */
	public boolean equals(Object obj) {

		if ((obj == null) || !(obj.getClass().equals(this.getClass()))) {
			return false;
		}
		Evaluation cmp = (Evaluation) obj;
		if (m_ClassIsNominal != cmp.m_ClassIsNominal) {
			return false;
		}
		if (m_NumClasses != cmp.m_NumClasses) {
			return false;
		}

		if (m_Incorrect != cmp.m_Incorrect) {
			return false;
		}
		if (m_Correct != cmp.m_Correct) {
			return false;
		}
		if (m_Unclassified != cmp.m_Unclassified) {
			return false;
		}
		if (m_MissingClass != cmp.m_MissingClass) {
			return false;
		}
		if (m_WithClass != cmp.m_WithClass) {
			return false;
		}

		if (m_SumErr != cmp.m_SumErr) {
			return false;
		}
		if (m_SumAbsErr != cmp.m_SumAbsErr) {
			return false;
		}
		if (m_SumSqrErr != cmp.m_SumSqrErr) {
			return false;
		}
		if (m_SumClass != cmp.m_SumClass) {
			return false;
		}
		if (m_SumSqrClass != cmp.m_SumSqrClass) {
			return false;
		}
		if (m_SumPredicted != cmp.m_SumPredicted) {
			return false;
		}
		if (m_SumSqrPredicted != cmp.m_SumSqrPredicted) {
			return false;
		}
		if (m_SumClassPredicted != cmp.m_SumClassPredicted) {
			return false;
		}

		if (m_ClassIsNominal) {
			for (int i = 0; i < m_NumClasses; i++) {
				for (int j = 0; j < m_NumClasses; j++) {
					if (m_ConfusionMatrix[i][j] != cmp.m_ConfusionMatrix[i][j]) {
						return false;
					}
				}
			}
		}

		return true;
	}

	/**
	 * Prints the predictions for the given dataset into a String variable.
	 *
	 * @param classifier		the classifier to use
	 * @param train		the training data
	 * @param testSource		the test set
	 * @param classIndex		the class index (1-based), if -1 ot does not
	 * 				override the class index is stored in the data
	 * 				file (by using the last attribute)
	 * @param attributesToOutput	the indices of the attributes to output
	 * @return			the generated predictions for the attribute range
	 * @throws Exception 		if test file cannot be opened
	 */
	public static void printClassifications(Classifier classifier,
			Instances train,
			DataSource testSource,
			int classIndex,
			Range attributesToOutput,
			StringBuffer predsText) throws Exception {

		printClassifications(classifier, train,
				testSource, classIndex,
				attributesToOutput, false, predsText);
	}

	/**
	 * Prints the header for the predictions output into a supplied StringBuffer
	 *
	 * @param test structure of the test set to print predictions for
	 * @param attributesToOutput indices of the attributes to output
	 * @param printDistribution prints the complete distribution for nominal
	 * attributes, not just the predicted value
	 * @param text the StringBuffer to print to
	 */
	protected static void printClassificationsHeader(Instances test,
			Range attributesToOutput,
			boolean printDistribution,
			StringBuffer text) {
		// print header
		if (test.classAttribute().isNominal()) {
			if (printDistribution) {
				text.append(" inst#     actual  predicted error distribution");
			} else {
				text.append(" inst#     actual  predicted error prediction");
			}
		} else {
			text.append(" inst#     actual  predicted      error");
		}
		if (attributesToOutput != null) {
			attributesToOutput.setUpper(test.numAttributes() - 1);
			text.append(" (");
			boolean first = true;
			for (int i = 0; i < test.numAttributes(); i++) {
				if (i == test.classIndex()) {
					continue;
				}

				if (attributesToOutput.isInRange(i)) {
					if (!first) {
						text.append(",");
					}
					text.append(test.attribute(i).name());
					first = false;
				}
			}
			text.append(")");
		}
		text.append("\n");
	}

	/**
	 * Prints the predictions for the given dataset into a supplied StringBuffer
	 *
	 * @param classifier		the classifier to use
	 * @param train		the training data
	 * @param testSource		the test set
	 * @param classIndex		the class index (1-based), if -1 ot does not
	 * 				override the class index is stored in the data
	 * 				file (by using the last attribute)
	 * @param attributesToOutput	the indices of the attributes to output
	 * @param printDistribution	prints the complete distribution for nominal
	 * 				classes, not just the predicted value
	 * @param text                StringBuffer to hold the printed predictions
	 * @throws Exception 		if test file cannot be opened
	 */
	public static void printClassifications(Classifier classifier,
			Instances train,
			DataSource testSource,
			int classIndex,
			Range attributesToOutput,
			boolean printDistribution,
			StringBuffer text) throws Exception {

		if (testSource != null) {
			Instances test = testSource.getStructure();
			if (classIndex != -1) {
				test.setClassIndex(classIndex - 1);
			} else {
				if (test.classIndex() == -1) {
					test.setClassIndex(test.numAttributes() - 1);
				}
			}

			// print the header
//			printClassificationsHeader(test, attributesToOutput, printDistribution, text);

			// print predictions
			int i = 0;
			testSource.reset();
			test = testSource.getStructure(test.classIndex());
			while (testSource.hasMoreElements(test)) {
				Instance inst = testSource.nextElement(test);
				double[] dist = classifier.distributionForInstance(inst);
				double prob = dist[1];
				int actual = (int)inst.classValue();
				System.out.format( "%9e %d\n", prob, actual );
				//System.out.println(predictionText(classifier, inst, i, attributesToOutput, printDistribution));
				//text.append(predictionText(classifier, inst, i,
//                                   attributesToOutput, printDistribution));
				i++;
			}
		}
	//    return text.toString();
	}

	/**
	 * store the prediction made by the classifier as a string
	 *
	 * @param classifier		the classifier to use
	 * @param inst		the instance to generate text from
	 * @param instNum		the index in the dataset
	 * @param attributesToOutput	the indices of the attributes to output
	 * @param printDistribution	prints the complete distribution for nominal
	 * 				classes, not just the predicted value
	 * @return                    the prediction as a String
	 * @throws Exception		if something goes wrong
	 * @see			#printClassifications(Classifier, Instances, String, int, Range, boolean)
	 */
	protected static String predictionText(Classifier classifier,
			Instance inst,
			int instNum,
			Range attributesToOutput,
			boolean printDistribution)
			throws Exception {

		StringBuffer result = new StringBuffer();
		int width = 10;
		int prec = 9;

		Instance withMissing = (Instance) inst.copy();
		withMissing.setDataset(inst.dataset());
		double predValue = ((Classifier) classifier).classifyInstance(withMissing);

		// index
		result.append(Utils.padLeft("" + (instNum + 1), 6));

		if (inst.dataset().classAttribute().isNumeric()) {
			// actual
			if (inst.classIsMissing()) {
				result.append(" " + Utils.padLeft("?", width));
			} else {
				result.append(" " + Utils.doubleToString(inst.classValue(), width, prec));
			}
			// predicted
			if (Instance.isMissingValue(predValue)) {
				result.append(" " + Utils.padLeft("?", width));
			} else {
				result.append(" " + Utils.doubleToString(predValue, width, prec));
			}
			// error
			if (Instance.isMissingValue(predValue) || inst.classIsMissing()) {
				result.append(" " + Utils.padLeft("?", width));
			} else {
				result.append(" " + Utils.doubleToString(predValue - inst.classValue(), width, prec));
			}
		} else {
			// actual
			result.append(" " + Utils.padLeft(((int) inst.classValue() + 1) + ":" + inst.toString(inst.classIndex()), width));
			// predicted
			if (Instance.isMissingValue(predValue)) {
				result.append(" " + Utils.padLeft("?", width));
			} else {
				result.append(" " + Utils.padLeft(((int) predValue + 1) + ":" + inst.dataset().classAttribute().value((int) predValue), width));
			}
			// error?
			if ((int) predValue + 1 != (int) inst.classValue() + 1) {
				result.append(" " + "  +  ");
			} else {
				result.append(" " + "     ");
			}
			// prediction/distribution
			if (printDistribution) {
				if (Instance.isMissingValue(predValue)) {
					result.append(" " + "?");
				} else {
					result.append(" ");
					double[] dist = classifier.distributionForInstance(withMissing);
					for (int n = 0; n < dist.length; n++) {
						if (n > 0) {
							result.append(",");
						}
						if (n == (int) predValue) {
							result.append("*");
						}
						result.append(Utils.doubleToString(dist[n], prec));
					}
				}
			} else {
				if (Instance.isMissingValue(predValue)) {
					result.append(" " + "?");
				} else {
					result.append(" " + Utils.doubleToString(classifier.distributionForInstance(withMissing)[(int) predValue], prec));
				}
			}
		}

		// attributes
		result.append(" " + attributeValuesString(withMissing, attributesToOutput) + "\n");

		return result.toString();
	}

	/**
	 * Builds a string listing the attribute values in a specified range of indices,
	 * separated by commas and enclosed in brackets.
	 *
	 * @param instance the instance to print the values from
	 * @param attRange the range of the attributes to list
	 * @return a string listing values of the attributes in the range
	 */
	protected static String attributeValuesString(Instance instance, Range attRange) {
		StringBuffer text = new StringBuffer();
		if (attRange != null) {
			boolean firstOutput = true;
			attRange.setUpper(instance.numAttributes() - 1);
			for (int i = 0; i < instance.numAttributes(); i++) {
				if (attRange.isInRange(i) && i != instance.classIndex()) {
					if (firstOutput) {
						text.append("(");
					} else {
						text.append(",");
					}
					text.append(instance.toString(i));
					firstOutput = false;
				}
			}
			if (!firstOutput) {
				text.append(")");
			}
		}
		return text.toString();
	}

	/**
	 * Make up the help string giving all the command line options
	 *
	 * @param classifier the classifier to include options for
	 * @return a string detailing the valid command line options
	 */
	protected static String makeOptionString(Classifier classifier) {

		StringBuffer optionsText = new StringBuffer("");

		// General options
		optionsText.append("\n\nGeneral options:\n\n");
		optionsText.append("-t <name of training file>\n");
		optionsText.append("\tSets training file.\n");
		optionsText.append("-T <name of test file>\n");
		optionsText.append("\tSets test file. If missing, a cross-validation will be performed\n");
		optionsText.append("\ton the training data.\n");
		optionsText.append("-c <class index>\n");
		optionsText.append("\tSets index of class attribute (default: last).\n");
		optionsText.append("-x <number of folds>\n");
		optionsText.append("\tSets number of folds for cross-validation (default: 10).\n");
		optionsText.append("-no-cv\n");
		optionsText.append("\tDo not perform any cross validation.\n");
		optionsText.append("-split-percentage <percentage>\n");
		optionsText.append("\tSets the percentage for the train/test set split, e.g., 66.\n");
		optionsText.append("-preserve-order\n");
		optionsText.append("\tPreserves the order in the percentage split.\n");
		optionsText.append("-s <random number seed>\n");
		optionsText.append("\tSets random number seed for cross-validation or percentage split\n");
		optionsText.append("\t(default: 1).\n");
		optionsText.append("-m <name of file with cost matrix>\n");
		optionsText.append("\tSets file with cost matrix.\n");
		optionsText.append("-l <name of input file>\n");
		optionsText.append("\tSets model input file. In case the filename ends with '.xml',\n");
		optionsText.append("\tthe options are loaded from the XML file.\n");
		optionsText.append("-d <name of output file>\n");
		optionsText.append("\tSets model output file. In case the filename ends with '.xml',\n");
		optionsText.append("\tonly the options are saved to the XML file, not the model.\n");
		optionsText.append("-v\n");
		optionsText.append("\tOutputs no statistics for training data.\n");
		optionsText.append("-o\n");
		optionsText.append("\tOutputs statistics only, not the classifier.\n");
		optionsText.append("-i\n");
		optionsText.append("\tOutputs detailed information-retrieval");
		optionsText.append(" statistics for each class.\n");
		optionsText.append("-k\n");
		optionsText.append("\tOutputs information-theoretic statistics.\n");
		optionsText.append("-p <attribute range>\n");
		optionsText.append("\tOnly outputs predictions for test instances (or the train\n" + "\tinstances if no test instances provided and -no-cv is used),\n" + "\talong with attributes (0 for none).\n");
		optionsText.append("-distribution\n");
		optionsText.append("\tOutputs the distribution instead of only the prediction\n");
		optionsText.append("\tin conjunction with the '-p' option (only nominal classes).\n");
		optionsText.append("-r\n");
		optionsText.append("\tOnly outputs cumulative margin distribution.\n");
		if (classifier instanceof Sourcable) {
			optionsText.append("-z <class name>\n");
			optionsText.append("\tOnly outputs the source representation" + " of the classifier,\n\tgiving it the supplied" + " name.\n");
		}
		if (classifier instanceof Drawable) {
			optionsText.append("-g\n");
			optionsText.append("\tOnly outputs the graph representation" + " of the classifier.\n");
		}
		optionsText.append("-xml filename | xml-string\n");
		optionsText.append("\tRetrieves the options from the XML-data instead of the " + "command line.\n");
		optionsText.append("-threshold-file <file>\n");
		optionsText.append("\tThe file to save the threshold data to.\n" + "\tThe format is determined by the extensions, e.g., '.arff' for ARFF \n" + "\tformat or '.csv' for CSV.\n");
		optionsText.append("-threshold-label <label>\n");
		optionsText.append("\tThe class label to determine the threshold data for\n" + "\t(default is the first label)\n");

		// Get scheme-specific options
		if (classifier instanceof OptionHandler) {
			optionsText.append("\nOptions specific to " + classifier.getClass().getName() + ":\n\n");
			Enumeration enu = ((OptionHandler) classifier).listOptions();
			while (enu.hasMoreElements()) {
				Option option = (Option) enu.nextElement();
				optionsText.append(option.synopsis() + '\n');
				optionsText.append(option.description() + "\n");
			}
		}
		return optionsText.toString();
	}

	/**
	 * Method for generating indices for the confusion matrix.
	 *
	 * @param num 	integer to format
	 * @param IDChars	the characters to use
	 * @param IDWidth	the width of the entry
	 * @return 		the formatted integer as a string
	 */
	protected String num2ShortID(int num, char[] IDChars, int IDWidth) {

		char ID[] = new char[IDWidth];
		int i;

		for (i = IDWidth - 1; i >= 0; i--) {
			ID[i] = IDChars[num % IDChars.length];
			num = num / IDChars.length - 1;
			if (num < 0) {
				break;
			}
		}
		for (i--; i >= 0; i--) {
			ID[i] = ' ';
		}

		return new String(ID);
	}

	/**
	 * Convert a single prediction into a probability distribution
	 * with all zero probabilities except the predicted value which
	 * has probability 1.0;
	 *
	 * @param predictedClass the index of the predicted class
	 * @return the probability distribution
	 */
	protected double[] makeDistribution(double predictedClass) {

		double[] result = new double[m_NumClasses];
		if (Instance.isMissingValue(predictedClass)) {
			return result;
		}
		if (m_ClassIsNominal) {
			result[(int) predictedClass] = 1.0;
		} else {
			result[0] = predictedClass;
		}
		return result;
	}

	/**
	 * Updates all the statistics about a classifiers performance for
	 * the current test instance.
	 *
	 * @param predictedDistribution the probabilities assigned to
	 * each class
	 * @param instance the instance to be classified
	 * @throws Exception if the class of the instance is not
	 * set
	 */
	protected void updateStatsForClassifier(double[] predictedDistribution,
			Instance instance)
			throws Exception {

		int actualClass = (int) instance.classValue();

		if (!instance.classIsMissing()) {
			updateMargins(predictedDistribution, actualClass, instance.weight());

			// Determine the predicted class (doesn't detect multiple
			// classifications)
			int predictedClass = -1;
			double bestProb = 0.0;
			for (int i = 0; i < m_NumClasses; i++) {
				if (predictedDistribution[i] > bestProb) {
					predictedClass = i;
					bestProb = predictedDistribution[i];
				}
			}

			m_WithClass += instance.weight();

			// Determine misclassification cost
			if (m_CostMatrix != null) {
				if (predictedClass < 0) {
					// For missing predictions, we assume the worst possible cost.
					// This is pretty harsh.
					// Perhaps we could take the negative of the cost of a correct
					// prediction (-m_CostMatrix.getElement(actualClass,actualClass)),
					// although often this will be zero
					m_TotalCost += instance.weight() * m_CostMatrix.getMaxCost(actualClass, instance);
				} else {
					m_TotalCost += instance.weight() * m_CostMatrix.getElement(actualClass, predictedClass,
							instance);
				}
			}

			// Update counts when no class was predicted
			if (predictedClass < 0) {
				m_Unclassified += instance.weight();
				return;
			}

			double predictedProb = Math.max(MIN_SF_PROB,
					predictedDistribution[actualClass]);
			double priorProb = Math.max(MIN_SF_PROB,
					m_ClassPriors[actualClass] / m_ClassPriorsSum);
			if (predictedProb >= priorProb) {
				m_SumKBInfo += (Utils.log2(predictedProb) -
						Utils.log2(priorProb)) * instance.weight();
			} else {
				m_SumKBInfo -= (Utils.log2(1.0 - predictedProb) -
						Utils.log2(1.0 - priorProb)) * instance.weight();
			}

			m_SumSchemeEntropy -= Utils.log2(predictedProb) * instance.weight();
			m_SumPriorEntropy -= Utils.log2(priorProb) * instance.weight();

			updateNumericScores(predictedDistribution,
					makeDistribution(instance.classValue()),
					instance.weight());

			// Update other stats
			m_ConfusionMatrix[actualClass][predictedClass] += instance.weight();
			if (predictedClass != actualClass) {
				m_Incorrect += instance.weight();
			} else {
				m_Correct += instance.weight();
			}
		} else {
			m_MissingClass += instance.weight();
		}
	}

	/**
	 * Updates all the statistics about a predictors performance for
	 * the current test instance.
	 *
	 * @param predictedValue the numeric value the classifier predicts
	 * @param instance the instance to be classified
	 * @throws Exception if the class of the instance is not
	 * set
	 */
	protected void updateStatsForPredictor(double predictedValue,
			Instance instance)
			throws Exception {

		if (!instance.classIsMissing()) {

			// Update stats
			m_WithClass += instance.weight();
			if (Instance.isMissingValue(predictedValue)) {
				m_Unclassified += instance.weight();
				return;
			}
			m_SumClass += instance.weight() * instance.classValue();
			m_SumSqrClass += instance.weight() * instance.classValue() * instance.classValue();
			m_SumClassPredicted += instance.weight() * instance.classValue() * predictedValue;
			m_SumPredicted += instance.weight() * predictedValue;
			m_SumSqrPredicted += instance.weight() * predictedValue * predictedValue;

			if (m_ErrorEstimator == null) {
				setNumericPriorsFromBuffer();
			}
			double predictedProb = Math.max(m_ErrorEstimator.getProbability(
					predictedValue - instance.classValue()),
					MIN_SF_PROB);
			double priorProb = Math.max(m_PriorErrorEstimator.getProbability(
					instance.classValue()),
					MIN_SF_PROB);

			m_SumSchemeEntropy -= Utils.log2(predictedProb) * instance.weight();
			m_SumPriorEntropy -= Utils.log2(priorProb) * instance.weight();
			m_ErrorEstimator.addValue(predictedValue - instance.classValue(),
					instance.weight());

			updateNumericScores(makeDistribution(predictedValue),
					makeDistribution(instance.classValue()),
					instance.weight());

		} else {
			m_MissingClass += instance.weight();
		}
	}

	/**
	 * Update the cumulative record of classification margins
	 *
	 * @param predictedDistribution the probability distribution predicted for
	 * the current instance
	 * @param actualClass the index of the actual instance class
	 * @param weight the weight assigned to the instance
	 */
	protected void updateMargins(double[] predictedDistribution,
			int actualClass, double weight) {

		double probActual = predictedDistribution[actualClass];
		double probNext = 0;

		for (int i = 0; i < m_NumClasses; i++) {
			if ((i != actualClass) &&
					(predictedDistribution[i] > probNext)) {
				probNext = predictedDistribution[i];
			}
		}

		double margin = probActual - probNext;
		int bin = (int) ((margin + 1.0) / 2.0 * k_MarginResolution);
		m_MarginCounts[bin] += weight;
	}

	/**
	 * Update the numeric accuracy measures. For numeric classes, the
	 * accuracy is between the actual and predicted class values. For
	 * nominal classes, the accuracy is between the actual and
	 * predicted class probabilities.
	 *
	 * @param predicted the predicted values
	 * @param actual the actual value
	 * @param weight the weight associated with this prediction
	 */
	protected void updateNumericScores(double[] predicted,
			double[] actual, double weight) {

		double diff;
		double sumErr = 0, sumAbsErr = 0, sumSqrErr = 0;
		double sumPriorAbsErr = 0, sumPriorSqrErr = 0;
		for (int i = 0; i < m_NumClasses; i++) {
			diff = predicted[i] - actual[i];
			sumErr += diff;
			sumAbsErr += Math.abs(diff);
			sumSqrErr += diff * diff;
			diff = (m_ClassPriors[i] / m_ClassPriorsSum) - actual[i];
			sumPriorAbsErr += Math.abs(diff);
			sumPriorSqrErr += diff * diff;
		}
		m_SumErr += weight * sumErr / m_NumClasses;
		m_SumAbsErr += weight * sumAbsErr / m_NumClasses;
		m_SumSqrErr += weight * sumSqrErr / m_NumClasses;
		m_SumPriorAbsErr += weight * sumPriorAbsErr / m_NumClasses;
		m_SumPriorSqrErr += weight * sumPriorSqrErr / m_NumClasses;
	}

	/**
	 * Adds a numeric (non-missing) training class value and weight to
	 * the buffer of stored values.
	 *
	 * @param classValue the class value
	 * @param weight the instance weight
	 */
	protected void addNumericTrainClass(double classValue, double weight) {

		if (m_TrainClassVals == null) {
			m_TrainClassVals = new double[100];
			m_TrainClassWeights = new double[100];
		}
		if (m_NumTrainClassVals == m_TrainClassVals.length) {
			double[] temp = new double[m_TrainClassVals.length * 2];
			System.arraycopy(m_TrainClassVals, 0,
					temp, 0, m_TrainClassVals.length);
			m_TrainClassVals = temp;

			temp = new double[m_TrainClassWeights.length * 2];
			System.arraycopy(m_TrainClassWeights, 0,
					temp, 0, m_TrainClassWeights.length);
			m_TrainClassWeights = temp;
		}
		m_TrainClassVals[m_NumTrainClassVals] = classValue;
		m_TrainClassWeights[m_NumTrainClassVals] = weight;
		m_NumTrainClassVals++;
	}

	/**
	 * Sets up the priors for numeric class attributes from the
	 * training class values that have been seen so far.
	 */
	protected void setNumericPriorsFromBuffer() {

		double numPrecision = 0.01; // Default value
		if (m_NumTrainClassVals > 1) {
			double[] temp = new double[m_NumTrainClassVals];
			System.arraycopy(m_TrainClassVals, 0, temp, 0, m_NumTrainClassVals);
			int[] index = Utils.sort(temp);
			double lastVal = temp[index[0]];
			double deltaSum = 0;
			int distinct = 0;
			for (int i = 1; i < temp.length; i++) {
				double current = temp[index[i]];
				if (current != lastVal) {
					deltaSum += current - lastVal;
					lastVal = current;
					distinct++;
				}
			}
			if (distinct > 0) {
				numPrecision = deltaSum / distinct;
			}
		}
		m_PriorErrorEstimator = new KernelEstimator(numPrecision);
		m_ErrorEstimator = new KernelEstimator(numPrecision);
		m_ClassPriors[0] = m_ClassPriorsSum = 0;
		for (int i = 0; i < m_NumTrainClassVals; i++) {
			m_ClassPriors[0] += m_TrainClassVals[i] * m_TrainClassWeights[i];
			m_ClassPriorsSum += m_TrainClassWeights[i];
			m_PriorErrorEstimator.addValue(m_TrainClassVals[i],
					m_TrainClassWeights[i]);
		}
	}

	/**
	 * Returns the revision string.
	 *
	 * @return		the revision
	 */
	public String getRevision() {
		return RevisionUtils.extract("$Revision: 1.88 $");
	}
}