ElasticEnsemble.java example

Explorer
TimeSeriesClassification-master
- TimeSeriesClassification
  - src
package weka.classifiers.meta.timeseriesensembles;

import java.io.File;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Random;
import java.util.TreeMap;
import java.util.TreeSet;
import weka.core.Capabilities;
import weka.core.Instance;
import weka.core.Instances;
import weka.classifiers.lazy.kNN;
import weka.filters.timeseries.DerivativeFilter;
import weka.core.elastic_distance_measures.BasicDTW;
import weka.core.elastic_distance_measures.ERPDistance;
import weka.core.elastic_distance_measures.LCSSDistance;
import weka.core.elastic_distance_measures.MSMDistance;
import weka.core.elastic_distance_measures.SakoeChibaDTW;
import weka.core.elastic_distance_measures.TWEDistance;
import weka.core.elastic_distance_measures.WeightedDTW;
import java.io.FileWriter;
import java.text.DecimalFormat;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import weka.classifiers.Classifier;
import weka.core.EuclideanDistance;

public class ElasticEnsemble implements Classifier{

    // note distributionForInstance and getCapabilities added to appease the new Classifier interface, NO IMPLEMENTATION
    @Override
    public double[] distributionForInstance(Instance instance) throws Exception {
        throw new UnsupportedOperationException("Not supported yet.");
    }

    @Override
    public Capabilities getCapabilities() {
        throw new UnsupportedOperationException("Not supported yet.");
    }
    
    
    public enum ClassifierVariants{
        Euclidean_1NN,
        DTW_R1_1NN,
        DTW_Rn_1NN,
        WDTW_1NN,
        DDTW_R1_1NN,
        DDTW_Rn_1NN,
        WDDTW_1NN,
        LCSS_1NN,
        MSM_1NN,
        TWE_1NN,
        ERP_1NN,
    }

    public enum EnsembleType{
        Best,
        Equal,
        Prop,
        Signif
    }

    private static double[] msmParms = {
        // <editor-fold defaultstate="collapsed" desc="hidden for space">
        0.01,
        0.01375,
        0.0175,
        0.02125,
        0.025,
        0.02875,
        0.0325,
        0.03625,
        0.04,
        0.04375,
        0.0475,
        0.05125,
        0.055,
        0.05875,
        0.0625,
        0.06625,
        0.07,
        0.07375,
        0.0775,
        0.08125,
        0.085,
        0.08875,
        0.0925,
        0.09625,
        0.1,
        0.136,
        0.172,
        0.208,
        0.244,
        0.28,
        0.316,
        0.352,
        0.388,
        0.424,
        0.46,
        0.496,
        0.532,
        0.568,
        0.604,
        0.64,
        0.676,
        0.712,
        0.748,
        0.784,
        0.82,
        0.856,
        0.892,
        0.928,
        0.964,
        1,
        1.36,
        1.72,
        2.08,
        2.44,
        2.8,
        3.16,
        3.52,
        3.88,
        4.24,
        4.6,
        4.96,
        5.32,
        5.68,
        6.04,
        6.4,
        6.76,
        7.12,
        7.48,
        7.84,
        8.2,
        8.56,
        8.92,
        9.28,
        9.64,
        10,
        13.6,
        17.2,
        20.8,
        24.4,
        28,
        31.6,
        35.2,
        38.8,
        42.4,
        46,
        49.6,
        53.2,
        56.8,
        60.4,
        64,
        67.6,
        71.2,
        74.8,
        78.4,
        82,
        85.6,
        89.2,
        92.8,
        96.4,
        100// </editor-fold>
    };

    private static double[] twe_nuParams = {
        // <editor-fold defaultstate="collapsed" desc="hidden for space">
        0.00001,
        0.0001,
        0.0005,
        0.001,
        0.005,
        0.01,
        0.05,
        0.1,
        0.5,
        1,// </editor-fold>
    };

    private static double[] twe_lamdaParams = {
        // <editor-fold defaultstate="collapsed" desc="hidden for space">
        0,
        0.011111111,
        0.022222222,
        0.033333333,
        0.044444444,
        0.055555556,
        0.066666667,
        0.077777778,
        0.088888889,
        0.1,// </editor-fold>
    };

    // Initially uses a TreeSet to store the classifiers to use. This ensures that no duplicates are included, and keeps everything in the expected order as specified in the enum creation.
    // Once classifier is built, the classifier choices are locked in by creating an array of ClassifierVariants. This is done for two reasons; firstly, the index of the array matches
    // the other arrays, such as cvAccs and cvPreds. Secondly, this seperates the classifier selection before and after building the classifier, ensuring that unexpected behaviour isn't caused
    // by carrying out abnormal opperations (i.e. adding classifiers to the ensemble after training has occured). 
    private TreeSet<ClassifierVariants> classifiersToUse;
    private ClassifierVariants[] finalClassifiers;
    

    private double[] cvAccs;
    private double[][] cvPreds;
    private double[] trainActualClassVals;

    private EnsembleType ensembleType;
    private double[][] bestParams;
    private boolean fileWriting;
    
    private String outpurDirLocation;
    private String datasetName;
    
    private Instances fullTrainingData;
    private boolean[] mcNemarsInclusion;

    private boolean classifierBuilt;
    private boolean verbose;
    
    private boolean parallel;

    public ElasticEnsemble(){
        this.ensembleType = null;
        this.classifiersToUse = new TreeSet<ClassifierVariants>();
        this.finalClassifiers = null;
        this.fileWriting = false;
        this.outpurDirLocation = null;
        this.cvAccs = null;
        this.cvPreds = null;
        this.bestParams = null;
        this.verbose = false;
        this.classifierBuilt = false;
        this.parallel = true; // default
    }
    
    public void turnAllClassifiersOn() throws Exception{
        if(classifierBuilt){
            throw new Exception("Error: Classifier has already been built. Unable to change classifiers within ensemble.");
        }
        this.classifiersToUse = new TreeSet<ClassifierVariants>();
        classifiersToUse.addAll(Arrays.asList(ClassifierVariants.values()));
    }

    public void turnSDMClassifiersOn() throws Exception{
        if(classifierBuilt){
            throw new Exception("Error: Classifier has already been built. Unable to change classifiers within ensemble.");
        }
        this.classifiersToUse = new TreeSet<ClassifierVariants>();
        classifiersToUse.add(ClassifierVariants.Euclidean_1NN);
        classifiersToUse.add(ClassifierVariants.DTW_R1_1NN);
        classifiersToUse.add(ClassifierVariants.DTW_Rn_1NN);
        classifiersToUse.add(ClassifierVariants.WDTW_1NN);
        classifiersToUse.add(ClassifierVariants.DDTW_R1_1NN);
        classifiersToUse.add(ClassifierVariants.DDTW_Rn_1NN);
        classifiersToUse.add(ClassifierVariants.WDDTW_1NN);
        classifiersToUse.add(ClassifierVariants.LCSS_1NN);
        classifiersToUse.add(ClassifierVariants.TWE_1NN);
    }
    
    public boolean addClassifierToEnsemble(ClassifierVariants classifierToAdd) throws Exception{
        if(classifierBuilt){
            throw new Exception("Error: Classifier has already been built. Unable to change classifiers within ensemble.");
        }
        if(this.classifiersToUse.contains(classifierToAdd)){
            return false;
        }else{
            classifiersToUse.add(classifierToAdd);
            return true;
        }
    }
    
    public boolean removeClassifierFromEnsemble(ClassifierVariants classifierToRemove) throws Exception{
        if(classifierBuilt){
            throw new Exception("Error: Classifier has already been built. Unable to change classifiers within ensemble.");
        }
        if(this.classifiersToUse.contains(classifierToRemove)){
            this.classifiersToUse.remove(classifierToRemove);
            return true;
        }else{
            return false;
        }
    }
    
    public void setEnsembleType(EnsembleType ensembleType){
        this.ensembleType = ensembleType;
    }
    
    public void turnOnFileWriting(String outputDirLocation, String datasetName){
        this.fileWriting = true;
        this.outpurDirLocation = outputDirLocation;
        this.datasetName = datasetName;
    }

    public void makeVerbose(){
        this.verbose = true;
    }
    
    @Override
    public void buildClassifier(Instances train) throws Exception {
        // if applicable, check that the file locations are valid before carrying out cv
        
        File cvDir = null;
        
        File classifierOutputDir;
        StringBuilder st;
        StringBuilder bestParamsSt = new StringBuilder();
        FileWriter cvOut;
        int correct;
        
        this.trainActualClassVals = new double[train.numInstances()];
        for(int ins = 0; ins < trainActualClassVals.length; ins++){
            trainActualClassVals[ins] = train.instance(ins).classValue();
        }
            
        if(fileWriting){
            cvDir = new File(this.outpurDirLocation+"/bestCvOut");
            boolean valid = cvDir.mkdirs();
            
            if(!valid && !cvDir.exists()){
                throw new Exception("The output dir at: "+outpurDirLocation+" could not be created.");
            }else if(!valid){
                System.out.println("Warning: cvDir at "+this.outpurDirLocation+" already exists. Any conflicting results under this location will be overwritten.");
            }
        }

        this.finalClassifiers = new ClassifierVariants[this.classifiersToUse.size()];
        int c = 0;
        for(ClassifierVariants classifier:this.classifiersToUse){
            this.finalClassifiers[c++] = classifier;
        }
        // carry out the cross validation        

        this.cvAccs = new double[this.finalClassifiers.length];
        this.cvPreds = new double[this.finalClassifiers.length][train.numInstances()];
        this.bestParams = new double[this.finalClassifiers.length][];        
        
        for(int i = 0; i < finalClassifiers.length; i++){
            ClassifierVariants classifierType = this.finalClassifiers[i];

            crossValidateClassifierType(classifierType, train, i);
            if(fileWriting){
                classifierOutputDir = new File(this.outpurDirLocation+"/bestCvOut/"+classifierType);
                classifierOutputDir.mkdirs();
                st = new StringBuilder();
                correct = 0;
                for(int j = 0; j < this.cvPreds[i].length; j++){
                    st.append(this.cvPreds[i][j]).append(",").append(trainActualClassVals[j]).append("\n");
                    if(this.cvPreds[i][j]==trainActualClassVals[j]){
                        correct++;
                    }
                }
                cvOut = new FileWriter(this.outpurDirLocation+"/bestCvOut/"+classifierType+"/"+"cvBest_"+classifierType+"_"+datasetName+".txt");
                cvOut.append(correct+"/"+cvPreds[i].length+"\n");
                cvOut.append(st);
                cvOut.close();
                bestParamsSt.append(classifierType).append(",");
                for(int j = 0; bestParams[i]!=null && j < bestParams[i].length; j++){
                    bestParamsSt.append(bestParams[i][j]).append(",");
                }
                bestParamsSt.append("\n");
            }
        }
        
        if(fileWriting){
            File paramsOutputDir = new File(this.outpurDirLocation+"/bestParams/");
            paramsOutputDir.mkdirs();
            FileWriter bestParamsOut = new FileWriter(this.outpurDirLocation+"/bestParams/bestParams_"+this.datasetName+".txt");
            bestParamsOut.append(bestParamsSt);
            bestParamsOut.close();
        }

      
        this.fullTrainingData = train;

        if(this.ensembleType==EnsembleType.Signif){
            mcNemarsInclusion = this.getMcNemarsInclusion();
        }
        this.classifierBuilt = true;
    }
    
    private void crossValidateClassifierType(ClassifierVariants classifierType, Instances inputTrainingData, int classifierNum) throws Exception{
        Instances train;
        // prepare for derivative classifiers
        if(classifierType.equals(ClassifierVariants.DDTW_R1_1NN)||classifierType.equals(ClassifierVariants.DDTW_Rn_1NN)||classifierType.equals(ClassifierVariants.WDDTW_1NN)){
            DerivativeFilter d = new DerivativeFilter();
            train = d.process(inputTrainingData);
        }else{
            train = inputTrainingData;
        }

        long startTime = -1;
        if(verbose){
            System.out.print("Starting CV on "+classifierType+"...");
            startTime = System.nanoTime();
        }

        this.cvAccs[classifierNum] = -1;
        double[] params;
        CvOutput result;

        switch(classifierType){
            // single-run classifiers (i.e. no params to tune, cv only needed for weighting in ensemble later)
            case Euclidean_1NN:
            case DTW_R1_1NN:
            case DDTW_R1_1NN:
                result = crossValidate(train, classifierType, null);
                this.cvAccs[classifierNum] = result.getAccuracy();
                this.cvPreds[classifierNum] = result.getPredictions();
                this.bestParams[classifierNum] = null;

                break;
            // window-based/weight-based classifiers (i.e. 0:0.01:1)
            case DTW_Rn_1NN:
            case WDTW_1NN:
            case DDTW_Rn_1NN:
            case WDDTW_1NN:
                params = new double[1];
                // values range from 0 to 1 in increments of 0.01; use ints to avoid double imprecision when incrementing
                for(int w = 0; w <= 100; w++){
                    params[0] = (double)w/100;
                    result = crossValidate(train, classifierType, params);
                    if(result.getAccuracy() > this.cvAccs[classifierNum]){     // favours smaller window sizes
                        this.cvAccs[classifierNum] = result.getAccuracy();
                        this.cvPreds[classifierNum] = result.getPredictions();
                        this.bestParams[classifierNum] = Arrays.copyOf(params, params.length);
                    }
                }
                break;
            case LCSS_1NN:
                // params depend on data - get these from class
                double stdTrain = LCSSDistance.stdv_p(train);
                double stdFloor = stdTrain*0.2;
                double[] epsilons = LCSSDistance.getInclusive10(stdFloor, stdTrain);
                int[] deltas = LCSSDistance.getInclusive10(0, (train.numAttributes()-1)/4);

                params = new double[2];
                for(int d= 0; d < deltas.length; d++){
                    params[0] = deltas[d];
                    for(int e = 0; e < epsilons.length; e++){
                        params[1] = epsilons[e];
                        result = crossValidate(train, classifierType, params);
                        if(result.getAccuracy() > this.cvAccs[classifierNum]){
                            this.cvAccs[classifierNum] = result.getAccuracy();
                            this.cvPreds[classifierNum] = result.getPredictions();
                            this.bestParams[classifierNum] = Arrays.copyOf(params, params.length);
                        }else if(result.getAccuracy() == this.cvAccs[classifierNum] && params[0] < epsilons[e] && params[1] < deltas[d]){
                            this.cvAccs[classifierNum] = result.getAccuracy();
                            this.cvPreds[classifierNum] = result.getPredictions();
                            this.bestParams[classifierNum] = Arrays.copyOf(params, params.length);
                        }
                    }
                }
                break;
            case MSM_1NN:
                params = new double[1];
                // values have a variable range. Specified in a static array at the start of the class called msmParams
                for(int p = 0; p < msmParms.length; p++){
                    params[0] = msmParms[p];
                    result = crossValidate(train, classifierType, params);
                    if(result.getAccuracy() > this.cvAccs[classifierNum]){     // favours smaller params
                        this.cvAccs[classifierNum] = result.getAccuracy();
                        this.cvPreds[classifierNum] = result.getPredictions();
                        this.bestParams[classifierNum] = Arrays.copyOf(params, params.length);
                    }
                }
                break;
            case TWE_1NN:
                // values have variable ranges, so are specified in static arrays at the top of the class as twe_nuParams and twe_lambdaParams
                params = new double[2];
                for(int n= 0; n < twe_nuParams.length; n++){
                    params[0] = twe_nuParams[n];
                    for(int la = 0; la < twe_lamdaParams.length; la++){
                        params[1] = twe_lamdaParams[la];
                        result = crossValidate(train, classifierType, params);
                        if(result.getAccuracy() > this.cvAccs[classifierNum]){
                            this.cvAccs[classifierNum] = result.getAccuracy();
                            this.cvPreds[classifierNum] = result.getPredictions();
                            this.bestParams[classifierNum] = Arrays.copyOf(params, params.length);
                        }else if(result.getAccuracy() == this.cvAccs[classifierNum] && params[0] < twe_nuParams[n] && params[1] < twe_lamdaParams[la]){
                            this.cvAccs[classifierNum] = result.getAccuracy();
                            this.cvPreds[classifierNum] = result.getPredictions();
                            this.bestParams[classifierNum] = Arrays.copyOf(params, params.length);
                        }
                    }
                }
                break;
            case ERP_1NN:
                // values of g depend on the data, so get the standard deviation and then work them out. Note: window of 0-25% used as per Keogh et al.'s paper. Sampled to
                // produce 100 different paramater combinations in total
                double[] windowSizes = ERPDistance.getInclusive10(0, 0.25);
                double stdv = ERPDistance.stdv_p(train);
                double[] gValues = ERPDistance.getInclusive10(0.2*stdv, stdv);
                params = new double[2];
                // g bandsize
                for(int w = 0; w < windowSizes.length; w++){
                    params[1] = windowSizes[w];
                    for(int g = 0; g < gValues.length; g++){
                        params[0] = gValues[g];
                        result = crossValidate(train, classifierType, params);
                        if(result.getAccuracy() > this.cvAccs[classifierNum]){
                            this.cvAccs[classifierNum] = result.getAccuracy();
                            this.cvPreds[classifierNum] = result.getPredictions();
                            this.bestParams[classifierNum] = Arrays.copyOf(params, params.length);
                        }else if(result.getAccuracy() == this.cvAccs[classifierNum] && params[0] < gValues[g] && params[1] < windowSizes[w]){
                            this.cvAccs[classifierNum] = result.getAccuracy();
                            this.cvPreds[classifierNum] = result.getPredictions();
                            this.bestParams[classifierNum] = Arrays.copyOf(params, params.length);
                        }
                    }
                }
                break;
            default:
                throw new Exception("The classifier type \""+classifierType+"\" is not defined within the ensemble. Please update the code.");
        }
        if(verbose){
            System.out.println("Done! (in "+((System.nanoTime()-startTime)/1000000000)+" seconds)");
        }

    }
    
    private static class IndividualClassificationOutput{
        private int id;
        private double prediction;

        public IndividualClassificationOutput(int id, double prediction) {
            this.id = id;
            this.prediction = prediction;
        }

        public int getId() {
            return id;
        }

        public double getPrediction() {
            return prediction;
        }
        
        
        
    }

    private static class SingleCVCaller implements Callable<IndividualClassificationOutput>{
        private Instances train;
        private ClassifierVariants classifierType;
        private double[] params;
        private int i;

        public SingleCVCaller(Instances train, ClassifierVariants classifierType, double[] params, int i) {
            this.train = train;
            this.classifierType = classifierType;
            this.params = params;
            this.i = i;
        }

        @Override
        public IndividualClassificationOutput call() throws Exception {
            Instance testInstance;
            Instances trainLoocv;
            kNN knn;

            testInstance = train.instance(i);
            trainLoocv = new Instances(train, train.numInstances() - 1);

            // add all instances to trainLoocv EXCEPT instance[i]
            for (int j = 0; j < train.numInstances(); j++) {
                if (j != i) {
                    trainLoocv.add(train.instance(j));
                }
            }

            // build classifier and classify
            knn = getInternalClassifier(classifierType, params, trainLoocv);
            return new IndividualClassificationOutput(i, knn.classifyInstance(testInstance));
        }
    }
    
    
    private static class SingleTrainTestCaller implements Callable<IndividualClassificationOutput>{
        private int i;
        private Instance testInstance;
        private kNN classifier;
        
        public SingleTrainTestCaller(int i, Instance testInstance, kNN classifier) {
            this.i = i;
            this.testInstance = testInstance;
            this.classifier = classifier;
        }

        @Override
        public IndividualClassificationOutput call() throws Exception{
            return new IndividualClassificationOutput(i, classifier.classifyInstance(testInstance));
        }
    }


    private static CvOutput crossValidate(Instances train, ClassifierVariants classifierType, double[] params) throws Exception{

        double[] predictions = new double[train.numInstances()];

        int correct = 0;
        int total = 0;

        ExecutorService service = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
        ArrayList<Future<IndividualClassificationOutput>> futures = new ArrayList<Future<IndividualClassificationOutput>>();

        for (int i = 0; i < train.numInstances(); i++) {
            futures.add(service.submit(new SingleCVCaller(train, classifierType, params, i)));
        }
        service.shutdown();

        IndividualClassificationOutput result;
        for(int i = 0 ; i < futures.size();i++){
            result = futures.get(i).get();
            predictions[result.id] = result.prediction;
            if(predictions[result.id]==train.instance(result.id).classValue()){
                correct++;
            }
            total++;
        }

        CvOutput output = new CvOutput(100.0/total*correct, predictions);
        return output;
    }

    private static kNN getInternalClassifier(ClassifierVariants classifierType, double[] params, Instances instances) throws Exception{
        
        EuclideanDistance distanceMeasure = null;
        kNN knn;
        switch(classifierType){
            case Euclidean_1NN:
                distanceMeasure = new EuclideanDistance();
                distanceMeasure.setDontNormalize(true);
                break;
            case DTW_R1_1NN:
            case DDTW_R1_1NN:
                distanceMeasure = new BasicDTW();
                break;
            case DTW_Rn_1NN:
            case DDTW_Rn_1NN:
                distanceMeasure = new SakoeChibaDTW(params[0]);
                break;
            case WDTW_1NN:
            case WDDTW_1NN:
                distanceMeasure = new WeightedDTW(params[0]);
                break;
            case LCSS_1NN:
                distanceMeasure = new LCSSDistance((int)params[0], params[1]);
                break;
            case MSM_1NN:
                distanceMeasure = new MSMDistance(params[0]);
                break;
            case TWE_1NN:
                distanceMeasure = new TWEDistance(params[0],params[1]);
                break;
            case ERP_1NN:
                distanceMeasure = new ERPDistance(params[0], params[1]);
                break;
            default:
                throw new Exception("Error: "+classifierType+" is not a supported classifier type. Please update code to use this in the ensemble");
        }
        
        knn = new kNN();
        knn.setDistanceFunction(distanceMeasure);
        knn.buildClassifier(instances);
        return knn;
    }
    
    @Override
    public double classifyInstance(Instance instance) throws Exception{
        if(!classifierBuilt){
            throw new Exception("Error: Classifier has not been built! Classifier must be built before carrying out classification. See buildClassifier(Instances train).");
        }

        // special case if classifier is originally built for a non-signif ensemble, but then changed after building (this is valid, as the cv remains the same but allows for
        // classification using any of the ensembling strategies). This is necessarry as the getMcNemarsInclusion() call is originally in buildClassifier(Instances train) for 
        // efficiency, as it depends on the original ensemble stratergy when the build function is executed (no point working it out if it's not being used!).
        if(this.ensembleType==EnsembleType.Signif && this.mcNemarsInclusion == null){
            this.mcNemarsInclusion = getMcNemarsInclusion();
        }

        int numProcessors = Runtime.getRuntime().availableProcessors();
        int numThreads = (numProcessors > this.finalClassifiers.length) ? this.finalClassifiers.length:numProcessors;
        
        ExecutorService service = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());
        ArrayList<Future<IndividualClassificationOutput>> futures = new ArrayList<Future<IndividualClassificationOutput>>();
        
        double[] predictions = new double[this.finalClassifiers.length];

        ClassifierVariants classifier;
        for(int i = 0; i < predictions.length; i++){
            classifier = this.finalClassifiers[i];
            if(this.ensembleType!=EnsembleType.Signif || this.mcNemarsInclusion[i]==true){
                kNN knn = getInternalClassifier(classifier, this.bestParams[i], this.fullTrainingData);
                futures.add(service.submit(new SingleTrainTestCaller(i, instance, knn)));
            }else{
                predictions[i] = -1;
            }
        }
        service.shutdown();
        
        IndividualClassificationOutput result;
        for(int i = 0 ; i < futures.size();i++){
            result = futures.get(i).get();
            predictions[result.id] = result.prediction;
        }
        
        switch(this.ensembleType){
            case Best:
                return this.classifyInstances_best(predictions);
            case Equal:
                return this.classifyInstances_equal(predictions);
            case Prop:
                return this.classifyInstances_prop(predictions);
            case Signif:
                return this.classifyInstances_prop(predictions);
            default:
                throw new Exception("Error: Unexpected ensemble type");
        }
    }


    private double classifyInstances_best(double[] predictions){

        ArrayList<Integer> bestClassifierIds = new ArrayList<Integer>();
        double bsfAcc = -1;
        for(int i = 0; i < this.cvAccs.length; i++){
            if(this.cvAccs[i] > bsfAcc){
                bestClassifierIds = new ArrayList<Integer>();
                bestClassifierIds.add(i);
                bsfAcc = this.cvAccs[i];
            }else if(this.cvAccs[i] == bsfAcc){
                bestClassifierIds.add(i);
            }
        }
        if(bestClassifierIds.size()>1){
            Random r = new Random();
            return predictions[r.nextInt(bestClassifierIds.size())];
        }else{
            return predictions[bestClassifierIds.get(0)];
        }
    }
    
    private double classifyInstances_equal(double[] predictions){


        TreeMap<Double, Integer> classValsAndVotes = new TreeMap<Double, Integer>();
        for(int c = 0; c < predictions.length; c++){
            double thisVote = predictions[c];
            if(classValsAndVotes.containsKey(thisVote)){
                int currentCount = classValsAndVotes.get(thisVote);
                currentCount++;
                classValsAndVotes.put(thisVote, currentCount);
            }else{
                classValsAndVotes.put(thisVote,1);
            }
        }

        ArrayList<Double> majorityClasses = new ArrayList<Double>();
        int bsfCount = -1;
        int thisCount;
        for(Double classVal:classValsAndVotes.keySet()){
            thisCount = classValsAndVotes.get(classVal);
            if(thisCount > bsfCount){
                bsfCount = thisCount;
                majorityClasses = new ArrayList<Double>();
                majorityClasses.add(classVal);
            }else if(thisCount == bsfCount){
                majorityClasses.add(classVal);
            }
        }

        if(majorityClasses.size()==1){
            return majorityClasses.get(0);
        }else{
            Random r = new Random();
            return majorityClasses.get(r.nextInt(majorityClasses.size()));
        }
    }

    private double classifyInstances_prop(double[] predictions){

        TreeMap<Double, Double> classValsAndVotes = new TreeMap<Double, Double>();

        double thisVote;
        double currentWeight;

        double bsfWeight = 0;
        ArrayList<Double> majorityClasses = new ArrayList<Double>();

        for(int c = 0; c < predictions.length; c++){
            thisVote = predictions[c];

            if(this.ensembleType==EnsembleType.Prop || this.mcNemarsInclusion[c]==true){
                if(classValsAndVotes.containsKey(thisVote)){
                    currentWeight = classValsAndVotes.get(thisVote);
                    currentWeight+=cvAccs[c];
                    classValsAndVotes.put(thisVote, currentWeight);
                }else{
                    currentWeight = cvAccs[c];
                    classValsAndVotes.put(thisVote,currentWeight);
                }

                if(currentWeight > bsfWeight){
                    majorityClasses = new ArrayList<Double>();
                    majorityClasses.add(thisVote);
                    bsfWeight = currentWeight;
                }else if(currentWeight == bsfWeight){
                    majorityClasses.add(thisVote);
                }
            }
        }

        if(majorityClasses.size()==1){
            return majorityClasses.get(0);
        }else{
            Random r = new Random();
            return majorityClasses.get(r.nextInt(majorityClasses.size()));
        }
    }


    public static void buildAndWriteCvAndTrainTestFiles_SDM(String outDir, String datasetName, Instances train, Instances test) throws Exception{

        ElasticEnsemble elastic = new ElasticEnsemble();
        elastic.setEnsembleType(EnsembleType.Best); // Doesn't matter
        elastic.turnSDMClassifiersOn();
        elastic.turnOnFileWriting(outDir, datasetName);
        elastic.makeVerbose();
        elastic.buildClassifier(train);

        kNN knn;
        File outputDir;
        FileWriter out;
        StringBuilder st;
        int correct, total;
        double decision, classValue;
        ClassifierVariants classifier;
        
        for(int c = 0; c < elastic.finalClassifiers.length; c++){
            classifier = elastic.finalClassifiers[c];
            knn = getInternalClassifier(classifier, elastic.bestParams[c], train);

            correct = 0;
            total = 0;
            st = new StringBuilder();
            for(int i = 0; i < test.numInstances(); i++){
                decision = knn.classifyInstance(test.instance(i));
                classValue = test.instance(i).classValue();
                if(decision==classValue){
                    correct++;
                }
                total++;
                st.append(decision).append(",").append(classValue).append("\n");
            }
            outputDir = new File(outDir+"/trainTest/"+classifier);
            outputDir.mkdirs();
            out = new FileWriter(outDir+"/trainTest/"+classifier+"/trainTest_"+classifier+"_"+datasetName+".txt");
            out.append(correct+"/"+total+"\n");
            out.append(st);
            out.close();

        }
    }

    public static void demonstrateEnsembles_SettingsFromSDM(Instances train, Instances test, String outputDir, String datasetName) throws Exception{
        
        // 1. Initialise classifier in the usual Weka form
        ElasticEnsemble elastic = new ElasticEnsemble();
        
        // 2. Set the internal distance measure 1NN classifiers to use. By default, the ensemble classifier won't use any (and will just throw an error). 
        //    These can be specified individually, i.e. this.addClassifierToEnsemble(ClassifierVariants.Euclidean1NN), or there are two special cases: 
        //      - this.turnAllClassifiersOn() // Uses all possible classifiers that have been written for the ensemble (DAMI version including TWED and ERO)
        //      - this.turnSDMClassifiersOn() // Uses the classifiers that were included in the SDM paper
        //    For the purposes of this demonstration, we will turn on all classifiers:
        //elastic.turnSDMClassifiersOn();
        elastic.turnAllClassifiersOn();
        
        // 3. By default, the ensmeble works like a typical Weka classifier - i.e. you build it, run it, and then it is removed from memory. 
        //    However, it can also be set to write the cv results to file (i.e. for faster build times when repeating experiments (NOT IMPLEMENTED HERE), or for information).
        //    Files are written to the dir specified by the String outputDir (and creates it/parent dirs if necessary), and files arenames using the name
        //    specified in datasetName. It is important to keep this consistent with the data for easily reusing cv results 
        //    (i.e. if training with ItalyPowerDemand_TRAIN.arff, use ItalyPowerDemand as the datasetName so the arff can be found dynamically late on)
        //     ***IMPORTANT: Will overwrite existing files as necessary. It will continue to use existing dirs (i.e. if a different dataset has been processed,
        //     those files will remain unchanged
        //  elastic.turnOnFileWriting(outputDir, datasetName);
        
        // 4. Training can be slow with large datasets and many classifiers in the ensemble. For peace of mind, a method is included to promt the classifier
        //    to print messages to the system output during training to state which distance measure is currently being processed (and the time taken to complete 
        //    once it has been done).
        elastic.makeVerbose();
        
        // 5. Build the classifier on the specified training data
        elastic.buildClassifier(train);
        
        int correct, total;
        double prediction, classValue;
        DecimalFormat df = new DecimalFormat("###.###");
        
        System.out.println();
        System.out.println("-----------------------------------------");
        System.out.println("TRAIN/TEST CLASSIFICATION");
        System.out.println("-----------------------------------------");
        
        // To save time and create a fair comparison, we build once and then classify seperately for each ensemble strategy. This is valid, as ensemble type is 
        // completely independent from the CV in the training stage of the classifier, so would be the same if we carried it our seperately for each ensemble 
        EnsembleType[] types = {EnsembleType.Best, EnsembleType.Equal, EnsembleType.Prop, EnsembleType.Signif};
        for(int t = 0; t < types.length; t++){
            elastic.setEnsembleType(types[t]);
            correct = 0;
            total = 0;

            for(int i = 0; i < test.numInstances(); i++){
                prediction = elastic.classifyInstance(test.instance(i));
                classValue = test.instance(i).classValue();
                if(prediction==classValue){
                    correct++;
                }
                total++;
            }
            System.out.println(elastic.ensembleType+": "+correct+"/"+total+" ("+df.format(100.0/total*correct)+"%)");
        }
    }

    public static Instances loadData(String fileName){
        Instances data = null;
        try{
            FileReader r;
            r = new FileReader(fileName);
            data = new Instances(r);

            data.setClassIndex(data.numAttributes() - 1);
        } catch(Exception e){
            System.out.println(" Error =" + e + " in method loadData");
            e.printStackTrace();
        }
        return data;
    }

    public boolean[] getMcNemarsInclusion(){
        // find the best classifier according to cvAccuracies - random selection of best where ties are equal
        ArrayList<Integer> bestClassifiersIds = new ArrayList<Integer>();
        double bsfAccuracy = -1;

        for(int c = 0; c < cvAccs.length; c++){
            if(cvAccs[c] > bsfAccuracy){
                bestClassifiersIds = new ArrayList<Integer>();
                bestClassifiersIds.add(c);
            }else if(cvAccs[c]==bsfAccuracy){
                bestClassifiersIds.add(c);
            }
        }

        int bestClassifierId = -1;
        if(bestClassifiersIds.size()==1){
            bestClassifierId = bestClassifiersIds.get(0);
        }else{
            Random r = new Random();
            bestClassifierId = bestClassifiersIds.get(r.nextInt(bestClassifiersIds.size()));
        }

        // go through each classifier and calculate McNemars. For each classifier, add either a 1 or 0 to the output to reflect whether the
        // classifier should be used in the array. i.e. if a classifier is significantly different to the best (i.e. it must be worse), output 0 for
        // that classifier. Else, output 1 to show that it is not significantly worse, and sjouldbe included.

        boolean[] output = new boolean[this.finalClassifiers.length];

        for(int classifierB = 0; classifierB < this.finalClassifiers.length; classifierB++){

            if(classifierB==bestClassifierId){
                output[classifierB] = true; // looking at itself, and obviously we want the best classifier included!
                continue;
            }

            // can include speedup where a==b, keep it simple for now until it's working
            int wrongByBoth = 0; // top-left
            int rightByAWrongByB = 0; // bottom-left
            int wrongByARightByB = 0; // top-right
            int rightByBoth = 0; // bottom-right

            double actualClass, thisPred, bPred;

            for(int i = 0; i < this.trainActualClassVals.length;i++){
                actualClass = trainActualClassVals[i];
                thisPred = cvPreds[bestClassifierId][i];
                bPred = cvPreds[classifierB][i];

                if(thisPred!=actualClass && bPred!=actualClass){
                    wrongByBoth++;
                }else if(thisPred==actualClass && bPred!=actualClass){
                    rightByAWrongByB++;
                }else if(thisPred!=actualClass && bPred==actualClass){
                    wrongByARightByB++;
                }else if(thisPred==actualClass && bPred==actualClass){
                    rightByBoth++;
                }
            }

            if(rightByAWrongByB+wrongByARightByB==0){
                output[classifierB] = true; // classifier is equivilent to the best, so we should include it to effectively add weight to best's votes
            }else{
                double chiPart = (Math.abs(wrongByARightByB-rightByAWrongByB)-1);
                double chi = (chiPart*chiPart)/(wrongByARightByB+rightByAWrongByB);

                if(chi >= 3.841459){ // Alpha = 0.05
//                if(chi >= 6.634897){  // Alpha = 0.01
                    output[classifierB] = false; // signif. different, so don't include
                }else{
                    output[classifierB] = true; // no signif different (i.e. not signif worse), so include
                }
            }
        }
        return output;
    }
    
    private static class CvOutput{
        private double accuracy;
        private double[] predictions;
        private double[] params;
        
        public CvOutput(double accuracy, double[] predictions){
            this.accuracy = accuracy;
            this.predictions = predictions;
        }

        public CvOutput(double accuracy, double[] predictions, double[] params){
            this.accuracy = accuracy;
            this.predictions = predictions;
            this.params = Arrays.copyOf(params, params.length);
        }
        
        public double getAccuracy(){
            return this.accuracy;
        }
        
        public double[] getPredictions(){
            return this.predictions;
        }

        public double[] getParams() {
            return params;
        }
        
        
    }


    @Override
    public String toString(){
        return this.classifiersToUse.toString();
    }

    public static void main(String[] args) {
        

        try{
            // Example use of the classifier for dataset ItalyPowerDemand. Please see demonstrateEnsembles... 
            // Dataset: 
            String datasetName = "ItalyPowerDemand";
            
            // Data dir:
            String dataDir = "../../TSC Problems";
            
            if(!new File(dataDir).exists()){
                throw new Exception("Error: Specified data directory does not exist: "+dataDir);
            }
            
            Instances train = loadData(dataDir+"/"+datasetName+"/"+datasetName+"_TRAIN.arff");
            Instances test = loadData(dataDir+"/"+datasetName+"/"+datasetName+"_TEST.arff");

            // see method for annotations
            demonstrateEnsembles_SettingsFromSDM(train, test, "demonstration", datasetName);

        }catch(Exception e){
            e.printStackTrace();
        }
    }
    
    
    
    
}