package weka.classifiers.meta.timeseriesensembles; import java.io.File; import java.io.FileReader; import java.util.ArrayList; import java.util.Arrays; import java.util.Random; import java.util.TreeMap; import java.util.TreeSet; import weka.core.Capabilities; import weka.core.Instance; import weka.core.Instances; import weka.classifiers.lazy.kNN; import weka.filters.timeseries.DerivativeFilter; import weka.core.elastic_distance_measures.BasicDTW; import weka.core.elastic_distance_measures.ERPDistance; import weka.core.elastic_distance_measures.LCSSDistance; import weka.core.elastic_distance_measures.MSMDistance; import weka.core.elastic_distance_measures.SakoeChibaDTW; import weka.core.elastic_distance_measures.TWEDistance; import weka.core.elastic_distance_measures.WeightedDTW; import java.io.FileWriter; import java.text.DecimalFormat; import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import weka.classifiers.Classifier; import weka.core.EuclideanDistance; public class ElasticEnsemble implements Classifier{ // note distributionForInstance and getCapabilities added to appease the new Classifier interface, NO IMPLEMENTATION @Override public double[] distributionForInstance(Instance instance) throws Exception { throw new UnsupportedOperationException("Not supported yet."); } @Override public Capabilities getCapabilities() { throw new UnsupportedOperationException("Not supported yet."); } public enum ClassifierVariants{ Euclidean_1NN, DTW_R1_1NN, DTW_Rn_1NN, WDTW_1NN, DDTW_R1_1NN, DDTW_Rn_1NN, WDDTW_1NN, LCSS_1NN, MSM_1NN, TWE_1NN, ERP_1NN, } public enum EnsembleType{ Best, Equal, Prop, Signif } private static double[] msmParms = { // <editor-fold defaultstate="collapsed" desc="hidden for space"> 0.01, 0.01375, 0.0175, 0.02125, 0.025, 0.02875, 0.0325, 0.03625, 0.04, 0.04375, 0.0475, 0.05125, 0.055, 0.05875, 0.0625, 0.06625, 0.07, 0.07375, 0.0775, 0.08125, 0.085, 0.08875, 0.0925, 0.09625, 0.1, 0.136, 0.172, 0.208, 0.244, 0.28, 0.316, 0.352, 0.388, 0.424, 0.46, 0.496, 0.532, 0.568, 0.604, 0.64, 0.676, 0.712, 0.748, 0.784, 0.82, 0.856, 0.892, 0.928, 0.964, 1, 1.36, 1.72, 2.08, 2.44, 2.8, 3.16, 3.52, 3.88, 4.24, 4.6, 4.96, 5.32, 5.68, 6.04, 6.4, 6.76, 7.12, 7.48, 7.84, 8.2, 8.56, 8.92, 9.28, 9.64, 10, 13.6, 17.2, 20.8, 24.4, 28, 31.6, 35.2, 38.8, 42.4, 46, 49.6, 53.2, 56.8, 60.4, 64, 67.6, 71.2, 74.8, 78.4, 82, 85.6, 89.2, 92.8, 96.4, 100// </editor-fold> }; private static double[] twe_nuParams = { // <editor-fold defaultstate="collapsed" desc="hidden for space"> 0.00001, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1,// </editor-fold> }; private static double[] twe_lamdaParams = { // <editor-fold defaultstate="collapsed" desc="hidden for space"> 0, 0.011111111, 0.022222222, 0.033333333, 0.044444444, 0.055555556, 0.066666667, 0.077777778, 0.088888889, 0.1,// </editor-fold> }; // Initially uses a TreeSet to store the classifiers to use. This ensures that no duplicates are included, and keeps everything in the expected order as specified in the enum creation. // Once classifier is built, the classifier choices are locked in by creating an array of ClassifierVariants. This is done for two reasons; firstly, the index of the array matches // the other arrays, such as cvAccs and cvPreds. Secondly, this seperates the classifier selection before and after building the classifier, ensuring that unexpected behaviour isn't caused // by carrying out abnormal opperations (i.e. adding classifiers to the ensemble after training has occured). private TreeSet<ClassifierVariants> classifiersToUse; private ClassifierVariants[] finalClassifiers; private double[] cvAccs; private double[][] cvPreds; private double[] trainActualClassVals; private EnsembleType ensembleType; private double[][] bestParams; private boolean fileWriting; private String outpurDirLocation; private String datasetName; private Instances fullTrainingData; private boolean[] mcNemarsInclusion; private boolean classifierBuilt; private boolean verbose; private boolean parallel; public ElasticEnsemble(){ this.ensembleType = null; this.classifiersToUse = new TreeSet<ClassifierVariants>(); this.finalClassifiers = null; this.fileWriting = false; this.outpurDirLocation = null; this.cvAccs = null; this.cvPreds = null; this.bestParams = null; this.verbose = false; this.classifierBuilt = false; this.parallel = true; // default } public void turnAllClassifiersOn() throws Exception{ if(classifierBuilt){ throw new Exception("Error: Classifier has already been built. Unable to change classifiers within ensemble."); } this.classifiersToUse = new TreeSet<ClassifierVariants>(); classifiersToUse.addAll(Arrays.asList(ClassifierVariants.values())); } public void turnSDMClassifiersOn() throws Exception{ if(classifierBuilt){ throw new Exception("Error: Classifier has already been built. Unable to change classifiers within ensemble."); } this.classifiersToUse = new TreeSet<ClassifierVariants>(); classifiersToUse.add(ClassifierVariants.Euclidean_1NN); classifiersToUse.add(ClassifierVariants.DTW_R1_1NN); classifiersToUse.add(ClassifierVariants.DTW_Rn_1NN); classifiersToUse.add(ClassifierVariants.WDTW_1NN); classifiersToUse.add(ClassifierVariants.DDTW_R1_1NN); classifiersToUse.add(ClassifierVariants.DDTW_Rn_1NN); classifiersToUse.add(ClassifierVariants.WDDTW_1NN); classifiersToUse.add(ClassifierVariants.LCSS_1NN); classifiersToUse.add(ClassifierVariants.TWE_1NN); } public boolean addClassifierToEnsemble(ClassifierVariants classifierToAdd) throws Exception{ if(classifierBuilt){ throw new Exception("Error: Classifier has already been built. Unable to change classifiers within ensemble."); } if(this.classifiersToUse.contains(classifierToAdd)){ return false; }else{ classifiersToUse.add(classifierToAdd); return true; } } public boolean removeClassifierFromEnsemble(ClassifierVariants classifierToRemove) throws Exception{ if(classifierBuilt){ throw new Exception("Error: Classifier has already been built. Unable to change classifiers within ensemble."); } if(this.classifiersToUse.contains(classifierToRemove)){ this.classifiersToUse.remove(classifierToRemove); return true; }else{ return false; } } public void setEnsembleType(EnsembleType ensembleType){ this.ensembleType = ensembleType; } public void turnOnFileWriting(String outputDirLocation, String datasetName){ this.fileWriting = true; this.outpurDirLocation = outputDirLocation; this.datasetName = datasetName; } public void makeVerbose(){ this.verbose = true; } @Override public void buildClassifier(Instances train) throws Exception { // if applicable, check that the file locations are valid before carrying out cv File cvDir = null; File classifierOutputDir; StringBuilder st; StringBuilder bestParamsSt = new StringBuilder(); FileWriter cvOut; int correct; this.trainActualClassVals = new double[train.numInstances()]; for(int ins = 0; ins < trainActualClassVals.length; ins++){ trainActualClassVals[ins] = train.instance(ins).classValue(); } if(fileWriting){ cvDir = new File(this.outpurDirLocation+"/bestCvOut"); boolean valid = cvDir.mkdirs(); if(!valid && !cvDir.exists()){ throw new Exception("The output dir at: "+outpurDirLocation+" could not be created."); }else if(!valid){ System.out.println("Warning: cvDir at "+this.outpurDirLocation+" already exists. Any conflicting results under this location will be overwritten."); } } this.finalClassifiers = new ClassifierVariants[this.classifiersToUse.size()]; int c = 0; for(ClassifierVariants classifier:this.classifiersToUse){ this.finalClassifiers[c++] = classifier; } // carry out the cross validation this.cvAccs = new double[this.finalClassifiers.length]; this.cvPreds = new double[this.finalClassifiers.length][train.numInstances()]; this.bestParams = new double[this.finalClassifiers.length][]; for(int i = 0; i < finalClassifiers.length; i++){ ClassifierVariants classifierType = this.finalClassifiers[i]; crossValidateClassifierType(classifierType, train, i); if(fileWriting){ classifierOutputDir = new File(this.outpurDirLocation+"/bestCvOut/"+classifierType); classifierOutputDir.mkdirs(); st = new StringBuilder(); correct = 0; for(int j = 0; j < this.cvPreds[i].length; j++){ st.append(this.cvPreds[i][j]).append(",").append(trainActualClassVals[j]).append("\n"); if(this.cvPreds[i][j]==trainActualClassVals[j]){ correct++; } } cvOut = new FileWriter(this.outpurDirLocation+"/bestCvOut/"+classifierType+"/"+"cvBest_"+classifierType+"_"+datasetName+".txt"); cvOut.append(correct+"/"+cvPreds[i].length+"\n"); cvOut.append(st); cvOut.close(); bestParamsSt.append(classifierType).append(","); for(int j = 0; bestParams[i]!=null && j < bestParams[i].length; j++){ bestParamsSt.append(bestParams[i][j]).append(","); } bestParamsSt.append("\n"); } } if(fileWriting){ File paramsOutputDir = new File(this.outpurDirLocation+"/bestParams/"); paramsOutputDir.mkdirs(); FileWriter bestParamsOut = new FileWriter(this.outpurDirLocation+"/bestParams/bestParams_"+this.datasetName+".txt"); bestParamsOut.append(bestParamsSt); bestParamsOut.close(); } this.fullTrainingData = train; if(this.ensembleType==EnsembleType.Signif){ mcNemarsInclusion = this.getMcNemarsInclusion(); } this.classifierBuilt = true; } private void crossValidateClassifierType(ClassifierVariants classifierType, Instances inputTrainingData, int classifierNum) throws Exception{ Instances train; // prepare for derivative classifiers if(classifierType.equals(ClassifierVariants.DDTW_R1_1NN)||classifierType.equals(ClassifierVariants.DDTW_Rn_1NN)||classifierType.equals(ClassifierVariants.WDDTW_1NN)){ DerivativeFilter d = new DerivativeFilter(); train = d.process(inputTrainingData); }else{ train = inputTrainingData; } long startTime = -1; if(verbose){ System.out.print("Starting CV on "+classifierType+"..."); startTime = System.nanoTime(); } this.cvAccs[classifierNum] = -1; double[] params; CvOutput result; switch(classifierType){ // single-run classifiers (i.e. no params to tune, cv only needed for weighting in ensemble later) case Euclidean_1NN: case DTW_R1_1NN: case DDTW_R1_1NN: result = crossValidate(train, classifierType, null); this.cvAccs[classifierNum] = result.getAccuracy(); this.cvPreds[classifierNum] = result.getPredictions(); this.bestParams[classifierNum] = null; break; // window-based/weight-based classifiers (i.e. 0:0.01:1) case DTW_Rn_1NN: case WDTW_1NN: case DDTW_Rn_1NN: case WDDTW_1NN: params = new double[1]; // values range from 0 to 1 in increments of 0.01; use ints to avoid double imprecision when incrementing for(int w = 0; w <= 100; w++){ params[0] = (double)w/100; result = crossValidate(train, classifierType, params); if(result.getAccuracy() > this.cvAccs[classifierNum]){ // favours smaller window sizes this.cvAccs[classifierNum] = result.getAccuracy(); this.cvPreds[classifierNum] = result.getPredictions(); this.bestParams[classifierNum] = Arrays.copyOf(params, params.length); } } break; case LCSS_1NN: // params depend on data - get these from class double stdTrain = LCSSDistance.stdv_p(train); double stdFloor = stdTrain*0.2; double[] epsilons = LCSSDistance.getInclusive10(stdFloor, stdTrain); int[] deltas = LCSSDistance.getInclusive10(0, (train.numAttributes()-1)/4); params = new double[2]; for(int d= 0; d < deltas.length; d++){ params[0] = deltas[d]; for(int e = 0; e < epsilons.length; e++){ params[1] = epsilons[e]; result = crossValidate(train, classifierType, params); if(result.getAccuracy() > this.cvAccs[classifierNum]){ this.cvAccs[classifierNum] = result.getAccuracy(); this.cvPreds[classifierNum] = result.getPredictions(); this.bestParams[classifierNum] = Arrays.copyOf(params, params.length); }else if(result.getAccuracy() == this.cvAccs[classifierNum] && params[0] < epsilons[e] && params[1] < deltas[d]){ this.cvAccs[classifierNum] = result.getAccuracy(); this.cvPreds[classifierNum] = result.getPredictions(); this.bestParams[classifierNum] = Arrays.copyOf(params, params.length); } } } break; case MSM_1NN: params = new double[1]; // values have a variable range. Specified in a static array at the start of the class called msmParams for(int p = 0; p < msmParms.length; p++){ params[0] = msmParms[p]; result = crossValidate(train, classifierType, params); if(result.getAccuracy() > this.cvAccs[classifierNum]){ // favours smaller params this.cvAccs[classifierNum] = result.getAccuracy(); this.cvPreds[classifierNum] = result.getPredictions(); this.bestParams[classifierNum] = Arrays.copyOf(params, params.length); } } break; case TWE_1NN: // values have variable ranges, so are specified in static arrays at the top of the class as twe_nuParams and twe_lambdaParams params = new double[2]; for(int n= 0; n < twe_nuParams.length; n++){ params[0] = twe_nuParams[n]; for(int la = 0; la < twe_lamdaParams.length; la++){ params[1] = twe_lamdaParams[la]; result = crossValidate(train, classifierType, params); if(result.getAccuracy() > this.cvAccs[classifierNum]){ this.cvAccs[classifierNum] = result.getAccuracy(); this.cvPreds[classifierNum] = result.getPredictions(); this.bestParams[classifierNum] = Arrays.copyOf(params, params.length); }else if(result.getAccuracy() == this.cvAccs[classifierNum] && params[0] < twe_nuParams[n] && params[1] < twe_lamdaParams[la]){ this.cvAccs[classifierNum] = result.getAccuracy(); this.cvPreds[classifierNum] = result.getPredictions(); this.bestParams[classifierNum] = Arrays.copyOf(params, params.length); } } } break; case ERP_1NN: // values of g depend on the data, so get the standard deviation and then work them out. Note: window of 0-25% used as per Keogh et al.'s paper. Sampled to // produce 100 different paramater combinations in total double[] windowSizes = ERPDistance.getInclusive10(0, 0.25); double stdv = ERPDistance.stdv_p(train); double[] gValues = ERPDistance.getInclusive10(0.2*stdv, stdv); params = new double[2]; // g bandsize for(int w = 0; w < windowSizes.length; w++){ params[1] = windowSizes[w]; for(int g = 0; g < gValues.length; g++){ params[0] = gValues[g]; result = crossValidate(train, classifierType, params); if(result.getAccuracy() > this.cvAccs[classifierNum]){ this.cvAccs[classifierNum] = result.getAccuracy(); this.cvPreds[classifierNum] = result.getPredictions(); this.bestParams[classifierNum] = Arrays.copyOf(params, params.length); }else if(result.getAccuracy() == this.cvAccs[classifierNum] && params[0] < gValues[g] && params[1] < windowSizes[w]){ this.cvAccs[classifierNum] = result.getAccuracy(); this.cvPreds[classifierNum] = result.getPredictions(); this.bestParams[classifierNum] = Arrays.copyOf(params, params.length); } } } break; default: throw new Exception("The classifier type \""+classifierType+"\" is not defined within the ensemble. Please update the code."); } if(verbose){ System.out.println("Done! (in "+((System.nanoTime()-startTime)/1000000000)+" seconds)"); } } private static class IndividualClassificationOutput{ private int id; private double prediction; public IndividualClassificationOutput(int id, double prediction) { this.id = id; this.prediction = prediction; } public int getId() { return id; } public double getPrediction() { return prediction; } } private static class SingleCVCaller implements Callable<IndividualClassificationOutput>{ private Instances train; private ClassifierVariants classifierType; private double[] params; private int i; public SingleCVCaller(Instances train, ClassifierVariants classifierType, double[] params, int i) { this.train = train; this.classifierType = classifierType; this.params = params; this.i = i; } @Override public IndividualClassificationOutput call() throws Exception { Instance testInstance; Instances trainLoocv; kNN knn; testInstance = train.instance(i); trainLoocv = new Instances(train, train.numInstances() - 1); // add all instances to trainLoocv EXCEPT instance[i] for (int j = 0; j < train.numInstances(); j++) { if (j != i) { trainLoocv.add(train.instance(j)); } } // build classifier and classify knn = getInternalClassifier(classifierType, params, trainLoocv); return new IndividualClassificationOutput(i, knn.classifyInstance(testInstance)); } } private static class SingleTrainTestCaller implements Callable<IndividualClassificationOutput>{ private int i; private Instance testInstance; private kNN classifier; public SingleTrainTestCaller(int i, Instance testInstance, kNN classifier) { this.i = i; this.testInstance = testInstance; this.classifier = classifier; } @Override public IndividualClassificationOutput call() throws Exception{ return new IndividualClassificationOutput(i, classifier.classifyInstance(testInstance)); } } private static CvOutput crossValidate(Instances train, ClassifierVariants classifierType, double[] params) throws Exception{ double[] predictions = new double[train.numInstances()]; int correct = 0; int total = 0; ExecutorService service = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); ArrayList<Future<IndividualClassificationOutput>> futures = new ArrayList<Future<IndividualClassificationOutput>>(); for (int i = 0; i < train.numInstances(); i++) { futures.add(service.submit(new SingleCVCaller(train, classifierType, params, i))); } service.shutdown(); IndividualClassificationOutput result; for(int i = 0 ; i < futures.size();i++){ result = futures.get(i).get(); predictions[result.id] = result.prediction; if(predictions[result.id]==train.instance(result.id).classValue()){ correct++; } total++; } CvOutput output = new CvOutput(100.0/total*correct, predictions); return output; } private static kNN getInternalClassifier(ClassifierVariants classifierType, double[] params, Instances instances) throws Exception{ EuclideanDistance distanceMeasure = null; kNN knn; switch(classifierType){ case Euclidean_1NN: distanceMeasure = new EuclideanDistance(); distanceMeasure.setDontNormalize(true); break; case DTW_R1_1NN: case DDTW_R1_1NN: distanceMeasure = new BasicDTW(); break; case DTW_Rn_1NN: case DDTW_Rn_1NN: distanceMeasure = new SakoeChibaDTW(params[0]); break; case WDTW_1NN: case WDDTW_1NN: distanceMeasure = new WeightedDTW(params[0]); break; case LCSS_1NN: distanceMeasure = new LCSSDistance((int)params[0], params[1]); break; case MSM_1NN: distanceMeasure = new MSMDistance(params[0]); break; case TWE_1NN: distanceMeasure = new TWEDistance(params[0],params[1]); break; case ERP_1NN: distanceMeasure = new ERPDistance(params[0], params[1]); break; default: throw new Exception("Error: "+classifierType+" is not a supported classifier type. Please update code to use this in the ensemble"); } knn = new kNN(); knn.setDistanceFunction(distanceMeasure); knn.buildClassifier(instances); return knn; } @Override public double classifyInstance(Instance instance) throws Exception{ if(!classifierBuilt){ throw new Exception("Error: Classifier has not been built! Classifier must be built before carrying out classification. See buildClassifier(Instances train)."); } // special case if classifier is originally built for a non-signif ensemble, but then changed after building (this is valid, as the cv remains the same but allows for // classification using any of the ensembling strategies). This is necessarry as the getMcNemarsInclusion() call is originally in buildClassifier(Instances train) for // efficiency, as it depends on the original ensemble stratergy when the build function is executed (no point working it out if it's not being used!). if(this.ensembleType==EnsembleType.Signif && this.mcNemarsInclusion == null){ this.mcNemarsInclusion = getMcNemarsInclusion(); } int numProcessors = Runtime.getRuntime().availableProcessors(); int numThreads = (numProcessors > this.finalClassifiers.length) ? this.finalClassifiers.length:numProcessors; ExecutorService service = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); ArrayList<Future<IndividualClassificationOutput>> futures = new ArrayList<Future<IndividualClassificationOutput>>(); double[] predictions = new double[this.finalClassifiers.length]; ClassifierVariants classifier; for(int i = 0; i < predictions.length; i++){ classifier = this.finalClassifiers[i]; if(this.ensembleType!=EnsembleType.Signif || this.mcNemarsInclusion[i]==true){ kNN knn = getInternalClassifier(classifier, this.bestParams[i], this.fullTrainingData); futures.add(service.submit(new SingleTrainTestCaller(i, instance, knn))); }else{ predictions[i] = -1; } } service.shutdown(); IndividualClassificationOutput result; for(int i = 0 ; i < futures.size();i++){ result = futures.get(i).get(); predictions[result.id] = result.prediction; } switch(this.ensembleType){ case Best: return this.classifyInstances_best(predictions); case Equal: return this.classifyInstances_equal(predictions); case Prop: return this.classifyInstances_prop(predictions); case Signif: return this.classifyInstances_prop(predictions); default: throw new Exception("Error: Unexpected ensemble type"); } } private double classifyInstances_best(double[] predictions){ ArrayList<Integer> bestClassifierIds = new ArrayList<Integer>(); double bsfAcc = -1; for(int i = 0; i < this.cvAccs.length; i++){ if(this.cvAccs[i] > bsfAcc){ bestClassifierIds = new ArrayList<Integer>(); bestClassifierIds.add(i); bsfAcc = this.cvAccs[i]; }else if(this.cvAccs[i] == bsfAcc){ bestClassifierIds.add(i); } } if(bestClassifierIds.size()>1){ Random r = new Random(); return predictions[r.nextInt(bestClassifierIds.size())]; }else{ return predictions[bestClassifierIds.get(0)]; } } private double classifyInstances_equal(double[] predictions){ TreeMap<Double, Integer> classValsAndVotes = new TreeMap<Double, Integer>(); for(int c = 0; c < predictions.length; c++){ double thisVote = predictions[c]; if(classValsAndVotes.containsKey(thisVote)){ int currentCount = classValsAndVotes.get(thisVote); currentCount++; classValsAndVotes.put(thisVote, currentCount); }else{ classValsAndVotes.put(thisVote,1); } } ArrayList<Double> majorityClasses = new ArrayList<Double>(); int bsfCount = -1; int thisCount; for(Double classVal:classValsAndVotes.keySet()){ thisCount = classValsAndVotes.get(classVal); if(thisCount > bsfCount){ bsfCount = thisCount; majorityClasses = new ArrayList<Double>(); majorityClasses.add(classVal); }else if(thisCount == bsfCount){ majorityClasses.add(classVal); } } if(majorityClasses.size()==1){ return majorityClasses.get(0); }else{ Random r = new Random(); return majorityClasses.get(r.nextInt(majorityClasses.size())); } } private double classifyInstances_prop(double[] predictions){ TreeMap<Double, Double> classValsAndVotes = new TreeMap<Double, Double>(); double thisVote; double currentWeight; double bsfWeight = 0; ArrayList<Double> majorityClasses = new ArrayList<Double>(); for(int c = 0; c < predictions.length; c++){ thisVote = predictions[c]; if(this.ensembleType==EnsembleType.Prop || this.mcNemarsInclusion[c]==true){ if(classValsAndVotes.containsKey(thisVote)){ currentWeight = classValsAndVotes.get(thisVote); currentWeight+=cvAccs[c]; classValsAndVotes.put(thisVote, currentWeight); }else{ currentWeight = cvAccs[c]; classValsAndVotes.put(thisVote,currentWeight); } if(currentWeight > bsfWeight){ majorityClasses = new ArrayList<Double>(); majorityClasses.add(thisVote); bsfWeight = currentWeight; }else if(currentWeight == bsfWeight){ majorityClasses.add(thisVote); } } } if(majorityClasses.size()==1){ return majorityClasses.get(0); }else{ Random r = new Random(); return majorityClasses.get(r.nextInt(majorityClasses.size())); } } public static void buildAndWriteCvAndTrainTestFiles_SDM(String outDir, String datasetName, Instances train, Instances test) throws Exception{ ElasticEnsemble elastic = new ElasticEnsemble(); elastic.setEnsembleType(EnsembleType.Best); // Doesn't matter elastic.turnSDMClassifiersOn(); elastic.turnOnFileWriting(outDir, datasetName); elastic.makeVerbose(); elastic.buildClassifier(train); kNN knn; File outputDir; FileWriter out; StringBuilder st; int correct, total; double decision, classValue; ClassifierVariants classifier; for(int c = 0; c < elastic.finalClassifiers.length; c++){ classifier = elastic.finalClassifiers[c]; knn = getInternalClassifier(classifier, elastic.bestParams[c], train); correct = 0; total = 0; st = new StringBuilder(); for(int i = 0; i < test.numInstances(); i++){ decision = knn.classifyInstance(test.instance(i)); classValue = test.instance(i).classValue(); if(decision==classValue){ correct++; } total++; st.append(decision).append(",").append(classValue).append("\n"); } outputDir = new File(outDir+"/trainTest/"+classifier); outputDir.mkdirs(); out = new FileWriter(outDir+"/trainTest/"+classifier+"/trainTest_"+classifier+"_"+datasetName+".txt"); out.append(correct+"/"+total+"\n"); out.append(st); out.close(); } } public static void demonstrateEnsembles_SettingsFromSDM(Instances train, Instances test, String outputDir, String datasetName) throws Exception{ // 1. Initialise classifier in the usual Weka form ElasticEnsemble elastic = new ElasticEnsemble(); // 2. Set the internal distance measure 1NN classifiers to use. By default, the ensemble classifier won't use any (and will just throw an error). // These can be specified individually, i.e. this.addClassifierToEnsemble(ClassifierVariants.Euclidean1NN), or there are two special cases: // - this.turnAllClassifiersOn() // Uses all possible classifiers that have been written for the ensemble (DAMI version including TWED and ERO) // - this.turnSDMClassifiersOn() // Uses the classifiers that were included in the SDM paper // For the purposes of this demonstration, we will turn on all classifiers: //elastic.turnSDMClassifiersOn(); elastic.turnAllClassifiersOn(); // 3. By default, the ensmeble works like a typical Weka classifier - i.e. you build it, run it, and then it is removed from memory. // However, it can also be set to write the cv results to file (i.e. for faster build times when repeating experiments (NOT IMPLEMENTED HERE), or for information). // Files are written to the dir specified by the String outputDir (and creates it/parent dirs if necessary), and files arenames using the name // specified in datasetName. It is important to keep this consistent with the data for easily reusing cv results // (i.e. if training with ItalyPowerDemand_TRAIN.arff, use ItalyPowerDemand as the datasetName so the arff can be found dynamically late on) // ***IMPORTANT: Will overwrite existing files as necessary. It will continue to use existing dirs (i.e. if a different dataset has been processed, // those files will remain unchanged // elastic.turnOnFileWriting(outputDir, datasetName); // 4. Training can be slow with large datasets and many classifiers in the ensemble. For peace of mind, a method is included to promt the classifier // to print messages to the system output during training to state which distance measure is currently being processed (and the time taken to complete // once it has been done). elastic.makeVerbose(); // 5. Build the classifier on the specified training data elastic.buildClassifier(train); int correct, total; double prediction, classValue; DecimalFormat df = new DecimalFormat("###.###"); System.out.println(); System.out.println("-----------------------------------------"); System.out.println("TRAIN/TEST CLASSIFICATION"); System.out.println("-----------------------------------------"); // To save time and create a fair comparison, we build once and then classify seperately for each ensemble strategy. This is valid, as ensemble type is // completely independent from the CV in the training stage of the classifier, so would be the same if we carried it our seperately for each ensemble EnsembleType[] types = {EnsembleType.Best, EnsembleType.Equal, EnsembleType.Prop, EnsembleType.Signif}; for(int t = 0; t < types.length; t++){ elastic.setEnsembleType(types[t]); correct = 0; total = 0; for(int i = 0; i < test.numInstances(); i++){ prediction = elastic.classifyInstance(test.instance(i)); classValue = test.instance(i).classValue(); if(prediction==classValue){ correct++; } total++; } System.out.println(elastic.ensembleType+": "+correct+"/"+total+" ("+df.format(100.0/total*correct)+"%)"); } } public static Instances loadData(String fileName){ Instances data = null; try{ FileReader r; r = new FileReader(fileName); data = new Instances(r); data.setClassIndex(data.numAttributes() - 1); } catch(Exception e){ System.out.println(" Error =" + e + " in method loadData"); e.printStackTrace(); } return data; } public boolean[] getMcNemarsInclusion(){ // find the best classifier according to cvAccuracies - random selection of best where ties are equal ArrayList<Integer> bestClassifiersIds = new ArrayList<Integer>(); double bsfAccuracy = -1; for(int c = 0; c < cvAccs.length; c++){ if(cvAccs[c] > bsfAccuracy){ bestClassifiersIds = new ArrayList<Integer>(); bestClassifiersIds.add(c); }else if(cvAccs[c]==bsfAccuracy){ bestClassifiersIds.add(c); } } int bestClassifierId = -1; if(bestClassifiersIds.size()==1){ bestClassifierId = bestClassifiersIds.get(0); }else{ Random r = new Random(); bestClassifierId = bestClassifiersIds.get(r.nextInt(bestClassifiersIds.size())); } // go through each classifier and calculate McNemars. For each classifier, add either a 1 or 0 to the output to reflect whether the // classifier should be used in the array. i.e. if a classifier is significantly different to the best (i.e. it must be worse), output 0 for // that classifier. Else, output 1 to show that it is not significantly worse, and sjouldbe included. boolean[] output = new boolean[this.finalClassifiers.length]; for(int classifierB = 0; classifierB < this.finalClassifiers.length; classifierB++){ if(classifierB==bestClassifierId){ output[classifierB] = true; // looking at itself, and obviously we want the best classifier included! continue; } // can include speedup where a==b, keep it simple for now until it's working int wrongByBoth = 0; // top-left int rightByAWrongByB = 0; // bottom-left int wrongByARightByB = 0; // top-right int rightByBoth = 0; // bottom-right double actualClass, thisPred, bPred; for(int i = 0; i < this.trainActualClassVals.length;i++){ actualClass = trainActualClassVals[i]; thisPred = cvPreds[bestClassifierId][i]; bPred = cvPreds[classifierB][i]; if(thisPred!=actualClass && bPred!=actualClass){ wrongByBoth++; }else if(thisPred==actualClass && bPred!=actualClass){ rightByAWrongByB++; }else if(thisPred!=actualClass && bPred==actualClass){ wrongByARightByB++; }else if(thisPred==actualClass && bPred==actualClass){ rightByBoth++; } } if(rightByAWrongByB+wrongByARightByB==0){ output[classifierB] = true; // classifier is equivilent to the best, so we should include it to effectively add weight to best's votes }else{ double chiPart = (Math.abs(wrongByARightByB-rightByAWrongByB)-1); double chi = (chiPart*chiPart)/(wrongByARightByB+rightByAWrongByB); if(chi >= 3.841459){ // Alpha = 0.05 // if(chi >= 6.634897){ // Alpha = 0.01 output[classifierB] = false; // signif. different, so don't include }else{ output[classifierB] = true; // no signif different (i.e. not signif worse), so include } } } return output; } private static class CvOutput{ private double accuracy; private double[] predictions; private double[] params; public CvOutput(double accuracy, double[] predictions){ this.accuracy = accuracy; this.predictions = predictions; } public CvOutput(double accuracy, double[] predictions, double[] params){ this.accuracy = accuracy; this.predictions = predictions; this.params = Arrays.copyOf(params, params.length); } public double getAccuracy(){ return this.accuracy; } public double[] getPredictions(){ return this.predictions; } public double[] getParams() { return params; } } @Override public String toString(){ return this.classifiersToUse.toString(); } public static void main(String[] args) { try{ // Example use of the classifier for dataset ItalyPowerDemand. Please see demonstrateEnsembles... // Dataset: String datasetName = "ItalyPowerDemand"; // Data dir: String dataDir = "../../TSC Problems"; if(!new File(dataDir).exists()){ throw new Exception("Error: Specified data directory does not exist: "+dataDir); } Instances train = loadData(dataDir+"/"+datasetName+"/"+datasetName+"_TRAIN.arff"); Instances test = loadData(dataDir+"/"+datasetName+"/"+datasetName+"_TEST.arff"); // see method for annotations demonstrateEnsembles_SettingsFromSDM(train, test, "demonstration", datasetName); }catch(Exception e){ e.printStackTrace(); } } }