/* * To change this template, choose Tools | Templates * and open the template in the editor. */ package applications; import java.util.ArrayList; import utilities.ClassifierTools; import weka.classifiers.Classifier; import weka.core.Instances; import fileIO.*; import java.io.File; import java.text.DecimalFormat; import java.util.Random; import papers.ICDM2013_Lines; import weka.classifiers.AbstractClassifier; import weka.classifiers.Evaluation; import weka.classifiers.bayes.NaiveBayes; import weka.classifiers.functions.SMO; import weka.classifiers.functions.supportVector.PolyKernel; import weka.classifiers.lazy.DTW_kNN; import weka.classifiers.lazy.IBk; import weka.classifiers.lazy.kNN; import weka.classifiers.meta.RotationForest; import weka.classifiers.meta.timeseriesensembles.TransformEnsembles; import weka.classifiers.trees.J48; import weka.classifiers.trees.RandomForest; import weka.filters.*; import weka.filters.timeseries.*; public class Otoliths { static String dataPath="C:\\Users\\ajb\\Dropbox\\TSC Problems\\Herring\\HERRING500"; Instances data; Otoliths(){ data=ClassifierTools.loadData(dataPath); } public static void CSSClassifier(int folds){ ArrayList<String> names=new ArrayList<String>(); Instances all=ClassifierTools.loadData("C:\\Users\\ajb\\Dropbox\\TSC Problems\\Otoliths\\CSSHerringA"); all.randomize(new Random()); NormalizeCase nc=new NormalizeCase(); nc.setNormType(NormalizeCase.NormType.STD); Classifier[] c =setSingleClassifiers(names); OutFile res=new OutFile("C:\\Research\\Results\\Otoliths\\singleClassifiersCSSA" + ".csv"); try{ all=nc.process(all); for(int i=0;i<c.length;i++){ System.out.print(" running classifier "+names.get(i)); Evaluation e=new Evaluation(all); e.crossValidateModel(c[i], all, folds, new Random()); res.writeLine(names.get(i)+","+e.correct()/all.numInstances()); System.out.println(" Acc = "+e.correct()/all.numInstances()); } }catch(Exception ex){ ex.printStackTrace(); System.exit(0); } } public static Classifier[] setSingleClassifiers(ArrayList<String> names){ ArrayList<Classifier> sc2=new ArrayList<Classifier>(); Classifier c; kNN k=new kNN(100); k.setCrossValidate(true); sc2.add(k); names.add("kNN_ED"); sc2.add(new NaiveBayes()); names.add("NB"); sc2.add(new J48()); names.add("C45"); c=new SMO(); PolyKernel kernel = new PolyKernel(); kernel.setExponent(1); ((SMO)c).setKernel(kernel); sc2.add(c); names.add("SVML"); c=new SMO(); kernel = new PolyKernel(); kernel.setExponent(2); ((SMO)c).setKernel(kernel); sc2.add(c); names.add("SVMQ"); c=new SMO(); c=new RandomForest(); ((RandomForest)c).setNumTrees(200); sc2.add(c); names.add("RandF200"); c=new RotationForest(); sc2.add(c); ((RotationForest) c).setNumIterations(50); names.add("RotF30"); c=new DTW_kNN(1); ((DTW_kNN)c).setMaxR(1); sc2.add(c); names.add("NN_DTW"); Classifier[] sc=new Classifier[sc2.size()]; for(int i=0;i<sc.length;i++) sc[i]=sc2.get(i); return sc; } public void rawOutlinesSingleClassifier(boolean normalise){ ArrayList<String> names=new ArrayList<String>(); data.randomize(new Random()); if(normalise){ NormalizeCase nc=new NormalizeCase(); nc.setNormType(NormalizeCase.NormType.STD); try{ data=nc.process(data); }catch(Exception ex){ System.out.println(" Error normalising "); ex.printStackTrace(); System.exit(0); } } Classifier[] c =setSingleClassifiers(names); OutFile res; if(normalise) res=new OutFile("C:\\Users\\ajb\\Dropbox\\Results\\Otoliths\\singleClassifiers.csv"); else res=new OutFile("C:\\Users\\ajb\\Dropbox\\Results\\Otoliths\\singleClassifiersNormalised.csv"); try{ for(int i=0;i<c.length;i++){ System.out.print(" running classifier "+names.get(i)); Evaluation e=new Evaluation(data); e.crossValidateModel(c[i], data, data.numInstances(), new Random()); res.writeLine(names.get(i)+","+e.correct()/data.numInstances()); System.out.println(" Acc = "+e.correct()/data.numInstances()); } }catch(Exception ex){ ex.printStackTrace(); System.exit(0); } } public void powerSpectrumSingleClassifier(boolean normalise){ ArrayList<String> names=new ArrayList<String>(); data.randomize(new Random()); if(normalise){ NormalizeCase nc=new NormalizeCase(); nc.setNormType(NormalizeCase.NormType.STD); try{ data=nc.process(data); }catch(Exception ex){ System.out.println(" Error normalising "); ex.printStackTrace(); System.exit(0); } } Classifier[] c =setSingleClassifiers(names); OutFile res; if(normalise) res=new OutFile("C:\\Users\\ajb\\Dropbox\\Results\\Otoliths\\singleClassifiersPS.csv"); else res=new OutFile("C:\\Users\\ajb\\Dropbox\\Results\\Otoliths\\singleClassifiersNormalisedPS.csv"); try{ PowerSpectrum ps= new PowerSpectrum(); data=ps.process(data); for(int i=0;i<c.length;i++){ System.out.print(" running classifier "+names.get(i)); Evaluation e=new Evaluation(data); e.crossValidateModel(c[i], data, data.numInstances(), new Random()); res.writeLine(names.get(i)+","+e.correct()/data.numInstances()); System.out.println(" Acc = "+e.correct()/data.numInstances()); } }catch(Exception ex){ ex.printStackTrace(); System.exit(0); } } public static double icdmEnsemble(Instances train, Instances test){ return 0; } public static void basicDataTransforms(String baseClassifier, int nosFolds){ DecimalFormat dc= new DecimalFormat("###.###"); Instances all=ClassifierTools.loadData("C:\\Research\\Data\\Time Series Classification\\Otoliths\\Herring"); OutFile of=new OutFile("C:\\Research\\Results\\Otoliths\\baseTransforms"+"baseClassifier.csv"); System.out.println("NEAREST NEIGHBOUR CLASSIFIERS"); of.writeLine("10 fold cross validation results with"+baseClassifier); of.writeLine(",TimeDomain,PowerSpectrumDomain,ACFDomain,PCADomain"); Classifier base=null; if(baseClassifier.equals("1NN")) base=new kNN(1); else if(baseClassifier.equals("DTW")) base=new DTW_kNN(1); else if(baseClassifier.equals("RotationForest")) base=new RotationForest(); else if(baseClassifier.equals("RandomForest")){ base=new RandomForest(); ((RandomForest)base).setNumTrees(30); } else if(baseClassifier.equals("SVMQ")){ base=new SMO(); PolyKernel kernel = new PolyKernel(); kernel.setExponent(2); ((SMO)base).setKernel(kernel); } else{ System.out.println("Classifier Not Included, exiting"); System.exit(0); } try{ System.out.println("******************Time Domain******************"); Evaluation e=new Evaluation(all); e.crossValidateModel(AbstractClassifier.makeCopy(base), all, nosFolds, new Random()); System.out.println(" Acc = "+e.correct()/all.numInstances()); // res.writeLine(names.get(i)+","+e.correct()/all.numInstances()); of.writeString(e.correct()/all.numInstances()+","); System.out.println("******************Power Spectrum Domain******************"); PowerSpectrum ps=new PowerSpectrum(); Instances psAll=ps.process(all); /* Delete the duplicate half of the spectrum */ int atts=(psAll.numAttributes()-1)/2-2; for(int j=0;j<atts;j++) psAll.deleteAttributeAt(psAll.numAttributes()-2); e=new Evaluation(psAll); e.crossValidateModel(AbstractClassifier.makeCopy(base), psAll, nosFolds, new Random()); System.out.println(" Acc = "+e.correct()/all.numInstances()); of.writeString(e.correct()/all.numInstances()+","); System.out.println("\n******************ACF Domain******************"); ACF acf=new ACF(); acf.setMaxLag(atts); Instances acfAll=acf.process(all); e=new Evaluation(acfAll); e.crossValidateModel(AbstractClassifier.makeCopy(base), acfAll, nosFolds, new Random()); System.out.println(" Acc = "+e.correct()/all.numInstances()); of.writeString(e.correct()/all.numInstances()+","); }catch(Exception e){System.out.println("Exception ="+e);e.printStackTrace();System.exit(0);} } public static double combineResults(String results){ InFile f; OutFile of=new OutFile(results); double acc=0; for(int i=1;i<=100;i++){ f=new InFile("C:/Users/ajb/Dropbox/Results/Herring/fold"+i); String s=f.readLine(); of.writeLine(s); } return 0; } public static String dataName="Herring500"; public static void datasetCrossValidation(Instances train_raw) throws Exception{ ICDM2013_Lines.initCv(dataName); // get derivative training data (can store locally and read in to save computation time for large datasets) DerivativeFilter df = new DerivativeFilter(); Instances train_derivative = df.process(train_raw); double cv_01_euclidean_1nn = ICDM2013_Lines.cv_01_Euclidean_1NN(dataName,train_raw); // 01 Euclidean 1NN double cv_02_dtw_fullWindow_1nn = ICDM2013_Lines.cv_02_DTW_fullWindow_1NN(dataName,train_raw); // 02 DTW Full Window 1NN double[] cv_03_dtw_cvWindow_1nn = ICDM2013_Lines.cv_03_DTW_bestWindow_1NN(dataName,train_raw); // 03 DTW variable window 1NN (try all possible values of R from 0% to 100% in increments of 1%) double[] cv_04_wdtw_1nn = ICDM2013_Lines.cv_04_WDTW_1NN(dataName,train_raw); // 04 Weighted DTW with cv to find the optimal weight, g. Possible values for g range from 0 to 1 in increments of 0.01 double[] cv_05_euclidean_knn = ICDM2013_Lines.cv_05_Euclidean_kNN(dataName,train_raw); // 05 Euclidean kNN (k = 1, 2, ..., 100) double[] cv_06_dtw_fullWindow_knn = ICDM2013_Lines.cv_06_DTW_fullWindow_kNN(dataName,train_raw); // 06 DTW Full Window kNN (k = 1, 2, ..., 100) double[] cv_11_dtw_optimalWindow_knn = ICDM2013_Lines.cv_11_DTW_optimalWindow_kNN(dataName,train_raw); // 11 DTW Optimal Window kNN (r = 0, 0.01, 0.02, ..., 1) (k = 1, 2, ..., 100) double[] cv_12_wdtw_knn = ICDM2013_Lines.cv_12_WDTW_kNN(dataName,train_raw); // 12 WDTW kNN (g = 0, 0.01, 0.02, ..., 1) (k = 1, 2, ..., 100) double cv_21_ddtw_fullWindow_1nn = ICDM2013_Lines.cv_21_DDTW_fullWindow_1NN(dataName,train_derivative); // 21 Derivative DTW Full Window 1NN double[] cv_22_ddtw_cvWindow_1nn = ICDM2013_Lines.cv_22_DDTW_bestWindow_1NN(dataName,train_derivative); // 22 Derivative DTW Variable Window 1NN (r 0-100%, increaments of 1%) double[] cv_23_wdtw_1nn = ICDM2013_Lines.cv_23_WDDTW_1NN(dataName,train_derivative); // 23 Erighted Derivative DTW 1NN (g 0-1, increments of 0.01) double[] cv_24_ddtw_fullWindow_knn = ICDM2013_Lines.cv_24_DDTW_fullWindow_kNN(dataName,train_derivative); // 24 Derivative DTW Full Window kNN (k 1-100, increments of 1) double[] cv_25_ddtw_optimalWindow_knn = ICDM2013_Lines.cv_25_DDTW_optimalWindow_kNN(dataName,train_derivative); // 25 Derivative DTW Variable Window double[] cv_26_wddtw_knn = ICDM2013_Lines.cv_26_WDTW_kNN(dataName,train_derivative); double[] cv_31_lcss_1nn = ICDM2013_Lines.cv_31_LCSS_1NN(dataName,train_raw); double[] cv_32_lcss_knn = ICDM2013_Lines.cv_32_LCSS_kNN(dataName,train_raw,(int)cv_31_lcss_1nn[1],cv_31_lcss_1nn[2]); // print results /***** RAW DATA ****/ System.out.printf("Euclidean 1NN:%33.3f%n",cv_01_euclidean_1nn); System.out.printf("DTW Full Window 1NN:%27.3f%n",cv_02_dtw_fullWindow_1nn); System.out.printf("DTW Optimal Window 1NN (r=%1.2f):%15.3f%n",cv_03_dtw_cvWindow_1nn[1],cv_03_dtw_cvWindow_1nn[0]); System.out.printf("WDTW 1NN (g=%1.2f):%29.3f%n",cv_04_wdtw_1nn[1],cv_04_wdtw_1nn[0]); System.out.printf("Euclidean kNN (k=%3.0f):%25.3f%n",cv_05_euclidean_knn[1],cv_05_euclidean_knn[0]); System.out.printf("DTW Full Window kNN (k=%3.0f):%19.3f%n",cv_06_dtw_fullWindow_knn[1],cv_06_dtw_fullWindow_knn[0]); System.out.printf("DTW Optimal Window kNN (k=%3.0f, r=%1.2f):%9.3f%n",cv_11_dtw_optimalWindow_knn[1],cv_11_dtw_optimalWindow_knn[2],cv_11_dtw_optimalWindow_knn[0]); System.out.printf("WDTW kNN (k=%3.0f, g=%1.2f):%22.3f%n",cv_12_wdtw_knn[1],cv_12_wdtw_knn[2],cv_12_wdtw_knn[0]); /***** DERIVATIVE TRANSFORMED DATA ****/ System.out.printf("DDTW Full Window 1NN:%26.3f%n",cv_21_ddtw_fullWindow_1nn); System.out.printf("DDTW Optimal Window 1NN (r=%1.2f):%14.3f%n",cv_22_ddtw_cvWindow_1nn[1],cv_22_ddtw_cvWindow_1nn[0]); System.out.printf("WDDTW 1NN (g=%1.2f):%28.3f%n",cv_23_wdtw_1nn[1],cv_23_wdtw_1nn[0]); System.out.printf("DDTW Full Window kNN (k=%3.0f):%18.3f%n",cv_24_ddtw_fullWindow_knn[1],cv_24_ddtw_fullWindow_knn[0]); System.out.printf("DDTW Optimal Window kNN (k=%3.0f, r=%1.2f):%7.3f%n",cv_25_ddtw_optimalWindow_knn[1],cv_25_ddtw_optimalWindow_knn[2],cv_25_ddtw_optimalWindow_knn[0]); System.out.printf("WDDTW kNN (k=%3.0f, g=%1.2f):%21.3f%n",cv_26_wddtw_knn[1],cv_26_wddtw_knn[2],cv_26_wddtw_knn[0]); /***** LCSS ******/ System.out.printf("LCSS (d=%3.0f, e=%1.3f) 1NN:%21.3f%n",cv_31_lcss_1nn[1],cv_31_lcss_1nn[2],cv_31_lcss_1nn[0]); System.out.printf("LCSS (d=%3.0f, e=%1.3f) kNN (k=%3.0f):%13.3f%n",cv_32_lcss_knn[1],cv_32_lcss_knn[2],cv_32_lcss_knn[3],cv_32_lcss_knn[0]); } public static void datasetTrainTest(Instances train_raw, Instances test_raw) throws Exception{ // Pre-requisite of train/test classification is that necessary params have been found in CV stage. // Therefore, check to see if CV has been carried out previously. If not, begin then CV automatically. File cvDir = new File(ICDM2013_Lines.OUTPUT_DIR_CV); if(!cvDir.exists()){ System.out.println("Cross-validation for "+dataName+" doesn't appear to have taken place. Starting cross-validation."); datasetCrossValidation(train_raw); } ICDM2013_Lines.initTrainTest(dataName); DerivativeFilter df = new DerivativeFilter(); Instances train_derivative = df.process(train_raw); df = new DerivativeFilter(); Instances test_derivative = df.process(test_raw); /***** RAW DATA ****/ System.out.printf("Euclidean 1NN:%33.3f%n",ICDM2013_Lines.trainTest_01_Euclidean_1NN(dataName, train_raw, test_raw)); System.out.printf("DTW Full Window 1NN:%27.3f%n",ICDM2013_Lines.trainTest_02_DTW_fullWindow_1NN(dataName, train_raw, test_raw)); System.out.printf("DTW Best Window 1NN:%27.3f%n",ICDM2013_Lines.trainTest_03_DTW_optimalWindow_1NN(dataName, train_raw, test_raw)); System.out.printf("WDTW 1NN:%38.3f%n",ICDM2013_Lines.trainTest_04_WDTW_1NN(dataName, train_raw, test_raw)); System.out.printf("Euclidean kNN:%33.3f%n",ICDM2013_Lines.trainTest_05_Euclidean_kNN(dataName, train_raw, test_raw)); System.out.printf("DTW Full Window kNN:%27.3f%n",ICDM2013_Lines.trainTest_06_DTW_fullWindow_kNN(dataName, train_raw, test_raw)); System.out.printf("DTW Best Window kNN:%27.3f%n",ICDM2013_Lines.trainTest_11_DTW_bestWindow_kNN(dataName, train_raw, test_raw)); System.out.printf("WDTW kNN:%38.3f%n",ICDM2013_Lines.trainTest_12_WDTW_kNN(dataName, train_raw, test_raw)); /***** DERIVATIVE TRANSFORMED DATA ****/ System.out.printf("DDTW Full Window 1NN:%26.3f%n",ICDM2013_Lines.trainTest_21_DDTW_fullWindow_1NN(dataName, train_derivative, test_derivative)); System.out.printf("DDTW Best Window 1NN:%26.3f%n",ICDM2013_Lines.trainTest_22_DDTW_optimalWindow_1NN(dataName, train_derivative, test_derivative)); System.out.printf("WDDTW 1NN:%37.3f%n",ICDM2013_Lines.trainTest_23_WDDTW_1NN(dataName, train_derivative, test_derivative)); System.out.printf("DDTW Full Window kNN:%26.3f%n",ICDM2013_Lines.trainTest_24_DDTW_fullWindow_kNN(dataName, train_derivative, test_derivative)); System.out.printf("DDTW Best Window kNN:%26.3f%n",ICDM2013_Lines.trainTest_25_DDTW_bestWindow_kNN(dataName, train_derivative, test_derivative)); System.out.printf("WDDTW kNN:%37.3f%n",ICDM2013_Lines.trainTest_26_WDDTW_kNN(dataName, train_derivative, test_derivative)); /***** LCSS ******/ System.out.printf("LCSS 1NN:%38.3f%n",ICDM2013_Lines.trainTest_31_LCSS_1NN(dataName, train_raw, test_raw)); System.out.printf("LCSS kNN:%38.3f%n",ICDM2013_Lines.trainTest_32_LCSS_kNN(dataName, train_raw, test_raw)); } //Nasty hack to creat 100 test train data sets! public static void splitAllData(){ String path="C:/Users/ajb/Dropbox/"; Instances all=ClassifierTools.loadData(path+"TSC_Problems/Herring500/Herring500"); for(int fold=0;fold<all.numInstances();fold++){ Instances train=new Instances(all); train.delete(fold); Instances test=new Instances(all,0); test.add(all.instance(fold)); //Create fold directory File f = new File(path+"TSC_Problems/Herring500_"+(fold+1)); if(!f.isDirectory())//Test whether directory exists f.mkdir(); //Save train/test OutFile of=new OutFile(path+"TSC_Problems/Herring500_"+(fold+1)+"/Herring500_"+(fold+1)+"_TRAIN.arff"); of.writeLine("% Train split "+fold+"\n"+train.toString()); OutFile of2=new OutFile(path+"TSC_Problems/Herring500_"+(fold+1)+"/Herring500_"+(fold+1)+"_TEST.arff"); of2.writeLine("% Test split "+fold+"\n"+test.toString()); } } public static void main(String[] args){ int fold=Integer.parseInt(args[0]); String dataName = "Herring500_"+fold; System.out.println(" Running fold ="+fold); // then all cross-validation and train/test experiments will be carried out when the code is executed. // 2. If 'dataName' has previously been processed and the results are stored under the paths listed in the 'OUTPUT_DIR' fields, // summary methods will be read in to parse the existing results. This is much faster on all datasets, as classification will // not need to be repeated unecessarily. try{ // Part 1: Cross-Validation // If CV hasn't been carried out, perform CV (ASSUMPTION: if cv folder exists, cv has been carried out FULLY) // Else, print CV results System.out.println("Cross-Validation Results"); System.out.println("----------------------------------------"); File cvResultsDir = new File(ICDM2013_Lines.OUTPUT_DIR_CV+"/"+dataName); if(cvResultsDir.exists()){ ICDM2013_Lines.printPreCalculatedCvResults(dataName); }else{ ICDM2013_Lines.datasetCrossValidation(dataName); } System.out.println(); // Part 2: Train/Test // If train/test hasn't been carried out, perform Train/Test (ASSUMPTION: if results dir exists, Train/Test has been fully carried out) // Else, print Train/Test results System.out.println("Train/Test Results"); System.out.println("----------------------------------------"); File trainTestResultsDir = new File(ICDM2013_Lines.OUTPUT_DIR_TRAIN_TEST+"/"+dataName); if(trainTestResultsDir.exists()){ ICDM2013_Lines.printPrecalculatedTrainTestResults(dataName); }else{ ICDM2013_Lines.datasetTrainTest(dataName); } System.out.println(); // Part 3: Ensembles // CV and Train/Test results must be in place to reach this statement under the intended conditions when this code was released. If code has // been modified, please ensure CV and train/test classification has been carried out before running ensembles (CV needed for weighting, // Train/Test for final classification) // Note: in method below, ensembling is carried out 10 times for each strategy and the average for each is reported. This is justified because // it is not unlikely that classifiers of very similar natures (i.e. all time-domain NN) may have very similar CV performance on some datasets, // therefore leading to slightly different ensembles when ties are settled randomly. Since ensembling takes place on pre-calculated results, it is very // time efficient and allows us to average over multilpe runs to smooth the results to obtain consistent results from multiple runs. System.out.println("Ensemble Results"); System.out.println("----------------------------------------"); ICDM2013_Lines.print10RunEnsembles(dataName); }catch(Exception e){ e.printStackTrace(); } /* Instances all=ClassifierTools.loadData("TSC_Problems/Herring/Herring500"); Instances train=new Instances(all); Instances test=new Instances(all,0); test.add(all.instance(fold)); train.delete(fold); // double acc=icdmEnsemble(train,test); double pred=icdmEnsemble(train,test); OutFile of=new OutFile("Results/Herring/fold"+(fold+1)); of.writeLine(fold+","+pred+","+test.instance(0).classValue()); */ System.exit(0); // CSSClassifier(100); Otoliths o= new Otoliths(); o.rawOutlinesSingleClassifier(false); o.rawOutlinesSingleClassifier(true); o.powerSpectrumSingleClassifier(false); o.powerSpectrumSingleClassifier(true); // basicDataTransforms("NB",10); // basicDataTransforms("SVMQ",10); // basicDataTransforms("NNDTW",10); } }