package development; import statistics.simulators.DataSimulator; import statistics.simulators.ShapeletModel; import java.io.BufferedWriter; import java.io.File; import java.io.FileNotFoundException; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.Random; import java.util.Scanner; import java.util.logging.Level; import java.util.logging.Logger; import weka.classifiers.Classifier; import weka.classifiers.trees.J48; import weka.core.Instances; import weka.core.shapelet.QualityMeasures; import weka.core.shapelet.Shapelet; import weka.filters.timeseries.shapelet_transforms.ApproximateShapeletTransform; import weka.filters.timeseries.shapelet_transforms.FullShapeletTransform; import weka.filters.timeseries.shapelet_transforms.ShapeletTransformDistCaching; import weka.filters.timeseries.shapelet_transforms.ShapeletTransform; /** * * @author Edgaras */ public class MCompProjectExperiments { public static String dropboxPath="C:\\Users\\Edgaras\\Dropbox\\TSC Problems"; // There are two types of dataset assessment - LOOCV or Train/Test split private enum AssesmentType{LOOCV, TRAIN_TEST}; public static String[] fileNames={ //Number of train,test cases,length,classes,total num of datapoints "SonyAIBORobotSurface", //20,601,70,2,1400 "ItalyPowerDemand", //67,1029,24,2,1608 "MoteStrain", //20,1252,84,2,1680 "TwoLeadECG", //23,1139,82,2,1886 "ECGFiveDays", //23,861,136,2,3128 "DiatomSizeReduction", //16,306,345,4,5520 "GunPoint", //50,150,150,2,7500 "Coffee", //28,28,286,2,8008 "FaceFour", //24,88,350,4,8400 "Symbols", //25,995,398,6,9950 "Beef", //30,30,470,5,14100 "SyntheticControl", //300,300,60,6,18000 "MPEG7Shapes/beetle-fly", //40,,512,2,20480 "MPEG7Shapes/bird-chicken", //40,,512,2,20480 "Lighting7", //70,73,319,7,22330 "Trace", //100,100,275,4,27500 "otoliths/Herrings", //64,64,512,2,32768 "MedicalImages", //381,760,99,10,37719 "otoliths/Herring500", //100,,500,2,50000 "SyntheticData", //100,1000,500,2,50000 "Adiac", //390,391,176,37,68640 "ChlorineConcentration", //467,3840,166,3,77522 "Bones/DP_Little", //400,645,250,3,100000 "Bones/DP_Middle", //400,645,250,3,100000 "Bones/DP_Thumb", //400,645,250,3,100000 "Bones/MP_Little", //400,645,250,3,100000 "Bones/MP_Middle", //400,645,250,3,100000 "Bones/PP_Little", //400,645,250,3,100000 "Bones/PP_Middle", //400,645,250,3,100000 "Bones/PP_Thumb", //400,645,250,3,100000 "MPEG7Shapes/ShapesAll", //600,600,512,60,307200 }; // An array containing the assesment type for each of the datasets. private static MCompProjectExperiments.AssesmentType[] assesmentTypes = { MCompProjectExperiments.AssesmentType.TRAIN_TEST, //SonyAIBORobotSurface MCompProjectExperiments.AssesmentType.TRAIN_TEST, //ItalyPowerDemand MCompProjectExperiments.AssesmentType.TRAIN_TEST, //MoteStrain MCompProjectExperiments.AssesmentType.TRAIN_TEST, //TwoLeadECG MCompProjectExperiments.AssesmentType.TRAIN_TEST, //ECGFiveDays MCompProjectExperiments.AssesmentType.TRAIN_TEST, //DiatomSizeReduction MCompProjectExperiments.AssesmentType.TRAIN_TEST, //GunPoint MCompProjectExperiments.AssesmentType.TRAIN_TEST, //Coffee MCompProjectExperiments.AssesmentType.TRAIN_TEST, //FaceFour MCompProjectExperiments.AssesmentType.TRAIN_TEST, //Symbols MCompProjectExperiments.AssesmentType.TRAIN_TEST, //Beef MCompProjectExperiments.AssesmentType.TRAIN_TEST, //SyntheticControl MCompProjectExperiments.AssesmentType.LOOCV, //MPEG7Shapes/beetle-fly MCompProjectExperiments.AssesmentType.LOOCV, //MPEG7Shapes/bird-chicken MCompProjectExperiments.AssesmentType.TRAIN_TEST, //Lighting7 MCompProjectExperiments.AssesmentType.TRAIN_TEST, //Trace MCompProjectExperiments.AssesmentType.TRAIN_TEST, //otoliths/Herrings MCompProjectExperiments.AssesmentType.TRAIN_TEST, //MedicalImages MCompProjectExperiments.AssesmentType.LOOCV, //otoliths/Herring500 MCompProjectExperiments.AssesmentType.TRAIN_TEST, //SyntheticData MCompProjectExperiments.AssesmentType.TRAIN_TEST, //Adiac MCompProjectExperiments.AssesmentType.TRAIN_TEST, //ChlorineConcentration MCompProjectExperiments.AssesmentType.TRAIN_TEST, //Bones/DP_Little MCompProjectExperiments.AssesmentType.TRAIN_TEST, //Bones/DP_Middle MCompProjectExperiments.AssesmentType.TRAIN_TEST, //Bones/DP_Thumb MCompProjectExperiments.AssesmentType.TRAIN_TEST, //Bones/MP_Little MCompProjectExperiments.AssesmentType.TRAIN_TEST, //Bones/MP_Middle MCompProjectExperiments.AssesmentType.TRAIN_TEST, //Bones/PP_Little MCompProjectExperiments.AssesmentType.TRAIN_TEST, //Bones/PP_Middle MCompProjectExperiments.AssesmentType.TRAIN_TEST, //Bones/PP_Thumb MCompProjectExperiments.AssesmentType.TRAIN_TEST //MPEG7Shapes/ShapesAll }; // An array containing the shapelet min-max interval for each of the datasets. private static int[][] shapeletMinMax = { {15, 36}, // SonyAIBORobotSurface {7, 14}, // ItalyPowerDemand {16, 31}, // MoteStrain {7, 13}, // TwoLeadECG {24, 76}, // ECGFiveDays {7,16}, // DiatomSizeReduction {24, 55}, // GunPoint {18,30}, // Coffee {20, 120}, // FaceFour {52, 155}, // Symbols {8, 30}, // Beef {20, 56}, // SyntheticControl {30, 101}, // MPEG7Shapes/beetle-fly {30, 101}, // MPEG7Shapes/bird-chicken {20, 80}, // Lighting7 {62, 232}, // Trace {30, 101}, // otoliths/Herrings {9, 35}, // MedicalImages {30, 101}, // otliths/Herring500 {25, 35}, // SyntheticData {3, 10}, // Adiac {7, 20}, // ChlorineConcentration {9, 36}, // Bones/DP_Little {15, 43}, // Bones/DP_Middle {11, 47}, // Bones/DP_Thumb {15, 41}, // Bones/MP_Little {20, 53}, // Bones/MP_Middle {13, 38}, // Bones/PP_Little {14, 34}, // Bones/PP_Middle {14, 41}, // Bones/PP_Thumb {30, 110} // MPEG7Shapes/ShapesAll }; // Variables for holding data private static Instances[] instancesTrain; private static Instances[] instancesTest; // SYNTHETIC DATA PARMAETERS private static final int STARTING_LENGTH = 50; private static final int MAX_LENGTH = 500; private static final int STEP_SIZE = 50; private static final int NUM_OF_CASES = 100; private static final int SHAPELET_LENGTH = 30; // APPROXIMATION PARAMETERS // Note: PERCENT_END - PERCENT_START % PERCENT_INCREMENT == 0 private static final int PERCENT_START = 45; private static final int PERCENT_END = 95; private static final int PERCENT_INCREMENT = 10; private static final int UCR_NUM_FILES = 31; /** * @param args the command line arguments */ public static void main(String[] args) { loadData(); //Experiment selection Scanner scan = new Scanner(System.in); int experimentIndex = 0; boolean isValidInput = false; System.out.println("Select experiment: " //Preparatory experiments + "\n \t 1. Estimate min/max (Note: Only used once and then values are hardcoded).\n" //Accuracy experiments + "\n \t 2. Compare C4.5 accuracy of different subsequence distance methods using UCR data." + "\n \t 3. Compare C4.5 accuracy of transform (DistCaching) with and without candidate IG pruning using UCR data." + "\n \t 4. Compare C4.5 accuracy of transform (DistCaching) with and without candidate MM pruning using UCR data." + "\n \t 5. Compare C4.5 accuracy of transform (DistCaching) with and without candidate F-Stat pruning using UCR data." + "\n \t 6. Compare C4.5 accuracy of transform (DistCaching) with and without candidate KW pruning using UCR data." + "\n \t 7. Compare C4.5 accuracy of transform (DistCaching) with and without candidate IG pruning using synthetic data." + "\n \t 8. Compare C4.5 accuracy of transform (DistCaching) with and without candidate MM pruning using synthetic data." + "\n \t 9. Compare C4.5 accuracy of transform (DistCaching) with and without candidate F-Stat pruning using synthetic data." + "\n \t 10. Compare C4.5 accuracy of transform (DistCaching) with and without candidate KW pruning using synthetic data.\n" //Timing experiments + "\n \t 11. Compare speed of different subsequence distance methods using UCR data." + "\n \t 12. Compare speed of different subsequence distance methods using synthetic data." + "\n \t 13. Compare speed of transform (DistCaching) with and without candidate IG pruning using UCR data." + "\n \t 14. Compare speed of transform (DistCaching) with and without candidate MM pruning using UCR data." + "\n \t 15. Compare speed of transform (DistCaching) with and without candidate F-Stat pruning using UCR data." + "\n \t 16. Compare speed of transform (DistCaching) with and without candidate KW pruning using UCR data." + "\n \t 17. Compare speed of transform (DistCaching) with and without candidate IG pruning using synthetic data." + "\n \t 18. Compare speed of transform (DistCaching) with and without candidate MM pruning using synthetic data." + "\n \t 19. Compare speed of transform (DistCaching) with and without candidate F-Stat pruning using synthetic data." + "\n \t 20. Compare speed of transform (DistCaching) with and without candidate KW pruning using synthetic data.\n" //Operation count experiments + "\n \t 21. Compare fundamental operation count of base transform vs transform with online normalization and reordering.\n" //Approximate transform experiments + "\n \t 22. Compare C4.5 accuracy of exact and approximate transforms using UCR data." + "\n \t 23. Compare C4.5 accuracy of exact and approximate transforms using synthetic data." + "\n \t 24. Compare speed of exact and approximate transforms using UCR data." + "\n \t 25. Compare speed of exact and approximate transforms using synthetic data"); while(!isValidInput){ try{ int in = scan.nextInt(); if(in > 0 && in < 26){ experimentIndex = in; isValidInput = true; }else{ throw new IOException(); } }catch(Exception e){ scan = new Scanner(System.in); System.out.println("Invalid experiment selection."); } } switch (experimentIndex){ //Preparatory experiments case 1: estimateMinMaxExperiment(); break; //Accuracy experiments case 2: exactTransformAccuracyExperiment(); break; case 3: candidatePruningAccuracyExperiment(QualityMeasures.ShapeletQualityChoice.INFORMATION_GAIN, false); break; case 4: candidatePruningAccuracyExperiment(QualityMeasures.ShapeletQualityChoice.MOODS_MEDIAN, false); break; case 5: candidatePruningAccuracyExperiment(QualityMeasures.ShapeletQualityChoice.F_STAT, false); break; case 6: candidatePruningAccuracyExperiment(QualityMeasures.ShapeletQualityChoice.KRUSKALL_WALLIS, false); break; case 7: candidatePruningAccuracyExperiment(QualityMeasures.ShapeletQualityChoice.INFORMATION_GAIN, true); break; case 8: candidatePruningAccuracyExperiment(QualityMeasures.ShapeletQualityChoice.MOODS_MEDIAN, true); break; case 9: candidatePruningAccuracyExperiment(QualityMeasures.ShapeletQualityChoice.F_STAT, true); break; case 10: candidatePruningAccuracyExperiment(QualityMeasures.ShapeletQualityChoice.KRUSKALL_WALLIS, true); break; //Timing experiments case 11: exactTransformTimingExperiment(false); break; case 12: exactTransformTimingExperiment(true); break; case 13: candidatePruningTimingExperiment(QualityMeasures.ShapeletQualityChoice.INFORMATION_GAIN, false); break; case 14: candidatePruningTimingExperiment(QualityMeasures.ShapeletQualityChoice.MOODS_MEDIAN, false); break; case 15: candidatePruningTimingExperiment(QualityMeasures.ShapeletQualityChoice.F_STAT, false); break; case 16: candidatePruningTimingExperiment(QualityMeasures.ShapeletQualityChoice.KRUSKALL_WALLIS, false); break; case 17: candidatePruningTimingExperiment(QualityMeasures.ShapeletQualityChoice.INFORMATION_GAIN, true); break; case 18: candidatePruningTimingExperiment(QualityMeasures.ShapeletQualityChoice.MOODS_MEDIAN, true); break; case 19: candidatePruningTimingExperiment(QualityMeasures.ShapeletQualityChoice.F_STAT, true); break; case 20: candidatePruningTimingExperiment(QualityMeasures.ShapeletQualityChoice.KRUSKALL_WALLIS, true); break; //Operation count experiments case 21: opCountForBaseAndOnlineTransform(); break; //Approximate transform experiments case 22: approxTransformAccuracyExperiment(false); break; case 23: approxTransformAccuracyExperiment(true); break; case 24: approxTransformTimingExperiment(false); break; case 25: approxTransformTimingExperiment(true); break; default: System.out.println("Unknow experiment identifier."); } } //################### Experiment Functions ################################# // Method to estimate min/max (Note: Only used once and then values are hardcoded). private static void estimateMinMaxExperiment(){ //Prepare output file String fileName = "estimated_min_max.csv"; String content = "Dataset, Min, Max"; writeToFile(fileName, content, false); //Find min/max for(int i = 0; i < instancesTrain.length; i++){ System.out.println("Processing dataset " + (i+1) +" out of " + instancesTrain.length); int[] minMax = estimateMinAndMax(instancesTrain[i]); content = fileNames[i] + ", " + minMax[0] + ", " + minMax[1]; writeToFile(fileName, content, true); } } //Method to check C4.5 accuracy of different subsequence distance methods. private static void exactTransformAccuracyExperiment(){ Classifier classifier = new J48(); //Prepare output file String fileName = "subsequence_distance_UCR_accuracy.csv"; String content = "Dataset, C4.5 using Base transform, C4.5 using online z-norm + reordering, C4.5 using stat caching"; writeToFile(fileName, content, false); // Record filter times to find single shapelet for(int i = 0; i < instancesTrain.length; i++){ System.out.println("Processing dataset " + (i+1) +" out of " + instancesTrain.length); FullShapeletTransform[] transforms = initExactTransformExperimentTransforms(); ucrDataAccuracy(classifier, transforms, i, fileName); } } //Method to check C4.5 accuracy of transforms with and without candidate pruning private static void candidatePruningAccuracyExperiment(QualityMeasures.ShapeletQualityChoice qualityChoice, boolean useSyntheticData){ Classifier classifier = new J48(); //Prepare output file String fileName; if(useSyntheticData){ fileName = "candidate_pruning_accuracy_synthetic_"+qualityChoice+".csv"; }else{ fileName = "candidate_pruning_accuracy_UCR_"+qualityChoice+".csv"; } String content = "Dataset, C4.5 using transform without candidate pruning, C4.5 using transform with candidate pruning"; writeToFile(fileName, content, false); FullShapeletTransform[] transforms; if(useSyntheticData){ for(int seriesLength = STARTING_LENGTH; seriesLength <= MAX_LENGTH; seriesLength += STEP_SIZE){ System.out.println("Processing length " + seriesLength+ " out of " + MAX_LENGTH); transforms = initCandidatePruningExperimentTransforms(qualityChoice); Instances[] syntheticData = generateData(NUM_OF_CASES, SHAPELET_LENGTH, seriesLength); syntheticDataAccuracy(classifier, transforms, syntheticData, seriesLength, fileName); } }else{ for(int i = 0; i < instancesTrain.length; i++){ System.out.println("Processing dataset " + (i+1) +" out of " + instancesTrain.length); transforms = initCandidatePruningExperimentTransforms(qualityChoice); ucrDataAccuracy(classifier, transforms, i, fileName); } } } //Method to perform timing experiment on base and optimized shapelet transforms. //The time taken to find a single best shapelet from the dataset is recorded. private static void exactTransformTimingExperiment(boolean useSyntheticData){ // Initialise transforms required for this experiment FullShapeletTransform[] transforms = initExactTransformExperimentTransforms(); //Prepare output file String fileName; if(useSyntheticData){ fileName = "subsequence_distance_synthetic_timing.csv"; }else{ fileName = "subsequence_distance_UCR_timing.csv"; } //Prepare output file if one does not exist //if(!isFileExists(fileName)){ String content = "Dataset, Base, Online z-norm + reordering, Stat caching"; writeToFile(fileName, content, false); //} if(useSyntheticData){ // Record transform times to find single shapelet for(int seriesLength = STARTING_LENGTH; seriesLength <= MAX_LENGTH; seriesLength +=STEP_SIZE){ System.out.println("Processing length " + seriesLength+ " out of " + MAX_LENGTH); Instances[] syntheticData = generateData(NUM_OF_CASES, SHAPELET_LENGTH, seriesLength); runTiming(transforms, syntheticData[0], String.valueOf(seriesLength), SHAPELET_LENGTH, SHAPELET_LENGTH, fileName); } }else{ // Record filter times to find single shapelet for(int i = 0; i < instancesTrain.length; i++){ System.out.println("Processing dataset " + (i+1) +" out of " + instancesTrain.length); runTiming(transforms, instancesTrain[i], fileNames[i], shapeletMinMax[i][0], shapeletMinMax[i][1], fileName); } } } //Method to perform timing experiment on transform with and withou candidate pruning. //The time taken to find a single best shapelet from the dataset is recorded. private static void candidatePruningTimingExperiment(QualityMeasures.ShapeletQualityChoice qualityChoice, boolean useSyntheticData){ // Initialise transforms required for this experiment FullShapeletTransform[] transforms = initCandidatePruningExperimentTransforms(qualityChoice); //Prepare output file String fileName; if(useSyntheticData){ fileName = "candidate_pruning_timing_synthetic_"+qualityChoice+".csv"; }else{ fileName = "candidate_pruning_timing_UCR_"+qualityChoice+".csv"; } String content = "Dataset, Dist Without pruning, With pruning"; writeToFile(fileName, content, false); if(useSyntheticData){ // Record transform times to find single shapelet for(int seriesLength = STARTING_LENGTH; seriesLength <= MAX_LENGTH; seriesLength +=STEP_SIZE){ System.out.println("Processing length " + seriesLength+ " out of " + MAX_LENGTH); Instances[] syntheticData = generateData(NUM_OF_CASES, SHAPELET_LENGTH, seriesLength); runTiming(transforms, syntheticData[0], String.valueOf(seriesLength), SHAPELET_LENGTH, SHAPELET_LENGTH, fileName); } }else{ for(int i = 0; i < instancesTrain.length; i++){ System.out.println("Processing dataset " + (i+1) +" out of " + instancesTrain.length); runTiming(transforms, instancesTrain[i], fileNames[i], shapeletMinMax[i][0], shapeletMinMax[i][1], fileName); } } } //Method to perform fundamental operation count experiment on base and //optimised transform which performs online normalisation and reordering. private static void opCountForBaseAndOnlineTransform(){ // Initialise filters required for this experiment FullShapeletTransform[] transforms = new FullShapeletTransform[2]; transforms[0] = new FullShapeletTransform(); transforms[1] = new ShapeletTransform(); transforms[0].turnOffLog(); transforms[0].supressOutput(); transforms[1].turnOffLog(); transforms[1].supressOutput(); //Prepare output file String fileName = "subseq_dist_op_count_base_and_online_transform.csv"; String content = "Dataset, Base, Online + reordering"; writeToFile(fileName, content, false); // Record filter times to find single shapelet StringBuilder sb; for(int seriesLength = STARTING_LENGTH; seriesLength <= MAX_LENGTH; seriesLength += STEP_SIZE){ System.out.println("Processing length " + seriesLength); sb = new StringBuilder(); sb.append(seriesLength); sb.append(", "); for(int j = 0; j < transforms.length; j++){ try{ Instances[] syntheticData = generateData(NUM_OF_CASES, SHAPELET_LENGTH, seriesLength); long count = transforms[j].opCountForSingleShapelet(syntheticData[0], SHAPELET_LENGTH, SHAPELET_LENGTH); if(count < 0){ System.out.println("Overflow!"); } sb.append(count); }catch(Exception ex){ Logger.getLogger(MCompProjectExperiments.class.getName()).log(Level.SEVERE, null, ex); } if(j != transforms.length-1){ sb.append(", "); } } writeToFile(fileName, sb.toString(), true); } } //Method to check C4.5 accuracy of base and approximate shapelet transforms. private static void approxTransformAccuracyExperiment(boolean useSyntheticData){ Classifier classifier = new J48(); //Prepare output file String fileName; if(useSyntheticData){ fileName = "approx_transform_accuracies_synthetic.csv"; }else{ fileName = "approx_transform_accuracies_UCR.csv"; } initApproxTransformExperimentFile(fileName); //Run experiment FullShapeletTransform[] transforms; if(useSyntheticData){ for(int seriesLength = STARTING_LENGTH; seriesLength <= MAX_LENGTH; seriesLength += STEP_SIZE){ System.out.println("Processing length " + seriesLength+ " out of " + MAX_LENGTH); //Initialise filters required for this experiment transforms = initApproxTransformExperimentTransforms(); Instances[] syntheticData = generateData(NUM_OF_CASES, SHAPELET_LENGTH, seriesLength); syntheticDataAccuracy(classifier, transforms, syntheticData, seriesLength, fileName); } }else{ for(int i = 15; i < UCR_NUM_FILES; i++){ System.out.println("Processing dataset " + (i+1) +" out of " + instancesTrain.length); //Initialise filters required for this experiment transforms = initApproxTransformExperimentTransforms(); //Write accuraries ucrDataAccuracy(classifier, transforms, i, fileName); } } } //Method to perform timing experiment on base and approximate shapelet transforms. //The time taken to find a single best shapelet from the dataset is recorded. private static void approxTransformTimingExperiment(boolean useSyntheticData){ // Initialise filters required for this experiment FullShapeletTransform[] transforms = initApproxTransformExperimentTransforms(); //Prepare output file String fileName; if(useSyntheticData){ fileName = "approx_transform_timing_synthetic.csv"; }else{ fileName = "approx_transform_timing_UCR.csv"; } initApproxTransformExperimentFile(fileName); if(useSyntheticData){ for(int seriesLength = STARTING_LENGTH; seriesLength <= MAX_LENGTH; seriesLength +=STEP_SIZE){ System.out.println("Processing length " + seriesLength+ " out of " + MAX_LENGTH); Instances[] syntheticData = generateData(NUM_OF_CASES, SHAPELET_LENGTH, seriesLength); runTiming(transforms, syntheticData[0], String.valueOf(seriesLength), SHAPELET_LENGTH, SHAPELET_LENGTH, fileName); } }else{ for(int i = 22; i < UCR_NUM_FILES; i++){ System.out.println("Processing dataset " + (i+1) +" out of " + instancesTrain.length); runTiming(transforms, instancesTrain[i], fileNames[i], shapeletMinMax[i][0], shapeletMinMax[i][1], fileName); } } } //################### End of Experiment Functions ########################## //################### Helper Functions ##################################### //Class implementing comparator which compares shapelets according to their length public static class ShapeletLengthComparator implements Comparator{ @Override public int compare(Object shapelet1, Object shapelet2){ int shapelet1Length = ((Shapelet)shapelet1).getContent().length; int shapelet2Lenght = ((Shapelet)shapelet2).getContent().length; if(shapelet1Length > shapelet2Lenght) { return 1; }else if(shapelet1Length < shapelet2Lenght) { return -1; }else { return 0; } } } //Method to prepare output file for approximate transform experiments private static void initApproxTransformExperimentFile(String fileName){ //Prepare output file StringBuilder sb = new StringBuilder(); sb.append("Dataset, "); sb.append("Base, "); for(int percentage = PERCENT_END; percentage >= PERCENT_START; percentage -= PERCENT_INCREMENT){ sb.append("Approx_"); sb.append(percentage); if(percentage > PERCENT_START){ sb.append(", "); } } if(!isFileExists(fileName)){ String content = "Dataset, Base, Online z-norm + reordering, Stat caching"; writeToFile(fileName, sb.toString(), false); } } //Method to initialise transforms for exact transform experiments private static FullShapeletTransform[] initExactTransformExperimentTransforms(){ FullShapeletTransform[] transforms = new FullShapeletTransform[3]; transforms[0] = new FullShapeletTransform(); transforms[1] = new ShapeletTransform(); transforms[2] = new ShapeletTransformDistCaching(); transforms[0].turnOffLog(); transforms[0].supressOutput(); transforms[1].turnOffLog(); transforms[1].supressOutput(); transforms[2].turnOffLog(); transforms[2].supressOutput(); return transforms; } //Method to initialise transforms for candidate pruning experiments private static FullShapeletTransform[] initCandidatePruningExperimentTransforms(QualityMeasures.ShapeletQualityChoice qualityChoice){ // Initialise transforms required for this experiment FullShapeletTransform[] transforms = new FullShapeletTransform[2]; transforms[0] = new ShapeletTransformDistCaching(); transforms[1] = new ShapeletTransformDistCaching(); transforms[0].turnOffLog(); transforms[0].supressOutput(); transforms[1].turnOffLog(); transforms[1].supressOutput(); transforms[1].setQualityMeasure(qualityChoice); transforms[1].useCandidatePruning(); return transforms; } //Method to initialise transforms for approximate transform experiments private static FullShapeletTransform[] initApproxTransformExperimentTransforms(){ if((PERCENT_END - PERCENT_START) % PERCENT_INCREMENT != 0){ System.err.println("Incorrect approximation parameters"); System.exit(0); } int numOfTransforms = ((PERCENT_END - PERCENT_START) / PERCENT_INCREMENT) + 2; FullShapeletTransform[] transforms = new FullShapeletTransform[numOfTransforms]; transforms[0] = new ShapeletTransformDistCaching(); transforms[0].turnOffLog(); transforms[0].supressOutput(); int transformIndex = 1; for(int percentage = PERCENT_END; percentage >= PERCENT_START; percentage -= PERCENT_INCREMENT){ try { //Initialise approx transforms required for the experiment ApproximateShapeletTransform ast = new ApproximateShapeletTransform(); ast.setSampleLevels(percentage, percentage); transforms[transformIndex] = ast; transforms[transformIndex].turnOffLog(); transforms[transformIndex].supressOutput(); transformIndex++; } catch (IOException ex) { Logger.getLogger(MCompProjectExperiments.class.getName()).log(Level.SEVERE, null, ex); } } return transforms; } // Method to load the datasets. private static void loadData(){ instancesTrain = new Instances[fileNames.length]; instancesTest = new Instances[fileNames.length]; //Load all the datasets and set class index for loaded instances for(int i=0; i<fileNames.length; i++){ String dir; String fileName; String[] splits = null; if(fileNames[i].contains("/")){ splits = fileNames[i].split("/"); } if(splits != null){ dir = splits[0]; fileName = splits[1]; }else{ dir = fileNames[i]; fileName = fileNames[i]; } // Load test/train splits if(assesmentTypes[i] == MCompProjectExperiments.AssesmentType.TRAIN_TEST){ instancesTrain[i] = FullShapeletTransform.loadData(dropboxPath+"\\"+dir+"\\"+fileName+"_TRAIN.arff"); instancesTest[i] = FullShapeletTransform.loadData(dropboxPath+"\\"+dir+"\\"+fileName+"_TEST.arff"); }else if(assesmentTypes[i] == MCompProjectExperiments.AssesmentType.LOOCV){ instancesTrain[i] = FullShapeletTransform.loadData(dropboxPath+"\\"+dir+"\\"+fileName+".arff"); instancesTest[i] = null; } // Set class indices instancesTrain[i].setClassIndex(instancesTrain[i].numAttributes() - 1); if(assesmentTypes[i] == MCompProjectExperiments.AssesmentType.TRAIN_TEST){ instancesTest[i].setClassIndex(instancesTest[i].numAttributes() - 1); } } } // Method to estimate min/max shapelet lenght for a given data private static int[] estimateMinAndMax(Instances data){ ArrayList<Shapelet> shapelets = new ArrayList<Shapelet>(); FullShapeletTransform st = new ShapeletTransformDistCaching(); st.supressOutput(); st.turnOffLog(); Instances randData = new Instances(data); Instances randSubset; for(int i = 0; i < 10; i++){ randData.randomize(new Random()); randSubset = new Instances(randData, 0, 10); try{ shapelets.addAll(st.findBestKShapeletsCache(10, randSubset, 1, randSubset.numAttributes()-1)); }catch(Exception e){ Logger.getLogger(MCompProjectExperiments.class.getName()).log(Level.SEVERE, null, e); } } /* //So instead I just select 100 best shapelet from the training data //which is an overkill but will yield good shapelet lengths try{ shapelets.addAll(st.findBestKShapeletsCache(100, data, 1, data.numAttributes()-1)); }catch(Exception ex){ Logger.getLogger(MCompProjectExperiments.class.getName()).log(Level.SEVERE, null, ex); } */ Collections.sort(shapelets, new ShapeletLengthComparator()); int min = shapelets.get(24).getContent().length; int max = shapelets.get(74).getContent().length; int[] parEstimates = {min, max}; return parEstimates; } //Method to perform accuracy test for all transforms on a given synthetic dataset private static void syntheticDataAccuracy(Classifier classifier, FullShapeletTransform[] transforms, Instances[] syntheticData, int seriesLength, String fileName){ StringBuilder sb = new StringBuilder(); sb.append(seriesLength); sb.append(", "); for(int j = 0; j < transforms.length; j++){ transforms[j].setNumberOfShapelets((syntheticData[0].numAttributes()-1)/2); transforms[j].setShapeletMinAndMax(SHAPELET_LENGTH, SHAPELET_LENGTH); try{ Instances tempTrain = instancesTrain[0]; Instances tempTest = instancesTest[0]; instancesTrain[0] = syntheticData[0]; instancesTest[0] = syntheticData[1]; sb.append(classifierAccuracy(classifier, 0, transforms[j], false, true)); instancesTrain[0] = tempTrain; instancesTest[0] = tempTest; }catch(Exception ex){ Logger.getLogger(MCompProjectExperiments.class.getName()).log(Level.SEVERE, null, ex); } if(j != transforms.length-1){ sb.append(", "); } } writeToFile(fileName, sb.toString(), true); } //Method to perform accuracy test for all transforms on a given UCR dataset private static void ucrDataAccuracy(Classifier classifier, FullShapeletTransform[] transforms, int dataIndex, String fileName){ //Get accuracies StringBuilder sb = new StringBuilder(); sb.append(fileNames[dataIndex]); sb.append(", "); for(int j = 0; j < transforms.length; j++){ transforms[j].setNumberOfShapelets((instancesTrain[dataIndex].numAttributes()-1)/2); transforms[j].setShapeletMinAndMax(shapeletMinMax[dataIndex][0], shapeletMinMax[dataIndex][1]); try{ sb.append(classifierAccuracy(classifier, dataIndex, transforms[j], false, true)); }catch(Exception ex){ Logger.getLogger(MCompProjectExperiments.class.getName()).log(Level.SEVERE, null, ex); } if(j != transforms.length-1){ sb.append(", "); } } writeToFile(fileName, sb.toString(), true); } private static void runTiming(FullShapeletTransform[] transforms, Instances data, String dataSetName,int minShapeletLength, int maxShapeletLength, String fileName){ StringBuilder sb = new StringBuilder(); sb.append(dataSetName); sb.append(", "); for(int j = 0; j < transforms.length; j++){ try{ double time = transforms[j].timingForSingleShapelet(data, minShapeletLength, maxShapeletLength); sb.append(time); }catch(Exception ex){ Logger.getLogger(MCompProjectExperiments.class.getName()).log(Level.SEVERE, null, ex); } if(j != transforms.length-1){ sb.append(", "); } } writeToFile(fileName, sb.toString(), true); } // Method to validate a given classifier private static double classifierAccuracy(Classifier classifier, int dataIndex, FullShapeletTransform transform, boolean computeErrorRate, boolean usePercentage){ double accuracy = 0.0; //Generate accuray if(assesmentTypes[dataIndex] == AssesmentType.TRAIN_TEST){ accuracy = classifierAccuracyTrainTest(classifier, dataIndex, transform); }else if(assesmentTypes[dataIndex] == AssesmentType.LOOCV){ accuracy = classifierAccuracyLOOCV(classifier, dataIndex, transform); } if(computeErrorRate){ accuracy = 1 - accuracy; } if(usePercentage){ accuracy *= 100; } return accuracy; } //Method to perform simple train/test split validation using given classifier private static double classifierAccuracyTrainTest(Classifier classifier, int dataIndex, FullShapeletTransform transform){ double accuracy = 0.0; Instances trainData = null, testData = null; if(transform != null){ //Transform data try{ trainData = transform.process(instancesTrain[dataIndex]); testData = transform.process(instancesTest[dataIndex]); }catch(Exception ex){ Logger.getLogger(MCompProjectExperiments.class.getName()).log(Level.SEVERE, null, ex); } }else{ trainData = instancesTrain[dataIndex]; testData = instancesTest[dataIndex]; } try { classifier.buildClassifier(trainData); } catch (Exception ex) { Logger.getLogger(MCompProjectExperiments.class.getName()).log(Level.SEVERE, null, ex); } //Classify test instancs while recording accuracy for(int j = 0; j < testData.numInstances(); j++){ double classifierPrediction = 0.0; try{ classifierPrediction = classifier.classifyInstance(testData.instance(j)); }catch(Exception ex){ Logger.getLogger(MCompProjectExperiments.class.getName()).log(Level.SEVERE, null, ex); } double actualClass = testData.instance(j).classValue(); if(classifierPrediction == actualClass) { accuracy++; } // Compute average accuracy if it is the last test instance if(j == testData.numInstances() - 1){ accuracy /= testData.numInstances(); } } return accuracy; } //Method to perform leave one out cross validation using given classifier and private static double classifierAccuracyLOOCV(Classifier classifier, int dataIndex, FullShapeletTransform transform){ //Variables for holding folds Instances data = instancesTrain[dataIndex]; Instances trainFold; Instances testFold; double accuracy = 0.0; //Generate average accuracies for (int n = 0; n < data.numInstances(); n++) { System.out.println("Processing fold: " + n); //Generate folds trainFold = data.trainCV(data.numInstances(), n); testFold = data.testCV(data.numInstances(), n); if(transform != null){ //Transform data try{ trainFold = transform.process(trainFold); testFold = transform.process(testFold); }catch(Exception ex){ Logger.getLogger(MCompProjectExperiments.class.getName()).log(Level.SEVERE, null, ex); } } try { classifier.buildClassifier(trainFold); } catch (Exception ex) { Logger.getLogger(MCompProjectExperiments.class.getName()).log(Level.SEVERE, null, ex); } double classifierPrediction = 0.0; try{ classifierPrediction = classifier.classifyInstance(testFold.instance(0)); }catch(Exception ex){ Logger.getLogger(MCompProjectExperiments.class.getName()).log(Level.SEVERE, null, ex); } double actualClass = testFold.instance(0).classValue(); if(classifierPrediction == actualClass) { accuracy++; } // Compute average accuracy if it is the last test instance if(n == data.numInstances() - 1){ accuracy /= data.numInstances(); } } return accuracy; } //Method to write text into a file. private static void writeToFile(String filename, String text, boolean append) { BufferedWriter bufferedWriter = null; try { //Construct the BufferedWriter object bufferedWriter = new BufferedWriter(new FileWriter(filename, append)); //Start writing to the output stream bufferedWriter.write(text); bufferedWriter.newLine(); } catch (FileNotFoundException ex) { Logger.getLogger(MCompProjectExperiments.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(MCompProjectExperiments.class.getName()).log(Level.SEVERE, null, ex); } finally { //Close the BufferedWriter try { if (bufferedWriter != null) { bufferedWriter.flush(); bufferedWriter.close(); } } catch (IOException ex) { Logger.getLogger(MCompProjectExperiments.class.getName()).log(Level.SEVERE, null, ex); } } } //Method to check if file with a given name exists. private static boolean isFileExists(String filename){ File f = new File(filename); if(f.isFile() && f.canWrite()) { return true; }else{ return false; } } //Method to generate synthetic data private static Instances[] generateData(int nosCases, int shapeletLength, int seriesLength){ ShapeletModel[] s=new ShapeletModel[2]; int[] casesPerClass={nosCases/2,nosCases/2}; //PARAMETER LIST: numShapelets, seriesLength, shapeletLength, maxStart double[] p1={1,seriesLength,shapeletLength}; double[] p2={1,seriesLength,shapeletLength}; //Create two ShapeleModels with different base Shapelets s[0]=new ShapeletModel(p1); ShapeletModel.ShapeType st=s[0].getShapeType(); s[1]=new ShapeletModel(p2); while(st==s[1].getShapeType()){ s[1]=new ShapeletModel(p2); } //System.out.println(" Shape 1= "+s[0]); //System.out.println(" Shape 2= "+s[1]); DataSimulator ds=new DataSimulator(s); Instances train=ds.generateDataSet(seriesLength,casesPerClass); Instances test=ds.generateDataSet(seriesLength,casesPerClass); Instances[] output = {train, test}; return output; } //################### End of Helper Functins ############################### }