/* This class is a helper class to describe the structure of our shapelet code and * demonstrate how to use it. *copyright Anthony Bagnall * @author Anthony Bagnall, Jason Lines, Jon Hills and Edgaras Baranauskas */ package examples; /* Package weka.core.shapelet.* contains the classes * Shapelet that stores the actual shapelet, its location * in the data set, the quality assessment and a reference to the quality * measure used * BinaryShapelet that extends Shapelet to store the threshold used to * measure quality * OrderLineObj: A simple class to store <distance,classValue> pairs * for calculating the quality of a shapelet * QualityMeasures: A class to store shapelet quality measure * implementations. This includes an abstract quality measure class, * and implementations of each of the four shapelet quality measures * QualityBound: A class to store shapelet quality measure bounding * implementations. This is used to determine whether an early abandonment is * permissible for the four quality measures. */ import java.io.FileReader; import java.io.IOException; import java.text.DecimalFormat; import java.util.logging.Level; import java.util.logging.Logger; import weka.core.shapelet.*; /* package weka.filters.timeseries.shapelet_transforms.* contains * FullShapeletTransform: Enumerative search to find the best k shapelets. * ShapeletTransformDistCaching: subclass of FullShapeletTransform that * uses the distance caching algorithm described in Mueen11. This is the fastest * exact approach, but is memory intensive. * ShapeletTransform: subclass of FullShapeletTransform that uses distance online normalisation and early abandon described in ??. Not as fast, * but does not require the extra memory. * ClusteredShapeletTransform: contains a FullShapeletTransform, and does post * transformation clustering. * * */ import weka.filters.timeseries.shapelet_transforms.*; /* package weka.classifiers.trees.shapelet_trees.* contains * ShapeletTreeClassifier: implementation of a shapelet tree to match the * description on the original paper. * 4x tree classifiers based on the alternative distance measures in class * QualityMeasures. */ import weka.classifiers.trees.shapelet_trees.*; import weka.core.*; import weka.filters.Filter; import weka.filters.SimpleBatchFilter; public class ShapeletExamples { public static FullShapeletTransform st; public static Instances basicTransformExample(Instances train){ /*Class to demonstrate the usage of the FullShapeletTransform. Returns the * transformed set of instances */ st =new ShapeletTransform(); /*The number of shapelets defaults to 100. we recommend setting it to a large value, since there will be many duplicates and there is little overhead in * keeping a lot (although the shapelet early abandon becomes less efficient). * */ //Let m=train.numAttributes()-1 (series length) //Let n= train.numInstances() (number of series) int nosShapelets=(train.numAttributes()-1)*train.numInstances()/5; if(nosShapelets<FullShapeletTransform.DEFAULT_NUMSHAPELETS) nosShapelets=FullShapeletTransform.DEFAULT_NUMSHAPELETS; st.setNumberOfShapelets(nosShapelets); /* Two other key parameters are minShapeletLength and maxShapeletLength. For * each value between these two, a full search is performed, which is * order (m^2n^2), so clearly there is a time/accuracy trade off. Defaults * to min of 3 max of 30. */ int minLength=5; int maxLength=(train.numAttributes()-1)/10; if(maxLength<FullShapeletTransform.DEFAULT_MINSHAPELETLENGTH) maxLength=FullShapeletTransform.DEFAULT_MINSHAPELETLENGTH; st.setShapeletMinAndMax(minLength, maxLength); /*Next you need to set the quality measure. This defaults to IG, but * we recommend using the F stat. It is faster and (debatably) more accurate. */ st.setQualityMeasure(QualityMeasures.ShapeletQualityChoice.F_STAT); // You can set the filter to output details of the shapelets or not st.setLogOutputFile("ShapeletExampleLog.csv"); // Alternatively, you can turn the logging off // st.turnOffLog(); /* Thats the basic options. Now you need to perform the transform. * FullShapeletTransform extends the weka SimpleBatchFilter, but we have made * the method process public to make usage easier. */ Instances shapeletT=null; try { shapeletT=st.process(train); } catch (Exception ex) { System.out.println("Error performing the shapelet transform"+ex); ex.printStackTrace(); System.exit(0); } return shapeletT; } public static Instances clusteredShapeletTransformExample(Instances train){ /* The class ClusteredShapeletTransform contains a FullShapeletTransform and * post transform clusters it. You can either perform the transform outside of * the ClusteredShapeletTransform or leave it to do it internally. * */ Instances shapeletT=null; //Cluster down to 10% of the number. int nosShapelets=(train.numAttributes()-1)*train.numInstances()/50; ClusteredShapeletTransform cst = new ClusteredShapeletTransform(st,nosShapelets); System.out.println(" Clustering down to "+nosShapelets+" Shapelets"); System.out.println(" From "+st.getNumberOfShapelets()+" Shapelets"); try { shapeletT=cst.process(train); } catch (Exception ex) { System.out.println("Error performing the shapelet clustering"+ex); ex.printStackTrace(); System.exit(0); } return shapeletT; } public static void initializeShapelet(FullShapeletTransform s,Instances train){ // int nosShapelets=(train.numAttributes()-1)*train.numInstances()/5; s.setNumberOfShapelets(1); int minLength=15; int maxLength=36; // int maxLength=(train.numAttributes()-1)/10; s.setShapeletMinAndMax(minLength, maxLength); s.setQualityMeasure(QualityMeasures.ShapeletQualityChoice.F_STAT); s.supressOutput(); s.turnOffLog(); } public static void distanceOptimizations(Instances train){ Instances shapeletT=null; FullShapeletTransform s1=new FullShapeletTransform(); initializeShapelet(s1,train); ShapeletTransform s2=new ShapeletTransform(); initializeShapelet(s2,train); ShapeletTransformDistCaching s3=new ShapeletTransformDistCaching(); initializeShapelet(s3,train); DecimalFormat df =new DecimalFormat("###.####"); long t1=0; long t2=0; double time1,time2,time3; try { t1=System.currentTimeMillis(); shapeletT=s1.process(train); t2=System.currentTimeMillis(); time1=((t2-t1)/1000.0); t1=System.currentTimeMillis(); shapeletT=s2.process(train); t2=System.currentTimeMillis(); time2=((t2-t1)/1000.0); t1=System.currentTimeMillis(); shapeletT=s3.process(train); t2=System.currentTimeMillis(); time3=((t2-t1)/1000.0); System.out.println("TIME (seconds)"); System.out.println("No Optimization\t Online Norm/Early Abandon\t Distance caching"); System.out.println(df.format(time1)+"\t\t\t"+df.format(time2)+"\t\t\t"+df.format(time3)); System.out.println("TIME REDUCTION\t Online Norm/Early Abandon\t Distance caching"); System.out.println("\t\t\t"+(int)(100.0*time2/time1)+"% \t\t\t"+(int)(100.0*time3/time1)+"%"); System.out.println("SPEED UP\t Online Norm/Early Abandon\t Distance caching"); System.out.println("\t\t\t"+df.format(time1/time2)+"\t\t\t"+df.format(time1/time3)); } catch (Exception ex) { System.out.println("Error performing the shapelet transform"+ex); ex.printStackTrace(); System.exit(0); } } public static void shapeletEarlyAbandons(Instances train){ //Time the speed up from early abandon of the four distance measures. //IG: FullShapeletTransform[] s=new FullShapeletTransform[4]; FullShapeletTransform[] pruned=new FullShapeletTransform[4]; for(int i=0;i<s.length;i++){ s[i]=new ShapeletTransformDistCaching(); pruned[i]=new ShapeletTransformDistCaching(); } for(FullShapeletTransform s1:s){ initializeShapelet(s1,train); s1.setCandidatePruning(false); } for(FullShapeletTransform s1:pruned){ initializeShapelet(s1,train); s1.setCandidatePruning(true); } QualityMeasures.ShapeletQualityChoice[] choices=QualityMeasures.ShapeletQualityChoice.values(); for(int i=0;i<s.length;i++){ s[i].setQualityMeasure(choices[i]); pruned[i].setQualityMeasure(choices[i]); } long t1,t2; double time1,time2; DecimalFormat df =new DecimalFormat("###.####"); try { for(int i=0;i<s.length;i++){ t1=System.currentTimeMillis(); s[i].process(train); t2=System.currentTimeMillis(); time1=((t2-t1)/1000.0); t1=System.currentTimeMillis(); pruned[i].process(train); t2=System.currentTimeMillis(); time2=((t2-t1)/1000.0); System.out.println(" ********* QUALITY MEASURE ="+s[i].getQualityMeasure()+" **********"); System.out.println(" NO ABANDON \t\t ABANDON\t\t ABANDON/(NO ABANDON)%\t\t SPEED UP "); System.out.println(df.format(time1)+"\t\t\t"+df.format(time2)+"\t\t\t"+(int)(100.0*(time2/time1))+"%"+"\t\t\t"+df.format(time1/time2)); } } catch (Exception ex) { System.out.println("Error performing the shapelet transform"+ex); ex.printStackTrace(); System.exit(0); } } public static Instances approxDataTransformExample(Instances train){ /*Class to demonstrate the usage of the ApproximateShapeletTransform. Returns the * transformed set of instances */ st = new ApproximateShapeletTransform(); //Parameters that are relevant to all types of transforms that extend FullShapeletTransform: //1. Number of shapelets to be stored int nosShapelets=(train.numAttributes()-1)*train.numInstances()/5; if(nosShapelets<FullShapeletTransform.DEFAULT_NUMSHAPELETS) nosShapelets=FullShapeletTransform.DEFAULT_NUMSHAPELETS; st.setNumberOfShapelets(nosShapelets); //2. Shapelet lenght range to be eplored int minLength=5; int maxLength=(train.numAttributes()-1)/10; if(maxLength<FullShapeletTransform.DEFAULT_MINSHAPELETLENGTH) maxLength=FullShapeletTransform.DEFAULT_MINSHAPELETLENGTH; st.setShapeletMinAndMax(minLength, maxLength); //3. Quality measure st.setQualityMeasure(QualityMeasures.ShapeletQualityChoice.F_STAT); //4. Set the filter to output details of the shapelets or not st.setLogOutputFile("ApproximateTransformExampleLog.csv"); /* Parameters that are specific to ApproximateShapeletTransform are: * 1. Dataset sampling level - specifies the percentage of instances to be used * from the provided training data for the shapelet discovery, i.e. setting * this parmeter to 50 forces the transform to sample the training data to * reduce it to 50% of the original size. * * 2. Series reduction level - specifies the percentage of how much each * series should be reduced, i.e. setting this parameter to 50 forces * the trasform to approximate each series using PAA such that each series * lenght is 50% of the original length. * * On average the higher the percentage the lower the accuracy is to be * expected. For example setting the levels to 50 - 50 on averege * should reduce the processing time by ~30 times and reduce the accuracy by * ~15% */ try { // Parameter 1 - datast sampling level, Parameter 2 - PAA approximation level ((ApproximateShapeletTransform)st).setSampleLevels(50, 50); } catch (IOException ex) { Logger.getLogger(ShapeletExamples.class.getName()).log(Level.SEVERE, null, ex); } // Now perform the transform exacty like using the ShapeletTransfomr. Instances shapeletT=null; try { shapeletT=st.process(train); } catch (Exception ex) { System.out.println("Error performing the shapelet transform"+ex); ex.printStackTrace(); System.exit(0); } return shapeletT; } public static void main(String[] args){ Instances train=null,test=null; FileReader r; try{ r= new FileReader("SonyAIBORobotSurface_TRAIN.arff"); train = new Instances(r); train.setClassIndex(train.numAttributes()-1); r= new FileReader("SonyAIBORobotSurface_TEST.arff"); test = new Instances(r); test.setClassIndex(test.numAttributes()-1); } catch(Exception e) { System.out.println("Unable to load data. Exception thrown ="+e); System.exit(0); } /* System.out.println("****************** PERFORMING BASIC TRANSFORM *******"); Instances shapeletT=basicTransformExample(train); System.out.println(" Transformed data set ="+shapeletT); System.out.println("\n **************** CLUSTERING *******"); shapeletT=clusteredShapeletTransformExample(train); System.out.println(" Clustered Transformed data set ="+shapeletT); System.out.println("\n ******Distance calculation optimizations *******"); distanceOptimizations(train); */ System.out.println("\n ******Shapelet Early Abandons *******"); shapeletEarlyAbandons(train); } }