/*
This class is a helper class to describe the structure of our shapelet code and
* demonstrate how to use it.
*copyright Anthony Bagnall
* @author Anthony Bagnall, Jason Lines, Jon Hills and Edgaras Baranauskas
*/
package examples;
/* Package weka.core.shapelet.* contains the classes
* Shapelet that stores the actual shapelet, its location
* in the data set, the quality assessment and a reference to the quality
* measure used
* BinaryShapelet that extends Shapelet to store the threshold used to
* measure quality
* OrderLineObj: A simple class to store <distance,classValue> pairs
* for calculating the quality of a shapelet
* QualityMeasures: A class to store shapelet quality measure
* implementations. This includes an abstract quality measure class,
* and implementations of each of the four shapelet quality measures
* QualityBound: A class to store shapelet quality measure bounding
* implementations. This is used to determine whether an early abandonment is
* permissible for the four quality measures.
*/
import java.io.FileReader;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.logging.Level;
import java.util.logging.Logger;
import weka.core.shapelet.*;
/* package weka.filters.timeseries.shapelet_transforms.* contains
* FullShapeletTransform: Enumerative search to find the best k shapelets.
* ShapeletTransformDistCaching: subclass of FullShapeletTransform that
* uses the distance caching algorithm described in Mueen11. This is the fastest
* exact approach, but is memory intensive.
* ShapeletTransform: subclass of FullShapeletTransform that uses
distance online normalisation and early abandon described in ??. Not as fast,
* but does not require the extra memory.
* ClusteredShapeletTransform: contains a FullShapeletTransform, and does post
* transformation clustering.
*
* */
import weka.filters.timeseries.shapelet_transforms.*;
/* package weka.classifiers.trees.shapelet_trees.* contains
* ShapeletTreeClassifier: implementation of a shapelet tree to match the
* description on the original paper.
* 4x tree classifiers based on the alternative distance measures in class
* QualityMeasures.
*/
import weka.classifiers.trees.shapelet_trees.*;
import weka.core.*;
import weka.filters.Filter;
import weka.filters.SimpleBatchFilter;
public class ShapeletExamples {
public static FullShapeletTransform st;
public static Instances basicTransformExample(Instances train){
/*Class to demonstrate the usage of the FullShapeletTransform. Returns the
* transformed set of instances
*/
st =new ShapeletTransform();
/*The number of shapelets defaults to 100. we recommend setting it to a large
value, since there will be many duplicates and there is little overhead in
* keeping a lot (although the shapelet early abandon becomes less efficient).
*
*/
//Let m=train.numAttributes()-1 (series length)
//Let n= train.numInstances() (number of series)
int nosShapelets=(train.numAttributes()-1)*train.numInstances()/5;
if(nosShapelets<FullShapeletTransform.DEFAULT_NUMSHAPELETS)
nosShapelets=FullShapeletTransform.DEFAULT_NUMSHAPELETS;
st.setNumberOfShapelets(nosShapelets);
/* Two other key parameters are minShapeletLength and maxShapeletLength. For
* each value between these two, a full search is performed, which is
* order (m^2n^2), so clearly there is a time/accuracy trade off. Defaults
* to min of 3 max of 30.
*/
int minLength=5;
int maxLength=(train.numAttributes()-1)/10;
if(maxLength<FullShapeletTransform.DEFAULT_MINSHAPELETLENGTH)
maxLength=FullShapeletTransform.DEFAULT_MINSHAPELETLENGTH;
st.setShapeletMinAndMax(minLength, maxLength);
/*Next you need to set the quality measure. This defaults to IG, but
* we recommend using the F stat. It is faster and (debatably) more accurate.
*/
st.setQualityMeasure(QualityMeasures.ShapeletQualityChoice.F_STAT);
// You can set the filter to output details of the shapelets or not
st.setLogOutputFile("ShapeletExampleLog.csv");
// Alternatively, you can turn the logging off
// st.turnOffLog();
/* Thats the basic options. Now you need to perform the transform.
* FullShapeletTransform extends the weka SimpleBatchFilter, but we have made
* the method process public to make usage easier.
*/
Instances shapeletT=null;
try {
shapeletT=st.process(train);
} catch (Exception ex) {
System.out.println("Error performing the shapelet transform"+ex);
ex.printStackTrace();
System.exit(0);
}
return shapeletT;
}
public static Instances clusteredShapeletTransformExample(Instances train){
/* The class ClusteredShapeletTransform contains a FullShapeletTransform and
* post transform clusters it. You can either perform the transform outside of
* the ClusteredShapeletTransform or leave it to do it internally.
*
*/
Instances shapeletT=null;
//Cluster down to 10% of the number.
int nosShapelets=(train.numAttributes()-1)*train.numInstances()/50;
ClusteredShapeletTransform cst = new ClusteredShapeletTransform(st,nosShapelets);
System.out.println(" Clustering down to "+nosShapelets+" Shapelets");
System.out.println(" From "+st.getNumberOfShapelets()+" Shapelets");
try {
shapeletT=cst.process(train);
} catch (Exception ex) {
System.out.println("Error performing the shapelet clustering"+ex);
ex.printStackTrace();
System.exit(0);
}
return shapeletT;
}
public static void initializeShapelet(FullShapeletTransform s,Instances train){
// int nosShapelets=(train.numAttributes()-1)*train.numInstances()/5;
s.setNumberOfShapelets(1);
int minLength=15;
int maxLength=36;
// int maxLength=(train.numAttributes()-1)/10;
s.setShapeletMinAndMax(minLength, maxLength);
s.setQualityMeasure(QualityMeasures.ShapeletQualityChoice.F_STAT);
s.supressOutput();
s.turnOffLog();
}
public static void distanceOptimizations(Instances train){
Instances shapeletT=null;
FullShapeletTransform s1=new FullShapeletTransform();
initializeShapelet(s1,train);
ShapeletTransform s2=new ShapeletTransform();
initializeShapelet(s2,train);
ShapeletTransformDistCaching s3=new ShapeletTransformDistCaching();
initializeShapelet(s3,train);
DecimalFormat df =new DecimalFormat("###.####");
long t1=0;
long t2=0;
double time1,time2,time3;
try {
t1=System.currentTimeMillis();
shapeletT=s1.process(train);
t2=System.currentTimeMillis();
time1=((t2-t1)/1000.0);
t1=System.currentTimeMillis();
shapeletT=s2.process(train);
t2=System.currentTimeMillis();
time2=((t2-t1)/1000.0);
t1=System.currentTimeMillis();
shapeletT=s3.process(train);
t2=System.currentTimeMillis();
time3=((t2-t1)/1000.0);
System.out.println("TIME (seconds)");
System.out.println("No Optimization\t Online Norm/Early Abandon\t Distance caching");
System.out.println(df.format(time1)+"\t\t\t"+df.format(time2)+"\t\t\t"+df.format(time3));
System.out.println("TIME REDUCTION\t Online Norm/Early Abandon\t Distance caching");
System.out.println("\t\t\t"+(int)(100.0*time2/time1)+"% \t\t\t"+(int)(100.0*time3/time1)+"%");
System.out.println("SPEED UP\t Online Norm/Early Abandon\t Distance caching");
System.out.println("\t\t\t"+df.format(time1/time2)+"\t\t\t"+df.format(time1/time3));
} catch (Exception ex) {
System.out.println("Error performing the shapelet transform"+ex);
ex.printStackTrace();
System.exit(0);
}
}
public static void shapeletEarlyAbandons(Instances train){
//Time the speed up from early abandon of the four distance measures.
//IG:
FullShapeletTransform[] s=new FullShapeletTransform[4];
FullShapeletTransform[] pruned=new FullShapeletTransform[4];
for(int i=0;i<s.length;i++){
s[i]=new ShapeletTransformDistCaching();
pruned[i]=new ShapeletTransformDistCaching();
}
for(FullShapeletTransform s1:s){
initializeShapelet(s1,train);
s1.setCandidatePruning(false);
}
for(FullShapeletTransform s1:pruned){
initializeShapelet(s1,train);
s1.setCandidatePruning(true);
}
QualityMeasures.ShapeletQualityChoice[] choices=QualityMeasures.ShapeletQualityChoice.values();
for(int i=0;i<s.length;i++){
s[i].setQualityMeasure(choices[i]);
pruned[i].setQualityMeasure(choices[i]);
}
long t1,t2;
double time1,time2;
DecimalFormat df =new DecimalFormat("###.####");
try {
for(int i=0;i<s.length;i++){
t1=System.currentTimeMillis();
s[i].process(train);
t2=System.currentTimeMillis();
time1=((t2-t1)/1000.0);
t1=System.currentTimeMillis();
pruned[i].process(train);
t2=System.currentTimeMillis();
time2=((t2-t1)/1000.0);
System.out.println(" ********* QUALITY MEASURE ="+s[i].getQualityMeasure()+" **********");
System.out.println(" NO ABANDON \t\t ABANDON\t\t ABANDON/(NO ABANDON)%\t\t SPEED UP ");
System.out.println(df.format(time1)+"\t\t\t"+df.format(time2)+"\t\t\t"+(int)(100.0*(time2/time1))+"%"+"\t\t\t"+df.format(time1/time2));
}
} catch (Exception ex) {
System.out.println("Error performing the shapelet transform"+ex);
ex.printStackTrace();
System.exit(0);
}
}
public static Instances approxDataTransformExample(Instances train){
/*Class to demonstrate the usage of the ApproximateShapeletTransform. Returns the
* transformed set of instances
*/
st = new ApproximateShapeletTransform();
//Parameters that are relevant to all types of transforms that extend FullShapeletTransform:
//1. Number of shapelets to be stored
int nosShapelets=(train.numAttributes()-1)*train.numInstances()/5;
if(nosShapelets<FullShapeletTransform.DEFAULT_NUMSHAPELETS)
nosShapelets=FullShapeletTransform.DEFAULT_NUMSHAPELETS;
st.setNumberOfShapelets(nosShapelets);
//2. Shapelet lenght range to be eplored
int minLength=5;
int maxLength=(train.numAttributes()-1)/10;
if(maxLength<FullShapeletTransform.DEFAULT_MINSHAPELETLENGTH)
maxLength=FullShapeletTransform.DEFAULT_MINSHAPELETLENGTH;
st.setShapeletMinAndMax(minLength, maxLength);
//3. Quality measure
st.setQualityMeasure(QualityMeasures.ShapeletQualityChoice.F_STAT);
//4. Set the filter to output details of the shapelets or not
st.setLogOutputFile("ApproximateTransformExampleLog.csv");
/* Parameters that are specific to ApproximateShapeletTransform are:
* 1. Dataset sampling level - specifies the percentage of instances to be used
* from the provided training data for the shapelet discovery, i.e. setting
* this parmeter to 50 forces the transform to sample the training data to
* reduce it to 50% of the original size.
*
* 2. Series reduction level - specifies the percentage of how much each
* series should be reduced, i.e. setting this parameter to 50 forces
* the trasform to approximate each series using PAA such that each series
* lenght is 50% of the original length.
*
* On average the higher the percentage the lower the accuracy is to be
* expected. For example setting the levels to 50 - 50 on averege
* should reduce the processing time by ~30 times and reduce the accuracy by
* ~15%
*/
try {
// Parameter 1 - datast sampling level, Parameter 2 - PAA approximation level
((ApproximateShapeletTransform)st).setSampleLevels(50, 50);
} catch (IOException ex) {
Logger.getLogger(ShapeletExamples.class.getName()).log(Level.SEVERE, null, ex);
}
// Now perform the transform exacty like using the ShapeletTransfomr.
Instances shapeletT=null;
try {
shapeletT=st.process(train);
} catch (Exception ex) {
System.out.println("Error performing the shapelet transform"+ex);
ex.printStackTrace();
System.exit(0);
}
return shapeletT;
}
public static void main(String[] args){
Instances train=null,test=null;
FileReader r;
try{
r= new FileReader("SonyAIBORobotSurface_TRAIN.arff");
train = new Instances(r);
train.setClassIndex(train.numAttributes()-1);
r= new FileReader("SonyAIBORobotSurface_TEST.arff");
test = new Instances(r);
test.setClassIndex(test.numAttributes()-1);
}
catch(Exception e)
{
System.out.println("Unable to load data. Exception thrown ="+e);
System.exit(0);
}
/* System.out.println("****************** PERFORMING BASIC TRANSFORM *******");
Instances shapeletT=basicTransformExample(train);
System.out.println(" Transformed data set ="+shapeletT);
System.out.println("\n **************** CLUSTERING *******");
shapeletT=clusteredShapeletTransformExample(train);
System.out.println(" Clustered Transformed data set ="+shapeletT);
System.out.println("\n ******Distance calculation optimizations *******");
distanceOptimizations(train);
*/ System.out.println("\n ******Shapelet Early Abandons *******");
shapeletEarlyAbandons(train);
}
}