/*
Class to generate benchmark test split accuracies for UCR/UEA benchmark
* Time Series Classification problems
*
* oneNearestNeighbour: These results should tally with those on the UCR website
* kNearestNeighbour: compare 1NN to kNN with k set through cross validation
*/
package development;
import fileIO.OutFile;
import java.text.DecimalFormat;
import java.util.ArrayList;
import statistics.simulators.DataSimulator;
import statistics.simulators.PolynomialModel;
import statistics.simulators.ShapeletModel;
import weka.attributeSelection.*;
import weka.classifiers.Classifier;
import utilities.ClassifierTools;
import weka.classifiers.lazy.IBk;
import weka.classifiers.lazy.kNN;
import weka.classifiers.meta.AdaBoostM1;
import weka.classifiers.meta.Bagging;
import weka.classifiers.trees.J48;
import weka.core.Instances;
import weka.filters.NormalizeCase;
/**
*
* @author ajb
*/
public class NN_Benchmarks {
static String[] files=TimeSeriesClassification.fileNamesTotalSizeSorted;
public static void filteredNearestNeighbour(String resultsPath){
DecimalFormat df = new DecimalFormat("###.###");
OutFile of = new OutFile(resultsPath);
System.out.println("************** EUCLIDEAN DISTANCE: All normalised/standardised*******************");
System.out.println("\t\t 1NN \t Cross Val kNN,");
of.writeLine("NNFilter, kNNFilter");
for(int i=0;i<files.length;i++)
{
try{
Instances test=utilities.ClassifierTools.loadData(TimeSeriesClassification.path+files[i]+"\\"+files[i]+"_TEST");
Instances train=utilities.ClassifierTools.loadData(TimeSeriesClassification.path+files[i]+"\\"+files[i]+"_TRAIN");
if(!(files[i].equals("ElectricDevices")|| files[i].equals("Herring"))){
NormalizeCase norm=new NormalizeCase();
norm.setNormType(NormalizeCase.NormType.STD_NORMAL);
test=norm.process(test);
train=norm.process(train);
}
//Filter to 50% of the data set with info gain
AttributeSelection as = new AttributeSelection();
Ranker r= new Ranker();
r.setNumToSelect((train.numAttributes()-1)/2);
as.setSearch(r);
as.setEvaluator(new InfoGainAttributeEval());
as.SelectAttributes(train);
Instances trainSmall=as.reduceDimensionality(train);
Instances testSmall=as.reduceDimensionality(test);
Classifier a=new IBk(1);
kNN b= new kNN(100);
b.setCrossValidate(true);
b.normalise(false);
a.buildClassifier(trainSmall);
b.buildClassifier(trainSmall);
double acc=utilities.ClassifierTools.accuracy(testSmall,a);
double acc2=utilities.ClassifierTools.accuracy(testSmall,b);
System.out.println(files[i]+"\t"+df.format((acc))+"\t"+df.format((acc2)));
of.writeLine(files[i]+","+df.format((acc))+","+df.format((acc2)));
}catch(Exception e){
// System.out.println(trainSmall.toSting());
System.out.println(" Error with file+"+files[i]+" ="+e);
e.printStackTrace();
System.exit(0);
}
}
}
public static void ensembleNearestNeighbour(String resultsPath){
DecimalFormat df = new DecimalFormat("###.###");
OutFile of = new OutFile(resultsPath);
System.out.println("************** EUCLIDEAN DISTANCE: All normalised/standardised*******************");
System.out.println("\t\t 1NN \t Cross Val kNN,");
of.writeLine("NNFilter, kNNFilter");
for(int i=0;i<files.length;i++)
{
try{
Instances test=utilities.ClassifierTools.loadData(TimeSeriesClassification.path+files[i]+"\\"+files[i]+"_TEST");
Instances train=utilities.ClassifierTools.loadData(TimeSeriesClassification.path+files[i]+"\\"+files[i]+"_TRAIN");
//Bagging with 20 base classifiers
int bagPercent=66;
Bagging a=new Bagging();
a.setClassifier(new kNN(1));
a.setNumIterations(20);
a.setBagSizePercent(bagPercent);
//Bagging with 100 base classifiers
Bagging b=new Bagging();
b.setClassifier(new kNN(1));
b.setNumIterations(50);
b.setBagSizePercent(66);
//Boosting with 20 base
AdaBoostM1 c=new AdaBoostM1();
c.setClassifier(new kNN(1));
c.setNumIterations(20);
c.setUseResampling(true);
AdaBoostM1 d=new AdaBoostM1();
d.setClassifier(new kNN(1));
d.setNumIterations(100);
d.setUseResampling(true);
a.buildClassifier(train);
b.buildClassifier(train);
c.buildClassifier(train);
d.buildClassifier(train);
double acc=utilities.ClassifierTools.accuracy(test,a);
double acc2=utilities.ClassifierTools.accuracy(test,b);
double acc3=utilities.ClassifierTools.accuracy(test,c);
double acc4=utilities.ClassifierTools.accuracy(test,d);
System.out.println(files[i]+"\t"+df.format((acc))+"\t"+df.format((acc2))+"\t"+df.format((acc3))+"\t"+df.format((acc4)));
of.writeLine(files[i]+","+df.format((acc))+","+df.format((acc2))+","+df.format((acc3))+","+df.format((acc4)));
}catch(Exception e){
// System.out.println(trainSmall.toSting());
System.out.println(" Error with file+"+files[i]+" ="+e);
e.printStackTrace();
System.exit(0);
}
}
}
public static void kNearestNeighbour(String resultsPath){
DecimalFormat df = new DecimalFormat("###.###");
OutFile of = new OutFile(resultsPath);
System.out.println("************** EUCLIDEAN DISTANCE: All normalised/standardised*******************");
System.out.println("\t\t 1NN \t Cross Val kNN,");
of.writeLine("Bk(1), Normalised/Standardised IBk(1)");
for(int i=0;i<files.length;i++)
{
try{
Instances test=utilities.ClassifierTools.loadData(TimeSeriesClassification.path+files[i]+"\\"+files[i]+"_TEST");
Instances train=utilities.ClassifierTools.loadData(TimeSeriesClassification.path+files[i]+"\\"+files[i]+"_TRAIN");
NormalizeCase norm=new NormalizeCase();
if((files[i].equals("ElectricDevices")))//Just standardise, file has a zero variance entry!
norm.setNormType(NormalizeCase.NormType.STD);
else
norm.setNormType(NormalizeCase.NormType.STD_NORMAL);
test=norm.process(test);
train=norm.process(train);
Classifier a=new IBk(1);
kNN b= new kNN(100);
b.setCrossValidate(true);
b.normalise(false);
a.buildClassifier(train);
b.buildClassifier(train);
double acc=utilities.ClassifierTools.accuracy(test,a);
double acc2=utilities.ClassifierTools.accuracy(test,b);
System.out.println(files[i]+"\t"+df.format((acc))+"\t"+df.format((acc2)));
of.writeLine(files[i]+","+df.format((acc))+","+df.format((acc2)));
}catch(Exception e){
System.out.println(" Error with file+"+files[i]+" ="+e);
e.printStackTrace();
System.exit(0);
}
}
}
public static void oneNearestNeighbour(String resultsPath){
DecimalFormat df = new DecimalFormat("###.###");
String[] files=TimeSeriesClassification.fileNamesTotalSizeSorted;
OutFile of = new OutFile(resultsPath);
System.out.println("************** EUCLIDEAN DISTANCE: All normalised/standardised*******************");
System.out.println("\t\t IBk(1) \t Normalised/Standardised IBk(1)");
of.writeLine("IBk(1),Normalised/Standardised IBk(1)");
for(int i=0;i<files.length;i++)
{
try{
Instances test=utilities.ClassifierTools.loadData(TimeSeriesClassification.path+files[i]+"\\"+files[i]+"_TEST");
Instances train=utilities.ClassifierTools.loadData(TimeSeriesClassification.path+files[i]+"\\"+files[i]+"_TRAIN");
Classifier b=new IBk(1);
b.buildClassifier(train);
double acc=utilities.ClassifierTools.accuracy(test,b);
double acc2;
NormalizeCase norm=new NormalizeCase();
if((files[i].equals("ElectricDevices"))){//Just standardise, file has a zero variance entry!
norm.setNormType(NormalizeCase.NormType.STD);
Instances test3=norm.process(test);
Instances train3=norm.process(train);
b.buildClassifier(train3);
acc2=utilities.ClassifierTools.accuracy(test3,b);
}
else{
norm.setNormType(NormalizeCase.NormType.STD_NORMAL);
Instances test2=norm.process(test);
Instances train2=norm.process(train);
b.buildClassifier(train2);
acc2=utilities.ClassifierTools.accuracy(test2,b);
}
System.out.println(files[i]+"\t"+df.format((acc))+"\t"+df.format((acc2)));
of.writeLine(files[i]+","+df.format((acc))+","+df.format((acc2)));
}catch(Exception e){
System.out.println(" Error with file+"+files[i]+" ="+e);
e.printStackTrace();
System.exit(0);
}
}
}
//My wrapper kNN has a attribute filter, but I'm not sure it works! Check with
// a small problem
public static void filterTest(){
Instances train=utilities.ClassifierTools.loadData(TimeSeriesClassification.path+files[58]+"\\"+files[58]+"_TEST");
Instances test=utilities.ClassifierTools.loadData(TimeSeriesClassification.path+files[58]+"\\"+files[58]+"_TEST");
kNN classifier = new kNN();
AttributeSelection as = new AttributeSelection();
try{
Ranker r= new Ranker();
r.setNumToSelect((train.numAttributes()-1)/4);
as.setSearch(r);
as.setEvaluator(new InfoGainAttributeEval());
as.SelectAttributes(train);
System.out.println("total number ="+(train.numAttributes()-1)+" number selected ="+as.numberAttributesSelected());
int[] ranks=as.selectedAttributes();
Instances trainSmall=as.reduceDimensionality(train);
Instances testSmall=as.reduceDimensionality(test);
System.out.println("Atts in new train ="+(trainSmall.numAttributes()-1)+" in testnumber selected ="+(testSmall.numAttributes()-1));
System.out.println("TRAIN"+trainSmall);
}catch(Exception e){
System.out.println(" Error ="+e);
e.printStackTrace();
System.exit(0);
}
kNN c3=new kNN(1);
c3.setFilterAttributes(true);
c3.setProportion(0.5);
}
public static void simulatedTest(String resultsPath){
//Runs a simulated polynomial experiment
int runs=30;
ShapeletModel[] s=new ShapeletModel[2];
int nosCases=100;
int[] casesPerClass={nosCases/2,nosCases/2};
int nosClassifiers=6;
//PARAMETER LIST: numShapelets, seriesLength, shapeletLength, maxStart
OutFile of= new OutFile(resultsPath);
of.writeLine("seriesLength,NN,kNN,NNFilter, kNNFilter,Bagging100,Boosting100");
double[] sum=new double[nosClassifiers];
double[] sumSq=new double[nosClassifiers];
int start=50, end=500, inc=50;
for(int seriesLength=start;seriesLength<=end;seriesLength+=inc){
PolynomialModel[] p=new PolynomialModel[2];
double[] powers={1,2,3}; //Cubic model
double[] coeff1={1.0,-2.0,0.1};
double[] coeff2={1.0,-2.01,0.11};
p[0]=new PolynomialModel(powers,coeff1);
p[1]=new PolynomialModel(powers,coeff2);
for(int r=0;r<runs;r++){
//Generate instances
try{
DataSimulator ds=new DataSimulator(p);
Instances train=ds.generateDataSet(seriesLength,casesPerClass);
Instances test=ds.generateDataSet(seriesLength,casesPerClass);
//Create classifiers
Classifier[] c =new Classifier[nosClassifiers];
c[0]=new kNN(1);
kNN b= new kNN(100);
b.setCrossValidate(true);
b.normalise(false);
c[1]=b;
//Filter to 50% of the data set with info gain
AttributeSelection as = new AttributeSelection();
Ranker ranker= new Ranker();
ranker.setNumToSelect((train.numAttributes()-1)/2);
as.setSearch(ranker);
as.setEvaluator(new InfoGainAttributeEval());
as.SelectAttributes(train);
Instances trainSmall=as.reduceDimensionality(train);
Instances testSmall=as.reduceDimensionality(test);
Classifier a=new IBk(1);
b= new kNN(100);
b.setCrossValidate(true);
b.normalise(false);
c[2]=a;
c[3]=b;
//Two ensembles
//Bagging with 1-0 base classifiers
int bagPercent=66;
Bagging bag=new Bagging();
bag.setClassifier(new kNN(1));
bag.setNumIterations(100);
bag.setBagSizePercent(bagPercent);
//Boosting with 100 base
AdaBoostM1 ada=new AdaBoostM1();
ada.setClassifier(new kNN(1));
ada.setNumIterations(100);
ada.setUseResampling(true);
c[4]=bag;
c[5]=ada;
//Train all classifiers
for(int j=0;j<c.length;j++){
if(j==2 || j==3) //Use small data sets
c[j].buildClassifier(trainSmall);
else
c[j].buildClassifier(train);
}
//Measure Accuracy
double[] acc = new double[nosClassifiers];
for(int j=0;j<c.length;j++){
if(j==2 || j==3) //Use small data sets
acc[j]=utilities.ClassifierTools.accuracy(testSmall,c[j]);
else
acc[j]=utilities.ClassifierTools.accuracy(test,c[j]);
}
//Update stats
System.out.print(" \t\t RUN :"+(r+1)+"\t");
for(int j=0;j<c.length;j++){
sum[j]+=acc[j];
sumSq[j]+=acc[j]*acc[j];
System.out.print(acc[j]+"\t");
}
System.out.print(" \n");
}catch(Exception e){
System.out.println(" Error with simulated run ="+r);
e.printStackTrace();
System.exit(0);
}
}
of.writeString(seriesLength+",");
System.out.println("Series length = "+seriesLength+" accuracy");
//Update stats
for(int j=0;j<nosClassifiers;j++){
sum[j]/=runs;
sumSq[j]=sumSq[j]/runs-sum[j]*sum[j];
}
for(int j=0;j<nosClassifiers-1;j++){
System.out.println(sum[j]+" ("+sumSq[j]+") ");
of.writeString(sum[j]+",");
}
of.writeLine(sum[nosClassifiers-1]+"");
}
}
public static void main(String[] args){
simulatedTest("C:\\Users\\ajb\\Dropbox\\Results\\TimeDomain\\SimTes1.csv");
// filterTest();
// ensembleNearestNeighbour("C:\\Users\\ajb\\Dropbox\\Results\\TimeDomain\\NN_Ensembles.csv");
// filteredNearestNeighbour("C:\\Users\\ajb\\Dropbox\\Results\\TimeDomain\\NNFilters.csv");
// oneNearestNeighbour("C:\\Users\\ajb\\Dropbox\\Results\\TimeDomain\\OneNN.csv");
// kNearestNeighbour("C:\\Users\\ajb\\Dropbox\\Results\\TimeDomain\\kNN.csv");
}
}