package papers;
import fileIO.InFile;
import fileIO.OutFile;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Random;
import utilities.ClassifierTools;
import weka.attributeSelection.PrincipalComponents;
import weka.classifiers.Classifier;
import weka.classifiers.bayes.NaiveBayes;
import weka.classifiers.functions.SMO;
import weka.classifiers.functions.supportVector.PolyKernel;
import weka.classifiers.functions.supportVector.RBFKernel;
import weka.classifiers.lazy.DTW_kNN;
import weka.classifiers.lazy.kNN;
import weka.classifiers.meta.AdaBoostM1;
import weka.classifiers.meta.Bagging;
import weka.classifiers.meta.RotationForest;
import weka.classifiers.meta.timeseriesensembles.TransformEnsembles;
import weka.classifiers.trees.J48;
import weka.classifiers.trees.RandomForest;
import weka.core.Instances;
import weka.filters.NormalizeCase;
import weka.filters.timeseries.*;
import development.*;
import weka.classifiers.AbstractClassifier;
public class SDM2012_Bagnall {
static String resultPath="C:\\Users\\ajb\\Dropbox\\Results\\Ensembles";
public static String[] sdm2012fileNames={ //Number of train,test cases,length,classes
"Adiac",//390,391,176,37
"ARSim",
"Beef", //30,30,470,5
"CBF",//30,900,128,3
"Lighting2",//60,61,637,2
"Lighting7",//70,73,319,7
"ECG200",//100,100,96,2
"FaceFour",//24,88,350,4
"fiftywords",//450,455,270,50
"fish",//175,175,463,7
"GunPoint",//50,150,150,2
"OSULeaf", //200,242,427,6
"SwedishLeaf", //500,625,128,15
"SyntheticControl", //300,300,60,6
"Trace",//100,100,275,4
"TwoPatterns", //1000,4000,128,4
"wafer",//1000,6174,152,2
"yoga",//300,3000,426,2
"FaceAll",//560,1690,131,14
//Index 18, after this the data has not been normalised.
"Coffee", //28,28,286,2
"OliveOil",
"Earthquakes",
"HandOutlines",//1000,300,2790
"FordA",
"FordB",
"ElectricDevices",
};
public static boolean normalise(String fileName){
if(fileName.equals("FordA")||fileName.equals("FordB")||fileName.equals("OliveOil")||fileName.equals("Beef")||fileName.equals("Coffee")||fileName.equals("Earthquakes"))
return true;
return false;
}
public static Classifier[] getFilters(ArrayList<String> names){
ArrayList<Classifier> sc2=new ArrayList<Classifier>();
Classifier c;
//1. Basic 1-NN Euclidean distance
kNN c1;
for(double i=1;i>0;i-=0.1){
c1=new kNN(1);
c1.setFilterAttributes(true);
c1.setProportion(i);
sc2.add(c1);
names.add("(1NN"+i+")");
}
Classifier[] sc=new Classifier[sc2.size()];
for(int i=0;i<sc.length;i++)
sc[i]=sc2.get(i);
return sc;
}
public static Classifier[] setSingleClassifiers(ArrayList<String> names){
ArrayList<Classifier> sc2=new ArrayList<Classifier>();
sc2.add(new kNN(1));
names.add("NN");
Classifier c;
c=new DTW_kNN(1);
((DTW_kNN)c).setMaxR(0.1);
((DTW_kNN)c).optimiseWindow(false);
sc2.add(c);
names.add("NNDTW");
sc2.add(new NaiveBayes());
names.add("NB");
sc2.add(new J48());
names.add("C45");
c=new SMO();
PolyKernel kernel = new PolyKernel();
kernel.setExponent(1);
((SMO)c).setKernel(kernel);
sc2.add(c);
names.add("SVML");
c=new SMO();
kernel = new PolyKernel();
kernel.setExponent(2);
((SMO)c).setKernel(kernel);
sc2.add(c);
names.add("SVMQ");
c=new SMO();
RBFKernel kernel2 = new RBFKernel();
((SMO)c).setKernel(kernel2);
sc2.add(c);
names.add("SVMR");
c=new RandomForest();
((RandomForest)c).setNumTrees(30);
sc2.add(c);
names.add("RandF30");
c=new RandomForest();
((RandomForest)c).setNumTrees(100);
sc2.add(c);
names.add("RandF100");
c=new RotationForest();
sc2.add(c);
names.add("RotF30");
Classifier[] sc=new Classifier[sc2.size()];
for(int i=0;i<sc.length;i++)
sc[i]=sc2.get(i);
return sc;
}
public static Classifier[] setNNClassifiers(ArrayList<String> names){
ArrayList<Classifier> sc2=new ArrayList<Classifier>();
Classifier c;
//1. Basic 1-NN Euclidean distance
kNN c1=new kNN(1);
sc2.add(c1);
names.add("(1NN)");
//2. k-NN, k set through LOOCV
kNN c2=new kNN(50);
c2.setCrossValidate(true);
sc2.add(c2);
names.add("(kNN)");
//3. 1-NN Filtered 50%
kNN c3=new kNN(1);
c3.setFilterAttributes(true);
c3.setProportion(0.5);
sc2.add(c3);
names.add("(1NN-50\\% Filter");
//Bagging with 20 base classifiers
int bagPercent=50;
c=new Bagging();
((Bagging)c).setClassifier(new kNN(1));
((Bagging)c).setNumIterations(20);
((Bagging)c).setBagSizePercent(bagPercent);
names.add("Bagging,"+bagPercent+"%,20 1NN");
sc2.add(c);
//Bagging with 100 base classifiers
c=new Bagging();
((Bagging)c).setClassifier(new kNN(1));
((Bagging)c).setNumIterations(50);
((Bagging)c).setBagSizePercent(66);
names.add("Bagging,"+bagPercent+"%,100 1NN");
sc2.add(c);
//Boosting with 20 base
c=new AdaBoostM1();
((AdaBoostM1)c).setClassifier(new kNN(1));
((AdaBoostM1)c).setNumIterations(20);
((AdaBoostM1)c).setUseResampling(true);
sc2.add(c);
names.add("Boosting 20 1NN");
Classifier[] sc=new Classifier[sc2.size()];
for(int i=0;i<sc.length;i++)
sc[i]=sc2.get(i);
System.out.print("Testing NN Classifiers: ");
for(String s:names)
System.out.print(s+",");
return sc;
}
/**
* Generates the results for Table 2: Classifier Comparison in the time domain (argument "SingleClassifiers")
* and for Figure 8, NN Variants(argument "NN_EuclidClassifiers"
* @param tableName
*/
public static void timeDomain(String tableName){
OutFile of=new OutFile(resultPath+"table"+tableName+".csv");
ClassifierTools.ResultsStats[] stats;
ArrayList<String> names=new ArrayList<String>();
Classifier[] sc=null;
if(tableName.equals("SingleClassifiers")){
System.out.println("SINGLE CLASSIFIERS");
of.writeLine("SINGLE CLASSIFIERS, results in Time Domain");
sc=setSingleClassifiers(names);
}
else if(tableName.equals("NN_EuclidClassifiers")){
System.out.println("NN Euclidean CLASSIFIER VARIANTS");
of.writeLine("NN CLASSIFIERS, results in Time Domain");
sc=setNNClassifiers(names);
}
else{
System.out.println("ERROR: Unknown classifier generation name");
System.exit(0);
}
of.writeString(",,");
for(String s:names){
of.writeString(s+",");
}
of.writeLine("\n");
try{
for(int i=0;i<sdm2012fileNames.length;i++)
{
//Load default test train split
Instances test=ClassifierTools.loadData(TimeSeriesClassification.path+sdm2012fileNames[i]+"\\"+sdm2012fileNames[i]+"_TEST");
Instances train=ClassifierTools.loadData(TimeSeriesClassification.path+sdm2012fileNames[i]+"\\"+sdm2012fileNames[i]+"_TRAIN");
/* //Resampling for DTW
if(tooLarge(TimeSeriesClassification.fileNames[i])){
System.out.println("Sampling ....");
if(TimeSeriesClassification.fileNames[i].equals("Earthquakes"))
train=sample(train,0.3);
else{
train=sample(train,0.1);
test=sample(test,0.3);
}
}
*/
//Normalise if necessary.
if(normalise(TimeSeriesClassification.fileNames[i])){
System.out.println("Standardising "+TimeSeriesClassification.fileNames[i]);
NormalizeCase nc=new NormalizeCase();
train=nc.process(train);
test=nc.process(test);
}
//Reinitialise the classifier each time for safety sake.
if(tableName.equals("SingleClassifiers"))
sc=setSingleClassifiers(names);
else if(tableName.equals("NN_EuclidClassifiers"))
sc=setNNClassifiers(names);
//Set folds. If 1 then it does the test/train split defined by the two files
int folds=setNosFolds(test,train);
of.writeString("\n"+TimeSeriesClassification.fileNames[i]+","+folds+",");
System.out.println("Train size = "+train.numInstances()+" Test size ="+test.numInstances()+" folds ="+folds);
System.out.println(TimeSeriesClassification.fileNames[i]);
System.out.println("************************************");
//Returns an array of stats, only using accuracy at present.
stats=ClassifierTools.evalClassifiers(test,train,folds,sc);
for(int j=0;j<stats.length;j++){
of.writeString(stats[j].accuracy+",");
System.out.println("\t"+names.get(j)+" error ="+(1-stats[j].accuracy));
}
}
}catch(Exception e){
System.out.println("Exception = "+e);
e.printStackTrace();
}
}
/**
* Generates the results for the first half of Table 3: Comparison of transforms
*/
public static void basicDataTransforms(String baseClassifier){
DecimalFormat dc= new DecimalFormat("###.###");
OutFile of=new OutFile(resultPath+baseClassifier+"BasicDataTransforms.csv");
ClassifierTools.ResultsStats[] stats;
System.out.println("NEAREST NEIGHBOUR CLASSIFIERS");
of.writeLine("NEAREST NEIGHBOUR CLASSIFIERS, 10 fold cross validation results");
of.writeLine(",TimeDomain,PowerSpectrumDomain,ACFDomain,PCADomain");
of.writeLine(",1-NN");
String[] files=TimeSeriesClassification.fileNames;
Classifier base=null;
if(baseClassifier.equals("1NN"))
base=new kNN(1);
else if(baseClassifier.equals("DTW"))
base=new DTW_kNN(1);
else if(baseClassifier.equals("RotationForest"))
base=new RotationForest();
else if(baseClassifier.equals("RandomForest")){
base=new RandomForest();
((RandomForest)base).setNumTrees(30);
}
else{
System.out.println("Classifier Not Included, exiting");
System.exit(0);
}
try{
for(int i=0;i<files.length;i++)
{
Instances test=ClassifierTools.loadData(TimeSeriesClassification.path+files[i]+"\\"+files[i]+"_TEST");
Instances train=ClassifierTools.loadData(TimeSeriesClassification.path+files[i]+"\\"+files[i]+"_TRAIN");
of.writeString("\n"+TimeSeriesClassification.fileNames[i]+",");
System.out.println("\n"+TimeSeriesClassification.fileNames[i]+",");
//Set folds
int folds=setNosFolds(test,train);
Classifier[] sc= new Classifier[1];
Instances timeTrain, timeTest;
if(normalise(TimeSeriesClassification.fileNames[i])){
System.out.println("Standardising "+TimeSeriesClassification.fileNames[i]);
NormalizeCase nc=new NormalizeCase();
timeTrain=new Instances(train);
timeTest=new Instances(test);
nc.process(timeTrain);
nc.process(timeTest);
}
else{
timeTrain=train;
timeTest=test;
}
//Time domain: no need as we already have these results
System.out.println("******************Time Domain******************");
sc[0]=AbstractClassifier.makeCopy(base);
// sc[1]=new DTW_kNN(1);
stats=ClassifierTools.evalClassifiers(timeTest,timeTrain,folds,sc);
for(int j=0;j<stats.length;j++){
of.writeString(stats[j].accuracy+",");
System.out.print("\n \t TIME: "+dc.format(stats[j].accuracy)+",");
}
System.out.println("******************Power Spectrum Domain******************");
PowerSpectrum ps=new PowerSpectrum();
Instances psTrain=ps.process(train);
Instances psTest=ps.process(test);
psTrain.deleteAttributeAt(0);
psTest.deleteAttributeAt(0);
/* Delete the duplicate half of the spectrum */
int atts=(psTrain.numAttributes()-1)/2-2;
for(int j=0;j<atts;j++){
psTrain.deleteAttributeAt(psTrain.numAttributes()-2);
psTest.deleteAttributeAt(psTest.numAttributes()-2);
}
sc[0]=AbstractClassifier.makeCopy(base);
stats=ClassifierTools.evalClassifiers(psTest,psTrain,folds,sc);
//Remove the last 50% of the coefficients.
for(int j=0;j<stats.length;j++){
of.writeString(stats[j].accuracy+",");
System.out.print("\n\t SPECTRUM: "+dc.format(stats[j].accuracy)+",");
}
System.out.println("\n******************ACF Domain******************");
ACF acf=new ACF();
acf.setMaxLag(train.numAttributes()-(int)(train.numAttributes()*.1));
sc[0]=AbstractClassifier.makeCopy(base);
Instances acfTrain=acf.process(train);
Instances acfTest=acf.process(test);
/* atts=(acfTrain.numAttributes()-1)/2;
for(int j=0;j<atts;j++){
acfTrain.deleteAttributeAt(acfTrain.numAttributes()-2);
acfTest.deleteAttributeAt(acfTest.numAttributes()-2);
}
*/ stats=ClassifierTools.evalClassifiers(acfTest,acfTrain,folds,sc);
for(int j=0;j<stats.length;j++){
of.writeString(stats[j].accuracy+",");
System.out.print("\n\t ACF: "+dc.format(stats[j].accuracy)+",");
}
System.out.println("\n******************PCA Domain******************");
PrincipalComponents pca=new PrincipalComponents ();
sc[0]=AbstractClassifier.makeCopy(base);
pca.buildEvaluator(train);
Instances pcaTrain=pca.transformedData(train);
Instances pcaTest=pca.transformedData(test);/*
atts=(pcaTrain.numAttributes()-1)/2;
for(int j=0;j<atts;j++){
pcaTrain.deleteAttributeAt(pcaTrain.numAttributes()-2);
pcaTest.deleteAttributeAt(pcaTest.numAttributes()-2);
}
*/ stats=ClassifierTools.evalClassifiers(pcaTest,pcaTrain,folds,sc);
for(int j=0;j<stats.length;j++){
of.writeString(stats[j].accuracy+",");
System.out.print("\n\t Pca: "+dc.format(stats[j].accuracy)+",");
}
}
}catch(Exception e){System.out.println("Exception ="+e);e.printStackTrace();System.exit(0);}
}
/**
* Generates the results for the second half of Table 3: Comparison of ensemblese
* This is also used to generate pairwise comparisons for Figure 10
*/
public static void ensembleTransforms(String baseClassifier){
DecimalFormat dc= new DecimalFormat("###.###");
OutFile of=new OutFile(resultPath+baseClassifier+"EnsembleTransforms.csv");
OutFile of2=new OutFile(resultPath+baseClassifier+"EnsembleWeights.csv");
ClassifierTools.ResultsStats[] stats;
System.out.println("ENSEMBLECLASSIFIERS");
of.writeLine(baseClassifier+",CombinedEqual,CombinedBest,CombinedWeighted,CombinedStep");
String[] files=TimeSeriesClassification.fileNames;
Classifier base=null;
if(baseClassifier.equals("1NN"))
base=new kNN(1);
else if(baseClassifier.equals("DTW"))
base=new DTW_kNN(1);
else if(baseClassifier.equals("RotationForest"))
base=new RotationForest();
else if(baseClassifier.equals("RandomForest")){
base=new RandomForest();
((RandomForest)base).setNumTrees(30);
}
else if(baseClassifier.equals("C4.5")){
base=new J48();
}
else if(baseClassifier.equals("NB")){
base=new NaiveBayes();
}
else if(baseClassifier.equals("SVMO")){
base=new SMO();
PolyKernel kernel = new PolyKernel();
kernel.setExponent(2);
((SMO)base).setKernel(kernel);
}
else if(baseClassifier.equals("SVML")){
base=new SMO();
PolyKernel kernel = new PolyKernel();
kernel.setExponent(1);
((SMO)base).setKernel(kernel);
}
else{
System.out.println("Classifier Not Included, exiting");
System.exit(0);
}
try{
for(int i=0;i<files.length;i++)
{
Instances test=ClassifierTools.loadData(TimeSeriesClassification.path+files[i]+"\\"+files[i]+"_TEST");
Instances train=ClassifierTools.loadData(TimeSeriesClassification.path+files[i]+"\\"+files[i]+"_TRAIN");
of.writeString("\n"+TimeSeriesClassification.fileNames[i]+",");
System.out.println("\n"+TimeSeriesClassification.fileNames[i]+",");
//Set folds
int folds=setNosFolds(test,train);
Classifier[] sc= new Classifier[1];
Instances timeTrain, timeTest;
if(normalise(TimeSeriesClassification.fileNames[i])){
System.out.println("Standardising "+TimeSeriesClassification.fileNames[i]);
NormalizeCase nc=new NormalizeCase();
timeTrain=new Instances(train);
timeTest=new Instances(test);
nc.process(timeTrain);
nc.process(timeTest);
}
else{
timeTrain=train;
timeTest=test;
}
sc[0]=new TransformEnsembles();
System.out.println("\n******************Combined Equal******************");
((TransformEnsembles)sc[0]).setBaseClassifier(AbstractClassifier.makeCopy(base));
((TransformEnsembles)sc[0]).setWeightType(TransformEnsembles.WeightType.EQUAL);
stats=ClassifierTools.evalClassifiers(test,train,folds,sc);
for(int j=0;j<stats.length;j++){
of.writeString(stats[j].accuracy+",");
System.out.print("\n\t ENSEMBLE_EQUAL: "+dc.format(stats[j].accuracy)+",");
}
System.out.println("\n******************Combined Best******************");
((TransformEnsembles)sc[0]).setBaseClassifier(AbstractClassifier.makeCopy(base));
((TransformEnsembles)sc[0]).setWeightType(TransformEnsembles.WeightType.BEST);
((TransformEnsembles)sc[0]).rebuildClassifier(false);
((TransformEnsembles)sc[0]).findWeights();
stats=ClassifierTools.evalClassifiers(test,train,folds,sc);
for(int j=0;j<stats.length;j++){
of.writeString(stats[j].accuracy+",");
System.out.print("\n\t ENSEMBLE_BEST: "+dc.format(stats[j].accuracy)+",");
}
System.out.println("\n******************Combined Weighted******************");
((TransformEnsembles)sc[0]).setBaseClassifier(AbstractClassifier.makeCopy(base));
((TransformEnsembles)sc[0]).setWeightType(TransformEnsembles.WeightType.CV);
((TransformEnsembles)sc[0]).rebuildClassifier(false);
((TransformEnsembles)sc[0]).findWeights();
stats=ClassifierTools.evalClassifiers(test,train,folds,sc);
for(int j=0;j<stats.length;j++){
of.writeString(stats[j].accuracy+",");
System.out.print("\n\t ENSEMBLE_WEIGHT: "+dc.format(stats[j].accuracy)+",");
}
System.out.println("\n******************Combined STEP******************");
((TransformEnsembles)sc[0]).setBaseClassifier(AbstractClassifier.makeCopy(base));
((TransformEnsembles)sc[0]).setWeightType(TransformEnsembles.WeightType.STEP);
((TransformEnsembles)sc[0]).rebuildClassifier(false);
((TransformEnsembles)sc[0]).findWeights();
stats=ClassifierTools.evalClassifiers(test,train,folds,sc);
for(int j=0;j<stats.length;j++){
of.writeString(stats[j].accuracy+",");
System.out.print("\n\t ENSEMBLE_WEIGHT: "+dc.format(stats[j].accuracy)+",");
}
String w=((TransformEnsembles)sc[0]).getWeights();
String w2=((TransformEnsembles)sc[0]).getCV();
of2.writeLine(TimeSeriesClassification.fileNames[i]+","+w+","+w2);
}
}catch(Exception e){System.out.println("Exception ="+e);e.printStackTrace();System.exit(0);}
}
/** Outputs a latex table with ranks, plus a matlab formatted table of Errors plus a spearate names file
*
* @param str
* @param dest
*/
public static void formatRankTable(String str, String dest){
InFile in=new InFile(str+"Acc.csv");
InFile in2=new InFile(str+"Rank.csv");
int lines=in.countLines();
in=new InFile(str+"Acc.csv");
String names=in.readLine();
in2.readLine();
String[] classifiers=names.split(",");
int nosClassifiers=classifiers.length-1;
lines--;
System.out.println("FILE PATH ="+str);
System.out.println("nos problems ="+lines+" nos classifiers="+nosClassifiers);
String[] problems=new String[lines];
double[][] acc=new double[lines][nosClassifiers];
double[][] ranks=new double[lines][nosClassifiers];
for(int i=0;i<lines;i++){
problems[i]=in.readString();
System.out.print("Problem ="+problems[i]);
in2.readString();
for(int j=0;j<nosClassifiers;j++){
acc[i][j]=in.readDouble();
ranks[i][j]=in2.readDouble();
System.out.print(" "+classifiers[j]+" "+acc[i][j]+" ("+ranks[i][j]+")");
}
System.out.print(" \n");
}
//Header
OutFile of=new OutFile(dest);
of.writeLine("\\begin{table*}[!ht]\n \\scriptsize \n \\begin{tabular}{");
for(int i=0;i<nosClassifiers;i++)
of.writeString("c|");
of.writeLine("c} \\hline \n Data Set\t&");
for(int i=0;i<nosClassifiers-1;i++)
of.writeString(classifiers[i+1]+"\t&");
of.writeLine(classifiers[nosClassifiers]+"\\\\ \\hline");
DecimalFormat df=new DecimalFormat("##.####");
for(int i=0;i<problems.length;i++){
of.writeString(problems[i]+"\t & ");
for(int j=0;j<nosClassifiers;j++){
//If top ranked put in bold
if(ranks[i][j]<2.0)
of.writeString("{\\bf ");
of.writeString(df.format(1-acc[i][j]));
if(ranks[i][j]*10 ==((int)ranks[i][j])*10) //whole integer
of.writeString("("+(int)ranks[i][j]+")");
else
of.writeString("("+ranks[i][j]+")");
if(ranks[i][j]<2.0)
of.writeString("}");
if(j==nosClassifiers-1){
if(i<problems.length-1)
of.writeString("\\\\ \n");
else
of.writeString("\\\\ \\hline \n");
}
else
of.writeString("\t & ");
}
}
of.writeString("Mean Rank \t & ");
//Find Average Ranks and relevant stats
double[] meanRanks=new double[nosClassifiers];
double rSS=0;
for(int i=0;i<problems.length;i++){
for(int j=0;j<nosClassifiers;j++)
meanRanks[j]+=ranks[i][j];
}
for(int j=0;j<nosClassifiers;j++){
meanRanks[j]/=problems.length;
rSS+=meanRanks[j]*meanRanks[j];
if(j<nosClassifiers-1)
of.writeString(df.format(meanRanks[j])+" \t & ");
else
of.writeString(df.format(meanRanks[j])+" \\\\ \\hline ");
}
//Pairwise test statistics using the first as the control.
// Q= \frac{12n}{k(k+1)} \cdot \left[ \sum_{j=1}^k\bar{r}^2_j-\frac{k(k+1)^2}{4}\right
double n=problems.length;
double k=nosClassifiers;
double Q=12*n/(k*(k+1));
Q*=(rSS-k*(k+1)*(k+1)/4);
double F=(n-1)*Q;
F/=n*(k-1)-Q;
double[] zStat=new double[nosClassifiers];
for(int j=1;j<nosClassifiers;j++)
zStat[j]=(meanRanks[0]-meanRanks[j])/Math.sqrt((k*(k+1))/(6*n));
of.writeString("\n &");
for(int j=0;j<nosClassifiers;j++){
if(j<nosClassifiers-1)
of.writeString(df.format(zStat[j])+" \t & ");
else
of.writeString(df.format(zStat[j])+" \\\\ \\hline ");
}
of.writeString("\n");
//Footer
of.writeLine("\\end{tabular} \n \\caption{Statistics: Q Stat="+df.format(Q)+" F Stat="+df.format(F)+" CD="+" }\n \\label{}\n \\end{table*}");
//Matlab format for CD graph
OutFile o2=new OutFile(str+"Error.csv");
OutFile o3=new OutFile(str+"Names.csv");
o3.writeString("{");
for(int i=1;i<classifiers.length;i++)
o3.writeString("'"+classifiers[i]+"' ");
o3.writeString("}");
for(int i=0;i<problems.length;i++){
for(int j=0;j<nosClassifiers;j++){
if(j<nosClassifiers-1)
o2.writeString(df.format((1-acc[i][j]))+",");
else
o2.writeString(df.format((1-acc[i][j]))+"\n");
}
}
}
public static void summariseData(String path)
{
OutFile of =new OutFile(path);
try{
for(int i=0;i<TimeSeriesClassification.fileNames.length;i++)
{
//Load default test train split
Instances test=ClassifierTools.loadData(TimeSeriesClassification.path+TimeSeriesClassification.fileNames[i]+"\\"+TimeSeriesClassification.fileNames[i]+"_TEST");
Instances train=ClassifierTools.loadData(TimeSeriesClassification.path+TimeSeriesClassification.fileNames[i]+"\\"+TimeSeriesClassification.fileNames[i]+"_TRAIN");
of.writeString(TimeSeriesClassification.fileNames[i]+","+train.numInstances()+","+test.numInstances());
of.writeString(","+(train.numAttributes()-1)+","+train.numClasses());
double[] classDist=new double[train.numClasses()];
for(int j=0;j<train.numInstances();j++)
classDist[(int)train.instance(j).classValue()]++;
for(int j=0;j<train.numClasses();j++)
of.writeString(","+(classDist[j]/train.numInstances()));
of.writeString(",,");
classDist=new double[test.numClasses()];
for(int j=0;j<test.numInstances();j++)
classDist[(int)test.instance(j).classValue()]++;
for(int j=0;j<test.numClasses();j++)
of.writeString(","+(classDist[j]/test.numInstances()));
of.writeString("\n");
}
}catch(Exception e){
e.printStackTrace();
System.exit(0);
}
}
//Sanity check to confirm NN and DTW works
public static void main(String[] args){
ensembleTransforms("1NN");
// formatRankTable("C:\\Users\\ajb\\Dropbox\\Results\\RSC\\NonSubspace","C:\\Users\\ajb\\Dropbox\\Results\\RSC\\NonSubspaceTable.csv");
//basic_recreateEamonnResults();
// summariseData("C:\\Research\\Data\\Time Series Data\\Time Series Classification\\Summary.csv");
//Table 1: Compare alternative classifiers on the raw data
// timeDomain("SingleClassifiers");
//Table 2: Compare alternative 1-NN Euclid classifiers on the raw data
// timeDomain("NN_EuclidClassifiers", new kNN(1));
//Table 2: Compare alternative 1-NN DTW classifiers on the raw data
// timeDomain("NN_EuclidClassifiers", new DTW_kNN(1));
// dataTransforms("DTW");
// basicDataTransforms("1NN");
// ensembleTransforms("1NN");
// ensembleTransforms("DTW");
// ensembleTransforms("NB");
// ensembleTransforms("C4.5");
// ensembleTransforms("SVML");
// ensembleTransforms("SVMO");
/* ensembleTransforms("RandomForest");
ensembleTransforms("RotationForest");
*/
// table2_NN_Combinations();
// table3_1NN_DataTransforms();
// table3_1NN_Ensembles();
// table4_My_Ensembles();
// SMO_Variants();
// testNormalisation();
// formatRankTable("C:\\Research\\Results\\TSC Results\\NNComparison","C:\\Research\\Results\\TSC Results\\NNComparisonLatex.csv");
// formatRankTable("C:\\Research\\Results\\TSC Results\\EnsembleTransformComparison","C:\\Research\\Results\\TSC Results\\EnsembleTransformComparisonLatex.csv");
// testECG();
}
public static void table4_My_Ensembles(){
OutFile of=new OutFile(resultPath+"NewEnsemblesComparison.csv");
int seed=100;
ClassifierTools.ResultsStats stats;
System.out.println("Ensemble on several transformations");
try{
for(int i=0;i<TimeSeriesClassification.fileNames.length;i++)
{
Instances test=ClassifierTools.loadData(TimeSeriesClassification.path+TimeSeriesClassification.fileNames[i]+"\\"+TimeSeriesClassification.fileNames[i]+"_TEST");
Instances train=ClassifierTools.loadData(TimeSeriesClassification.path+TimeSeriesClassification.fileNames[i]+"\\"+TimeSeriesClassification.fileNames[i]+"_TRAIN");
of.writeString("\n"+TimeSeriesClassification.fileNames[i]+",");
System.out.println(TimeSeriesClassification.fileNames[i]+",");
//Time domain
//Set folds
int folds=setNosFolds(test,train);
TransformEnsembles te=new TransformEnsembles();
double testAccuracy=0;
double[][] preds;
if(folds>1){ // Combine the two files
Instances full=new Instances(train);//Instances.mergeInstances(train, test);
for(int j=0;j<test.numInstances();j++)
full.add(test.instance(j));
Random rand = new Random(seed);
// System.out.print("\t cases ="+full.numInstances());
full.randomize(rand);
preds=ClassifierTools.crossValidation(te,full,folds);
testAccuracy=preds[0][0];
}
else{
te.buildClassifier(train);
testAccuracy=ClassifierTools.accuracy(test,te);
}
System.out.println("\t : "+testAccuracy);
of.writeString(testAccuracy+",");
}
}catch(Exception e){
System.out.println("Exception = "+e);
e.printStackTrace();
System.exit(0);
}
}
public static boolean tooLarge(String name){
if(name.equals("FordA")||name.equals("FordB")||name.equals("HandOutlines")||name.equals("ElectricDevices")||name.equals("ARSim")||name.equals("Earthquakes"))
return true;
return false;
}
public static Instances sample(Instances data, double prop){
if(prop<0||prop>1) return null;
Instances newD=new Instances(data);
newD.randomize(new Random());
int size=(int)(prop*newD.numInstances());
for(int i=size+1;i<data.numInstances();i++)
newD.delete(size);
return newD;
}
public static int setNosFolds(Instances test, Instances train){
//Set to 1 to reproduce test/train results in line with Keogh website
int folds =1; //train.numInstances();
//test.numInstances()+train.numInstances();
if(folds>100)
if(folds>1000)
folds=1;
else if(folds>500)
folds=10;
return folds;
}
}