package papers;
/**
*
* @author Author 1 and Author 2
*/
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.Random;
import java.util.Scanner;
import java.util.TreeMap;
import weka.classifiers.lazy.kNN;
import weka.core.elastic_distance_measures.BasicDTW;
import weka.core.elastic_distance_measures.SakoeChibaDTW;
import weka.core.elastic_distance_measures.WeightedDTW;
import weka.core.elastic_distance_measures.LCSSDistance;
import weka.filters.timeseries.DerivativeFilter;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.EuclideanDistance;
public class ICDM2013_Lines {
/* DATA_DIR is the location of the arff instance files. Each dataset should be included in this folder witin a subfolder of the dataName,
* which contains the training and test data in the form 'dataName/dataName_TRAIN.arff' and dataName/dataName_TEST.arff'
*
* OUTPUT_DIR is the location where experimental results will be written. The CV and TRAIN_TEST subfolders are used to keep the different
* experiments seperate and are relative to the overall OUTPUT_DIR String.
*/
public static final String DATA_DIR = "TSC Problems";
public static final String OUTPUT_DIR = "Results";
public static final String OUTPUT_DIR_CV = OUTPUT_DIR+"/cv";
public static final String OUTPUT_DIR_TRAIN_TEST = OUTPUT_DIR+"/trainTest";
//<editor-fold defaultstate="collapsed" desc="Initialisation Methods">
public static void initCv(String dataName) throws Exception{
// create outputDir (if it doesn't exist)
File outputDir = new File(OUTPUT_DIR);
File outputDirCv = new File(OUTPUT_DIR_CV);
outputDir.mkdir();
outputDirCv.mkdir();
// write decision log
//if dataset doesn't have a dir in the cv dir yet, make one. Else, throw exception to alert user
//to possiblility of overwriting results
File datasetDir = new File(OUTPUT_DIR_CV+"/"+dataName);
if(datasetDir.exists()){
throw new Exception("WARNING! Cross-validation results already exist for dataset '"+dataName+"'. Please remove these (or rename), then re-run if you wish to continue.");
}
datasetDir.mkdir();
}
public static void initTrainTest(String dataName) throws Exception{
// create results outputDir (if it doesn't exist)
File outputDirTrainTest = new File(OUTPUT_DIR_TRAIN_TEST);
outputDirTrainTest.mkdir();
// make specific data dir
File datasetDir = new File(OUTPUT_DIR_TRAIN_TEST+"/"+dataName);
datasetDir.mkdir();
}
//</editor-fold>
//<editor-fold defaultstate="collapsed" desc="Cross-Validation Methods">
// generic cross-validation method
public static double crossValidate(Instances data, int k, EuclideanDistance distanceMetric, StringBuilder st) throws Exception {
Instances trainLoocv;
Instance testInstance;
kNN knn;
int correct = 0;
int total = 0;
double decision, classValue;
for (int i = 0; i < data.numInstances(); i++) {
testInstance = data.instance(i);
trainLoocv = new Instances(data, data.numInstances() - 1);
classValue = testInstance.classValue();
// add all instances to trainLoocv EXCEPT instance[i]
for (int j = 0; j < data.numInstances(); j++) {
if (j != i) {
trainLoocv.add(data.instance(j));
}
}
if (trainLoocv.numInstances() != data.numInstances() - 1) {
throw new Exception("Incorrect initialisation of instances!");
}
// build classifier and classify
knn = new kNN(k);
knn.setDistanceFunction(distanceMetric);
knn.buildClassifier(trainLoocv);
decision = knn.classifyInstance(testInstance);
if (decision == classValue) {
correct++;
}
total++;
if(st!=null){
st.append(decision).append(",").append(classValue).append("\n");
}
}
return 100.0/total*correct;
}
// start of experiment-specific cross-validation methods
public static double cv_01_Euclidean_1NN(String dataName, Instances data) throws Exception{
EuclideanDistance euclid = new EuclideanDistance();
euclid.setDontNormalize(true);
StringBuilder st = new StringBuilder();
double euclidean_1nn = crossValidate(data, 1, euclid,st);
// write log
FileWriter log = new FileWriter(OUTPUT_DIR_CV+"/"+dataName+"/cv_01_Euclidean_1NN.txt");
log.append(euclidean_1nn+"\n");
log.append(st);
log.close();
return euclidean_1nn;
}
public static double cv_02_DTW_fullWindow_1NN(String dataName, Instances data) throws Exception{
BasicDTW fullWindowDtw = new BasicDTW();
StringBuilder st = new StringBuilder();
double dtw_fullWindow_1nn = crossValidate(data, 1, fullWindowDtw,st);
FileWriter log = new FileWriter(OUTPUT_DIR_CV+"/"+dataName+"/cv_02_DTW_fullWindow_1NN.txt");
log.append(dtw_fullWindow_1nn+"\n");
log.append(st);
log.close();
return dtw_fullWindow_1nn;
}
public static double[] cv_03_DTW_bestWindow_1NN(String dataName, Instances data) throws Exception{
double r;
double thisAcc;
double bsfAcc = -1;
double bsfR = -1;
StringBuilder bsfSt = null;
StringBuilder thisSt;
for(int window = 0; window <= 100; window++){
thisSt = new StringBuilder();
r = (double)window/100; // to avoid double imprecision
thisAcc = crossValidate(data, 1, new SakoeChibaDTW(r),thisSt);
if(thisAcc > bsfAcc){
bsfAcc = thisAcc;
bsfR = r;
bsfSt = thisSt;
}
}
double[] dtw_cvWindow_1nn = {bsfAcc,bsfR};
FileWriter log = new FileWriter(OUTPUT_DIR_CV+"/"+dataName+"/cv_03_DTW_bestWindow_1NN.txt");
log.append(bsfAcc+","+bsfR+"\n");
log.append(bsfSt);
log.close();
return dtw_cvWindow_1nn;
}
public static double[] cv_04_WDTW_1NN(String dataName, Instances data) throws Exception{
double g;
double thisAcc = - 1;
double bsfAcc = -1;
double bsfG = -1;
StringBuilder bsfSt = null;
StringBuilder thisSt;
for(int weight = 0; weight <= 100; weight++){
thisSt = new StringBuilder();
g = (double)weight/100; // to avoid double imprecision
thisAcc = crossValidate(data, 1, new WeightedDTW(g),thisSt);
if(thisAcc > bsfAcc){
bsfAcc = thisAcc;
bsfG = g;
bsfSt = thisSt;
}
}
double[] wdtw_1nn = {bsfAcc,bsfG};
FileWriter log = new FileWriter(OUTPUT_DIR_CV+"/"+dataName+"/cv_04_WDTW_1NN.txt");
log.append(bsfAcc+","+bsfG+"\n");
log.append(bsfSt);
log.close();
return wdtw_1nn;
}
public static double[] cv_05_Euclidean_kNN(String dataName, Instances data) throws Exception{
int bsfK = -1;
double thisAcc = - 1;
double bsfAcc = -1;
EuclideanDistance euclid;
StringBuilder bsfSt = null;
StringBuilder thisSt;
for(int k = 1; k <= 100; k++){
thisSt = new StringBuilder();
euclid = new EuclideanDistance();
euclid.setDontNormalize(true);
thisAcc = crossValidate(data, k, euclid,thisSt);
if(thisAcc > bsfAcc){
bsfAcc = thisAcc;
bsfK = k;
bsfSt = thisSt;
}
}
double[] euclidean_knn = {bsfAcc,bsfK};
FileWriter log = new FileWriter(OUTPUT_DIR_CV+"/"+dataName+"/cv_05_Euclidean_kNN.txt");
log.append(bsfAcc+","+bsfK+"\n");
log.append(bsfSt);
log.close();
return euclidean_knn;
}
public static double[] cv_06_DTW_fullWindow_kNN(String dataName, Instances data) throws Exception{
int bsfK = -1;
double thisAcc = - 1;
double bsfAcc = -1;
BasicDTW dtw;
StringBuilder bsfSt = null;
StringBuilder thisSt;
for(int k = 1; k <= 100; k++){
thisSt = new StringBuilder();
dtw = new BasicDTW();
thisAcc = crossValidate(data, k, dtw,thisSt);
if(thisAcc > bsfAcc){
bsfAcc = thisAcc;
bsfK = k;
bsfSt = thisSt;
}
}
double[] dtw_fullWindow_knn = {bsfAcc,bsfK};
FileWriter log = new FileWriter(OUTPUT_DIR_CV+"/"+dataName+"/cv_06_DTW_fullWindow_kNN.txt");
log.append(bsfAcc+","+bsfK+"\n");
log.append(bsfSt);
log.close();
return dtw_fullWindow_knn;
}
public static double[] cv_11_DTW_optimalWindow_kNN(String dataName, Instances data) throws Exception{
int bsfK = -1;
double bsfR = -1;
double thisAcc = - 1;
double bsfAcc = -1;
SakoeChibaDTW dtw;
StringBuilder bsfSt = null;
StringBuilder thisSt;
for(int window = 0; window <= 100; window++){
double r = (double)window/100; // avoid double imprecision
// System.out.println("r: "+r);
for(int k = 1; k <=100; k++){
thisSt = new StringBuilder();
dtw = new SakoeChibaDTW(r);
thisAcc = crossValidate(data, k, dtw, thisSt);
if(thisAcc > bsfAcc){
bsfAcc = thisAcc;
bsfK = k;
bsfR = r;
bsfSt = thisSt;
}
}
}
double [] dtw_rn_knn = {bsfAcc,bsfK,bsfR};
FileWriter log = new FileWriter(OUTPUT_DIR_CV+"/"+dataName+"/cv_11_DTW_optimalWindow_kNN.txt");
log.append(bsfAcc+","+bsfK+","+bsfR+"\n");
log.append(bsfSt);
log.close();
return dtw_rn_knn;
}
public static double[] cv_12_WDTW_kNN(String dataName, Instances data) throws Exception{
int bsfK = -1;
double bsfG = -1;
double thisAcc = - 1;
double bsfAcc = -1;
WeightedDTW wdtw;
StringBuilder bsfSt = null;
StringBuilder thisSt;
for(int weight = 0; weight <= 100; weight++){
double g = (double)weight/100; // avoid double imprecision
// System.out.println("g: "+g);
for(int k = 1; k <=100; k++){
thisSt = new StringBuilder();
wdtw = new WeightedDTW(g);
thisAcc = crossValidate(data, k, wdtw,thisSt);
if(thisAcc > bsfAcc){
bsfAcc = thisAcc;
bsfK = k;
bsfG = g;
bsfSt = thisSt;
}
}
}
double [] wdtw_knn = {bsfAcc,bsfK,bsfG};
FileWriter log = new FileWriter(OUTPUT_DIR_CV+"/"+dataName+"/cv_12_WDTW_kNN.txt");
log.append(bsfAcc+","+bsfK+","+bsfG+"\n");
log.append(bsfSt);
log.close();
return wdtw_knn;
}
public static double cv_21_DDTW_fullWindow_1NN(String dataName, Instances data) throws Exception{
if(!data.relationName().contains("derivative")){
throw new Exception("WARNING! Instances object does not include derivative in the relation name! If this is intentional, please either update relation name, or remove this exception from the code.");
}
BasicDTW fullWindowDtw = new BasicDTW();
StringBuilder st = new StringBuilder();
double cv_21_ddtw_fullWindow_1nn = crossValidate(data, 1, fullWindowDtw, st);
FileWriter log = new FileWriter(OUTPUT_DIR_CV+"/"+dataName+"/cv_21_DDTW_fullWindow_1NN.txt");
log.append(cv_21_ddtw_fullWindow_1nn+"\n");
log.append(st);
log.close();
return cv_21_ddtw_fullWindow_1nn;
}
public static double[] cv_22_DDTW_bestWindow_1NN(String dataName, Instances data) throws Exception{
if(!data.relationName().contains("derivative")){
throw new Exception("WARNING! Instances object does not include derivative in the relation name! If this is intentional, please either update relation name, or remove this exception from the code.");
}
double r;
double thisAcc;
double bsfAcc = -1;
double bsfR = -1;
StringBuilder bsfSt = null;
StringBuilder thisSt;
for(int window = 0; window <= 100; window++){
thisSt = new StringBuilder();
r = (double)window/100; // to avoid double imprecision
thisAcc = crossValidate(data, 1, new SakoeChibaDTW(r),thisSt);
if(thisAcc > bsfAcc){
bsfAcc = thisAcc;
bsfR = r;
bsfSt = thisSt;
}
}
double[] ddtw_cvWindow_1nn = {bsfAcc,bsfR};
FileWriter log = new FileWriter(OUTPUT_DIR_CV+"/"+dataName+"/cv_22_DDTW_bestWindow_1NN.txt");
log.append(bsfAcc+","+bsfR+"\n");
log.append(bsfSt);
log.close();
return ddtw_cvWindow_1nn;
}
public static double[] cv_23_WDDTW_1NN(String dataName, Instances data) throws Exception{
if(!data.relationName().contains("derivative")){
throw new Exception("WARNING! Instances object does not include derivative in the relation name! If this is intentional, please either update relation name, or remove this exception from the code.");
}
double bsfG = -1;
double thisAcc = - 1;
double bsfAcc = -1;
WeightedDTW wdtw;
StringBuilder bsfSt = null;
StringBuilder thisSt;
for(int weight = 0; weight <= 100; weight++){
thisSt = new StringBuilder();
double g = (double)weight/100; // avoid double imprecision
wdtw = new WeightedDTW(g);
thisAcc = crossValidate(data, 1, wdtw,thisSt);
if(thisAcc > bsfAcc){
bsfAcc = thisAcc;
bsfG = g;
bsfSt = thisSt;
}
}
double [] wddtw_knn = {bsfAcc,bsfG};
FileWriter log = new FileWriter(OUTPUT_DIR_CV+"/"+dataName+"/cv_23_WDDTW_1NN.txt");
log.append(bsfAcc+","+bsfG+"\n");
log.append(bsfSt);
log.close();
return wddtw_knn;
}
public static double[] cv_24_DDTW_fullWindow_kNN(String dataName, Instances data) throws Exception{
if(!data.relationName().contains("derivative")){
throw new Exception("WARNING! Instances object does not include derivative in the relation name! If this is intentional, please either update relation name, or remove this exception from the code.");
}
int bsfK = -1;
double thisAcc = - 1;
double bsfAcc = -1;
BasicDTW dtw;
StringBuilder bsfSt = null;
StringBuilder thisSt;
for(int k = 1; k <= 100; k++){
thisSt = new StringBuilder();
dtw = new BasicDTW();
thisAcc = crossValidate(data, k, dtw,thisSt);
if(thisAcc > bsfAcc){
bsfAcc = thisAcc;
bsfK = k;
bsfSt = thisSt;
}
}
double[] ddtw_fullWindow_knn = {bsfAcc,bsfK};
FileWriter log = new FileWriter(OUTPUT_DIR_CV+"/"+dataName+"/cv_24_DDTW_fullWindow_kNN.txt");
log.append(bsfAcc+","+bsfK+"\n");
log.append(bsfSt);
log.close();
return ddtw_fullWindow_knn;
}
public static double[] cv_25_DDTW_optimalWindow_kNN(String dataName, Instances data) throws Exception{
if(!data.relationName().contains("derivative")){
throw new Exception("WARNING! Instances object does not include derivative in the relation name! If this is intentional, please either update relation name, or remove this exception from the code.");
}
int bsfK = -1;
double bsfR = -1;
double thisAcc = - 1;
double bsfAcc = -1;
SakoeChibaDTW dtw;
StringBuilder bsfSt = null;
StringBuilder thisSt;
for(int window = 0; window <= 100; window++){
double r = (double)window/100; // avoid double imprecision
for(int k = 1; k <=100; k++){
thisSt = new StringBuilder();
dtw = new SakoeChibaDTW(r);
thisAcc = crossValidate(data, k, dtw,thisSt);
if(thisAcc > bsfAcc){
bsfAcc = thisAcc;
bsfK = k;
bsfR = r;
bsfSt = thisSt;
}
}
}
double [] ddtw_rn_knn = {bsfAcc,bsfK,bsfR};
FileWriter log = new FileWriter(OUTPUT_DIR_CV+"/"+dataName+"/cv_25_DDTW_optimalWindow_kNN.txt");
log.append(bsfAcc+","+bsfK+","+bsfR+"\n");
log.append(bsfSt);
log.close();
return ddtw_rn_knn;
}
public static double[] cv_26_WDTW_kNN(String dataName, Instances data) throws Exception{
if(!data.relationName().contains("derivative")){
throw new Exception("WARNING! Instances object does not include derivative in the relation name! If this is intentional, please either update relation name, or remove this exception from the code.");
}
int bsfK = -1;
double bsfG = -1;
double thisAcc = - 1;
double bsfAcc = -1;
WeightedDTW wdtw;
StringBuilder bsfSt = null;
StringBuilder thisSt;
for(int weight = 0; weight <= 100; weight++){
double g = (double)weight/100; // avoid double imprecision
for(int k = 1; k <=100; k++){
thisSt = new StringBuilder();
wdtw = new WeightedDTW(g);
thisAcc = crossValidate(data, k, wdtw,thisSt);
if(thisAcc > bsfAcc){
bsfAcc = thisAcc;
bsfK = k;
bsfG = g;
bsfSt = thisSt;
}
}
}
double [] wddtw_knn = {bsfAcc,bsfK,bsfG};
FileWriter log = new FileWriter(OUTPUT_DIR_CV+"/"+dataName+"/cv_26_WDTW_kNN.txt");
log.append(bsfAcc+","+bsfK+","+bsfG+"\n");
log.append(bsfSt);
log.close();
return wddtw_knn;
}
public static double[] cv_31_LCSS_1NN(String dataName, Instances data) throws Exception{
// get the 10 delta and 10 epsilon readings between the ranges specified by <citation>
int seriesLength = data.numAttributes()-1; // -1 to remove class value
double dataStdv = LCSSDistance.stdv_p(data);
//compute delta params
int[] deltas = LCSSDistance.getInclusive10(0, seriesLength/4);
// compute epsilon params
double stdvFloor = dataStdv*0.2;
double[] epsilons = LCSSDistance.getInclusive10(stdvFloor, dataStdv);
int thisDelta;
double thisEpsilon;
double thisAcc;
int bsfDelta = -1;
double bsfEpsilon = 1;
double bsfAcc = -1;
LCSSDistance lcss;
StringBuilder bsfSt = null;
StringBuilder thisSt;
for(int d = 0; d < deltas.length; d++){
for(int e = 0; e < epsilons.length; e++){
thisSt = new StringBuilder();
thisDelta = deltas[d];
thisEpsilon = epsilons[e];
lcss = new LCSSDistance(thisDelta, thisEpsilon);
thisAcc = crossValidate(data, 1, lcss,thisSt);
if(thisAcc > bsfAcc){
bsfAcc = thisAcc;
bsfDelta = thisDelta;
bsfEpsilon=thisEpsilon;
bsfSt = thisSt;
}else if(thisAcc == bsfAcc && thisDelta <= bsfDelta && thisEpsilon <= bsfEpsilon){
bsfAcc = thisAcc;
bsfDelta = thisDelta;
bsfEpsilon=thisEpsilon;
bsfSt = thisSt;
}
}
}
double[] lcss_1nn = {bsfAcc,bsfDelta,bsfEpsilon};
FileWriter log = new FileWriter(OUTPUT_DIR_CV+"/"+dataName+"/cv_31_LCSS_1NN.txt");
log.append(bsfAcc+","+bsfDelta+","+bsfEpsilon+"\n");
log.append(bsfSt);
log.close();
return lcss_1nn;// accuracy, delta, epsilon
}
/*
* Due to time constratints, we could not investigate all possible paramater options for
* k = 1, 2, ..., 100. As a comprmise, we investigate 100 k values using the best LCSS
* parameters found in the LCSS 1NN cross-validation.
*/
public static double[] cv_32_LCSS_kNN(String dataName, Instances data, int delta, double epsilon) throws Exception{
// get the 10 delta and 10 epsilon readings between the ranges specified by <citation>
int bsfK = -1;
double thisAcc = - 1;
double bsfAcc = -1;
LCSSDistance lcss;
StringBuilder bsfSt = null;
StringBuilder thisSt;
for(int k = 1; k <= 100; k++){
thisSt = new StringBuilder();
lcss = new LCSSDistance(delta, epsilon);
thisAcc = crossValidate(data, k, lcss,thisSt);
if(thisAcc > bsfAcc){
bsfAcc = thisAcc;
bsfK = k;
bsfSt = thisSt;
}
}
double[] lcss_knn = {bsfAcc,delta,epsilon,bsfK};
FileWriter log = new FileWriter(OUTPUT_DIR_CV+"/"+dataName+"/cv_32_LCSS_kNN.txt");
log.append(bsfAcc+","+delta+","+epsilon+","+bsfK+"\n");
log.append(bsfSt);
log.close();
return lcss_knn;
}
//</editor-fold>
//<editor-fold defaultstate="collapsed" desc="Train/Test Methods">
// generic train/test code
public static double trainTest(Instances train, Instances test, int k, EuclideanDistance distanceMetric, StringBuilder st) throws Exception{
kNN knn = new kNN(k);
knn.setDistanceFunction(distanceMetric);
knn.buildClassifier(train);
int correct = 0;
double decision, classValue;
for(int i = 0; i < test.numInstances(); i++){
classValue = test.instance(i).classValue();
decision = knn.classifyInstance(test.instance(i));
st.append(decision).append(",").append(classValue).append("\n");
if(classValue==decision){
correct++;
}
}
return 100.0/test.numInstances()*correct;
}
// start of experiment-specific train/test code
public static double trainTest_01_Euclidean_1NN(String dataName, Instances train, Instances test) throws Exception{
StringBuilder st = new StringBuilder();
EuclideanDistance euclid = new EuclideanDistance();
euclid.setDontNormalize(true);
double trainTest_01_euclidean_1nn = trainTest(train, test, 1, euclid, st);
FileWriter log = new FileWriter(OUTPUT_DIR_TRAIN_TEST+"/"+dataName+"/trainTest_01_Euclidean_1NN.txt");
log.append(trainTest_01_euclidean_1nn+"\n");
log.append(st);
log.close();
return trainTest_01_euclidean_1nn;
}
public static double trainTest_02_DTW_fullWindow_1NN(String dataName, Instances train, Instances test) throws Exception{
StringBuilder st = new StringBuilder();
BasicDTW dtw = new BasicDTW();
double trainTest_02_dtw_fullwindow_1nn = trainTest(train, test, 1, dtw, st);
FileWriter log = new FileWriter(OUTPUT_DIR_TRAIN_TEST+"/"+dataName+"/trainTest_02_DTW_fullWindow_1NN.txt");
log.append(trainTest_02_dtw_fullwindow_1nn+"\n");
log.append(st);
log.close();
return trainTest_02_dtw_fullwindow_1nn;
}
public static double trainTest_03_DTW_optimalWindow_1NN(String dataName, Instances train, Instances test) throws Exception{
// get best window with from cv file
//1. check that the cv file exists:
File cvFile = new File(OUTPUT_DIR_CV+"/"+dataName+"/cv_03_DTW_bestWindow_1NN.txt");
if(!cvFile.exists()){
throw new Exception("ERROR! CV hasn't been carried out for fully for "+cvFile.getName()+".");
}
Scanner scan = new Scanner(cvFile);
scan.useDelimiter("\n");
String[] resultsAndParams = scan.next().split(",");
double bestR = Double.parseDouble(resultsAndParams[1].trim());
StringBuilder st = new StringBuilder();
SakoeChibaDTW sdtw = new SakoeChibaDTW(bestR);
double trainTest_03_DTW_optimalWindow_1NN = trainTest(train, test, 1, sdtw, st);
FileWriter log = new FileWriter(OUTPUT_DIR_TRAIN_TEST+"/"+dataName+"/trainTest_03_DTW_optimalWindow_1NN.txt");
log.append(trainTest_03_DTW_optimalWindow_1NN+","+bestR+"\n");
log.append(st);
log.close();
return trainTest_03_DTW_optimalWindow_1NN;
}
public static double trainTest_04_WDTW_1NN(String dataName, Instances train, Instances test) throws Exception{
// get best window with from cv file
//1. check that the cv file exists:
File cvFile = new File(OUTPUT_DIR_CV+"/"+dataName+"/cv_04_WDTW_1NN.txt");
if(!cvFile.exists()){
throw new Exception("ERROR! CV hasn't been carried out for fully for "+cvFile.getName()+".");
}
Scanner scan = new Scanner(cvFile);
scan.useDelimiter("\n");
String[] resultsAndParams = scan.next().split(",");
double bestG = Double.parseDouble(resultsAndParams[1].trim());
StringBuilder st = new StringBuilder();
WeightedDTW wdtw = new WeightedDTW(bestG);
double trainTest_04_WDTW_1NN = trainTest(train, test, 1, wdtw, st);
FileWriter log = new FileWriter(OUTPUT_DIR_TRAIN_TEST+"/"+dataName+"/trainTest_04_WDTW_1NN.txt");
log.append(trainTest_04_WDTW_1NN+","+bestG+"\n");
log.append(st);
log.close();
return trainTest_04_WDTW_1NN;
}
public static double trainTest_05_Euclidean_kNN(String dataName, Instances train, Instances test) throws Exception{
// get best window with from cv file
//1. check that the cv file exists:
File cvFile = new File(OUTPUT_DIR_CV+"/"+dataName+"/cv_05_Euclidean_kNN.txt");
if(!cvFile.exists()){
throw new Exception("ERROR! CV hasn't been carried out for fully for "+cvFile.getName()+".");
}
Scanner scan = new Scanner(cvFile);
scan.useDelimiter("\n");
String[] resultsAndParams = scan.next().split(",");
int bestK = Integer.parseInt(resultsAndParams[1].trim());
StringBuilder st = new StringBuilder();
EuclideanDistance euclid = new EuclideanDistance();
euclid.setDontNormalize(true);
double trainTest_05_Euclidean_kNN = trainTest(train, test, bestK, euclid, st);
FileWriter log = new FileWriter(OUTPUT_DIR_TRAIN_TEST+"/"+dataName+"/trainTest_05_Euclidean_kNN.txt");
log.append(trainTest_05_Euclidean_kNN+","+bestK+"\n");
log.append(st);
log.close();
return trainTest_05_Euclidean_kNN;
}
public static double trainTest_06_DTW_fullWindow_kNN(String dataName, Instances train, Instances test) throws Exception{
// get best window with from cv file
//1. check that the cv file exists:
File cvFile = new File(OUTPUT_DIR_CV+"/"+dataName+"/cv_06_DTW_fullWindow_kNN.txt");
if(!cvFile.exists()){
throw new Exception("ERROR! CV hasn't been carried out for fully for "+cvFile.getName()+".");
}
Scanner scan = new Scanner(cvFile);
scan.useDelimiter("\n");
String[] resultsAndParams = scan.next().split(",");
int bestK = Integer.parseInt(resultsAndParams[1].trim());
StringBuilder st = new StringBuilder();
BasicDTW dtw = new BasicDTW();
double trainTest_06_DTW_fullWindow_kNN = trainTest(train, test, bestK, dtw, st);
FileWriter log = new FileWriter(OUTPUT_DIR_TRAIN_TEST+"/"+dataName+"/trainTest_06_DTW_fullWindow_kNN.txt");
log.append(trainTest_06_DTW_fullWindow_kNN+","+bestK+"\n");
log.append(st);
log.close();
return trainTest_06_DTW_fullWindow_kNN;
}
public static double trainTest_11_DTW_bestWindow_kNN(String dataName, Instances train, Instances test) throws Exception{
// get best window with from cv file
//1. check that the cv file exists:
File cvFile = new File(OUTPUT_DIR_CV+"/"+dataName+"/cv_11_DTW_optimalWindow_kNN.txt");
if(!cvFile.exists()){
throw new Exception("ERROR! CV hasn't been carried out for fully for "+cvFile.getName()+".");
}
Scanner scan = new Scanner(cvFile);
scan.useDelimiter("\n");
String[] resultsAndParams = scan.next().split(",");
int bestK = Integer.parseInt(resultsAndParams[1].trim());
double bestR = Double.parseDouble(resultsAndParams[2].trim());
StringBuilder st = new StringBuilder();
SakoeChibaDTW sdtw = new SakoeChibaDTW(bestR);
double trainTest_11_DTW_bestWindow_kNN = trainTest(train, test, bestK, sdtw, st);
FileWriter log = new FileWriter(OUTPUT_DIR_TRAIN_TEST+"/"+dataName+"/trainTest_11_DTW_bestWindow_kNN.txt");
log.append(trainTest_11_DTW_bestWindow_kNN+","+bestK+"\n");
log.append(st);
log.close();
return trainTest_11_DTW_bestWindow_kNN;
}
public static double trainTest_12_WDTW_kNN(String dataName, Instances train, Instances test) throws Exception{
// get best window with from cv file
//1. check that the cv file exists:
File cvFile = new File(OUTPUT_DIR_CV+"/"+dataName+"/cv_12_WDTW_kNN.txt");
if(!cvFile.exists()){
throw new Exception("ERROR! CV hasn't been carried out for fully for "+cvFile.getName()+".");
}
Scanner scan = new Scanner(cvFile);
scan.useDelimiter("\n");
String[] resultsAndParams = scan.next().split(",");
int bestK = Integer.parseInt(resultsAndParams[1].trim());
double bestG = Double.parseDouble(resultsAndParams[2].trim());
StringBuilder st = new StringBuilder();
WeightedDTW wdtw = new WeightedDTW(bestG);
double trainTest_12_WDTW_kNN = trainTest(train, test, bestK, wdtw, st);
FileWriter log = new FileWriter(OUTPUT_DIR_TRAIN_TEST+"/"+dataName+"/trainTest_12_WDTW_kNN.txt");
log.append(trainTest_12_WDTW_kNN+","+bestK+"\n");
log.append(st);
log.close();
return trainTest_12_WDTW_kNN;
}
public static double trainTest_21_DDTW_fullWindow_1NN(String dataName, Instances train, Instances test) throws Exception{
if(!train.relationName().contains("derivative") || !test.relationName().contains("derivative")){
throw new Exception("WARNING! Instances object does not include derivative in the relation name! If this is intentional, please either update relation name, or remove this exception from the code.");
}
StringBuilder st = new StringBuilder();
BasicDTW dtw = new BasicDTW();
double trainTest_21_DDTW_fullWindow_1NN = trainTest(train, test, 1, dtw, st);
FileWriter log = new FileWriter(OUTPUT_DIR_TRAIN_TEST+"/"+dataName+"/trainTest_21_DDTW_fullWindow_1NN.txt");
log.append(trainTest_21_DDTW_fullWindow_1NN+"\n");
log.append(st);
log.close();
return trainTest_21_DDTW_fullWindow_1NN;
}
public static double trainTest_22_DDTW_optimalWindow_1NN(String dataName, Instances train, Instances test) throws Exception{
// get best window with from cv file
//1. check that the cv file exists:
File cvFile = new File(OUTPUT_DIR_CV+"/"+dataName+"/cv_22_DDTW_bestWindow_1NN.txt");
if(!cvFile.exists()){
throw new Exception("ERROR! CV hasn't been carried out for fully for "+cvFile.getName()+".");
}
Scanner scan = new Scanner(cvFile);
scan.useDelimiter("\n");
String[] resultsAndParams = scan.next().split(",");
double bestR = Double.parseDouble(resultsAndParams[1].trim());
StringBuilder st = new StringBuilder();
SakoeChibaDTW sdtw = new SakoeChibaDTW(bestR);
double trainTest_22_DDTW_optimalWindow_1NN = trainTest(train, test, 1, sdtw, st);
FileWriter log = new FileWriter(OUTPUT_DIR_TRAIN_TEST+"/"+dataName+"/trainTest_22_DDTW_optimalWindow_1NN.txt");
log.append(trainTest_22_DDTW_optimalWindow_1NN+","+bestR+"\n");
log.append(st);
log.close();
return trainTest_22_DDTW_optimalWindow_1NN;
}
public static double trainTest_23_WDDTW_1NN(String dataName, Instances train, Instances test) throws Exception{
// get best window with from cv file
//1. check that the cv file exists:
File cvFile = new File(OUTPUT_DIR_CV+"/"+dataName+"/cv_23_WDDTW_1NN.txt");
if(!cvFile.exists()){
throw new Exception("ERROR! CV hasn't been carried out for fully for "+cvFile.getName()+".");
}
Scanner scan = new Scanner(cvFile);
scan.useDelimiter("\n");
String[] resultsAndParams = scan.next().split(",");
double bestG = Double.parseDouble(resultsAndParams[1].trim());
StringBuilder st = new StringBuilder();
WeightedDTW wdtw = new WeightedDTW(bestG);
double trainTest_23_WDDTW_1NN = trainTest(train, test, 1, wdtw, st);
FileWriter log = new FileWriter(OUTPUT_DIR_TRAIN_TEST+"/"+dataName+"/trainTest_23_WDDTW_1NN.txt");
log.append(trainTest_23_WDDTW_1NN+","+bestG+"\n");
log.append(st);
log.close();
return trainTest_23_WDDTW_1NN;
}
public static double trainTest_24_DDTW_fullWindow_kNN(String dataName, Instances train, Instances test) throws Exception{
// get best window with from cv file
//1. check that the cv file exists:
File cvFile = new File(OUTPUT_DIR_CV+"/"+dataName+"/cv_24_DDTW_fullWindow_kNN.txt");
if(!cvFile.exists()){
throw new Exception("ERROR! CV hasn't been carried out for fully for "+cvFile.getName()+".");
}
Scanner scan = new Scanner(cvFile);
scan.useDelimiter("\n");
String[] resultsAndParams = scan.next().split(",");
int bestK = Integer.parseInt(resultsAndParams[1].trim());
StringBuilder st = new StringBuilder();
BasicDTW dtw = new BasicDTW();
double trainTest_24_DDTW_fullWindow_kNN = trainTest(train, test, bestK, dtw, st);
FileWriter log = new FileWriter(OUTPUT_DIR_TRAIN_TEST+"/"+dataName+"/trainTest_24_DDTW_fullWindow_kNN.txt");
log.append(trainTest_24_DDTW_fullWindow_kNN+","+bestK+"\n");
log.append(st);
log.close();
return trainTest_24_DDTW_fullWindow_kNN;
}
public static double trainTest_25_DDTW_bestWindow_kNN(String dataName, Instances train, Instances test) throws Exception{
// get best window with from cv file
//1. check that the cv file exists:
File cvFile = new File(OUTPUT_DIR_CV+"/"+dataName+"/cv_25_DDTW_optimalWindow_kNN.txt");
if(!cvFile.exists()){
throw new Exception("ERROR! CV hasn't been carried out for fully for "+cvFile.getName()+".");
}
Scanner scan = new Scanner(cvFile);
scan.useDelimiter("\n");
String[] resultsAndParams = scan.next().split(",");
int bestK = Integer.parseInt(resultsAndParams[1].trim());
double bestR = Double.parseDouble(resultsAndParams[2].trim());
StringBuilder st = new StringBuilder();
SakoeChibaDTW sdtw = new SakoeChibaDTW(bestR);
double trainTest_25_DDTW_bestWindow_kNN = trainTest(train, test, bestK, sdtw, st);
FileWriter log = new FileWriter(OUTPUT_DIR_TRAIN_TEST+"/"+dataName+"/trainTest_25_DDTW_bestWindow_kNN.txt");
log.append(trainTest_25_DDTW_bestWindow_kNN+","+bestK+"\n");
log.append(st);
log.close();
return trainTest_25_DDTW_bestWindow_kNN;
}
public static double trainTest_26_WDDTW_kNN(String dataName, Instances train, Instances test) throws Exception{
// get best window with from cv file
//1. check that the cv file exists:
File cvFile = new File(OUTPUT_DIR_CV+"/"+dataName+"/cv_26_WDTW_kNN.txt");
if(!cvFile.exists()){
throw new Exception("ERROR! CV hasn't been carried out for fully for "+cvFile.getName()+".");
}
Scanner scan = new Scanner(cvFile);
scan.useDelimiter("\n");
String[] resultsAndParams = scan.next().split(",");
int bestK = Integer.parseInt(resultsAndParams[1].trim());
double bestG = Double.parseDouble(resultsAndParams[2].trim());
StringBuilder st = new StringBuilder();
WeightedDTW wdtw = new WeightedDTW(bestG);
double trainTest_26_WDDTW_kNN = trainTest(train, test, bestK, wdtw, st);
FileWriter log = new FileWriter(OUTPUT_DIR_TRAIN_TEST+"/"+dataName+"/trainTest_26_WDDTW_kNN.txt");
log.append(trainTest_26_WDDTW_kNN+","+bestK+"\n");
log.append(st);
log.close();
return trainTest_26_WDDTW_kNN;
}
public static double trainTest_31_LCSS_1NN(String dataName, Instances train, Instances test) throws Exception{
// get best window with from cv file
//1. check that the cv file exists:
File cvFile = new File(OUTPUT_DIR_CV+"/"+dataName+"/cv_31_LCSS_1NN.txt");
if(!cvFile.exists()){
throw new Exception("ERROR! CV hasn't been carried out for fully for "+cvFile.getName()+".");
}
Scanner scan = new Scanner(cvFile);
scan.useDelimiter("\n");
String[] resultsAndParams = scan.next().split(",");
int bestDelta = Integer.parseInt(resultsAndParams[1].trim());
double bestEpsilon = Double.parseDouble(resultsAndParams[2].trim());
StringBuilder st = new StringBuilder();
LCSSDistance lcss = new LCSSDistance(bestDelta, bestEpsilon);
double trainTest_31_LCSS_1NN = trainTest(train, test, 1, lcss, st);
FileWriter log = new FileWriter(OUTPUT_DIR_TRAIN_TEST+"/"+dataName+"/trainTest_31_LCSS_1NN.txt");
log.append(trainTest_31_LCSS_1NN+","+bestDelta+","+bestEpsilon+"\n");
log.append(st);
log.close();
return trainTest_31_LCSS_1NN;
}
public static double trainTest_32_LCSS_kNN(String dataName, Instances train, Instances test) throws Exception{
// get best window with from cv file
//1. check that the cv file exists:
File cvFile = new File(OUTPUT_DIR_CV+"/"+dataName+"/cv_32_LCSS_kNN.txt");
if(!cvFile.exists()){
throw new Exception("ERROR! CV hasn't been carried out for fully for "+cvFile.getName()+".");
}
Scanner scan = new Scanner(cvFile);
scan.useDelimiter("\n");
String[] resultsAndParams = scan.next().split(",");
int bestDelta = Integer.parseInt(resultsAndParams[1].trim());
double bestEpsilon = Double.parseDouble(resultsAndParams[2].trim());
int bestK = Integer.parseInt(resultsAndParams[3].trim());
StringBuilder st = new StringBuilder();
LCSSDistance lcss = new LCSSDistance(bestDelta, bestEpsilon);
double trainTest_32_LCSS_kNN = trainTest(train, test, bestK, lcss, st);
FileWriter log = new FileWriter(OUTPUT_DIR_TRAIN_TEST+"/"+dataName+"/trainTest_32_LCSS_kNN.txt");
log.append(trainTest_32_LCSS_kNN+","+bestDelta+","+bestEpsilon+","+bestK+"\n");
log.append(st);
log.close();
return trainTest_32_LCSS_kNN;
}
//</editor-fold>
//<editor-fold defaultstate="collapsed" desc="Complete Dataset CV and Train/Test Methods">
public static void datasetCrossValidation(String dataName) throws Exception{
initCv(dataName);
Instances train_raw = loadData(DATA_DIR+"/"+dataName+"/"+dataName+"_TRAIN.arff");
// get derivative training data (can store locally and read in to save computation time for large datasets)
DerivativeFilter df = new DerivativeFilter();
Instances train_derivative = df.process(train_raw);
double cv_01_euclidean_1nn = cv_01_Euclidean_1NN(dataName,train_raw); // 01 Euclidean 1NN
double cv_02_dtw_fullWindow_1nn = cv_02_DTW_fullWindow_1NN(dataName,train_raw); // 02 DTW Full Window 1NN
double[] cv_03_dtw_cvWindow_1nn = cv_03_DTW_bestWindow_1NN(dataName,train_raw); // 03 DTW variable window 1NN (try all possible values of R from 0% to 100% in increments of 1%)
double[] cv_04_wdtw_1nn = cv_04_WDTW_1NN(dataName,train_raw); // 04 Weighted DTW with cv to find the optimal weight, g. Possible values for g range from 0 to 1 in increments of 0.01
double[] cv_05_euclidean_knn = cv_05_Euclidean_kNN(dataName,train_raw); // 05 Euclidean kNN (k = 1, 2, ..., 100)
double[] cv_06_dtw_fullWindow_knn = cv_06_DTW_fullWindow_kNN(dataName,train_raw); // 06 DTW Full Window kNN (k = 1, 2, ..., 100)
double[] cv_11_dtw_optimalWindow_knn = cv_11_DTW_optimalWindow_kNN(dataName,train_raw); // 11 DTW Optimal Window kNN (r = 0, 0.01, 0.02, ..., 1) (k = 1, 2, ..., 100)
double[] cv_12_wdtw_knn = cv_12_WDTW_kNN(dataName,train_raw); // 12 WDTW kNN (g = 0, 0.01, 0.02, ..., 1) (k = 1, 2, ..., 100)
double cv_21_ddtw_fullWindow_1nn = cv_21_DDTW_fullWindow_1NN(dataName,train_derivative); // 21 Derivative DTW Full Window 1NN
double[] cv_22_ddtw_cvWindow_1nn = cv_22_DDTW_bestWindow_1NN(dataName,train_derivative); // 22 Derivative DTW Variable Window 1NN (r 0-100%, increaments of 1%)
double[] cv_23_wdtw_1nn = cv_23_WDDTW_1NN(dataName,train_derivative); // 23 Erighted Derivative DTW 1NN (g 0-1, increments of 0.01)
double[] cv_24_ddtw_fullWindow_knn = cv_24_DDTW_fullWindow_kNN(dataName,train_derivative); // 24 Derivative DTW Full Window kNN (k 1-100, increments of 1)
double[] cv_25_ddtw_optimalWindow_knn = cv_25_DDTW_optimalWindow_kNN(dataName,train_derivative); // 25 Derivative DTW Variable Window
double[] cv_26_wddtw_knn = cv_26_WDTW_kNN(dataName,train_derivative);
double[] cv_31_lcss_1nn = cv_31_LCSS_1NN(dataName,train_raw);
double[] cv_32_lcss_knn = cv_32_LCSS_kNN(dataName,train_raw,(int)cv_31_lcss_1nn[1],cv_31_lcss_1nn[2]);
// print results
/***** RAW DATA ****/
System.out.printf("Euclidean 1NN:%33.3f%n",cv_01_euclidean_1nn);
System.out.printf("DTW Full Window 1NN:%27.3f%n",cv_02_dtw_fullWindow_1nn);
System.out.printf("DTW Optimal Window 1NN (r=%1.2f):%15.3f%n",cv_03_dtw_cvWindow_1nn[1],cv_03_dtw_cvWindow_1nn[0]);
System.out.printf("WDTW 1NN (g=%1.2f):%29.3f%n",cv_04_wdtw_1nn[1],cv_04_wdtw_1nn[0]);
System.out.printf("Euclidean kNN (k=%3.0f):%25.3f%n",cv_05_euclidean_knn[1],cv_05_euclidean_knn[0]);
System.out.printf("DTW Full Window kNN (k=%3.0f):%19.3f%n",cv_06_dtw_fullWindow_knn[1],cv_06_dtw_fullWindow_knn[0]);
System.out.printf("DTW Optimal Window kNN (k=%3.0f, r=%1.2f):%9.3f%n",cv_11_dtw_optimalWindow_knn[1],cv_11_dtw_optimalWindow_knn[2],cv_11_dtw_optimalWindow_knn[0]);
System.out.printf("WDTW kNN (k=%3.0f, g=%1.2f):%22.3f%n",cv_12_wdtw_knn[1],cv_12_wdtw_knn[2],cv_12_wdtw_knn[0]);
/***** DERIVATIVE TRANSFORMED DATA ****/
System.out.printf("DDTW Full Window 1NN:%26.3f%n",cv_21_ddtw_fullWindow_1nn);
System.out.printf("DDTW Optimal Window 1NN (r=%1.2f):%14.3f%n",cv_22_ddtw_cvWindow_1nn[1],cv_22_ddtw_cvWindow_1nn[0]);
System.out.printf("WDDTW 1NN (g=%1.2f):%28.3f%n",cv_23_wdtw_1nn[1],cv_23_wdtw_1nn[0]);
System.out.printf("DDTW Full Window kNN (k=%3.0f):%18.3f%n",cv_24_ddtw_fullWindow_knn[1],cv_24_ddtw_fullWindow_knn[0]);
System.out.printf("DDTW Optimal Window kNN (k=%3.0f, r=%1.2f):%7.3f%n",cv_25_ddtw_optimalWindow_knn[1],cv_25_ddtw_optimalWindow_knn[2],cv_25_ddtw_optimalWindow_knn[0]);
System.out.printf("WDDTW kNN (k=%3.0f, g=%1.2f):%21.3f%n",cv_26_wddtw_knn[1],cv_26_wddtw_knn[2],cv_26_wddtw_knn[0]);
/***** LCSS ******/
System.out.printf("LCSS (d=%3.0f, e=%1.3f) 1NN:%21.3f%n",cv_31_lcss_1nn[1],cv_31_lcss_1nn[2],cv_31_lcss_1nn[0]);
System.out.printf("LCSS (d=%3.0f, e=%1.3f) kNN (k=%3.0f):%13.3f%n",cv_32_lcss_knn[1],cv_32_lcss_knn[2],cv_32_lcss_knn[3],cv_32_lcss_knn[0]);
}
public static void datasetTrainTest(String dataName) throws Exception{
// Pre-requisite of train/test classification is that necessary params have been found in CV stage.
// Therefore, check to see if CV has been carried out previously. If not, begin then CV automatically.
File cvDir = new File(OUTPUT_DIR_CV);
if(!cvDir.exists()){
System.out.println("Cross-validation for "+dataName+" doesn't appear to have taken place in '"+OUTPUT_DIR_CV+"'. Starting cross-validation.");
datasetCrossValidation(dataName);
}
initTrainTest(dataName);
Instances train_raw = loadData(DATA_DIR+"/"+dataName+"/"+dataName+"_TRAIN.arff");
Instances test_raw = loadData(DATA_DIR+"/"+dataName+"/"+dataName+"_TEST.arff");
DerivativeFilter df = new DerivativeFilter();
Instances train_derivative = df.process(train_raw);
df = new DerivativeFilter();
Instances test_derivative = df.process(test_raw);
/***** RAW DATA ****/
System.out.printf("Euclidean 1NN:%33.3f%n",trainTest_01_Euclidean_1NN(dataName, train_raw, test_raw));
System.out.printf("DTW Full Window 1NN:%27.3f%n",trainTest_02_DTW_fullWindow_1NN(dataName, train_raw, test_raw));
System.out.printf("DTW Best Window 1NN:%27.3f%n",trainTest_03_DTW_optimalWindow_1NN(dataName, train_raw, test_raw));
System.out.printf("WDTW 1NN:%38.3f%n",trainTest_04_WDTW_1NN(dataName, train_raw, test_raw));
System.out.printf("Euclidean kNN:%33.3f%n",trainTest_05_Euclidean_kNN(dataName, train_raw, test_raw));
System.out.printf("DTW Full Window kNN:%27.3f%n",trainTest_06_DTW_fullWindow_kNN(dataName, train_raw, test_raw));
System.out.printf("DTW Best Window kNN:%27.3f%n",trainTest_11_DTW_bestWindow_kNN(dataName, train_raw, test_raw));
System.out.printf("WDTW kNN:%38.3f%n",trainTest_12_WDTW_kNN(dataName, train_raw, test_raw));
/***** DERIVATIVE TRANSFORMED DATA ****/
System.out.printf("DDTW Full Window 1NN:%26.3f%n",trainTest_21_DDTW_fullWindow_1NN(dataName, train_derivative, test_derivative));
System.out.printf("DDTW Best Window 1NN:%26.3f%n",trainTest_22_DDTW_optimalWindow_1NN(dataName, train_derivative, test_derivative));
System.out.printf("WDDTW 1NN:%37.3f%n",trainTest_23_WDDTW_1NN(dataName, train_derivative, test_derivative));
System.out.printf("DDTW Full Window kNN:%26.3f%n",trainTest_24_DDTW_fullWindow_kNN(dataName, train_derivative, test_derivative));
System.out.printf("DDTW Best Window kNN:%26.3f%n",trainTest_25_DDTW_bestWindow_kNN(dataName, train_derivative, test_derivative));
System.out.printf("WDDTW kNN:%37.3f%n",trainTest_26_WDDTW_kNN(dataName, train_derivative, test_derivative));
/***** LCSS ******/
System.out.printf("LCSS 1NN:%38.3f%n",trainTest_31_LCSS_1NN(dataName, train_raw, test_raw));
System.out.printf("LCSS kNN:%38.3f%n",trainTest_32_LCSS_kNN(dataName, train_raw, test_raw));
}
//</editor-fold>
//<editor-fold defaultstate="collapsed" desc="Result Parsing for CV and Train/Test">
public static void printPreCalculatedCvResults(String dataset) throws Exception{
Scanner scan;
File[] cvResults = new File(OUTPUT_DIR_CV+"/"+dataset).listFiles();
String classifierName;
String[] resultLineParts;
for(int i = 0; i < cvResults.length; i++){
classifierName = cvResults[i].getName().substring(6).replace(".txt", "").replaceAll("_", " ");
scan = new Scanner(cvResults[i]);
scan.useDelimiter("\n");
resultLineParts = scan.next().split(",");
double accuracy = Double.parseDouble(resultLineParts[0]);
System.out.printf(classifierName+"%"+(40-classifierName.length())+".3f\n",accuracy);
}
}
public static void printPrecalculatedTrainTestResults(String dataset) throws Exception{
Scanner scan;
File[] cvResults = new File(OUTPUT_DIR_TRAIN_TEST+"/"+dataset).listFiles();
String classifierName;
String[] resultLineParts;
for(int i = 0; i < cvResults.length; i++){
classifierName = cvResults[i].getName().substring(13).replace(".txt", "").replaceAll("_", " ");
scan = new Scanner(cvResults[i]);
scan.useDelimiter("\n");
resultLineParts = scan.next().split(",");
double accuracy = Double.parseDouble(resultLineParts[0]);
System.out.printf(classifierName+"%"+(40-classifierName.length())+".3f\n",accuracy);
}
}
//</editor-fold>
//<editor-fold defaultstate="collapsed" desc="Ensemble Classification Methods">
public static double ensembleClassification_best(String dataName) throws Exception{
// prerequisites:
// 1. Cv must have been carried out for all classifiers
// 2. Train/Test must have been carried out for all classifiers
// Step 1. Get cv accuracies for all classifiers (either single classifier, or multiple if classifiers are tied)
double[] cvAccuracies = getCvAccuracies(dataName);
// Step 2: Find the best classifier according to cv accuracies.
// in case there isn't a single best classifier, need to store the id's of each of the best classifier
// justification: to chose a random classifier from the best on each classification decision, otherwise
// random selection must happen before classification, and then only one classifier would be used.
ArrayList<Integer> bestClassifiers = new ArrayList<Integer>();
double bestAccuracy = -1;
for(int i = 0; i < cvAccuracies.length;i++){
if(cvAccuracies[i]>bestAccuracy){
bestClassifiers = new ArrayList<Integer>();
bestClassifiers.add(i);
bestAccuracy = cvAccuracies[i];
}else if(cvAccuracies[i]==bestAccuracy){
bestClassifiers.add(i);
}
}
Instances test = loadData(DATA_DIR+"/"+dataName+"/"+dataName+"_TEST.ARFF");
//3. get test predictions for all classifiers
double[][] predictions = getTestPredictions(dataName, test);
//4. get actual class vlaues
double[] actualClassValues = getClassValues(test);
// if there is more than one best classifier, we must radomly pick one of this subset each time we make
// a classification decision.
boolean moreThanOneBest = false;
if(bestClassifiers.size() > 1){
moreThanOneBest = true;
}
Random r = new Random();
double[] ensemblePredictions = new double[actualClassValues.length];
for(int i = 0; i < actualClassValues.length; i++){
if(moreThanOneBest){
ensemblePredictions[i] = predictions[i][bestClassifiers.get(r.nextInt(bestClassifiers.size()))];
}else{
ensemblePredictions[i] = predictions[i][bestClassifiers.get(0)];
}
}
int correct = 0;
for(int i = 0; i < ensemblePredictions.length; i++){
// System.out.println(ensemblePredictions[i]+","+actualClassValues[i]);
if(ensemblePredictions[i]==actualClassValues[i]){
correct++;
}
}
// System.out.println("Best: "+100.0/test.numInstances()*correct);
return 100.0/test.numInstances()*correct;
}
public static double ensembleClassification_equal(String dataName) throws Exception{
// for each instances, we get the predictions for all classifiers. Then we count the occurances
// of each class value as votes, and the class value with the highest vote is selected as the
// classification decision. In cases where there is no majority, ties are split randomly
// 1. We don't need to use the cv accuracies with this ensemble strategy, so we can go straight
// to getting the class values and test predictions
Instances test = loadData(DATA_DIR+"/"+dataName+"/"+dataName+"_TEST.arff");
double[] actualClassValues = getClassValues(test);
double[][] predictions = getTestPredictions(dataName, test);
// 2. For each instances, count all votes. When voting has finished, extact the most frequent
// class value(s) and make classification decision
TreeMap<Double, Integer> votes;
ArrayList<Double> majorityClasses;
Random r = new Random();
int count, bsfCount;
double[] ensemblePredictions = new double[actualClassValues.length];
for(int i = 0; i < actualClassValues.length; i++){ // for instance i
votes = new TreeMap<Double, Integer>();
for(int j = 0; j < predictions[i].length; j++){ // for classifier j
if(votes.containsKey(predictions[i][j])){ // add vote from classifier j on instance i
count = votes.get(predictions[i][j]);
count+=1;
votes.put(predictions[i][j], count);
}else{
votes.put(predictions[i][j], 1);
}
}
// extract top class(es)
bsfCount = -1;
majorityClasses = new ArrayList<Double>();
for(Double d:votes.keySet()){
if(votes.get(d) > bsfCount){ // if better than bsf, must be better than all others so reinitialise store
majorityClasses = new ArrayList<Double>();
majorityClasses.add(d);
bsfCount = votes.get(d);
}else if(votes.get(d)==bsfCount){ // if equal to bsf, store but don't remove previous.
majorityClasses.add(d);
}
}
// if there is a dominant class, no need to randomly select (will be @ index 0). Else, randomly pick on of the best class values
// as they must be equally represented to be included in the majority classes store.
if(majorityClasses.size()==1){
ensemblePredictions[i] = majorityClasses.get(0);
}else{
ensemblePredictions[i] = majorityClasses.get(r.nextInt(majorityClasses.size()));
}
}
int correct = 0;
for(int i = 0; i < ensemblePredictions.length; i++){
if(ensemblePredictions[i]==actualClassValues[i]){
correct++;
}
}
// System.out.println("Equal: "+100.0/test.numInstances()*correct);
return 100.0/test.numInstances()*correct;
}
public static double ensembleClassification_proportional(String dataName) throws Exception{
// for each classifier, we calcaulte a voting weight according to cv performance. The classifier then proceeds much like the ensemble_equal
// strategy; for each instance, a vote is taken from each classifier. However, rather than each classifier having an equal vote (i.e. 1 each),
// the vote is adjusted according to the classifier's weighting.
// 1. get cv accuracies for all classifiers
double[] cvAccuracies = getCvAccuracies(dataName);
// 2. calculate weights
double[] weights = new double[cvAccuracies.length];
// get sum of accuracy
double sumOfAccuracies = 0;
for(int i = 0; i < cvAccuracies.length;i++){
sumOfAccuracies+=cvAccuracies[i];
}
// assign values to weights according to cv accuracy
for(int i = 0; i < cvAccuracies.length; i++){
weights[i] = 100.0/sumOfAccuracies*cvAccuracies[i];
}
// 3. get predictions
Instances test = loadData(DATA_DIR+"/"+dataName+"/"+dataName+"_TEST.arff");
double[][] predictions = getTestPredictions(dataName, test);
double[] actualClassValues = getClassValues(test);
// 4. get classification decisions
TreeMap<Double, Double> classVotes;
ArrayList<Double> electedClasses;
double vote;
double bsfVote;
double[] ensemblePredictions = new double[test.numInstances()];
Random r = new Random();
for(int i = 0;i < test.numInstances(); i++){ // for each instance i
classVotes = new TreeMap<Double, Double>();
for(int j = 0; j < predictions[i].length; j++){ // for each classifier j
if(classVotes.containsKey(predictions[i][j])){ // if class value has already been voted for, update vote
vote = classVotes.get(predictions[i][j]);
vote += weights[j];
classVotes.put(predictions[i][j], vote);
}else{ // else, add a new entry for this class value
classVotes.put(predictions[i][j], weights[j]);
}
}
bsfVote = -1;
electedClasses = new ArrayList<Double>();
for(Double d:classVotes.keySet()){
if(classVotes.get(d) > bsfVote){ // if best so far, re-initialise the store and add this class
electedClasses = new ArrayList<Double>();
electedClasses.add(d);
bsfVote = classVotes.get(d);
}else if(classVotes.get(d) == bsfVote){ // else if it is equal to the best so far, retain original value(s) and add this too.
electedClasses.add(d);
}
}
// if there is a dominant class value, pick that. Else, randomly pick one from the set of best classes
if(electedClasses.size()==1){
ensemblePredictions[i] = electedClasses.get(0);
}else{
ensemblePredictions[i] = electedClasses.get(r.nextInt(electedClasses.size()));
}
}
int correct = 0;
for(int i = 0; i < ensemblePredictions.length; i++){
if(ensemblePredictions[i]==actualClassValues[i]){
correct++;
}
}
// System.out.println("Proportional: "+100.0/test.numInstances()*correct);
return 100.0/test.numInstances()*correct;
}
public static double ensembleClassification_significant(String dataName) throws Exception{
// 1. get cv accuracies for all classifiers
Instances train = loadData(DATA_DIR+"/"+dataName+"/"+dataName+"_TRAIN.arff");
double[] cvAccuracies = getCvAccuracies(dataName);
// 2. calculate weights, with McNemar's taken into consideration
double[] weights = new double[cvAccuracies.length];
int[] mcNemarsInclusion = mcNemars(dataName, train);
// get sum of accuracy
double sumOfAccuracies = 0;
for(int i = 0; i < cvAccuracies.length;i++){
if(mcNemarsInclusion[i]==1){
sumOfAccuracies+=cvAccuracies[i];
}
}
// assign values to weights according to cv accuracy
for(int i = 0; i < cvAccuracies.length; i++){
if(mcNemarsInclusion[i]==1){
weights[i] = 100.0/sumOfAccuracies*cvAccuracies[i];
}else{
weights[i]=0;
}
}
// 3. get predictions
Instances test = loadData(DATA_DIR+"/"+dataName+"/"+dataName+"_TEST.arff");
double[][] predictions = getTestPredictions(dataName, test);
double[] actualClassValues = getClassValues(test);
// 4. get classification decisions
TreeMap<Double, Double> classVotes;
ArrayList<Double> electedClasses;
double vote;
double bsfVote;
double[] ensemblePredictions = new double[test.numInstances()];
Random r = new Random();
for(int i = 0;i < test.numInstances(); i++){ // for each instance i
classVotes = new TreeMap<Double, Double>();
for(int j = 0; j < predictions[i].length; j++){ // for each classifier j
if(classVotes.containsKey(predictions[i][j])){ // if class value has already been voted for, update vote
vote = classVotes.get(predictions[i][j]);
vote += weights[j];
classVotes.put(predictions[i][j], vote);
}else{ // else, add a new entry for this class value
classVotes.put(predictions[i][j], weights[j]);
}
}
bsfVote = -1;
electedClasses = new ArrayList<Double>();
for(Double d:classVotes.keySet()){
if(classVotes.get(d) > bsfVote){ // if best so far, re-initialise the store and add this class
electedClasses = new ArrayList<Double>();
electedClasses.add(d);
bsfVote = classVotes.get(d);
}else if(classVotes.get(d) == bsfVote){ // else if it is equal to the best so far, retain original value(s) and add this too.
electedClasses.add(d);
}
}
// if there is a dominant class value, pick that. Else, randomly pick one from the set of best classes
if(electedClasses.size()==1){
ensemblePredictions[i] = electedClasses.get(0);
}else{
ensemblePredictions[i] = electedClasses.get(r.nextInt(electedClasses.size()));
}
}
int correct = 0;
for(int i = 0; i < ensemblePredictions.length; i++){
if(ensemblePredictions[i]==actualClassValues[i]){
correct++;
}
}
// System.out.println("Significant: "+100.0/test.numInstances()*correct);
return 100.0/test.numInstances()*correct;
}
public static int[] mcNemars(String dataName, Instances train)throws Exception{
//1. Get the cv predictions for each classifier
double[][] cvPredictions = getCVPredictions(dataName, train);
//2. get cv accuracies
double[] cvAccuracies = getCvAccuracies(dataName);
//3. pick the best classifier to build ensemble around
double bsfAccuracy = -1;
ArrayList<Integer> bestClassifierIds = null;
for(int i = 0; i < cvAccuracies.length;i++){
if(cvAccuracies[i] > bsfAccuracy){ // new single-best classifier, so reinitialise store and add the id of this classifier
bsfAccuracy = cvAccuracies[i];
bestClassifierIds = new ArrayList<Integer>();
bestClassifierIds.add(i);
}else if(cvAccuracies[i] == bsfAccuracy){ // equals best so far, so retain previous best classifier(s) and add this id to the store
bestClassifierIds.add(i);
}
}
int bestClassifierId = -1;
// split ties randomly
if(bestClassifierIds.size() > 1){
Random r = new Random();
bestClassifierId = bestClassifierIds.get(r.nextInt(bestClassifierIds.size()));
}else{
bestClassifierId = bestClassifierIds.get(0);
}
double[] bestClassifierPredictions = new double[train.numInstances()];
for(int i = 0; i < train.numInstances();i++){
bestClassifierPredictions[i] = cvPredictions[i][bestClassifierId];
}
int numClassifiers = cvAccuracies.length;
double[] actualClassValues = getClassValues(train);
int[] logicalOutput = new int[numClassifiers];
for(int c = 0; c < numClassifiers; c++){
if(c==bestClassifierId){
logicalOutput[c] = 1;
}else if(cvAccuracies[c]==100){
logicalOutput[c] = 1; // if classifier isn't picked as the best and still has 100%, must be equivilient, as best must also be 100% so all class decision were the same
}else{
// create contingency table
// best classifier = classifier a
// other = classificer b
int wrongByBoth = 0; // top-left
int rightByAWrongByB = 0; // bottom-left
int wrongByaRightByB = 0; // top-right
int rightByBoth = 0; // bottom-right
double actualClass, a, b;
for(int i = 0; i < train.numInstances();i++){
actualClass = actualClassValues[i];
a = bestClassifierPredictions[i];
b = cvPredictions[i][c];
if(a!=actualClass && b!=actualClass){
wrongByBoth++;
}else if(a==actualClass && b!=actualClass){
rightByAWrongByB++;
}else if(a!=actualClass&&b==actualClass){
wrongByaRightByB++;
}else if(a==actualClass && b==actualClass){
rightByBoth++;
}
}
if(wrongByBoth+rightByAWrongByB+wrongByaRightByB+rightByBoth!=train.numInstances()){
throw new Exception("Count of instances is incorrect. Please ensure inputs are correct");
}
if(rightByAWrongByB+wrongByaRightByB==0){
logicalOutput[c] = 1; // equivilent to the best classifier, so include it to add weight to proportional votes
}else{
double chiPart = (Math.abs(wrongByaRightByB-rightByAWrongByB)-1);
double chi = (chiPart*chiPart)/(wrongByaRightByB+rightByAWrongByB);
if(chi >= 6.634897){ // Alpha = 0.01
logicalOutput[c] = 0;
}else{
logicalOutput[c] = 1;
}
}
}
}
return logicalOutput;
}
public static void print10RunEnsembles(String dataset) throws Exception{
double best = 0;
double equal = 0;
double prop = 0;
double sig = 0;
for(int i = 0; i < 10; i++){
best += ensembleClassification_best(dataset);
equal += ensembleClassification_equal(dataset);
prop += ensembleClassification_proportional(dataset);
sig += ensembleClassification_significant(dataset);
}
System.out.printf("Best%36.3f\n",(best/10));
System.out.printf("Equal%35.3f\n",(equal/10));
System.out.printf("Proportional%28.3f\n",(prop/10));
System.out.printf("Significant%29.3f\n",(sig/10));
}
//</editor-fold>
//<editor-fold defaultstate="collapsed" desc="Utility Methods">
public static Instances loadData(String fileName){
Instances data = null;
try{
FileReader r;
r = new FileReader(fileName);
data = new Instances(r);
data.setClassIndex(data.numAttributes() - 1);
} catch(Exception e){
System.out.println(" Error =" + e + " in method loadData");
}
return data;
}
//</editor-fold>
//<editor-fold defaultstate="collapsed" desc="Helper Methods">
private static double[] getCvAccuracies(String dataName) throws Exception{
// get cv accuracies for each classifier
File cvFolder = new File(OUTPUT_DIR_CV+"/"+dataName);
File[] cvFiles = cvFolder.listFiles();
double[] cvAccuracies = new double[16];
for(int i = 0; i < 16; i++){
File classifierFile = cvFiles[i];
Scanner scan = new Scanner(classifierFile);
scan.useDelimiter("\n");
cvAccuracies[i] = Double.parseDouble(scan.next().split(",")[0].trim());
}
return cvAccuracies;
}
private static double[][] getCVPredictions(String dataName, Instances train) throws Exception{
// read in train/test predictions
File cvFolder = new File(OUTPUT_DIR_CV+"/"+dataName);
File[] cvFiles = cvFolder.listFiles();
double[][] predictions = new double[train.numInstances()][cvFiles.length];
for(int c = 0; c < cvFiles.length; c++){
File cvFile = cvFiles[c];
Scanner scan = new Scanner(cvFile);
scan.useDelimiter("\n");
scan.next(); // header
int idx = 0;
while(scan.hasNext()){
String[] resultLineParts = scan.next().split(",");
double prediction = Double.parseDouble(resultLineParts[0]);
double classVal = Double.parseDouble(resultLineParts[1]);
if(classVal!=train.instance(idx).classValue()){
throw new Exception("Class value mismatch! Found:"+classVal+", expected:"+train.instance(idx).classValue());
}
predictions[idx][c] = prediction;
idx++;
}
if(idx!=train.numInstances()){
throw new Exception("Incorrect number of instances! Found:"+idx+", expected: "+train.numInstances());
}
}
return predictions;
}
private static double[][] getTestPredictions(String dataName, Instances test) throws Exception{
// read in train/test predictions
File testFolder = new File(OUTPUT_DIR_TRAIN_TEST+"/"+dataName);
File[] testFiles = testFolder.listFiles();
double[][] predictions = new double[test.numInstances()][testFiles.length];
for(int c = 0; c < testFiles.length; c++){
File trainTestFile = testFiles[c];
Scanner scan = new Scanner(trainTestFile);
scan.useDelimiter("\n");
scan.next(); // header
int idx = 0;
while(scan.hasNext()){
String[] resultLineParts = scan.next().split(",");
double prediction = Double.parseDouble(resultLineParts[0]);
double classVal = Double.parseDouble(resultLineParts[1]);
if(classVal!=test.instance(idx).classValue()){
throw new Exception("Class value mismatch! Found:"+classVal+", expected:"+test.instance(idx).classValue());
}
predictions[idx][c] = prediction;
idx++;
}
if(idx!=test.numInstances()){
throw new Exception("Incorrect number of instances! Found:"+idx+", expected: "+test.numInstances());
}
}
return predictions;
}
public static double[] getClassValues(Instances input){
double[] classValues = new double[input.numInstances()];
for(int i = 0; i < input.numInstances();i++){
classValues[i]=input.instance(i).classValue();
}
return classValues;
}
//</editor-fold>
public static void main(String[] args) {
// A main method to carry out CV, train/test, and ensemble classification for a given dataset name. Before running, please ensure that
// the static fields at the start of this class suit your needs (i.e. correct output location and input Instances data location).
// Contained in this method are two options that are automatically selected according to the results that are in place:
// 1. If no experiments have been carried out for the dataset specified in the field dataName:
String dataName = "ItalyPowerDemand";
// then all cross-validation and train/test experiments will be carried out when the code is executed.
// 2. If 'dataName' has previously been processed and the results are stored under the paths listed in the 'OUTPUT_DIR' fields,
// summary methods will be read in to parse the existing results. This is much faster on all datasets, as classification will
// not need to be repeated unecessarily.
try{
// Part 1: Cross-Validation
// If CV hasn't been carried out, perform CV (ASSUMPTION: if cv folder exists, cv has been carried out FULLY)
// Else, print CV results
System.out.println("Cross-Validation Results");
System.out.println("----------------------------------------");
File cvResultsDir = new File(OUTPUT_DIR_CV+"/"+dataName);
if(cvResultsDir.exists()){
printPreCalculatedCvResults(dataName);
}else{
datasetCrossValidation(dataName);
}
System.out.println();
// Part 2: Train/Test
// If train/test hasn't been carried out, perform Train/Test (ASSUMPTION: if results dir exists, Train/Test has been fully carried out)
// Else, print Train/Test results
System.out.println("Train/Test Results");
System.out.println("----------------------------------------");
File trainTestResultsDir = new File(OUTPUT_DIR_TRAIN_TEST+"/"+dataName);
if(trainTestResultsDir.exists()){
printPrecalculatedTrainTestResults(dataName);
}else{
datasetTrainTest(dataName);
}
System.out.println();
// Part 3: Ensembles
// CV and Train/Test results must be in place to reach this statement under the intended conditions when this code was released. If code has
// been modified, please ensure CV and train/test classification has been carried out before running ensembles (CV needed for weighting,
// Train/Test for final classification)
// Note: in method below, ensembling is carried out 10 times for each strategy and the average for each is reported. This is justified because
// it is not unlikely that classifiers of very similar natures (i.e. all time-domain NN) may have very similar CV performance on some datasets,
// therefore leading to slightly different ensembles when ties are settled randomly. Since ensembling takes place on pre-calculated results, it is very
// time efficient and allows us to average over multilpe runs to smooth the results to obtain consistent results from multiple runs.
System.out.println("Ensemble Results");
System.out.println("----------------------------------------");
print10RunEnsembles(dataName);
}catch(Exception e){
e.printStackTrace();
}
}
}