/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package development;
import fileIO.InFile;
import fileIO.OutFile;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Random;
import java.util.logging.Level;
import java.util.logging.Logger;
import utilities.ClassifierTools;
import weka.classifiers.Classifier;
import weka.classifiers.bayes.BayesNet;
import weka.classifiers.bayes.NaiveBayes;
import weka.classifiers.functions.SMO;
import weka.classifiers.functions.supportVector.PolyKernel;
import weka.classifiers.lazy.kNN;
import weka.classifiers.meta.HeterogeneousEnsemble;
import weka.classifiers.meta.RotationForest;
import weka.classifiers.trees.J48;
import weka.classifiers.trees.RandomForest;
import weka.core.Instances;
import weka.filters.NormalizeCase;
import weka.filters.timeseries.shapelet_transforms.FullShapeletTransform;
import weka.filters.timeseries.shapelet_transforms.ShapeletTransform;
import weka.filters.timeseries.shapelet_transforms.ShapeletTransformDistCaching;
/**
*
* @author ajb
*/
public class CrossValidateShapelets extends Thread {
Instances train;
Instances test;
int fold;
String path;
public static String fileName;
public static boolean useCluster=true;
//UCR ONES FIRST
static int[] missing={45,46,61,71,72,73,74,20,26,27,57};
static int[] incomplete={45,46,61,71,72,73,74,20,26,27,57};
public CrossValidateShapelets(Instances tr, Instances te, int f,String path){
train=tr;
test=te;
fold=f;
this.path=path;
}
public static void formCV(){
//Delete any existing shapelet files for the incomplete
for(int i=0;i<incomplete.length;i++){
File f = new File("/gpfs/sys/ajb/TSC Problems/"+DataSets.fileNames[incomplete[i]]+"/ShapeletCV/");
//Delete everything there
if(f.exists()){
try{
delete(f);
}catch(IOException e){
System.err.println(" Unable to delete directory ShapeletCV/ Continuing ");
}
}
//Recreate the directory
if(!f.exists()){
f.mkdir();
}
}
for(int i=0;i<missing.length;i++){
String clusterPath = "/gpfs/sys/ajb/TSC Problems/"+DataSets.fileNames[missing[i]]+"/";
String dropboxPath= "C:/Users/ajb/Dropbox/TSC Problems/"+DataSets.fileNames[missing[i]]+"/";
// String path=dropboxPath;
String path=clusterPath;
Instances train = ClassifierTools.loadData(path+DataSets.fileNames[missing[i]]+"_TRAIN");
System.out.println("Processing : "+DataSets.fileNames[missing[i]]);
NormalizeCase nc = new NormalizeCase();
try{
train=nc.process(train);
}catch(Exception e){
System.out.println(" Unable to normalise for some unknown reason "+e+" but continuing...");
}
//Randomize the data. Need to save the mapping somewhere.
int[] positions=new int[train.numInstances()];
train=randomise(train,positions);
OutFile of = new OutFile(path+"ShapeletCV/InstancePositions.csv");
for(int j=0;j<positions.length;j++)
of.writeLine(positions[j]+",");
of = new OutFile(path+"InstancePositions.csv");
for(int j=0;j<positions.length;j++)
of.writeLine(positions[j]+",");
//Split into time domain folds
int folds=10;
Instances[] trainFolds=new Instances[folds];
Instances[] testFolds=new Instances[folds];
splitTrainData(train,trainFolds,testFolds,folds);
//Save folds to file
for(int j=1;j<=folds;j++){
OutFile of1 = new OutFile(path+DataSets.fileNames[missing[i]]+"_TRAIN"+(j)+".arff");
OutFile of2 = new OutFile(path+DataSets.fileNames[missing[i]]+"_TEST"+(j)+".arff");
of1.writeLine(trainFolds[j-1].toString());
of2.writeLine(testFolds[j-1].toString());
}
}
}
public void run(){
//Perform cached on online
FullShapeletTransform st=new ShapeletTransformDistCaching();
st.useCandidatePruning(10);
// if(train.numInstances()>=500 || train.numAttributes()>500)
// st = new ShapeletTransform();
st.supressOutput();
st.setNumberOfShapelets(Math.max(train.numAttributes(), train.numInstances()));
try {
Instances sTrain=st.process(train);
Instances sTest=st.process(test);
OutFile of1 = new OutFile(path+fileName+"_TRAIN"+(fold+1)+".arff");
OutFile of2 = new OutFile(path+fileName+"_TEST"+(fold+1)+".arff");
of1.writeLine(sTrain.toString());
of2.writeLine(sTest.toString());
} catch (Exception ex) {
Logger.getLogger(CrossValidateShapelets.class.getName()).log(Level.SEVERE, null, ex);
}
}
public static void splitTrainData(Instances train,Instances[] trainFolds,Instances[] testFolds,int folds){
int size=train.numInstances();
int foldSize=size/folds;
int[] foldCV = new int[folds];
for(int i=0;i<foldCV.length;i++)
foldCV[i]=foldSize;
if(size%folds!=0) //Adjust the last fold size accordingly
foldCV[folds-1]=size -foldSize*(folds-1);
int diff=foldCV[folds-1]-foldSize;
int c=0;
while(diff>0){ //Reassign elements to other folds
foldCV[c%(folds-1)]++;
foldCV[folds-1]--;
diff=foldCV[folds-1]-foldCV[c%(folds-1)];
c++;
}
Instances copy = new Instances(train);
int start=0;
for(int i=0;i<folds;i++){
trainFolds[i]= new Instances(copy,0);
testFolds[i]= new Instances(copy,0);
for(int j=0;j<train.numInstances();j++){
if(j<start || j>=start+foldCV[i])
trainFolds[i].add(train.instance(j));
else
testFolds[i].add(train.instance(j));
}
start+=foldCV[i];
}
}
public static Instances randomise(Instances train, int[] pos){
//Generate a random permutation into pos
Random r = new Random();
for(int i=0;i<pos.length;i++)
pos[i]=i;
for(int i=0;i<pos.length;i++){
int p1=r.nextInt(pos.length);
int p2=r.nextInt(pos.length);
int temp=pos[p1];
pos[p1]=pos[p2];
pos[p2]=temp;
}
Instances newD=new Instances(train,0);
for(int i=0;i<pos.length;i++)
newD.add(train.instance(pos[i]));
return newD;
}
public static void singleRunThreaded(String file){
// String file ="ItalyPowerDemand";
String clusterPath = "/gpfs/sys/ajb/TSC Problems/"+file+"/";
String desktopPath="C:/Users/ajb/Dropbox/TSC Problems/"+file+"/";
String path=desktopPath;
if(useCluster)
path=clusterPath;
String filePath=path+"ShapeletCV/";
int count=0;
//Create directory if it isn't there already
File dir = new File(filePath);
if(!dir.exists()){
dir.mkdir();
}
else{ //Comment out to allow overwriting
boolean present=true;
for(int i=1;i<=10&& present;i++){
File cv=new File(filePath+file+"_TRAIN"+i+".arff");
File cv2=new File(filePath+file+"_TEST"+i+".arff");
if(cv.exists()&&cv2.exists()) {
//CV files already there
count++;
}
else
present=false;
}
if(count==10)//Exit now
return;
}
CrossValidateShapelets.fileName=file;
Instances train = ClassifierTools.loadData(path+file+"_TRAIN");
NormalizeCase nc = new NormalizeCase();
try{
train=nc.process(train);
}catch(Exception e){
System.out.println(" Unable to normalise for some unknown reason "+e+" but continuing...");
}
//Randomize the data. Need to save the mapping somewhere.
int[] positions=new int[train.numInstances()];
train=randomise(train,positions);
OutFile of = new OutFile(filePath+"InstancePositions.csv");
for(int i=0;i<positions.length;i++)
of.writeLine(positions[i]+",");
//Split data into folds
int folds=10;
Instances[] trainFolds=new Instances[folds];
Instances[] testFolds=new Instances[folds];
splitTrainData(train,trainFolds,testFolds,folds);
CrossValidateShapelets[] ct= new CrossValidateShapelets[folds];
for(int i=0;i<folds;i++){
ct[i]=new CrossValidateShapelets(trainFolds[i],testFolds[i],i,filePath);
}
for(int i=0;i<folds;i++){ //Only start the threads where file is not their
ct[i].start();
}
try {
for(int i=0;i<folds;i++)
ct[i].join();
} catch (InterruptedException ex) {
Logger.getLogger(CrossValidateShapelets.class.getName()).log(Level.SEVERE, null, ex);
}
}
public static int countFiles(String file){
String path= "/gpfs/sys/ajb/TSC Problems/"+file+"/";
String filePath=path+"ShapeletCV/";
//See if it has already done the job. If so, dont bother!
boolean b=false;
int count=0;
for(int j=1;j<=10;j++){
File cv=new File(filePath+file+"_TRAIN"+j+".arff");
if(cv.exists()) //CV files already there
count++;
}
return count;
}
public static void checkTransforms(){
int totalCount=0;
for(int i=0;i<DataSets.fileNames.length;i++){
String path= "/gpfs/sys/ajb/TSC Problems/"+DataSets.fileNames[i]+"/";
String filePath=path+"ShapeletCV/";
//See if it has already done the job. If so, dont bother!
boolean b=false;
int count=0;
for(int j=1;j<=10;j++){
File cv=new File(filePath+DataSets.fileNames[i]+"_TRAIN"+j+".arff");
if(cv.exists()) //CV files already there
count++;
}
if(count==10)
totalCount++;
else
System.out.println("PROBLEM"+DataSets.fileNames[i]+" IN POSITION "+i+" ONLY "+count+" CV FILES COMPLETED");
}
System.out.println("TOTAL COMPLETED = "+totalCount);
}
public static int[][] classifyFold(String file, int fold){
String clusterPath = "/gpfs/sys/ajb/TSC Problems/"+file+"/";
String desktopPath="C:/Users/ajb/Dropbox/TSC Problems/"+file+"/";
String path=desktopPath;
if(useCluster)
path=clusterPath;
String filePath=path+"ShapeletCV/";
//Check training and test files exist, terminate if not
File tr=new File(filePath+file+"_TRAIN"+fold+".arff");
File ts=new File(filePath+file+"_TEST"+fold+".arff");
if(!tr.exists() || !ts.exists()){
System.err.println(" ERROR CLASSIFYING "+file+" fold "+fold+" file does not exist");
return null;
}
//Check whether predictions exist, terminate if not.
File r=new File(filePath+file+"Predictions"+fold+".csv");
if(r.exists()){
System.err.println(file+" fold "+fold+" Classificastion already done");
return null;
}
Instances train = ClassifierTools.loadData(filePath+file+"_TRAIN"+fold);
Instances test = ClassifierTools.loadData(filePath+file+"_TEST"+fold);
ArrayList<String> names= new ArrayList<>();
ArrayList<Classifier> c=setSingleClassifiers(names);
HeterogeneousEnsemble hc = new HeterogeneousEnsemble(c);
hc.useCVWeighting(true);
int[][] preds=new int[2][test.numInstances()];
try {
hc.buildClassifier(train);
for(int i=0;i<test.numInstances();i++){
preds[0][i]=(int)test.instance(i).classValue();
preds[1][i]=(int)hc.classifyInstance(test.instance(i));
}
} catch (Exception ex) {
Logger.getLogger(CrossValidateShapelets.class.getName()).log(Level.SEVERE, null, ex);
}
//Save results to the appropriate file
double[] cvAccs=hc.getWeights();
OutFile results=new OutFile(filePath+file+"Predictions"+fold+".csv");
for(int i=0;i<cvAccs.length;i++)
results.writeString(cvAccs[i]+",");
results.writeString("\n Actual,Predicted\n");
int correct=0;
for(int i=0;i<preds[0].length;i++){
results.writeString(preds[0][i]+","+preds[1][i]+"\n");
if(preds[0][i]==preds[1][i])
correct++;
}
System.out.println(" Fold ="+fold+" correct ="+correct+" acc = "+((double)correct)/preds[0].length);
return preds;
}
public static void combineandInvertFolds(){
OutFile all=new OutFile("/gpfs/sys/ajb/shapeletCV/TrainCV.csv");
// OutFile all=new OutFile("C:/Users/ajb/Dropbox/TSC Problems/TrainCV.csv");
fileLoop:for(int i=0;i<DataSets.fileNames.length;i++)
{
all.writeString("\n"+DataSets.fileNames[i]);
//Check predictions exist, if not, ignore
String path="/gpfs/sys/ajb/TSC Problems/";
// String path="C:/Users/ajb/Dropbox/TSC Problems/";
int count=0;
for(int j=1;j<=10;j++){
File f=new File(path+DataSets.fileNames[i]+"/ShapeletCV/"+DataSets.fileNames[i]+"Predictions"+j+".csv");
if(f.exists())
count++;
}
if(count<10){ //Skip this problem
System.out.println(" Not enough Prediction files for problem "+DataSets.fileNames[i]+" num ="+count);
continue fileLoop;
}
//Check if combined file exists. If it does, do nothing.
// File f=new File("/gpfs/sys/ajb/shapeletCV/"+DataSets.fileNames[i]+"Preds.csv");
// if(f.exists())//Skip this problem
// continue fileLoop;
//Concatinate into a single file
String str= "/gpfs/sys/ajb/shapeletCV/";
// String str= "/gpfs/sys/ajb/TSC Problems/";
OutFile of=new OutFile(str+DataSets.fileNames[i]+"Preds.csv");
of.writeLine("actual,predicted");
OutFile of2=new OutFile(str+DataSets.fileNames[i]+"CV_Accs.csv");
ArrayList<int[]> preds=new ArrayList<>();
int lines;
InFile inF;
for(int j=1;j<=10;j++){
inF=new InFile(path+DataSets.fileNames[i]+"/ShapeletCV/"+DataSets.fileNames[i]+"Predictions"+j+".csv");
lines =inF.countLines()-2;
System.out.println(" Number of lines ="+lines);
inF=new InFile(path+DataSets.fileNames[i]+"/ShapeletCV/"+DataSets.fileNames[i]+"Predictions"+j+".csv");
of2.writeLine(inF.readLine());
inF.readLine();
for(int k=0;k<lines;k++){
int[] d=new int[2];
d[0]=inF.readInt();
d[1]=inF.readInt();
preds.add(d);
}
}
//Load ordering
int[] orders=new int[preds.size()];
inF=new InFile(path+DataSets.fileNames[i]+"/ShapeletCV/InstancePositions.csv");
lines=inF.countLines();
if(lines!=preds.size()){ //ERROR
System.err.println(" BIG ERROR: reording number does not equal the number of cases in the file!!! Problem ="+DataSets.fileNames[i]);
System.err.println(" \t\t in recorded positions there are" +lines+" in the combo results there are "+preds.size());
continue fileLoop;
}
inF=new InFile(path+DataSets.fileNames[i]+"/ShapeletCV/InstancePositions.csv");
for(int k=0;k<lines;k++)
orders[k]=inF.readInt();
//Reorder into original //Work out Cv Train Accuracy
int[][] results=new int[lines][];
int correct=0;
for(int k=0;k<lines;k++){
results[orders[k]]=preds.get(k);
if(results[orders[k]][0]==results[orders[k]][1])
correct++;
}
//Print to file
for(int k=0;k<lines;k++)
of.writeLine(results[k][0]+","+results[k][0]);
all.writeString(","+((double)correct)/lines);
}
}
public static ArrayList<Classifier> setSingleClassifiers(ArrayList<String> names){
ArrayList<Classifier> sc=new ArrayList<>();
kNN n= new kNN(50);
n.setCrossValidate(true);
sc.add(n);
names.add("kNN");
sc.add(new J48());
names.add("C45");
sc.add(new NaiveBayes());
names.add("NB");
BayesNet bn = new BayesNet();
sc.add(bn);
names.add("BayesNet");
RandomForest rf = new RandomForest();
rf.setNumTrees(200);
sc.add(rf);
names.add("RandForest");
RotationForest rot = new RotationForest();
rot.setNumIterations(30);
sc.add(rf);
names.add("RotForest");
SMO svmL = new SMO();
PolyKernel kernel = new PolyKernel();
kernel.setExponent(1);
svmL.setKernel(kernel);
sc.add(svmL);
names.add("SVML");
kernel = new PolyKernel();
kernel.setExponent(2);
SMO svmQ = new SMO();
svmQ.setKernel(kernel);
sc.add(svmQ);
names.add("SVMQ");
return sc;
}
public static void doTransform(String[] args){
// checkTrainsforms();
// System.exit(0);
if(args.length==0){
useCluster=false;
System.out.println(" ON DESKTOP");
int pos=1;
System.out.println(" Transforming :"+DataSets.fileNames[34]);
singleRunThreaded("ItalyPowerDemand");
}
else{
useCluster=true;
int num=Integer.parseInt(args[0]);
int problemNum=num-1;
System.out.println(" Transforming ="+DataSets.fileNames[problemNum]);
singleRunThreaded(DataSets.fileNames[problemNum]);
}
}
public static void classifyProblem(String[] args){
if(args.length==0){
useCluster=false;
System.out.println(" ON DESKTOP");
int pos=1;
// System.out.println(" Classifying :"+DataSets.fileNames[pos]);
for(int i=1;i<=10;i++){
int[][] res=classifyFold("ItalyPowerDemand",i);
}
}
else{
useCluster=true;
int n=Integer.parseInt(args[0])-1;
int problemNum=n/10;
int foldNum=n%10;
//Results saved to individual files
int[][] res=classifyFold(DataSets.fileNames[problemNum],foldNum+1);
}
}
public static void purge(){ //Delete all CV files from the cluster
useCluster=true;
for(int i=0;i<DataSets.fileNames.length;i++)
{
String clusterPath = "/gpfs/sys/ajb/TSC Problems/"+DataSets.fileNames[i]+"/";
String desktopPath="C:/Users/ajb/Dropbox/TSC Problems/"+DataSets.fileNames[i]+"/";
String path=desktopPath;
if(useCluster)
path=clusterPath;
File f = new File(path+"ShapeletCV/");
if(f.exists()){
try{
delete(f);
}catch(IOException e){
System.err.println(" Unable to delete directory "+path+"ShapeletCV/ Continuing ");
}
}
}
}
public static void delete(File file) throws IOException{
if(file.isDirectory()){
//directory is empty, then delete it
if(file.list().length==0){
file.delete();
System.out.println("Directory is deleted : "
+ file.getAbsolutePath());
}else{
//list all the directory contents
String files[] = file.list();
for (String temp : files) {
//construct the file structure
File fileDelete = new File(file, temp);
//recursive delete
delete(fileDelete);
}
//check the directory again, if empty then delete it
if(file.list().length==0){
file.delete();
System.out.println("Directory is deleted : "
+ file.getAbsolutePath());
}
}
}else{ //Base case
//if file, then delete it
file.delete();
System.out.println("File is deleted : " + file.getAbsolutePath());
}
}
public static void transformIncomplete(String[] args){
int length=incomplete.length; //12 of these
int n=Integer.parseInt(args[0])-1;
int problemNum=n/10;
int foldNum=n%10;
if(problemNum>=length) //Error
return;
problemNum=incomplete[problemNum];
doSingleTransform(problemNum,foldNum);
}
public static void doSingleTransform(int problemNum,int foldNum){
String fileName=DataSets.fileNames[problemNum];
String clusterPath = "/gpfs/sys/ajb/TSC Problems/"+fileName+"/";
String path=clusterPath;
String shapeletPath=path+"ShapeletCV/";
File f1= new File(shapeletPath+fileName+"_TRAIN"+(foldNum+1)+".arff");
File f2 = new File(shapeletPath+fileName+"_TEST"+(foldNum+1)+".arff");
if(f1.exists() && f2.exists()){
System.out.println(" Transform "+foldNum+" problem "+fileName+" already exists");
return;
}
Instances train = ClassifierTools.loadData(clusterPath+fileName+"_TRAIN"+(foldNum+1));
Instances test = ClassifierTools.loadData(clusterPath+fileName+"_TEST"+(foldNum+1));
FullShapeletTransform st=new ShapeletTransformDistCaching();
// if(train.numInstances()>=500 || train.numAttributes()>500)
// st = new ShapeletTransform();
st.supressOutput();
st.setNumberOfShapelets(Math.max(train.numAttributes(), train.numInstances()));
try {
Instances sTrain=st.process(train);
Instances sTest=st.process(test);
OutFile of1 = new OutFile(shapeletPath+fileName+"_TRAIN"+(foldNum+1)+".arff");
OutFile of2 = new OutFile(shapeletPath+fileName+"_TEST"+(foldNum+1)+".arff");
of1.writeLine(sTrain.toString());
of2.writeLine(sTest.toString());
} catch (Exception ex) {
Logger.getLogger(CrossValidateShapelets.class.getName()).log(Level.SEVERE, null, ex);
}
}
public static void shapeletTrainSingle(String file){
String clusterPath = "/gpfs/sys/ajb/ShapeletTransformed/";
String desktopPath="C:/Users/ajb/Dropbox/TSC Problems/ShapeletTransformed/";
String path=desktopPath;
if(useCluster)
path=clusterPath;
//Load
OutFile of=new OutFile(path+"TrainCV/"+file+"_trainCVacc.csv");
File f= new File(path+file+"Transformed_TRAIN");
if(!f.exists()){
of.writeLine(file+","+"-1");
}
Instances train = ClassifierTools.loadData(path+file+"Transformed_TRAIN");
//Get classifiers
ArrayList<String> names= new ArrayList<>();
ArrayList<Classifier> c=setSingleClassifiers(names);
HeterogeneousEnsemble hc = new HeterogeneousEnsemble(c);
hc.useCVWeighting(true);
//Find Accuracy
double acc=ClassifierTools.stratifiedCrossValidation(train, hc,10, 1);
//Get individual stats
//Write to file
of.writeLine(file+","+acc);
}
public static void shapeletTrainCV(String[] args){
if(args.length==0){
useCluster=false;
System.out.println(" ON DESKTOP");
int pos=1;
System.out.println(" Transforming :"+DataSets.fileNames[34]);
shapeletTrainSingle("ItalyPowerDemand");
}
else{
useCluster=true;
int num=Integer.parseInt(args[0]);
int problemNum=num-1;
System.out.println(" Transforming ="+DataSets.fileNames[problemNum]);
shapeletTrainSingle(DataSets.fileNames[problemNum]);
}
}
public static void combineShapeletTrain(){
String path="C:/Users/ajb/Dropbox/Results/ShapeletDomain/TrainCV/";
OutFile combo=new OutFile(path+"allResults.csv");
for(String s:DataSets.fileNames){
File f= new File(path+s+"_trainCVacc.csv");
if(!f.exists()){
combo.writeLine(s+",");
System.out.println(path+s+"_trainCVacc.csv"+" DOES NOT EXIST");
}
else{
InFile inf = new InFile(path+s+"_trainCVacc.csv");
String str=inf.readLine();
combo.writeLine(str+","+inf.readLine());
}
}
}
public static void main(String[] args){
// formCV();
// transformIncomplete(args);
// doTransform(args);
// classifyProblem(args);
// combineandInvertFolds();
// shapeletTrainCV(args);
combineShapeletTrain();
}
}