package development; import weka.core.spectral_distance_functions.LikelihoodRatioDistance; import java.util.*; import java.text.*; import statistics.simulators.SimulateAR; import weka.core.*; import weka.filters.*; import weka.filters.timeseries.*; import weka.classifiers.Classifier; import weka.classifiers.lazy.*; import fileIO.*; import utilities.ClassifierTools; import weka.classifiers.trees.*; import weka.classifiers.bayes.*; import weka.classifiers.functions.Logistic; /* * Experimental agenda * 1. Show DTW doesnt work on ARMA data * 2. Show ARMA doesnt work on TSDM? * 3. Show that RLE tends towards ARMA for sim data * 4. Show that RLE outperforms FFT? * 5. Evaluate alternative distance metrics for run lengths * 6. Run a model shift experiment */ public class RunLengthExperiments { public static String path="C:\\Research\\Data\\Time Series Classification\\"; /* Problems where ACF oputperforms 1-NN */ public static String[] fileNames={ //Number of train,test cases,length,classes "OSULeaf", //200,242,427,6 "SwedishLeaf", //500,625,128,15 "wafer",//1000,6174,152,2 //Index 18, after this the data has not been normalised. "Beef", //30,30,470,5 "Coffee", //28,28,286,2 "OliveOil", "FordA", "FordB", "SonyAIBORobotSurface", "StarLightCurves", "Symbols", "TwoLeadECG" }; /* ARMA MODELS * Model for Y=log(lynx )-2.9036 is y(t)=1.13y(t-1)-0.51y(t-2)+0.23y(t-3)-0.29y(t-4)+0.14y(t-5)-0.14y(t-6)+0.08y (t-7)-0.04y(y-8)+0.13y(t-9)+0.19y(t-10)-0.31y(t-11)+e * from H. Tong, "Some comments on the Canadian lynx data-with discussion" J. Roy. Statist. Soc. A, 140 (1977) pp. 432-435; 448-468 */ public static double[][][] exampleModels={ { {1.3532,0.4188,-1.2153,0.3091,0.1877,-0.0876,0.0075,0.0004}, {1.0524,0.9042,-1.2193,0.0312,0.263,-0.0567,-0.0019} }, { {0.488, 3.2496, -1.6615, -4.5163, 2.4227, 3.5192, -1.9921, -1.69, 1.0227, 0.5156, -0.3423, -0.0984, 0.0756, 0.0108, -0.0109, -0.0005, 0.001, 0, -0.0001}, {0.2212, 3.1268, -0.7433, -4.2211, 1.0535, 3.2423, -0.8344, -1.5702, 0.4107, 0.5012, -0.1316, -0.1069, 0.0279, 0.0151, -0.0039, -0.0014, 0.0003, 0.0001, 0} }, { {-3.598, -4.1991, -0.3087, 3.2337, 2.5102, 0.3777, -0.3093, -0.1241, -0.0009, 0.0057, 0.0008}, {-2.8419, -2.1235, 0.9762, 1.9624, 0.6231, -0.1995, -0.1281, -0.0056, 0.0055, 0.0009} }, //Model 2 0.605 0.84 { {-1.1084,-0.097,0.1579,0.0478,0.0044,0.0001}, {-1.0993,-0.0436,0.1601,0.0426,0.0036,0.0001} } }; /** * Evaluate: Given a classifier, each of the Full, Clipped and Histogram evaluated on //Benchmark 1. Euclidean 2. DTW //Full model 3. Durban Levinsen Recursions //compressed 4. Clipped+Durban Levinsen Recursions //compressed 4. Histograms+Euclidean distance //compressed 5. Histograms+ Gower metric //compressed 5. FFT+ euclidean //compressed 5. FFT+ likelihood */ /** * Experiment 1: Pure test of concept: show that DTW no use for ARMA data * Start with AR models of fixed length, n=500, with 1000 cases. * model 1: Ar1=0.5. Vary parameter difference with model 2 from 0.5 to 0 * */ public static void Experiment1_AR1_Test(int nosFiles,OutFile of){ of.writeLine("ModelNos,EuclideanRAW,DTW_RAW,EuclideanARMA"); Instances test,train,testARMA=null,trainARMA=null; for(int i=1;i<=nosFiles;i++){ //Load up the data System.out.println("Model"+i+","); String str=""; of.writeString("Model"+i+","); test=ClassifierTools.loadData( SimulateAR.path+"AR1\\trainModel"+i); train=ClassifierTools.loadData( SimulateAR.path+"AR1\\testModel"+i); //Train 1-NN, 1-NN DTW, on raw data and fitted ARMA data int nosClassifiers=2; Classifier[] all=new Classifier[nosClassifiers]; NormalizableDistance df =new EuclideanDistance(); df.setDontNormalize(true); all[0] = new kNN(df); all[1]=new kNN(new DTW_DistanceEfficient()); ARMA ar=new ARMA(); try{ trainARMA=ar.process(train); testARMA=ar.process(test); } catch(Exception e){ System.out.println("Error in transforming to arma"+e); System.exit(0); } str+=ClassifierTools.singleTrainTestSplitAccuracy(all[0],train,test)+","; str+=ClassifierTools.singleTrainTestSplitAccuracy(all[1],train,test)+","; str+=ClassifierTools.singleTrainTestSplitAccuracy(all[0],trainARMA,testARMA); System.out.println("RESULT ="+str); of.writeLine(str); } } /** * Experiment 2: Show histogram tends towards optimal as n increases for fixed AR1 * * Generate on the fly */ public static void Experiment2_AR1_Classification(OutFile of){ int startN=100; int endN=5000; int increment=200; int nosCases=100; int reps=10; // double[][] paras={{0.5,},{0.7}}; double[][] paras={{1.3532,0.4188,-1.2153,0.3091,0.1877,-0.0876,0.0075,0.0004}, {1.0524,0.9042,-1.2193,0.0312,0.263,-0.0567,-0.0019} }; IB1_Classification(paras,startN,endN,increment,nosCases,reps,of); } /* Experiment 3: repeat 2 with more complex models, then introduce Jan's distance metric * * */ public static void Experiment3_RandomAR_Classification(String fileName){ int startN=100; int endN=1000; int increment=100; int nosCases=50; int reps=1; int modelReps=10; double[][] paras; OutFile of; for(int i=0;i<modelReps;i++){ of=new OutFile(fileName+i+".csv"); //Generate two random AR models //Random length between 4 and 15 int nosParas=4+(int)(Math.random()*10); paras=new double[2][nosParas]; for(int j=0;j<nosParas;j++){ paras[0][j]=-0.8+1.9*Math.random(); paras[1][j]=paras[0][j]-0.2+0.4*Math.random(); if(paras[1][j]<=-1 || paras[1][j]>=1) paras[1][j]=-0.8+1.9*Math.random(); } paras[0]=SimulateAR.findCoefficients(paras[0]); paras[1]=SimulateAR.findCoefficients(paras[1]); for(int j=0;j<nosParas;j++) of.writeString(paras[0][j]+","); of.writeString("\n"); for(int j=0;j<nosParas;j++) of.writeString(paras[1][j]+","); of.writeString("\n"); System.out.println("ARMA Model ="+nosParas); System.out.print("\n"); DecimalFormat dc=new DecimalFormat("##.####"); for(int j=0;j<nosParas;j++) System.out.print(dc.format(paras[0][j])+","); System.out.print("\n"); for(int j=0;j<nosParas;j++) System.out.print(dc.format(paras[1][j])+","); System.out.print("\n"); //Random parameters between -0.5 and 0.5 //Measure histogram accuracy IB1_Classification(paras,startN,endN,increment,nosCases,reps,of); } } public static void Experiment4_TSDM_Problems(String fileName){ //Run ARMA, RL and DTW on standard TSDM problems Instances train,test; Instances armaTrain,armaTest; Instances histoTrain,histoTest; // Instances fftTest,fftTrain; OutFile of =new OutFile(fileName); of.writeLine("file,euclid,dtw,arma,histo"); double euclidAcc=0,dtwAcc=0,histAcc=0,armaAcc=0; for(int i=0;i<fileNames.length;i++){ System.out.println(" Running data set "+fileNames[i]); //1. Load test/train split String base=path+fileNames[i]+"\\"+fileNames[i]; test=ClassifierTools.loadData(base+"_TEST"); train=ClassifierTools.loadData(base+"_TRAIN"); //2. Transform FFT, ARMA and Run Lengths ARMA ar=new ARMA(); RunLength rl=new RunLength(); int n=train.numAttributes()-1; rl.noGlobalMean(); rl.setMaxRL(n/2); ar.setMaxLag(n/2); //3. Perform 4 accuracy measurements try{ armaTrain=ar.process(train); armaTest=ar.process(test); histoTrain=rl.process(train); histoTest=rl.process(test); euclidAcc=ClassifierTools.singleTrainTestSplitAccuracy(new IB1(),train,test); Classifier c=new kNN(new DTW_DistanceBasic()); dtwAcc=ClassifierTools.singleTrainTestSplitAccuracy(c,train,test); histAcc=ClassifierTools.singleTrainTestSplitAccuracy(new IB1(),histoTrain,histoTest); armaAcc=ClassifierTools.singleTrainTestSplitAccuracy(new IB1(),armaTrain,armaTest); }catch(Exception e){ System.out.println("Error in process e = "+e); e.printStackTrace(); System.exit(0); } //4. Write to file of.writeLine(fileNames[i]+","+euclidAcc+","+dtwAcc+","+armaAcc+","+histAcc); } } //Experiment 5: Show for a single model the relative accuracy of ARMA, RunLengths and FFT, DTW //1. Generate the data sets //2. For n=100 to 1000 // 2. Measure accuracy and store. public static void Experiment5_AR_NearestNeighbour_SingleSeriesComparison(String fileName){ //Generate a model int startN=100, endN=5100, increment=200; OutFile of=new OutFile(fileName); of.writeString("ARMA_Euclid,RL_Euclid,FFT,RL_Gower,RL_DTW\n"); System.out.print("ARMA,RL_Euclid,RL_Gower,RL_Likelihood,RL_DTW\n"); double[][] paras=exampleModels[0]; // {{0.5},{0.7}}; // {1.3532,0.4188,-1.2153,0.3091,0.1877,-0.0876,0.0075,0.0004} // {1.0524,0.9042,-1.2193,0.0312,0.263,-0.0567,-0.0019} /* Random r = new Random(); r.setSeed(RANDOMSEED); int nosParas=3; double[][] paras=new double[2][]; paras[0]=new double[nosParas]; paras[1]=new double[nosParas]; for(int j=0;j<nosParas;j++){ paras[0][j]=-0.5+1*r.nextDouble(); paras[1][j]=paras[0][j]-0.5+r.nextDouble(); if(paras[1][j]<=-1) paras[1][j]=-.95; if(paras[1][j]>=1) paras[1][j]=0.95; } */ for(int m=0;m<1;m++) { // paras=generateValidModel(10,20); for(int n=startN;n<=endN;n+=increment){ double[] acc=AR_NN_Classification(paras,n,100); of.writeString(m+","+n+","); System.out.print(m+","+n+","); for(int i=0;i<acc.length;i++){ of.writeString(acc[i]+","); System.out.print(acc[i]+","); } of.writeString("\n"); System.out.print("\n"); } } } public static void Experiment6_AR_NN_DistanceMetricComparison(String fileName) //This method generates the results for the section \subsection{Alternative Distance Measures for Run Lengths} //Rerun with alternative n, manually hacked! { //Generate a model int n=5000; int nosModels=200; OutFile of=new OutFile(fileName); of.writeString("ARMA_Euclid,RL_Euclid,FFT,RL_Gower,RL_DTW\n"); System.out.print("ARMA,RL_Euclid,RL_Gower,RL_Likelihood,RL_DTW\n"); double[][] paras; for(int m=0;m<nosModels;m++) { paras=generateValidModel(10,20); double[] acc=AR_NN_Classification(paras,n,100); of.writeString(m+","+n+","); System.out.print(m+","+n+","); for(int i=0;i<acc.length;i++){ of.writeString(acc[i]+","); System.out.print(acc[i]+","); } of.writeString("\n"); System.out.print("\n"); } } public static void Experiment7_AR_RLvsFFT_AFC(String fileName){ //Generate a model int startN=2000,endN=5000,inc=400; int nosModels=20; OutFile of=new OutFile(fileName); of.writeString("ARMA_Euclid,RL_DTW,FFT_Euclid,FFT_DTW,AFC_Euclid,AFC_DTW\n"); System.out.print("ARMA_Euclid,RL_DTW,FFT_Euclid,FFT_DTW,AFC_Euclid,AFC_DTW\n"); double[][] paras; for(int n=startN;n<endN;n+=inc){ double[] av=new double[8]; System.out.println("\n Running length = "+n+","); for(int m=0;m<nosModels;m++) { paras=generateValidModel(10,20); double[] acc=AR_TransformTest(paras,n,100); for(int i=0;i<acc.length;i++) av[i]+=acc[i]; for(int i=0;i<acc.length;i++) System.out.print(acc[i]+","); System.out.print("\n"); } of.writeString(nosModels+","+n+","); System.out.print(nosModels+","+n+","); for(int i=0;i<av.length;i++){ of.writeString(av[i]/nosModels+","); System.out.print(av[i]/nosModels+","); } of.writeString("\n"); System.out.print("\n"); } } public static void Experiment8_AlternativeClassifiers(String fileName){ //Generate a model int startN=2200,endN=5000,inc=400; int nosModels=30; int runs=30; OutFile of=new OutFile(fileName); of.writeString("ARMA_LDA,RL_LDA,FFT_LDA,ACF_LDA\n"); System.out.print("ARMA_LDA,RL_LDA,FFT_LDA,ACF_LDA\n"); double[][] paras; for(int n=startN;n<=endN;n+=inc){ double[] av=new double[15]; System.out.println("\n Running length = "+n+","); for(int m=0;m<nosModels;m++) { paras=generateValidModel(10,20); double[] acc; //1. Generate a random stationary model acc=AR_Mixed_Classification(paras,n,100); for(int j=0;j<av.length;j++) av[j]+=acc[j]; for(int i=0;i<acc.length;i++) System.out.print(acc[i]+","); System.out.print("\n"); } of.writeString(nosModels+","+n+","); System.out.print(nosModels+","+n+","); for(int i=0;i<av.length;i++){ of.writeString(av[i]/nosModels+","); System.out.print(av[i]/nosModels+","); } of.writeString("\n"); System.out.print("\n"); } } public static void Experiment9_VariableLength(String fileName){ //Fitting times int nosModels=20; double[][] paras; int smallN=500; int largeN=10000; int inc=500; OutFile of=new OutFile(fileName); for(int n=1000;n<largeN;n+=inc){ double[] av=new double[3]; System.out.println("\n Running length = "+n+","); for(int m=0;m<nosModels;m++) { paras=generateValidModel(10,20); double[] acc; //1. Generate a random stationary model acc=AR_FixedLength_Classification(paras,smallN,n,100); for(int j=0;j<av.length;j++) av[j]+=acc[j]; System.out.print("run ="+m+","); for(int i=0;i<acc.length;i++) System.out.print(acc[i]+","); System.out.print("\n"); } of.writeString(nosModels+","+n+","); System.out.print(nosModels+","+n+","); for(int i=0;i<av.length;i++){ of.writeString(av[i]/nosModels+","); System.out.print(av[i]/nosModels+","); } of.writeString("\n"); System.out.print("\n"); } } public static void Experiment10_Timing(String fileName){ int nosModels=1; double[][] paras; int startN=4500; int endN=10000; int inc=500; OutFile of=new OutFile(fileName); for(int n=startN;n<endN;n+=inc){ double[] av=new double[3]; System.out.println("\n Running length = "+n+","); for(int m=0;m<nosModels;m++) { paras=generateModel(10,20); double[] acc; //1. Generate a random stationary model acc=AR_TimingExperiment(paras,n,100); for(int j=0;j<av.length;j++) av[j]+=acc[j]/(double)1000000; System.out.print("run ="+m+","); for(int i=0;i<acc.length;i++) System.out.print(acc[i]/1000000+","); System.out.print("\n"); } of.writeString(nosModels+","+n+","); System.out.print(nosModels+","+n+","); for(int i=0;i<av.length;i++){ of.writeString(av[i]/nosModels+","); System.out.print(av[i]/nosModels+","); } of.writeString("\n"); System.out.print("\n"); } } public static void Experiment11_CrossPoints(String fileName){ int nosModels=10; double[][] paras; int startN=1000; int endN=5000; int inc=200; OutFile of=new OutFile(fileName); for(int n=startN;n<=endN;n+=inc){ System.out.println("\n Running base length = "+n+","); boolean beat=false; int longN=n+2*inc; while(!beat&& longN<5000){ double[] av=new double[3]; for(int m=0;m<nosModels;m++){ paras=generateValidModel(10,20); double[] acc; acc=AR_FixedLength_Classification(paras,n,longN,100); for(int j=0;j<av.length;j++) av[j]+=acc[j]; System.out.print("run ="+m+","+" n ="+n+" longN="+longN+","); for(int i=0;i<acc.length;i++) System.out.print(acc[i]+","); System.out.print("\n"); } if(av[1]>=av[0] && av[1]>=av[2]){ beat=true; of.writeString(nosModels+","+n+","+","+longN+","); System.out.print(nosModels+","+n+","+","+longN+","); for(int i=0;i<av.length;i++){ of.writeString(av[i]/nosModels+","); System.out.print(av[i]/nosModels+","); } of.writeString("\n"); System.out.print("\n"); } else longN+=inc; } } } public static int RANDOMSEED=7; public static void main(String[] args){ // Experiment1_AR1_Test(50, new OutFile("C:\\Research\\Results\\RunLengths\\Experiment1.csv")); // Experiment2_AR1_Classification(new OutFile("C:\\Research\\Results\\RunLengths\\Experiment2.csv")); // Experiment3_RandomAR_Classification("C:\\Research\\Results\\RunLengths\\Experiment3_"); Experiment4_TSDM_Problems("C:\\Research\\Results\\RunLengths\\Experiment4_TSDMProblems.csv"); // Experiment5_AR_NearestNeighbour_SingleSeriesComparison("C:\\Research\\Results\\RunLengths\\Experiment5_SingleModelComparison.csv"); // Experiment6_AR_NN_DistanceMetricComparison("C:\\Research\\Results\\RunLengths\\Experiment6_DistanceMetricComparison4.csv"); // Experiment7_AR_RLvsFFT_AFC("C:\\Research\\Results\\RunLengths\\Experiment7_RLvsFFT_AFC.csv"); // Experiment8_AlternativeClassifiers("C:\\Research\\Results\\RunLengths\\Experiment8_DifferentClassifiers.csv"); // Experiment7_AR_VaryingSize_MultipleSeriesComparison("C:\\Research\\Results\\RunLengths\\Experiment7_MultipleClassifierComparison2.csv"); // Experiment9_VariableLength("C:\\Research\\Results\\RunLengths\\Experiment9_DiffentN.csv"); // Experiment10_Timing("C:\\Research\\Results\\RunLengths\\Timings.csv"); // Experiment11_CrossPoints("C:\\Research\\Results\\RunLengths\\CrossPoints.csv"); // Experiment6_AIC_Lengths("C:\\Research\\Results\\RunLengths\\AIC_Lengths.csv"); // Experiment7_AIC_Effect("C:\\Research\\Results\\RunLengths\\AIC_Effect.csv"); // FFT_Test("C:\\Research\\Results\\RunLengths\\FFT_Test.csv"); // exampleSeries("C:\\Research\\Results\\RunLengths\\exampleSeries2.csv"); // generateValidModels("C:\\Research\\Results\\RunLengths\\ValidModels.csv"); f1HistogramsBasic(); } /** * This method randomly generates 100 stationary arma models for use in classification experiments. It * 1. Randomly generate model length between 3 and 10 * 2. Randomly generate first model parameters * 3. Perturb each parameter by 10% for second model * 4. Call SimulateAR.findCoefficients to find AR paras * 5. Generate a test samples of 100 and 1000 * 6. Test if zero mean (series mean between -0.5 and 0.5 * 7. Measure accuracy with ARMA fit. Must be above 60% for 100 and above 90% for 1000 * * @param fileName */ public static double[][] generateValidModel(int minLength, int maxLength){ double[][] paras=new double[2][]; Random r = new Random(); r.setSeed(RANDOMSEED); int[] cases={100,100}; boolean good=false; while(!good){ good=true; int nosParas1=(int)(minLength+maxLength*r.nextDouble()); int nosParas2=(int)(nosParas1+(2-4*r.nextDouble())); paras[0]=new double[nosParas1]; paras[1]=new double[nosParas2]; for(int j=0;j<nosParas1;j++){ paras[0][j]=-0.9+1.8*r.nextDouble(); } for(int j=0;j<nosParas2;j++){ if(j<nosParas1){ if(r.nextDouble()>0.5) paras[1][j]=paras[0][j]*(1+0.1*r.nextDouble()); else paras[1][j]=paras[0][j]*(1-0.1*r.nextDouble()); if(paras[1][j]<=-1) paras[1][j]=-.95; if(paras[1][j]>=1) paras[1][j]=0.95; } } // 4. Call SimulateAR.findCoefficients to find AR paras paras[0]=SimulateAR.findCoefficients(paras[0]); paras[1]=SimulateAR.findCoefficients(paras[1]); // 5. Generate a test samples of 500 and 1000 Instances smallTrain =SimulateAR.generateARDataSet(paras,400,cases); Instances smallTest =SimulateAR.generateARDataSet(paras,400,cases); Instances largeTrain =SimulateAR.generateARDataSet(paras,1000,cases); Instances largeTest =SimulateAR.generateARDataSet(paras,1000,cases); // 6. Test if zero mean (series mean between -0.5 and 0.5 if(zeroMeans(smallTrain)||zeroMeans(smallTest)||zeroMeans(largeTrain)||zeroMeans(largeTest)){ good=false; } ARMA ar =new ARMA(); double smallAcc=0,largeAcc=0; try{ Instances arTrain=ar.process(smallTrain); Instances arTest=ar.process(smallTest); smallAcc=ClassifierTools.singleTrainTestSplitAccuracy(new IB1(),arTrain,arTest); if(smallAcc<0.6||smallAcc>0.9){ good=false; } }catch(Exception e){ System.out.println("Exception in transformation! skipping"); good=false; } try{ Instances arTrain=ar.process(largeTrain); Instances arTest=ar.process(largeTest); largeAcc=ClassifierTools.singleTrainTestSplitAccuracy(new IB1(),arTrain,arTest); if(largeAcc<=smallAcc|| largeAcc==1.0){ System.out.println("Rejecting model for being too hard or easy on the LARGE set. Acc ="+largeAcc); good=false; } }catch(Exception e){ System.out.println("Exception in transformation! skipping"); good=false; } if(good) return paras; } return null; } public static double[][] generateModel(int minLength, int maxLength){ double[][] paras=new double[2][]; Random r = new Random(); r.setSeed(RANDOMSEED); int[] cases={100,100}; boolean good=false; while(!good){ good=true; int nosParas1=(int)(minLength+maxLength*r.nextDouble()); int nosParas2=(int)(nosParas1+(2-4*r.nextDouble())); paras[0]=new double[nosParas1]; paras[1]=new double[nosParas2]; for(int j=0;j<nosParas1;j++){ paras[0][j]=-0.9+1.8*r.nextDouble(); } for(int j=0;j<nosParas2;j++){ if(j<nosParas1){ if(r.nextDouble()>0.5) paras[1][j]=paras[0][j]*(1+0.1*r.nextDouble()); else paras[1][j]=paras[0][j]*(1-0.1*r.nextDouble()); if(paras[1][j]<=-1) paras[1][j]=-.95; if(paras[1][j]>=1) paras[1][j]=0.95; } } // 4. Call SimulateAR.findCoefficients to find AR paras paras[0]=SimulateAR.findCoefficients(paras[0]); paras[1]=SimulateAR.findCoefficients(paras[1]); // 5. Generate a test samples of 500 and 1000 return paras; } return null; } public static void generateValidModels(String fileName){ int modelCount=0; int nosParas1,nosParas2; double[][] paras=new double[2][]; Random r = new Random(); r.setSeed(RANDOMSEED); int[] cases={100,100}; boolean good=true; DecimalFormat dc = new DecimalFormat("###.####"); OutFile of = new OutFile(fileName); while(modelCount<100){ good=true; // 1. Randomly generate model length between 2 and 10 nosParas1=(int)(10+20*r.nextDouble()); nosParas2=(int)(nosParas1+(2-4*r.nextDouble())); // 2. Randomly generate first model parameters // 3. Perturb each parameter by 10% for second model paras[0]=new double[nosParas1]; paras[1]=new double[nosParas2]; for(int j=0;j<nosParas1;j++){ paras[0][j]=-0.9+1.8*r.nextDouble(); } for(int j=0;j<nosParas2;j++){ if(j<nosParas1){ if(r.nextDouble()>0.5) paras[1][j]=paras[0][j]*(1+0.1*r.nextDouble()); else paras[1][j]=paras[0][j]*(1-0.1*r.nextDouble()); if(paras[1][j]<=-1) paras[1][j]=-.95; if(paras[1][j]>=1) paras[1][j]=0.95; } } // 4. Call SimulateAR.findCoefficients to find AR paras paras[0]=SimulateAR.findCoefficients(paras[0]); paras[1]=SimulateAR.findCoefficients(paras[1]); // 5. Generate a test samples of 500 and 1000 Instances smallTrain =SimulateAR.generateARDataSet(paras,100,cases); Instances smallTest =SimulateAR.generateARDataSet(paras,100,cases); Instances largeTrain =SimulateAR.generateARDataSet(paras,1000,cases); Instances largeTest =SimulateAR.generateARDataSet(paras,1000,cases); // 6. Test if zero mean (series mean between -0.5 and 0.5 if(zeroMeans(smallTrain)||zeroMeans(smallTest)||zeroMeans(largeTrain)||zeroMeans(largeTest)){ System.out.println("Rejecting model for non zero means"); System.out.println("Small data are"+smallTrain); good=false; } // 7. Measure accuracy with ARMA fit. Must be above 60% for 100 and above 90% for 1000 ARMA ar =new ARMA(); double smallAcc=0,largeAcc=0; try{ Instances arTrain=ar.process(smallTrain); Instances arTest=ar.process(smallTest); smallAcc=ClassifierTools.singleTrainTestSplitAccuracy(new IB1(),arTrain,arTest); if(smallAcc<0.6||smallAcc>0.9){ System.out.println("Rejecting model for being too hard or easy on the small set. Acc ="+smallAcc); good=false; } }catch(Exception e){ System.out.println("Exception in transformation! skipping"); good=false; } try{ Instances arTrain=ar.process(largeTrain); Instances arTest=ar.process(largeTest); largeAcc=ClassifierTools.singleTrainTestSplitAccuracy(new IB1(),arTrain,arTest); if(largeAcc<=smallAcc|| largeAcc==1.0){ System.out.println("Rejecting model for being too hard or easy on the LARGE set. Acc ="+largeAcc); good=false; } }catch(Exception e){ System.out.println("Exception in transformation! skipping"); good=false; } if(good==true){ System.out.println("KEEPING MODEL >"); of.writeLine(smallAcc+","+largeAcc); of.writeString(paras[0].length+","); for(int i=0;i<paras[0].length;i++){ of.writeString(dc.format(paras[0][i])+","); System.out.print(dc.format(paras[0][i])+","); } of.writeString("\n"); System.out.print("\n"); of.writeString(paras[1].length+","); for(int i=0;i<paras[1].length;i++){ of.writeString(dc.format(paras[1][i])+","); System.out.print(dc.format(paras[1][i])+","); } of.writeLine("\n"); System.out.println("\nACC Small = "+smallAcc+" ACC Large ="+largeAcc); System.out.print("\n\n"); modelCount++; } } } public static boolean zeroMeans(Instances d){ for(int i=0;i<d.numInstances();i++){ double mean=0; int count=0; Instance inst=d.instance(i); for(int j=0;j<inst.numAttributes();j++){ if(j!=inst.classIndex()){ mean+=inst.value(j); count++; } mean/=count; if(mean<-0.5|| mean>0.5) return false; } } return true; } public static double[] AR_NN_Classification(double[][] paras, int n,int nosCases){ double[] acc=new double[5]; Instances train,test; Instances arTrain,arTest; Instances rlTrain,rlTest; Instances fftTrain,fftTest; int[] cases={nosCases,nosCases}; //1. Generate a random stationary model train=SimulateAR.generateARDataSet(paras,n,cases); test=SimulateAR.generateARDataSet(paras,n,cases); //2. Transform to ARMA, RunLengths and FFT. ARMA ar=new ARMA(); RunLength rl=new RunLength(); FFT fft=new FFT(); //Go for 10% compression rl.setMaxRL(n/10); ar.setMaxLag(n/10); ar.setUseAIC(true); fft.padSeries(true); try{ arTrain=ar.process(train); arTest=ar.process(test); rlTrain=rl.process(train); rlTest=rl.process(test); fftTrain=fft.process(train); fftTest=fft.process(test); fft.truncate(fftTrain,n/10); fft.truncate(fftTest,n/10); //3. Do test train accuracy with. Classifier dtw,gower; dtw=new kNN(new DTW_DistanceBasic()); gower=new kNN(new GowerDistance(rlTrain)); acc[0]=ClassifierTools.singleTrainTestSplitAccuracy(new kNN(new EuclideanDistance()),arTrain,arTest); acc[1]=ClassifierTools.singleTrainTestSplitAccuracy(new kNN(new EuclideanDistance()),rlTrain,rlTest); acc[2]=ClassifierTools.singleTrainTestSplitAccuracy(new kNN(new GowerDistance(rlTrain)),rlTrain,rlTest); acc[3]=ClassifierTools.singleTrainTestSplitAccuracy(new kNN(new LikelihoodRatioDistance()),rlTrain,rlTest); acc[4]=ClassifierTools.singleTrainTestSplitAccuracy(dtw,rlTrain,rlTest); }catch(Exception e){ System.out.println("Error w ="+e); System.exit(0); } return acc; } public static double[] AR_TransformTest(double[][] paras, int n,int nosCases){ double[] acc=new double[4]; Instances train,test; Instances arTrain,arTest; Instances rlTrain,rlTest; Instances fftTrain,fftTest; Instances acfTrain,acfTest; int[] cases={nosCases,nosCases}; //1. Generate a random stationary model train=SimulateAR.generateARDataSet(paras,n,cases); test=SimulateAR.generateARDataSet(paras,n,cases); //2. Transform to ARMA, RunLengths and FFT. ARMA ar=new ARMA(); RunLength rl=new RunLength(); FFT fft=new FFT(); ACF acf= new ACF(); //Go for 10% compression rl.setMaxRL(n/10); ar.setMaxLag(n/10); ar.setUseAIC(true); acf.setMaxLag(n/10); fft.padSeries(true); try{ arTrain=ar.process(train); arTest=ar.process(test); rlTrain=rl.process(train); rlTest=rl.process(test); fftTrain=fft.process(train); fftTest=fft.process(test); acfTrain=acf.process(train); acfTest=acf.process(test); fft.truncate(fftTrain,n/10); fft.truncate(fftTest,n/10); //3. Do test train accuracy with. acc[0]=ClassifierTools.singleTrainTestSplitAccuracy(new IB1(),arTrain,arTest); acc[1]=ClassifierTools.singleTrainTestSplitAccuracy(new kNN(new DTW_DistanceBasic()),rlTrain,rlTest); acc[2]=ClassifierTools.singleTrainTestSplitAccuracy(new IB1(),fftTrain,fftTest); acc[3]=ClassifierTools.singleTrainTestSplitAccuracy(new IB1(),acfTrain,acfTest); }catch(Exception e){ System.out.println("Error w ="+e); e.printStackTrace(); System.exit(0); } return acc; } //1-NN, C4.5, Naive Bayes, RandomForests with AR, RL and FFT at 10% public static double[] AR_Mixed_Classification(double[][] paras, int n,int nosCases){ double[] acc=new double[15]; Instances train,test; Instances arTrain,arTest; Instances rlTrain,rlTest; Instances fftTrain,fftTest; Instances acfTrain,acfTest; int[] cases={nosCases,nosCases}; //1. Generate a random stationary model train=SimulateAR.generateARDataSet(paras,n,cases); test=SimulateAR.generateARDataSet(paras,n,cases); //2. Transform to ARMA, RunLengths and FFT. ARMA ar=new ARMA(); RunLength rl=new RunLength(); FFT fft=new FFT(); ACF acf= new ACF(); //Go for 10% compression rl.setMaxRL(n/10); ar.setMaxLag(n/10); ar.setUseAIC(true); acf.setMaxLag(n/10); fft.padSeries(true); try{ arTrain=ar.process(train); arTest=ar.process(test); rlTrain=rl.process(train); rlTest=rl.process(test); fftTrain=fft.process(train); fftTest=fft.process(test); acfTrain=acf.process(train); acfTest=acf.process(test); fft.truncate(fftTrain,n/10); fft.truncate(fftTest,n/10); //3. Do test train accuracy with. Classifier dtw; dtw=new kNN(new DTW_DistanceBasic()); acc[0]=ClassifierTools.singleTrainTestSplitAccuracy(new IB1(),arTrain,arTest); acc[1]=ClassifierTools.singleTrainTestSplitAccuracy(new J48(),arTrain,arTest); acc[2]=ClassifierTools.singleTrainTestSplitAccuracy(new NaiveBayes(),arTrain,arTest); acc[3]=ClassifierTools.singleTrainTestSplitAccuracy(new RandomForest(),arTrain,arTest); acc[4]=ClassifierTools.singleTrainTestSplitAccuracy(new Logistic(),arTrain,arTest); acc[5]=ClassifierTools.singleTrainTestSplitAccuracy(dtw,rlTrain,rlTest); acc[6]=ClassifierTools.singleTrainTestSplitAccuracy(new J48(),rlTrain,rlTest); acc[7]=ClassifierTools.singleTrainTestSplitAccuracy(new NaiveBayes(),rlTrain,rlTest); acc[8]=ClassifierTools.singleTrainTestSplitAccuracy(new RandomForest(),rlTrain,rlTest); acc[9]=ClassifierTools.singleTrainTestSplitAccuracy(new Logistic(),rlTrain,rlTest); acc[10]=ClassifierTools.singleTrainTestSplitAccuracy(new IB1(),acfTrain,acfTest); acc[11]=ClassifierTools.singleTrainTestSplitAccuracy(new J48(),acfTrain,acfTest); acc[12]=ClassifierTools.singleTrainTestSplitAccuracy(new NaiveBayes(),acfTrain,acfTest); acc[13]=ClassifierTools.singleTrainTestSplitAccuracy(new RandomForest(),acfTrain,acfTest); acc[14]=ClassifierTools.singleTrainTestSplitAccuracy(new Logistic(),acfTrain,acfTest); }catch(Exception e){ System.out.println("Error w ="+e); e.printStackTrace(); System.exit(0); } return acc; } //1-NN, C4.5, Naive Bayes, RandomForests with AR, RL and FFT at 10% public static double[] AR_TimingExperiment(double[][] paras, int n, int nosCases){ double[] acc=new double[3]; Instances train,test; Instances arTrain,arTest; Instances rlTrain,rlTest; Instances fftTrain,fftTest; Instances acfTrain,acfTest; int reps=30; int[] cases={nosCases,nosCases}; //1. Generate a random stationary model train=SimulateAR.generateARDataSet(paras,n,cases); try{ //2. Transform to RL RunLength rl=new RunLength(); ARMA ar=new ARMA(); ACF acf= new ACF(); //Go for 10% compression ar.setMaxLag(n/10); ar.setUseAIC(true); acf.setMaxLag(n/10); rl.setMaxRL(n/10); long start=System.nanoTime(); for(int i=0;i<reps;i++) rlTrain=rl.process(train); start=System.nanoTime()-start; acc[0]+=(double)start/(double)reps; //ARMA and ACF start=System.nanoTime(); for(int i=0;i<reps;i++) arTrain=ar.process(train); start=System.nanoTime()-start; acc[1]+=(double)start/(double)reps; start=System.nanoTime(); for(int i=0;i<reps;i++) acfTrain=acf.process(train); start=System.nanoTime()-start; acc[2]+=(double)start/(double)reps; }catch(Exception e){ System.out.println("Error w ="+e); e.printStackTrace(); System.exit(0); } return acc; } // public static double[] AR_FixedLength_Classification(double[][] paras, int shortN,int longN, int nosCases){ double[] acc=new double[3]; Instances train,test; Instances arTrain,arTest; Instances rlTrain,rlTest; Instances fftTrain,fftTest; Instances acfTrain,acfTest; int[] cases={nosCases,nosCases}; //1. Generate a random stationary model train=SimulateAR.generateARDataSet(paras,longN,cases); test=SimulateAR.generateARDataSet(paras,longN,cases); try{ //2. Transform to RL RunLength rl=new RunLength(); rl.setMaxRL(shortN/10); rlTrain=rl.process(train); rlTest=rl.process(test); //3. Remove data down to short N for(int i=shortN;i<longN;i++){ train.deleteAttributeAt(shortN-1); test.deleteAttributeAt(shortN-1); } //ARMA and ACF ARMA ar=new ARMA(); ACF acf= new ACF(); //Go for 10% compression ar.setMaxLag(shortN/10); ar.setUseAIC(true); acf.setMaxLag(shortN/10); arTrain=ar.process(train); arTest=ar.process(test); acfTrain=acf.process(train); acfTest=acf.process(test); //3. Do test train accuracy with. Classifier dtw; dtw=new kNN(new DTW_DistanceBasic()); acc[0]=ClassifierTools.singleTrainTestSplitAccuracy(new Logistic(),arTrain,arTest); acc[1]=ClassifierTools.singleTrainTestSplitAccuracy(dtw,rlTrain,rlTest); acc[2]=ClassifierTools.singleTrainTestSplitAccuracy(new Logistic(),acfTrain,acfTest); }catch(Exception e){ System.out.println("Error w ="+e); e.printStackTrace(); System.exit(0); } return acc; } public static void IB1_Classification(double[][] paras, int startN, int endN, int increment, int nosCases, int reps, OutFile of){ double euclidAcc=0,histAcc=0, armaAcc=0; Instances train,test; Instances armaTrain,armaTest; Instances histoTrain,histoTest; int[] cases={nosCases,nosCases}; of.writeLine("n,euclid,histogram,arma"); for(int n=startN;n<=endN;n+=increment){ System.out.println(" Running with series length ="+n); histAcc=0; armaAcc=0; euclidAcc=0; of.writeString(n+","); for(int r=1;r<=reps;r++){ //1. Generate two class problem with nosCases in each class, each series length n train=SimulateAR.generateARDataSet(paras,n,cases); test=SimulateAR.generateARDataSet(paras,n,cases); //2. transform to ARMA and Histogram ARMA ar=new ARMA(); RunLength rl=new RunLength(); rl.noGlobalMean(); rl.setMaxRL(n/4); ar.setMaxLag(n/4); try{ armaTrain=ar.process(train); armaTest=ar.process(test); histoTrain=rl.process(train); histoTest=rl.process(test); euclidAcc+=ClassifierTools.singleTrainTestSplitAccuracy(new IB1(),train,test); histAcc+=ClassifierTools.singleTrainTestSplitAccuracy(new IB1(),histoTrain,histoTest); armaAcc+=ClassifierTools.singleTrainTestSplitAccuracy(new IB1(),armaTrain,armaTest); }catch(Exception e){ System.out.println("Error in process e = "+e); e.printStackTrace(); System.exit(0); } //3. Measure classification accuracy on both } // euclidAcc/=reps; histAcc/=reps; armaAcc/=reps; System.out.println("Euclid="+euclidAcc+" Histo ="+histAcc+" ARMA ="+armaAcc); of.writeLine(histAcc+","+armaAcc); } } //This is to create similar looking series that differ in AR structure and in histograms public static void exampleSeries(String fileName){ int n=512; int nosParas=3; Random r = new Random(); r.setSeed(RANDOMSEED); OutFile of=new OutFile(fileName); double[][] paras=new double[2][nosParas]; for(int j=0;j<nosParas;j++){ paras[0][j]=-0.5+1*r.nextDouble(); paras[1][j]=paras[0][j]-0.5+r.nextDouble(); if(paras[1][j]<=-1) paras[1][j]=-.95; if(paras[1][j]>=1) paras[1][j]=0.95; } paras[0]=SimulateAR.findCoefficients(paras[0]); paras[1]=SimulateAR.findCoefficients(paras[1]); for(int j=0;j<nosParas;j++) of.writeString("\n"); for(int j=0;j<nosParas;j++) of.writeString(paras[0][j]+","); of.writeString("\n"); for(int j=0;j<nosParas;j++) of.writeString(paras[1][j]+","); of.writeString("\n"); System.out.println("ARMA Model ="+nosParas); System.out.print("\n"); DecimalFormat dc=new DecimalFormat("##.####"); for(int j=0;j<nosParas;j++) System.out.print(dc.format(paras[0][j])+","); System.out.print("\n"); for(int j=0;j<nosParas;j++) System.out.print(dc.format(paras[1][j])+","); System.out.print("\n"); //Generate two series of length 500 int[] cases={2,2}; Instances train=SimulateAR.generateARDataSet(paras,n,cases); double[][] data=new double[train.numInstances()][]; for(int i=0;i<train.numInstances();i++) data[i]=train.instance(i).toDoubleArray(); for(int i=0;i<data.length;i++) of.writeString(dc.format(data[i][data[i].length-1])+","); of.writeString("\n"); for(int j=0;j<data[0].length-1;j++){ for(int i=0;i<data.length;i++) of.writeString(dc.format(data[i][j])+","); of.writeString("\n"); } //Fit ARMA ARMA ar= new ARMA(); ar.setUseAIC(true); //Fit Histogram RunLength rl=new RunLength(); rl.noGlobalMean(); rl.setMaxRL(n/4); ar.setMaxLag(n/4); try{ Instances arTrain=ar.process(train); data=new double[train.numInstances()][]; for(int i=0;i<arTrain.numInstances();i++) data[i]=arTrain.instance(i).toDoubleArray(); System.out.print("\n ARMA FIT =\n"); for(int i=0;i<arTrain.numInstances();i++){ for(int j=0;j<30;j++){ System.out.print(dc.format(data[i][j])+","); } System.out.print("\n"); } System.out.print("\n"); of.writeString("\n"); of.writeString("\n"); for(int i=0;i<data.length;i++) of.writeString(dc.format(data[i][data[i].length-1])+","); of.writeString("\n"); System.out.print("\n RUN LENGTH FIT =\n"); for(int j=0;j<data[0].length-1;j++){ for(int i=0;i<data.length;i++) of.writeString(dc.format(data[i][j])+","); of.writeString("\n"); } of.writeString("\n"); of.writeString("\n"); Instances histTrain=rl.process(train); data=new double[train.numInstances()][]; for(int i=0;i<train.numInstances();i++) data[i]=histTrain.instance(i).toDoubleArray(); for(int i=0;i<arTrain.numInstances();i++){ for(int j=0;j<data[i].length;j++){ System.out.print(dc.format(data[i][j])+","); } System.out.print("\n"); } for(int i=0;i<data.length;i++) of.writeString(dc.format(data[i][data[i].length-1])+","); of.writeString("\n"); for(int j=0;j<data[0].length-1;j++){ for(int i=0;i<data.length;i++) of.writeString(dc.format(data[i][j])+","); of.writeString("\n"); } }catch(Exception e){ System.out.println("Exception = "+e); System.exit(0); } } public static void runLengthTest(){ for(int i=0;i<fileNames.length;i++) { Instances train=ClassifierTools.loadData(path+fileNames[i]+"\\"+fileNames[i]+"_TRAIN"); Instances test=ClassifierTools.loadData(path+fileNames[i]+"\\"+fileNames[i]+"_TEST"); //Filter through RLE clipper RunLength rl=new RunLength(); rl.noGlobalMean(); Clipping clip=new Clipping(); try{ Classifier c=new IBk(); System.out.print("\n"+fileNames[i]+"\t"); double a = ClassifierTools.singleTrainTestSplitAccuracy(c, train, test); System.out.print(a+"\t"); c=new kNN(new DTW_DistanceBasic()); a = ClassifierTools.singleTrainTestSplitAccuracy(c, train, test); System.out.print(a+"\t"); Instances clipTrain=clip.process(train); Instances clipTest=clip.process(test); a = ClassifierTools.singleTrainTestSplitAccuracy(c, clipTrain, clipTest); System.out.print(a+"\t"); Instances rlTrain=rl.process(train); Instances rlTest=rl.process(test); a = ClassifierTools.singleTrainTestSplitAccuracy(c, rlTrain, rlTest); System.out.print(a+"\t"); }catch(Exception e){ System.exit(0); } } } //DEPRECIATED: NOT USED public static String SingleAR1_Experiment(String file){ String path=SimulateAR.path+"AR1\\"; String str=""; //Set up filters for data sets. Clipped data needs to be stored as reals to work with knn Clipping clip=new Clipping(); clip.setUseRealAttributes(true); ////Max run length set, RunLength rl=new RunLength(); rl.noGlobalMean(); rl.setMaxRL(10); //ARMA fitted model with DL recursions, no AIC stopping I THINK! check. Max length of model //25% of series length ARMA ar=new ARMA(); //FFT: keep only first 25% of terms? FFT fft =new FFT(); int nosDataSets=5; Instances[] train=new Instances[nosDataSets]; Instances[] test=new Instances[nosDataSets]; train[0]=ClassifierTools.loadData(path+file+"\\"+file+"_TRAIN"); test[0]=ClassifierTools.loadData(path+file+"\\"+file+"_TEST"); try{ train[1]=clip.process(train[0]); test[1]=clip.process(test[0]); // System.out.println("Clipped Test ="+test[1]); train[2]=rl.process(train[1]); test[2]=rl.process(test[1]); train[3]= ar.process(train[0]); test[3]= ar.process(test[0]); train[3]= fft.process(train[0]); test[3]= fft.process(test[0]); System.out.println("ARMA Test ="+train[3]); }catch(Exception e){ System.out.println("Exception in the filters ="+e); e.printStackTrace(); System.exit(0); } // System.out.println("Train clipped"+train[1]); // System.out.println("Train histo"+train[2]); //For each type of data (RAW, CLIPPED and RL) try the following classifiers //1. 1-NN Euclid //Raw data classifiers int nosClassifiers=2; Classifier[] all=new Classifier[nosClassifiers]; NormalizableDistance df =new EuclideanDistance(); df.setDontNormalize(true); all[0] = new kNN(df); all[1]=new kNN(new DTW_DistanceEfficient()); /* all[0] = new IBk(1); all[1]=new kNN(new DTW_DistanceEfficient()); all[3]=new DTW_kNN(); ((DTW_kNN)all[3]).optimiseWindow(true); */ for(int j=0;j<nosClassifiers;j++) for(int i=0;i<train.length;i++) str+=ClassifierTools.singleTrainTestSplitAccuracy(all[j],train[i],test[i])+","; //Raw data classifiers //Transform classifiers //2/ Clipped //2.1 Euclidean //2.2 DTW //2.3 ARMA Model using DL recursions // return str; } //FFT Test public static void FFT_Test(String fileName){ int startN=100; int endN=1000; int increment=200; int nosCases=2; int reps=10; // double[][] paras={{0.5},{0.7}}; double[][] paras={{1.3532,0.4188,-1.2153,0.3091,0.1877,-0.0876,0.0075,0.0004}, {1.0524,0.9042,-1.2193,0.0312,0.263,-0.0567,-0.0019} }; OutFile of=new OutFile(fileName); double fftAcc=0,histAcc=0, armaAcc=0; Instances train,test; Instances armaTrain,armaTest; Instances histoTrain,histoTest; Instances fftTrain,fftTest; int[] cases={nosCases,nosCases}; of.writeLine("n,euclid,histogram,arma"); for(int n=startN;n<=endN;n+=increment){ System.out.println(" Running with series length ="+n); histAcc=0; armaAcc=0; fftAcc=0; of.writeString(n+","); for(int r=1;r<=reps;r++){ //1. Generate two class problem with nosCases in each class, each series length n train=SimulateAR.generateARDataSet(paras,n,cases); test=SimulateAR.generateARDataSet(paras,n,cases); of.writeLine(train+"\n"); //2. transform to ARMA and Histogram ARMA ar=new ARMA(); RunLength rl=new RunLength(); FFT fft=new FFT(); rl.setMaxRL(n/4); ar.setMaxLag(n/4); fft.padSeries(true); try{ ar.setUseAIC(true); armaTrain=ar.process(train); armaTest=ar.process(test); histoTrain=rl.process(train); histoTest=rl.process(test); fftTrain=fft.process(train); fftTest=fft.process(test); of.writeLine(fftTrain+"\n"); fft.truncate(fftTrain,n/4); fft.truncate(fftTest,n/4); fftAcc+=ClassifierTools.singleTrainTestSplitAccuracy(new IB1(),fftTrain,fftTest); histAcc+=ClassifierTools.singleTrainTestSplitAccuracy(new J48(),histoTrain,histoTest); armaAcc+=ClassifierTools.singleTrainTestSplitAccuracy(new IB1(),armaTrain,armaTest); }catch(Exception e){ System.out.println("Error in process: e = "+e); e.printStackTrace(); System.exit(0); } //3. Measure classification accuracy on both } fftAcc/=reps; histAcc/=reps; armaAcc/=reps; System.out.println("FFT="+fftAcc+" Histo ="+histAcc+" ARMA ="+armaAcc); of.writeLine(histAcc+","+armaAcc); } } //Find an example public static void distanceMetricComparison(String file){ } public static void AIC_Lengths(String fileName){ //Generate a model int startN=100, endN=5000, increment=100; int[] cases ={200,1}; OutFile of=new OutFile(fileName); of.writeString("n,ARMA_Length\n"); System.out.print("n,ARMA_Length \n"); //Model2: 0.76 0.995 double[][] paras={ {0.4881,0.6105,-0.2979}, {0.8391}}; // double[][] paras={ {1.3532,0.4188,-1.2153,0.3091,0.1877,-0.0876,0.0075,0.0004}, // {1.0524,0.9042,-1.2193,0.0312,0.263,-0.0567,-0.0019} }; try{ for(int n=startN;n<endN;n+=increment) { //1. Generate data of.writeString(n+","); System.out.print(n+","); Instances train =SimulateAR.generateARDataSet(paras,n,cases); ARMA ar = new ARMA(); //2. Fit ARMA Instances arTrain=ar.process(train); //Find number of parameters for(int i=0;i<cases[0];i++) { Instance inst=arTrain.instance(i); int length=0; while(length<inst.numAttributes()&&inst.value(length)!=0) length++; of.writeString(length+","); System.out.print(length+","); } of.writeString("\n"); System.out.print("\n"); } }catch(Exception e){ System.out.println("Error in Experiment 6, exit "); System.exit(0); } } public static void AIC_Effect(String fileName){ //Measure the effect of AIC length. //1. Keep all //2. Use AIC //3. Fix to correct length (fix MAXLAG) int startN=5000, endN=6000, increment=500; int[] cases ={100,100}; int reps=2; OutFile of=new OutFile(fileName); of.writeString("n,ar_AIC,ar_All,ar_Correct\n"); System.out.print("n,ar_All,ar_AIC,ar_Correct\n"); double[][] paras= { {1.3532,0.4188,-1.2153,0.3091,0.1877,-0.0,0.00,0.0004}, {1.3532,0.4188,-1.2153,0.3091,0.0,-0.0,0.00,0.5004}}; // exampleModels[0]; DecimalFormat dc=new DecimalFormat("###.####"); try{ for(int f=0;f<1;f++){ // paras=exampleModels[f]; System.out.println("\n\n MODEL NUMBER "+f+"\n\n"); for(int n=startN;n<endN;n+=increment) { double a1=0,a2=0,a3=0; for(int i=0;i<reps;i++){ //1. Generate data Instances train =SimulateAR.generateARDataSet(paras,n,cases); Instances test =SimulateAR.generateARDataSet(paras,n,cases); //Model 1, use all the parameters ARMA ar = new ARMA(); ar.setUseAIC(false); if(n<200) ar.setMaxLag(10); else ar.setMaxLag(20); //Model 2, use AIC fit ARMA ar2 = new ARMA(); ar2.setUseAIC(true); //Model 3, use correct number ARMA ar3 = new ARMA(); ar3.setUseAIC(false); ar3.setMaxLag(8); //2. Fit ARMA Instances arTrain=ar.process(train); Instances ar2Train=ar2.process(train); Instances ar3Train=ar3.process(train); Instances arTest=ar.process(test); Instances ar2Test=ar2.process(test); Instances ar3Test=ar3.process(test); a1+=ClassifierTools.singleTrainTestSplitAccuracy(new J48(),arTrain,arTest); a2+=ClassifierTools.singleTrainTestSplitAccuracy(new J48(),ar2Train,ar2Test); a3+=ClassifierTools.singleTrainTestSplitAccuracy(new J48(),ar3Train,ar3Test); } of.writeLine(n+","+a1/reps+","+a2/reps+","+a3/reps); System.out.println(n+","+dc.format(a1/reps)+","+dc.format(a2/reps)+","+dc.format(a3/reps)); } } }catch(Exception e){ System.out.println("Error in Experiment 6, exit "); System.exit(0); } } //F1 Experiment: Generate sliding window histograms, recalculate histogram AND Spectrogram each time. Can be massively optimised by online calculation public static void f1HistogramsBasic(){ //Parameters: n =series length,w = window length int n=80000; //629291; int w=8000; //8000 == 1 second int mrl=200; double[] data=new double[n]; double[] window=new double[w]; double[] oldWindow=new double[w]; //Load sound data into array InFile f=new InFile("C:\\Research\\Data\\F1\\myF1.csv"); OutFile of=new OutFile("C:\\Research\\Data\\F1\\basicdistancesF1.csv"); OutFile of2=new OutFile("C:\\Research\\Data\\F1\\histoF1.csv"); for(int i=0;i<n;i++) data[i]=f.readDouble(); int[] histo,oldHisto; //Test histogram for first series System.arraycopy(data,0,oldWindow,0,w); RunLength rl=new RunLength(); oldHisto=rl.processSingleSeries(oldWindow, mrl); //Each step: //1. Extract new series //2. Get histogram //3. Measure distance between original series and histograms //4. Write to file for(int i=1;i<n-w;i++){ window=new double[w]; System.arraycopy(data,i,window,0,w); //Histogram histo=rl.processSingleSeries(window, mrl); // for(int j=0;j<histo.length;j++) // of.writeString(histo[j]+","); // of.writeString("\n"); //Compare current histo to old histo if(i%10000==0) System.out.println(" Finished step "+i); //Euclidean distance between raw data double d1=dist(window,oldWindow); //Euclidean distance between histograms double d2=dist(histo,oldHisto); of.writeLine(d1+","+d2); oldHisto=histo; oldWindow=window; } } public static double dist(double[] a, double[] b){ double d=0; for(int i=0;i<a.length;i++) d+=(a[i]-b[i])*(a[i]-b[i]); return d; } public static double dist(int[] a, int[] b){ double d=0; for(int i=0;i<a.length;i++) d+=(a[i]-b[i])*(a[i]-b[i]); return d; } }