/* * To change this template, choose Tools | Templates * and open the template in the editor. */ package development; import fileIO.InFile; import fileIO.OutFile; import java.io.FileReader; import java.util.ArrayList; import java.util.Random; import tests.TwoSampleTests; import utilities.ClassifierTools; import weka.classifiers.Classifier; import weka.classifiers.Evaluation; import weka.classifiers.IteratedSingleClassifierEnhancer; import weka.classifiers.SingleClassifierEnhancer; import weka.classifiers.bayes.NaiveBayes; import weka.classifiers.functions.SMO; import weka.classifiers.functions.supportVector.PolyKernel; import weka.classifiers.functions.supportVector.RBFKernel; import weka.classifiers.lazy.*; import weka.classifiers.meta.*; import weka.classifiers.trees.J48; import weka.classifiers.trees.RandomForest; import weka.core.Instance; import weka.core.Instances; import weka.filters.NormalizeCase; /** * Comparison of built in ensembles with RSC. Require * Bagging * Aaboost * RandomComittee * RandomSubSpace * Dagging * * missing: Multiboost * * Ensemble size: 25 or 100 */ public class RSC_classification { public static String dataPath="C:\\Users\\ajb\\Dropbox\\UCR Classification Problems\\"; public static String resultPath=""; public static String[] fileNames={"abalone", "waveform", "satimage", "banana", "ringnorm", "twonorm", //image?? "german", "wdbc", "yeast", "ionosphere", "sonar", "heart", "cancer", "wins", "ecoli" /* "clouds", "concentric", "diabetes", "glass2", "haberman", "liver", "magic", "pendigitis", "phoneme", "segment", "thyroid", "vehicle", "vowel", */ }; public static IteratedSingleClassifierEnhancer[] setEnsembleClassifiers(ArrayList<String> names){ ArrayList<IteratedSingleClassifierEnhancer> sc2=new ArrayList<>(); Classifier c; Bagging b=new Bagging(); names.add("Bagging"); sc2.add(b); AdaBoostM1 ada=new AdaBoostM1(); names.add("Adaboost"); sc2.add(ada); RandomSubSpace rs= new RandomSubSpace(); names.add("RandomSubSpace"); sc2.add(rs); RandomCommittee rc=new RandomCommittee(); names.add("RandomCommittee"); sc2.add(rc); MultiBoostAB mb=new MultiBoostAB(); names.add("Multiboost"); sc2.add(mb); IteratedSingleClassifierEnhancer[] sc=new IteratedSingleClassifierEnhancer[sc2.size()]; for(int i=0;i<sc.length;i++) sc[i]=sc2.get(i); return sc; } public static Classifier[] setSingleLazyClassifiers(ArrayList<String> names){ ArrayList<Classifier> sc2=new ArrayList<>(); Classifier c; //Lazy classifiers c=new IBk(1); ((IBk)c).setCrossValidate(false); sc2.add(c); names.add("OneNN"); c=new RandomizedSphereCover(1); sc2.add(c); names.add("OneRSC"); IBk c2=new IBk(50); c2.setCrossValidate(true); sc2.add(c2); names.add("kNN"); RandomizedSphereCover r=new RandomizedSphereCover(); r.crossValidate(true); sc2.add(r); names.add("OneRSC"); /* c=new KStar(); sc2.add(c); names.add("KStar"); c=new LWL(); sc2.add(c); names.add("LWL"); */ Classifier[] sc=new Classifier[sc2.size()]; for(int i=0;i<sc.length;i++) sc[i]=sc2.get(i); return sc; } public static Random rand=new Random(); public static int estimateAlpha(Instances train){ int alpha; int maxAlpha=10; int bestAlpha=1; double bestAcc=0; int folds=5; for(alpha=1;alpha<maxAlpha;alpha++){ Classifier c=new RandomizedSphereCover(alpha); try{ Evaluation e=new Evaluation(train); e.crossValidateModel(c, train,folds, rand); double acc=e.correct()/(double)train.numInstances(); System.out.println(" alpha ="+alpha+" acc ="+acc); if(acc>bestAcc){ bestAcc=acc; bestAlpha=alpha; } } catch(Exception e){ System.out.println(" Error ="+e); e.printStackTrace(); System.exit(0); } } return bestAlpha; } public static void assessRSC_EnsembleClassifiers(String fileName, int nosBase){ //Test 1: just do bagging and boosting with 1-RSC and 1-NN ArrayList<String> names=new ArrayList<>(); IteratedSingleClassifierEnhancer[] c1=setEnsembleClassifiers(names); int runs=30; double[][] a; rand.setSeed(100); OutFile of =new OutFile("C:\\Users\\ajb\\Dropbox\\Results\\RSC\\AccEnsemblelassifiers"+nosBase+".csv"); OutFile of2 =new OutFile("C:\\Users\\ajb\\Dropbox\\Results\\RSC\\SDEnsembleClassifiers"+nosBase+".csv"); of.writeString("\n"); for(String s:names){ of.writeString("RSC"+s+","); of2.writeString("RSC"+s+","); } /* for(String s:names){ of.writeString("kNN"+s+","); of2.writeString("kNN"+s+","); } for(String s:names){ of.writeString("Tree"+s+","); of2.writeString("Tree"+s+","); } */ of2.writeString("\n"); try{ for(int i=0;i<fileNames.length;i++) { of2.writeString(fileNames[i]+","); of.writeString(fileNames[i]+","); System.out.println(" Problem = "+fileNames[i]); c1=setEnsembleClassifiers(names); double[] sum=new double[c1.length]; double[] sumsq=new double[c1.length]; Instances train=ClassifierTools.loadData(dataPath+fileNames[i]+"\\"+fileNames[i]+"-train"); Instances test=ClassifierTools.loadData(dataPath+fileNames[i]+"\\"+fileNames[i]+"-test"); Instances all=new Instances(train); int testSize=test.numInstances(); for(int j=0;j<test.numInstances();j++) all.add(test.instance(j)); for(int j=0;j<runs;j++){ //Form randomised test train split all.randomize(rand); train=new Instances(all); test=new Instances(all,0); for(int k=0;k<testSize;k++){ Instance temp=train.instance(0); test.add(temp); train.delete(0); } //Estimate alpha parameter int alpha=estimateAlpha(train); System.out.println(" Run ="+j+" best Alpha ="+alpha); //Set the classifiers c1=setEnsembleClassifiers(names); for(IteratedSingleClassifierEnhancer s:c1){ s.setClassifier(new RandomizedSphereCover(alpha)); s.setNumIterations(nosBase); } //Build classifiers and evaluate test accuracy for(int k=0;k<c1.length;k++){ c1[k].buildClassifier(train); double acc=ClassifierTools.accuracy(test,c1[k]); sum[k]+=acc; sumsq[k]+=acc*acc; } } //Store mean and variance over runs. for(int k=0;k<c1.length;k++){ sum[k]/=runs; sumsq[k]=sumsq[k]/runs-sum[k]*sum[k]; of.writeString(sum[k]+","); of2.writeString(sumsq[k]+","); } of.writeString("\n"); of2.writeString("\n"); } } catch(Exception e){ System.out.println(" Error in accuracy ="+e); e.printStackTrace(); System.exit(0); } //Subspace } public static void assessSingleClassifiers(String fileName){ ArrayList<String> names=new ArrayList<String>(); Classifier[] c= setSingleLazyClassifiers(names); double[][] a; int folds=10; OutFile of =new OutFile("C:\\Users\\ajb\\Dropbox\\Results\\RSC\\CVAcc"+folds+"AccSingleClassifiers.csv"); OutFile of2 =new OutFile("C:\\Users\\ajb\\Dropbox\\Results\\RSC\\CVAcc"+folds+"SingleClassifiers.csv"); for(int i=0;i<names.size();i++){ of.writeString(","+names.get(i)); of2.writeString(","+names.get(i)); } try{ for(int i=0;i<fileNames.length;i++) { of2.writeString(fileNames[i]+","); of.writeString(fileNames[i]+","); Instances train=ClassifierTools.loadData(dataPath+fileNames[i]+"\\"+fileNames[i]+"-train"); Instances test=ClassifierTools.loadData(dataPath+fileNames[i]+"\\"+fileNames[i]+"-test"); Instances all=new Instances(train); for(int j=0;j<test.numInstances();j++) all.add(test.instance(i)); all.randomize(new Random()); System.out.println(" Problem = "+fileNames[i]); for(int j=0;j<c.length;j++){ // acc[i][j]=ClassifierTools.singleTrainTestSplitAccuracy(c[j], train, test); a=ClassifierTools.crossValidationWithStats(c[j], all, folds); System.out.println("\t\t"+names.get(j)+" acc ="+a[0][0]+" sd ="+a[1][0]); of.writeString(a[0][0]+","); of2.writeString(a[1][0]+","); } of.writeString("\n"); of2.writeString("\n"); } } catch(Exception e){ System.out.println(" Error in accuracy ="+e); e.printStackTrace(); System.exit(0); } } public static void testIBK(){ for(int i=0;i<fileNames.length;i++){ Instances train=ClassifierTools.loadData(dataPath+fileNames[i]+"\\"+fileNames[i]+"-train"); Instances test=ClassifierTools.loadData(dataPath+fileNames[i]+"\\"+fileNames[i]+"-test"); Instances all=new Instances(train); for(int j=0;j<test.numInstances();j++) all.add(test.instance(i)); all.randomize(new Random()); System.out.println(" Problem = "+fileNames[i]); IBk[] c =new IBk[2]; c[0]=new IBk(1); c[0].setDebug(true); c[1]=new IBk(); c[1].setDebug(true); int folds=2; c[1].setCrossValidate(true); try{ for(int j=0;j<c.length;j++){ Evaluation e=new Evaluation(all); e.crossValidateModel(c[j], all, folds, new Random()); System.out.println(" Acc = "+e.correct()/all.numInstances()); System.out.println(" IB1 k ="+c[0].getKNN()+" IBk k="+c[1].getKNN()); } }catch(Exception ex){ ex.printStackTrace(); System.exit(0); } } } public static void findPairwiseStats(String file){ InFile in=new InFile(file); int lines=in.countLines(); in=new InFile(file); int nosClassifiers=5; String names=in.readLine(); double[][] data=new double[nosClassifiers][lines-1]; for(int i=0;i<lines-1;i++){ names=in.readString(); for(int j=0;j<nosClassifiers;j++) data[j][i]=in.readDouble(); } double[] a=data[0]; double[] b=data[1]; TwoSampleTests ts=new TwoSampleTests(); ts.performTests(a, b); //T test double tSig=tests.TwoSampleTests.studentT_TestStat(a,b); //Robust Rank Sum double ranks=tests.TwoSampleTests.rrs_PValue(a,b); //Mann-Whitney double mw=tests.TwoSampleTests.mw_PValue(a,b); System.out.println(" T test ="+tSig+" Mann-Whitney ="+mw+" RRS "+ranks); } public static void main(String[] args){ assessRSC_EnsembleClassifiers("EnsembleTest25.csv",25); assessRSC_EnsembleClassifiers("EnsembleTest100.csv",100); // findPairwiseStats("C:\\Users\\ajb\\Dropbox\\Results\\RSC\\NonSubspaceAcc.csv"); // testIBK(); // assessSingleClassifiers("C:\\Users\\ajb\\Dropbox\\Results\\RSC\\RSC_Single_Lazy.csv"); // assessEnsembleClassifiers("C:\\Users\\ajb\\Dropbox\\Results\\RSC\\RSC_Ensemble.csv"); } }