package utilities;
import development.TimeSeriesClassification;
import java.util.Random;
import weka.attributeSelection.ASEvaluation;
import weka.attributeSelection.ASSearch;
import weka.attributeSelection.AttributeEvaluator;
import weka.attributeSelection.AttributeSelection;
import weka.attributeSelection.ChiSquaredAttributeEval;
import weka.attributeSelection.GainRatioAttributeEval;
import weka.attributeSelection.InfoGainAttributeEval;
import weka.attributeSelection.Ranker;
import weka.classifiers.Classifier;
import weka.core.Instances;
import fileIO.OutFile;
import java.util.ArrayList;
import weka.attributeSelection.*;
import weka.classifiers.Evaluation;
import weka.classifiers.meta.AttributeSelectedClassifier;
import weka.classifiers.trees.J48;
public class AttributeSelectionTools {
private AttributeSelection attSel;
private ASEvaluation attEval;
private ASSearch attSearch;
private int numFolds=5;
public AttributeSelectionTools(){
attSel =new AttributeSelection();
attEval=new CfsSubsetEval();
attSearch= new GreedyStepwise();
attSel.setEvaluator(attEval);
attSel.setSearch(attSearch);
}
public void setEvaluation(ASEvaluation a){
attEval=a;
attSel.setEvaluator(attEval);
}
public void setSearch(ASSearch s){
attSearch=s;
attSel.setSearch(attSearch);
}
public static void main(String[] args){
attributeFilterTest();
}
/**
* Assesses the foldsx train accuracy of classifier c on data set train using
* this filter. Note that since the filter is supervised, we have to do the
* attribute selection separately on every fold.
*
* @param train
* @param c
* @param folds
* @return
*/
public double crossValidateAccuracy(Instances train, Classifier c, int folds){
double acc=0;
train.randomize(new Random());
for(int i=0;i<folds;i++){
Instances tempTrain = train.trainCV(folds, i);
Instances tempTest = train.testCV(folds, i);
tempTrain=filterTrainSet(tempTrain);
tempTest=filterTestSet(tempTest);
try{
c.buildClassifier(tempTrain);
}catch(Exception e){
System.out.println("Exception thrown in attributeScoring = "+e.toString());
e.printStackTrace();
System.exit(0);
}
acc+=ClassifierTools.accuracy(tempTest, c)*tempTest.numInstances();
}
acc/=train.numInstances();
return acc;
}
/**
* @param train
*/
public Instances filterTrainSet(Instances train){
try{
attSel.SelectAttributes(train);
return attSel.reduceDimensionality(train);
}catch(Exception e){
System.out.println("Exception thrown in attributeScoring = "+e.toString());
e.printStackTrace();
System.exit(0);
}
return null;
}
public Instances filterTestSet(Instances test){
try{
return attSel.reduceDimensionality(test);
}catch(Exception e){
System.out.println("Exception thrown in attributeScoring = "+e.toString());
e.printStackTrace();
System.exit(0);
}
return null;
}
public double testAccuracy(Instances test, Classifier c){
try{
Instances testReduced=attSel.reduceDimensionality(test);
return ClassifierTools.accuracy(testReduced, c);
} catch(Exception e){
System.out.println(" Exception in testAccuracy ="+e);
e.printStackTrace();
System.exit(0);
}
return 0;
}
/** Determines the attribute selection based on a classifier wrapper.
* @return double[] array,
* first row is the train set cross validation accuracy and deviation between folds,
* the rest are the test set predictions and actuals
* Cross validation set to
* LOOCV if train size <50
* 10x if 50<train size <200
* 5x otherwise
*/
public double[][] filteredAttributePredictions(Instances train, Instances test, Classifier c){
double[][] testPreds=new double[test.numInstances()+1][2];
try{
attSel.SelectAttributes(train);
// System.out.println(" Number Selected ="+indices.length);
// attSel.SelectAttributes(train);
Instances trainReduced=attSel.reduceDimensionality(train);
Instances testReduced=attSel.reduceDimensionality(test);
double[][] preds=ClassifierTools.crossValidationWithStats(c,trainReduced,numFolds);
testPreds[0][0]=preds[0][0];
testPreds[0][1]=preds[0][1];
for(int i=0;i<testReduced.numInstances();i++){
testPreds[i+1][0]=c.classifyInstance(testReduced.instance(i));
testPreds[i+1][1]=testReduced.instance(i).classValue();
}
}catch(Exception e)
{
System.out.println("Exception thrown in attributeScoring = "+e.toString());
e.printStackTrace();
System.exit(0);
}
return testPreds;
}
/**
This methods scores each attribute with IG, IGR and CHI and writes the results to file
Filters:
ChiSquaredAttributeEval, GainRatioAttributeEval, InfoGainAttributeEval,
Single attribute classifiers:
OneRAttributeEval, ReliefFAttributeEval, SVMAttributeEval,
Dont know what these are!
SymmetricalUncertAttributeEval, UnsupervisedAttributeEvaluator
NOTE: I have pulled in Chi Squared from an older weka version, might need testing ...
* @param tr: training instances to evaluate
* @param file: full path for the output
**/
public static void attributeScoring(Instances tr, String file)
{
try{
OutFile f=new OutFile(file);
double e1,e2,e3;
AttributeEvaluator as = new InfoGainAttributeEval();
AttributeEvaluator as2 = new GainRatioAttributeEval();
AttributeEvaluator as3 = new ChiSquaredAttributeEval();
f.writeLine("INDEX,NAME,IG,IGR,CHI");
// as.buildEvaluator(tr);
// as2.buildEvaluator(tr);
// as3.buildEvaluator(tr);
for(int i=0;i<tr.numAttributes();i++)
{
e1= as.evaluateAttribute(i);
e2= as2.evaluateAttribute(i);
e3= as3.evaluateAttribute(i);
f.writeLine(i+","+tr.attribute(i).name()+","+e1+","+e2+","+e3);
System.out.println(i+","+tr.attribute(i).toString()+e1+","+e2+","+e3);
}
}catch(Exception e)
{
System.out.println("Exception thrown in attributeScoring = "+e.toString());
}
}
/** This class takes a particular Evaluator, evaluates each attribute, then returns the ranked
* list. Note we dont return the filter scores, I'm assuming this is done already in attributeScoring
THIS NEEDS TESTING
* * @param tr: training instances to evaluate
* @param as: attribute scoring technique
*/
public static int[] simpleAttributeRanking(Instances tr,ASEvaluation as)
{
int size=tr.numInstances()-1;
int[] rankings;
AttributeSelection a;
Instances trFiltered=null;
ASSearch rank= new Ranker();
int[] att=null;
try{
as.buildEvaluator(tr);
System.out.println("Attribute Selector built");
rank.search(as,tr);
a=new AttributeSelection();
a.setEvaluator(as);
a.setSearch(rank);
System.out.println("Attribute Selector set in AttributeSelection");
System.out.println("Attribute Selector "+a);
System.out.println("Ranker "+rank);
System.out.println("ASEvaluator "+as);
a.SelectAttributes(tr);
att= a.selectedAttributes();
// for(int i=0;i<att.length;i++)
// System.out.print(att[i]+" ");
// System.out.println(a.toResultsString());
}
catch(Exception e)
{
System.out.println(" Error in simpleAttributeRanking "+e.toString());
}
return att;
}
/** This class takes the entire data set and does a forward selection of attributes with a fixed proportion
* of test train.
* NEEDS TESTING
* * @param allData: training instances to base selection on
* @param c: classifier to rank attributes with
* @param testingProportion: test/train prop
* @return array of doubles. Really not sure what this does!!
*/
public static double[] attributeForwardSelection(Instances allData, Classifier c, double testingProportion)
{
ASEvaluation as = new GainRatioAttributeEval();
//Split data into Test/Train
allData.randomize(new Random());
int size=allData.numInstances();
int testSize=(int)(testingProportion*size);
int trainSize=size-testSize;
Instances trainData=new Instances(allData,0,trainSize);
Instances testData=new Instances(allData,trainSize,testSize);
double[] accuracies= new double[allData.numAttributes()];
//Rank attributes on training data
int[] atts=simpleAttributeRanking(trainData,as);
int removalPos;
// Remove an attribute one at a time in reverse order of importance
// Construct classifier on train, evaluate on test and store
for(int i=0;i<accuracies.length;i++)
{
try{
c.buildClassifier(trainData);
accuracies[i]=ClassifierTools.accuracy(testData,c);
//Record accuracies
System.out.println("Nos Attributes train = "+trainData.numAttributes()+" test = "+testData.numAttributes()+" Accuracy = "+accuracies[i]);
//Remove attribute
removalPos=atts[atts.length-2-i];
trainData.deleteAttributeAt(removalPos);
testData.deleteAttributeAt(removalPos);
//Recalculate all the postions! What a pain, will be very inefficient with competition data
for(int j=0;j<atts.length;j++)
{
if(atts[j]>=removalPos)
atts[j]--;
}
}
catch(Exception e){System.out.println("Error in build classifier XXSS");}
}
return accuracies;
}
public static void attributeFilterTest(){
Instances test=ClassifierTools.loadData("C:\\Users\\ajb\\Dropbox\\TSC Problems\\PhalangesOutlinesCorrect\\PhalangesOutlinesCorrect_TRAIN");
Instances train=ClassifierTools.loadData("C:\\Users\\ajb\\Dropbox\\TSC Problems\\PhalangesOutlinesCorrect\\PhalangesOutlinesCorrect_TEST");
AttributeSelection attSelect =new AttributeSelection();
ASEvaluation eval=new CfsSubsetEval();
ASSearch search= new GreedyStepwise();
attSelect.setEvaluator(eval);
attSelect.setSearch(search);
ArrayList<String> names= new ArrayList<>();
Classifier[] c = TimeSeriesClassification.setDefaultSingleClassifiers(names);
double[] acc =new double[c.length];
double[] acc2 =new double[c.length];
try{
for(int i=0;i<c.length;i++){
c[i].buildClassifier(train);
acc[i]=ClassifierTools.accuracy(test,c[i]);
}
System.out.println(" Number of attributes BEFORE ="+(train.numAttributes()-1));
attSelect.SelectAttributes(train);
int[] indices = attSelect.selectedAttributes();
System.out.println(" Number Selected ="+indices.length);
train=attSelect.reduceDimensionality(train);
test=attSelect.reduceDimensionality(test);
System.out.println(" Number of attributes AFTER ="+(train.numAttributes()-1));
for(int i=0;i<c.length;i++){
c[i].buildClassifier(train);
acc2[i]=ClassifierTools.accuracy(test,c[i]);
System.out.println("Full = "+acc[i]+"\t Filtered ="+acc2[i]);
}
}catch(Exception e){
System.out.println("Error in filter test ="+e);
e.printStackTrace();
System.exit(0);
}
}
}