/* * copyright: Anthony Bagnall * * */ package weka.filters.timeseries; import java.io.FileReader; import java.util.*; import weka.core.*; import weka.filters.SimpleBatchFilter; public class Clipping extends SimpleBatchFilter { boolean useMean=true; boolean useRealAttributes=false; public void setUseRealAttributes(boolean f){useRealAttributes=f;} @Override protected Instances determineOutputFormat(Instances inputFormat) throws Exception { //Must convert all attributes to binary. Attribute a; FastVector fv=new FastVector(); if(!useRealAttributes){ fv.addElement("0"); fv.addElement("1"); } FastVector atts=new FastVector(); for(int i=0;i<inputFormat.numAttributes();i++) { // System.out.println(" Create Attribute "+i); if(i!=inputFormat.classIndex()){ if(!useRealAttributes) a=new Attribute("Clipped"+inputFormat.attribute(i).name(),fv); else a=new Attribute("Clipped"+inputFormat.attribute(i).name()); } else a=inputFormat.attribute(i); atts.addElement(a); // System.out.println(" Add Attribute "+i); // result.insertAttributeAt(a,i); } Instances result = new Instances("Clipped"+inputFormat.relationName(),atts,inputFormat.numInstances()); // System.out.println(" Output format ="+result); if(inputFormat.classIndex()>=0){ result.setClassIndex(result.numAttributes()-1); } return result; } @Override public String globalInfo() { return null; } //Means by CASE, not by attribute private double[] findMedians(Instances instances){ //USe quick select to find them return null; } private double[] findMeans(Instances instances){ double[] means=new double[instances.numInstances()]; int count=0; for(int i=0;i<instances.numInstances();i++){ count=0; for(int j=0;j<instances.numAttributes();j++){ if(j!=instances.classIndex()&& !instances.instance(i).isMissing(j)){ count++; means[i]+=instances.instance(i).value(j); } } if(count>0) means[i]/=count; // System.out.println(" Mean attribute "+j+" = "+means[j]); } return means; } @Override public Instances process(Instances instances) throws Exception { //find the average values, either mean or median double[] averages; if(useMean) averages=findMeans(instances); else averages=findMedians(instances); Instances result = determineOutputFormat(instances); Instance newInst; String val="0"; if(!useRealAttributes){ for(int i=0;i<instances.numInstances();i++) { newInst= new DenseInstance(result.numAttributes()); result.add(newInst); for(int j=0;j<instances.numAttributes();j++){ if(instances.instance(i).isMissing(j)) val="?"; else{ if(j!=instances.classIndex()){ if(instances.instance(i).value(j)<averages[i]) // Zero val="0"; else val="1"; result.instance(i).setValue(j,val); } else result.instance(i).setValue(j,instances.instance(i).stringValue(j)); } } } } else{ double x=0; for(int i=0;i<instances.numInstances();i++) { newInst= new DenseInstance(result.numAttributes()); result.add(newInst); for(int j=0;j<instances.numAttributes();j++){ if(instances.instance(i).isMissing(j)) x=-1; else{ if(j!=instances.classIndex()){ if(instances.instance(i).value(j)<averages[i]) // Zero x=0; else x=1; result.instance(i).setValue(j,x); } else result.instance(i).setValue(j,instances.instance(i).value(j)); } } } } return result; } public String getRevision() { return null; } public static void main(String[] args){ Clipping cp=new Clipping(); Instances data=null; String fileName="C:\\Research\\Data\\Time Series Data\\Time Series Classification\\TestData\\TimeSeries_Train.arff"; try{ FileReader r; r= new FileReader(fileName); data = new Instances(r); data.setClassIndex(data.numAttributes()-1); System.out.println(" Class type numeric ="+data.attribute(data.numAttributes()-1).isNumeric()); System.out.println(" Class type nominal ="+data.attribute(data.numAttributes()-1).isNominal()); Instances newInst=cp.process(data); System.out.println(newInst); }catch(Exception e) { System.out.println(" Error ="+e); StackTraceElement [] st=e.getStackTrace(); for(int i=st.length-1;i>=0;i--) System.out.println(st[i]); } } }