package weka.filters.timeseries;
//import fileIO.OutFile;
import java.text.DecimalFormat;
//import simulators.SimulateAR;
//import weka.classifiers.evaluation.ClassifierTools;
import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.filters.*;
/*
* * copyright: Anthony Bagnall
* 1. Check debugging:
* ACF vs ACvF. No point really.
* Truncate:
* Simple significance threshold
* Shifting significance threshold.Smoothing: e Blackman-Tukey method
*
* Other things to investigate:
*/
public class ACF extends SimpleBatchFilter {
/**
*
*/
private static final long serialVersionUID = 1L;
private boolean normalized=false; //Assumes zero mean and unit variance
int endTerms=10;
int maxLag=300;
int seriesLength;
int lag=maxLag;
int globalSignificantLag=maxLag;
double globalSigThreshold;
boolean useGlobalSigThreshold=true;
double[] sigThreshold;
int[] cutOffs;
boolean globalTruncate=true;
double alpha=0.1; // Significant threshold for the4 truncation
public void setMaxLag(int n){ maxLag=n;}
public void setNormalized(boolean flag){ normalized=flag;}
public void setGlobalSigThresh(boolean flag){ useGlobalSigThreshold=flag;}
protected Instances determineOutputFormat(Instances inputFormat)
throws Exception {
seriesLength=inputFormat.numAttributes();
if(inputFormat.classIndex()>=0)
seriesLength--;
//Check all attributes are real valued, otherwise throw exception
for(int i=0;i<inputFormat.numAttributes();i++)
if(inputFormat.classIndex()!=i)
if(!inputFormat.attribute(i).isNumeric())
throw new Exception("Non numeric attribute not allowed in ACF");
//Cannot include the final endTerms correlations, since they are based on too little data and hence unreliable.
if(maxLag>inputFormat.numAttributes()-endTerms)
maxLag=inputFormat.numAttributes()-endTerms;
//Set up instances size and format.
FastVector atts=new FastVector();
String name;
for(int i=0;i<maxLag;i++){
name = "ACF_"+i;
atts.addElement(new Attribute(name));
}
if(inputFormat.classIndex()>=0){ //Classification set, set class
//Get the class values as a fast vector
Attribute target =inputFormat.attribute(inputFormat.classIndex());
FastVector vals=new FastVector(target.numValues());
for(int i=0;i<target.numValues();i++)
vals.addElement(target.value(i));
atts.addElement(new Attribute(inputFormat.attribute(inputFormat.classIndex()).name(),vals));
}
Instances result = new Instances("ACF"+inputFormat.relationName(),atts,inputFormat.numInstances());
if(inputFormat.classIndex()>=0){
result.setClassIndex(result.numAttributes()-1);
}
return result;
}
@Override
public String globalInfo() {
return null;
}
@Override
public Instances process(Instances inst) throws Exception {
Instances output=determineOutputFormat(inst);
//For each data, first extract the relevan
seriesLength=inst.numAttributes();
int acfLength=output.numAttributes();
if(inst.classIndex()>=0){
seriesLength--;
acfLength--;
}
for(int i=0;i<inst.numInstances();i++){
//1. Get series:
double[] d=inst.instance(i).toDoubleArray();
//2. Remove target class
double[] temp;
int c=inst.classIndex();
if(c>=0){
temp=new double[d.length-1];
int count=0;
for(int k=0;k<d.length;k++){
if(k!=c){
temp[count]=d[k];
count++;
}
}
d=temp;
}
double[] autoCorr=fitAutoCorrelations(d);
//Extract out the terms and set the attributes
Instance newInst=null;
if(inst.classIndex()>=0)
newInst=new DenseInstance(acfLength+1);
else
newInst=new DenseInstance(acfLength);
for(int j=0;j<acfLength;j++){
if(autoCorr[j]<-1.0 || autoCorr[j]>1 || Double.isNaN(autoCorr[j])|| Double.isInfinite(autoCorr[j]))
newInst.setValue(j,0);
else
newInst.setValue(j,autoCorr[j]);
}
if(inst.classIndex()>=0)
newInst.setValue(output.classIndex(), inst.instance(i).classValue());
output.add(newInst);
}
return output;
}
public double[] fitAutoCorrelations(double[] data)
{
double[] a = new double[maxLag];
if(!normalized){
for(int i=1;i<=maxLag;i++){
double s1,s2,ss1,ss2,v1,v2;
a[i-1]=0;
s1=s2=ss1=ss2=0;
for(int j=0;j<data.length-i;j++){
s1+=data[j];
ss1+=data[j]*data[j];
s2+=data[j+i];
ss2+=data[j+i]*data[j+i];
}
s1/=data.length-i;
s2/=data.length-i;
for(int j=0;j<data.length-i;j++)
a[i-1]+=(data[j]-s1)*(data[j+i]-s2);
a[i-1]/=(data.length-i);
v1=ss1/(data.length-i)-s1*s1;
v2=ss2/(data.length-i)-s2*s2;
a[i-1]/=Math.sqrt(v1)*Math.sqrt(v2);
}
}
else{
for(int i=1;i<=maxLag;i++){
a[i-1]=0;
for(int j=0;j<data.length-i;j++)
a[i-1]+=data[j]*data[j+i];
a[i-1]/=data.length;
}
}
return a;
}
public static double[] fitAutoCorrelations(double[] data, int mLag)
{
double[] a = new double[mLag];
double s1,s2,ss1,ss2,v1,v2;
for(int i=1;i<=mLag;i++){
a[i-1]=0;
s1=s2=ss1=ss2=0;
for(int j=0;j<data.length-i;j++){
s1+=data[j];
ss1+=data[j]*data[j];
s2+=data[j+i];
ss2+=data[j+i]*data[j+i];
}
s1/=data.length-i;
s2/=data.length-i;
for(int j=0;j<data.length-i;j++)
a[i-1]+=(data[j]-s1)*(data[j+i]-s2);
a[i-1]/=(data.length-i);
v1=ss1/(data.length-i)-s1*s1;
v2=ss2/(data.length-i)-s2*s2;
a[i-1]/=Math.sqrt(v1)*Math.sqrt(v2);
}
return a;
}
public String getRevision() {
// TODO Auto-generated method stub
return null;
}
public int truncate(Instances d, boolean global){
globalTruncate=global;
return truncate(d);
}
/** Firstly, this method finds the first insignificant ACF term in every series
* If then does does one of two things
* if globalTruncate is true, it finds the max position of
**/
public int truncate(Instances d){
//Truncate 1: find the first insignificant term for each series, then find the highest, then remove all after this
int largestPos=0;
int[] c=findAllCutOffs(d);
if(globalTruncate){
for(int i=1;i<c.length;i++){
if(c[largestPos]<c[i])
largestPos=i;
}
//This is to stop zero attributes!
if(largestPos<d.numAttributes()-2)
largestPos++;
truncate(d,largestPos);
}
else{
for(int i=0;i<d.numInstances();i++){
zeroInstance(d.instance(i),c[i]);
}
}
return largestPos;
}
public void truncate(Instances d, int n){
int att=n;
while(att<d.numAttributes()){
if(att==d.classIndex())
att++;
else
d.deleteAttributeAt(att);
}
}
private void zeroInstance(Instance ins, int p){
for(int i=p;i<ins.numAttributes();i++){
if(i!=ins.classIndex())
ins.setValue(i, 0);
}
}
private int[] findAllCutOffs(Instances inst){
globalSigThreshold=2/Math.sqrt(seriesLength);
sigThreshold=new double[inst.numAttributes()-1];
cutOffs=new int[inst.numInstances()];
for(int i=0;i<cutOffs.length;i++)
cutOffs[i]=findSingleCutOff(inst.instance(i));
return cutOffs;
}
/* Assumes you pass an ACF data set! Will not work if the class variable is not the last. Two fiddly to do at the moment**/
private int findSingleCutOff(Instance inst){
/** Finds the threshold of the first non significant ACF term for all the series.
*/
double[] r=inst.toDoubleArray();
int count=0;
if(useGlobalSigThreshold){
for(int i=0;i<inst.numAttributes();i++){
if(i!=inst.classIndex()){
sigThreshold[count]=globalSigThreshold;
count++;
}
}
}
else{ ///DO NOT USE, I'm not sure of the logic of this, need to look up the paper
sigThreshold[0]=r[0]*r[0];
count=1;
for(int i=1;i<inst.numAttributes();i++){
if(i!=inst.classIndex()){
sigThreshold[count]=sigThreshold[count-1]+r[i]*r[i];
count++;
}
}
for(int i=0;i<sigThreshold.length;i++){
sigThreshold[i]=(1+sigThreshold[i])/seriesLength;
sigThreshold[i]=2/Math.sqrt(sigThreshold[i]);
}
}
for(int i=0;i<sigThreshold.length;i++)
if(Math.abs(r[i])<sigThreshold[i])
return i;
return sigThreshold.length-1;
}
/*
public static void testTransform(){
/**Debug code to test ACF generation:
Test File ACF: Four AR(1) series, first two \phi_0=0.5, seconde two \phi_0=-0.5
Instances test=ClassifierTools.loadData("C:\\Research\\Data\\TestData\\ACFTest");
DecimalFormat df=new DecimalFormat("##.####");
ACF acf=new ACF();
acf.setMaxLag(test.numAttributes()-10);
try{
Instances t2=acf.process(test);
System.out.println(" Number of attributes ="+t2.numAttributes());
Instance ins=t2.instance(0);
for(int i=0;i<ins.numAttributes()&&i<10;i++)
System.out.print(" "+df.format(ins.value(i)));
OutFile of=new OutFile("C:\\Research\\Data\\TestData\\ACTTestOutput.csv");
of.writeString(t2.toString());
}catch(Exception e){
System.out.println(" Exception in ACF harness="+e);
e.printStackTrace();
}
}
public static void testTrunctate(){
Instances test=ClassifierTools.loadData("C:\\Research\\Data\\TestData\\ACFTest");
DecimalFormat df=new DecimalFormat("##.####");
ACF acf=new ACF();
int[] cases={20,20};
int seriesLength=200;
acf.setMaxLag(test.numAttributes()-10);
try{
acf.setMaxLag(seriesLength-10);
Instances all=SimulateAR.generateARDataSet(20,20,seriesLength,cases,true);
System.out.println(" Number of attributes All ="+all.numAttributes());
Instances t2=acf.process(all);
System.out.println(" Number of attributes ="+t2.numAttributes());
acf.truncate(t2);
System.out.println(" Number of attributes ="+t2.numAttributes());
acf.useGlobalSigThreshold=true;
t2=acf.process(all);
acf.truncate(t2);
System.out.println(" Number of attributes ="+t2.numAttributes());
}catch(Exception e){
System.out.println(" Exception in ACF harness="+e);
e.printStackTrace();
}
}
*/
public static void main(String[] args){
// testTransform();
// testTrunctate();
}
}
/*
new double[acfLength];
//2. Find mean and variance
double mean=d[0];
double var=d[0]*d[0];
for(int j=1;j<seriesLength;j++){
mean+=d[j];
var+=d[j]*d[j];
}
mean/=seriesLength;
var=(mean*mean-var*seriesLength)/(seriesLength-1);
//Work out the auto-c for lags 1 to length
for(int lag=1;lag<maxLag;lag++)
{
autoCorr[lag-1]=0;
for(int j=0;j<acfLength;j++)
autoCorr[lag-1]+=(d[j]-mean)*(d[j+lag]-mean);
autoCorr[lag-1]/=(seriesLength-lag)*var;
}
*/