/*
* Created on Jan 29, 2006
*
* TODO To change the template for this generated file go to
* Window - Preferences - Java - Code Style - Code Templates
*/
package transformations;
import fileIO.*;
import weka.core.*;
import weka.classifiers.functions.*;
import weka.classifiers.*;
/**
* @author ajb
*
* TODO To chang
*
* e the template for this generated type comment go to
* Window - Preferences - Java - Code Style - Code Templates
*/
public class BoxCox extends Transformations{
public static double MIN=-3,MAX=3,INTERVAL=0.25;
boolean tryZero=false;
AbstractClassifier c;
double minError=Double.MAX_VALUE, bestLambda;
double gamma;
boolean strictlyPositive=true;
public BoxCox()
{
supervised=true;
response=true;
minError=Double.MAX_VALUE;
bestLambda=MIN;
c=new LinearRegression();
String[] options = {"-S 1","-C "};
try{
c.setOptions(options);
}catch(Exception e){
System.out.println(" Error Setting options in constructor");
}
}
public void setStrictlyPos(boolean f){strictlyPositive=f;}
public BoxCox(AbstractClassifier c)
{
this();
this.c=c;
}
//Performs a specific B-C transform on the response variable, overwriting original
static public void transformResponse(Instances data, double lambda, double[] response)
{
Instance inst;
double v;
int responsePos=data.numAttributes()-1;
for(int i=0;i<response.length;i++)
{
inst=data.instance(i);
v=(Math.pow(response[i],lambda)-1)/lambda;
inst.setValue(responsePos,v);
}
}
// Transform the response variable using box-cox procedure
public Instances transform(Instances data)
{
int responsePos=data.classIndex();
double[] response=data.attributeToDoubleArray(responsePos);
double[] predictions=new double[response.length];
double v;
Instance inst;
//Check if strictly positive
gamma=response[0];
for(int i=1;i<response.length;i++)
{
if(response[i]<gamma)
gamma=response[i];
}
System.out.println(" Min value = "+gamma);
if(gamma<=0)
{
gamma=-2*gamma+1;
System.out.println(" Data series is not strictly positive, rescaling by "+gamma);
for(int i=0;i<response.length;i++)
response[i]+=gamma;
}
for(double lambda=MIN;lambda<=MAX;lambda+=INTERVAL)
{
//Transform response
if(lambda==0) lambda+=INTERVAL;
transformResponse(data,lambda,response);
//Fit model and get training predictions
try{
c.buildClassifier(data);
// System.out.println("Classifier = "+c);
for(int i=0;i<predictions.length;i++)
{
inst=data.instance(i);
predictions[i]=c.classifyInstance(inst);
// if(predictions[i]<0)
// predictions[i]=0;
}
}
catch(Exception e)
{
System.out.println(" Error building with lambda = "+lambda);
}
//Assess quality of fit by SSE: Transformed or untransformed? Assume we have to
// turn it back
double SSE=0;
boolean f=true;
for(int i=0;i<predictions.length;i++)
{
predictions[i]*=lambda;
predictions[i]++;
if(predictions[i]<=0)
predictions[i]=0;
else
{
if(lambda>0)
predictions[i]=Math.pow(predictions[i],1.0/lambda);
else
predictions[i]=1/Math.pow(predictions[i],-1.0/lambda);
}
SSE+=(predictions[i]-response[i])*(predictions[i]-response[i]);
}
//Check whether minimum, and store
SSE/=(data.numInstances()-data.numAttributes());
System.out.println("lambda = "+lambda+"SSE ="+SSE);
if(SSE<minError)
{
minError=SSE;
bestLambda=lambda;
}
}
System.out.println("Min lambda = "+bestLambda+" with MSE = "+minError);
//Perform best transform
for(int i=0;i<response.length;i++)
{
inst=data.instance(i);
v=(Math.pow(response[i],bestLambda)-1)/bestLambda;
inst.setValue(responsePos,v);
}
return data;
}
public Instances invert(Instances data){
Instance inst;
int responsePos=data.numAttributes()-1;
double[] response=data.attributeToDoubleArray(responsePos);
double v;
for(int i=0;i<data.numInstances();i++)
{
inst=data.instance(i);
v=response[i]*bestLambda;
v++;
v=Math.pow(v,1/bestLambda);
inst.setValue(responsePos,v);
}
return data;
}
//Transform data based on values formed by calling transform on another data set
//Only needed for dependent variable transformations, for others does nothing
public Instances staticTransform(Instances data)
{
Instance inst;
int responsePos=data.numAttributes()-1;
double[] response=data.attributeToDoubleArray(responsePos);
double v;
for(int i=0;i<data.numInstances();i++)
{
inst=data.instance(i);
v=(Math.pow(response[i],bestLambda)-1)/bestLambda;
inst.setValue(responsePos,v);
}
return data;
}
public double[] invertPredictedResponse(double[] d)
{
double v;
for(int i=0;i<d.length;i++)
{
v=d[i]*bestLambda;
v++;
d[i]=Math.pow(v,1/bestLambda);
}
return d;
}
public static void main(String[] args)
{
double[] quantiles = Transformations.getNormalQuantiles(0.0,1.0);
for(int i=0;i<quantiles.length;i++)
System.out.println("Quantile "+i+" = "+quantiles[i]);
OutFile of = new OutFile("TestQuantiles.csv");
for(int i=0;i<quantiles.length;i++)
{
System.out.println(i+","+(i+1)/(double)quantiles.length+","+quantiles[i]);
of.writeLine(i+","+(i+1)/(double)quantiles.length+","+quantiles[i]);
}
}
}