/* * Created on Jan 30, 2006 * * TODO To change the template for this generated file go to * Window - Preferences - Java - Code Style - Code Templates */ package transformations; import fileIO.OutFile; import java.io.FileReader; import weka.core.Instance; import weka.core.Instances; public class BoxTidwell { public static Instances transformRegressor(Instances data, int pos,int resultPos, double[] powers) { //1. Get values of the attribute of interest. //Confusingly, am working with attributes in rows not columns double[] temp=data.attributeToDoubleArray(pos); double[] originalData= new double[temp.length]; double[] logData= new double[temp.length]; for(int i=0;i<temp.length;i++) { originalData[i]=temp[i]; logData[i]=Math.log(temp[i]); } double[] y =data.attributeToDoubleArray(data.classIndex()); // I'm not sure if this is a memory copy or a reference copy, so be safe double[][] transposeFirst = new double[data.numAttributes()][data.numInstances()]; double[][] transposeSecond = new double[data.numAttributes()+1][data.numInstances()]; for(int j=0;j<data.numInstances();j++) { transposeFirst[0][j]=transposeSecond[0][j]=1; } for(int i=1;i<data.numAttributes();i++) { transposeFirst[i]=transposeSecond[i]=data.attributeToDoubleArray(i-1); } // Add one to pos cos of the ones pos=pos+1; // Second has an attribute at the end of data for transform int workingPos=data.numAttributes(); LinearModel l1,l2; double alpha=1, b1,b2; double min=0.1; boolean finished=false; int count=0; final int MaxIterations=10; // Initialise alpha to 1 //Find Base SSE //While not termination condition while(!finished) { // System.out.println(" Iteration = "+(count+1)+" alpha = "+alpha); //Create new attributes //1. Calculate x^alpha for(int j=0;j<originalData.length;j++) { transposeSecond[pos][j]=transposeFirst[pos][j]=Math.pow(originalData[j],alpha); } //2. Fit y=b1+ .. b_pos x^alpha (+ other terms)-> get b_pos l1=new LinearModel(transposeFirst,y); l1.fitModel(); //Not necessary: // l1.formTrainPredictions(); // l1.findTrainStatistics(); // System.out.println(l1+"\nVariance for L1 = "+l1.variance); b1=l1.paras[pos]; //3. Fit y=b*1+ .. b*_pos x^alpha +b*_workingPos x^alpha*log(x) (+ other terms)-> get b*2 //2. Calculate x^alpha*log(x) for(int j=0;j<originalData.length;j++) transposeSecond[workingPos][j]=transposeFirst[pos][j]*logData[j]; l2=new LinearModel(transposeSecond,y); l2.fitModel(); // Not necessary: // l2.formTrainPredictions(); // l2.findTrainStatistics(); // System.out.println(l2+"\nVariance for L2 = "+l2.variance); b2=l2.paras[workingPos]; alpha+=b2/b1; //Work out change term alpha = b*2/b1+alpha0 // System.out.println("New Alpha ="+alpha+" b1 = "+b1+" b2 = "+b2); //Update termination criteria: stop if small change: check notes count++; if(Math.abs(b2/b1)<min || count>=MaxIterations) finished=true; else if(Math.abs(alpha)>10) { alpha=1; finished=true; } } //Fix original powers[resultPos]=alpha; pos=pos-1; Instance inst; for(int i=0;i<data.numInstances();i++) { inst=data.instance(i); inst.setValue(pos,Math.pow(originalData[i],alpha)); } return data; } //First rows is all ones //Last row is for transformed attribute public static double transformRegressor(double[][] data, double[] response, int pos) { //1. Get values of the attribute of interest. double[] temp=data[pos]; double[] originalData= new double[temp.length]; double[] transformedData= new double[temp.length]; double[] logData = new double[originalData.length]; for(int i=0;i<originalData.length;i++) { originalData[i]=temp[i]; logData[i]=Math.log(originalData[i]); } double[] y =response; double[][] transposeFirst = new double[data.length][]; double[][] transposeSecond = new double[data.length+1][]; for(int j=0;j<data.length;j++) transposeFirst[j]=transposeSecond[j]=data[j]; transposeFirst[pos]=transformedData; transposeSecond[pos]=transformedData; transposeSecond[data.length]=logData; int workingPos=data.length; LinearModel l1,l2; double alpha=1, b1,b2; double min=0.1; boolean finished=false; int count=0; final int MaxIterations=10; // Initialise alpha to 1 //Find Base SSE //While not termination condition while(!finished) { //Create new attributes //1. Calculate x^alpha for(int j=0;j<originalData.length;j++) transformedData[j]=Math.pow(originalData[j],alpha); //2. Fit y=b1+ .. b_pos x^alpha (+ other terms)-> get b_pos l1=new LinearModel(transposeFirst,y); l1.fitModel(); //Not necessary: // l1.formTrainPredictions(); // l1.findTrainStatistics(); // System.out.println(l1+"\nVariance for L1 = "+l1.variance); b1=l1.paras[pos]; //3. Fit y=b*1+ .. b*_pos x^alpha +b*_workingPos x^alpha*log(x) (+ other terms)-> get b*2 //2. Calculate x^alpha*log(x) for(int j=0;j<originalData.length;j++) transposeSecond[workingPos][j]=originalData[j]*logData[j]; l2=new LinearModel(transposeSecond,y); l2.fitModel(); b2=l2.paras[workingPos]; alpha+=b2/b1; //Work out change term alpha = b*2/b1+alpha0 //Update termination criteria: stop if small change: check notes count++; if(Math.abs(b2/b1)<min || count>=MaxIterations) finished=true; else if(Math.abs(alpha)>10) { alpha=1; finished=true; } } //Fix original return alpha; } public static void main(String[] args) { Instances data=null; try{ FileReader r = new FileReader("C:/Research/Code/Archive Generator/src/weka/addOns/BoxTidwellTest2.arff"); data = new Instances(r); data.setClassIndex(data.numAttributes()-1); }catch(Exception e) { System.out.println("Error loading file "+e); } double[] powers=new double[data.numAttributes()-1]; // data=transformRegressor(data,0,powers); // data=transformRegressor(data,2,powers); // data=transformRegressor(data,1,powers); System.out.println(" Final powers ="); for(int i=0;i<powers.length;i++) System.out.println(i+" ="+powers[i]); OutFile r = new OutFile("C:/Research/Code/Archive Generator/src/weka/addOns/BoxTidwellResults2.arff"); r.writeLine(data.toString()); } }