/* Basic linear regression, including standardisation for residuals which isnt included in the Jama ridge regression * */ package transformations; import fileIO.OutFile; import java.io.FileReader; import weka.core.Instances; import weka.core.matrix.Matrix; public class LinearModel { double variance, standardisedError,SSE,SST,SSR, yBar; Matrix Xt,X,XtXinv; Matrix Y; Matrix B; double[] paras,y,H_Diagonal,predicted,residual,stdResidual; int n,m; //H is going to be big! X(XtX)-1Xt is nxn, so need to just generate diagonal terms // X(XtX)-1 is nxm, so can just work the diagonals with Xt public Matrix HatDiagonal; //ASSUMES FIRST ROW IS ALL ONES IF CONSTANT TERM TO BE INCLUDED //ATTRIBUTE FIRST Dirty hack public LinearModel(double[][] data,double[] response) { m=data.length; n=data[0].length; y = response; //This way round for consistency with other constructor Xt=new Matrix(data); // System.out.println("Xt = \n"+Xt); X = Xt.transpose(); // System.out.println("X = \n"+X); Y=new Matrix(y,y.length); } public LinearModel(Instances data) { //Form X and Y from Instances n=data.numInstances(); m=data.numAttributes(); //includes the constant term y = data.attributeToDoubleArray(data.classIndex()); Y=new Matrix(y,y.length); double[][] xt = new double[m][n]; for(int i=0;i<n;i++) xt[0][i]=1; for(int i=1;i<m;i++) xt[i]=data.attributeToDoubleArray(i-1); Xt=new Matrix(xt); X=Xt.transpose(); } public double[] getY(){return y;} public double[] getPredicted(){return predicted;} public double[] getResiduals(){return stdResidual;} public double getSSR(){return SSR;} public void fitModel() { //B = (XtX)-1XtY XtXinv=Xt.times(X); XtXinv=XtXinv.inverse(); Matrix temp= XtXinv.times(Xt),t2,t3; //B should be m x 1 B=temp.times(Y); paras=B.getColumnPackedCopy(); H_Diagonal=new double[n]; // (XtX)-1Xt is mxn, so can just work the diagonals with Xt double sum=0; for(int i=0;i<n;i++) { t2=X.getMatrix(i,i,0,m-1); t3=t2.transpose(); // System.out.println("Row mult t2 rows ="+t2.getRowDimension()+" columns = "+t2.getColumnDimension()); t3=XtXinv.times(t3); t3=t2.times(t3); H_Diagonal[i]=t3.get(0,0); sum+=H_Diagonal[i]; } } public double findInverseStats(double l, double[] untransformed) { formTrainPredictions(); predicted=YeoJohnson.invert(l,predicted); y=untransformed; findTrainStatistics(); return variance; } public double findStats() { formTrainPredictions(); findTrainStatistics(); return variance; } public double[] formTrainPredictions() { predicted=new double[n]; for(int i=0;i<n;i++) { //Find predicted predicted[i]=paras[0]; for(int j=1;j<paras.length;j++) predicted[i]+=paras[j]*X.get(i,j); } return predicted; } public void findTrainStatistics() { SSE=0; stdResidual=new double[n]; residual=new double[n]; yBar=0; for(int i=0;i<n;i++) { residual[i]=(y[i]-predicted[i]); SSE+=residual[i]*residual[i]; yBar+=y[i]; } yBar/=n; variance=SSE/(n-paras.length); SST=0; for(int i=0;i<n;i++) SST+=(y[i]-yBar)*(y[i]-yBar); SSR=SST-SSE; double s= Math.sqrt(variance); standardisedError=0; for(int i=0;i<n;i++) { stdResidual[i]=residual[i]/(s*(Math.sqrt(1-H_Diagonal[i]))); standardisedError+=stdResidual[i]*stdResidual[i]; } standardisedError/=(n-paras.length); } public double[] formTestPredictions(Instances testData) { //Form X matrix from testData int rows=testData.numInstances(); int cols=testData.numAttributes(); //includes the constant term predicted=new double[rows]; if(cols!=m) { System.out.println("Error: Mismatch in attribute lengths in form test Train ="+m+" Test ="+cols); System.exit(0); } double[][] xt = new double[cols][rows]; for(int i=0;i<rows;i++) xt[0][i]=1; for(int i=1;i<cols;i++) xt[i]=testData.attributeToDoubleArray(i-1); Matrix testX=new Matrix(xt); testX=testX.transpose(); for(int i=0;i<rows;i++) { //Find predicted predicted[i]=paras[0]; for(int j=1;j<paras.length;j++) predicted[i]+=paras[j]*testX.get(i,j); } return predicted; } public String toString() { String str="Paras : "; for(int i=0;i<paras.length;i++) str+=paras[i]+" "; return str; } public static void main(String[] args) { Instances data=null; try{ FileReader r = new FileReader("C:/Research/Code/Archive Generator/src/weka/addOns/RegressionTest2.arff"); data = new Instances(r); data.setClassIndex(data.numAttributes()-1); }catch(Exception e) { System.out.println("Error loading file "+e); } LinearModel lm = new LinearModel(data); lm.fitModel(); lm.formTrainPredictions(); lm.findTrainStatistics(); OutFile f = new OutFile("C:/Research/Code/Archive Generator/src/weka/addOns/TestResults.csv"); f.writeLine("Parameters"); for(int i=0;i<lm.paras.length;i++) f.writeString(lm.paras[i]+","); f.writeLine("Variance = "+lm.variance); f.writeLine("\nHatDiagonal, Actual, Predicted, StdResidual"); for(int i=0;i<lm.n;i++) f.writeLine(lm.H_Diagonal[i]+","+lm.y[i]+","+lm.predicted[i]+","+lm.stdResidual[i]); } public double getSSE() { throw new UnsupportedOperationException("Not yet implemented"); } }