// File2Histo.java // // This class take a directory of observation files, calculates either: // - the mutual information with the uniform distribution on inputs // - or the maximising input distribution of the FIRST file with all // the matrices // It then plots the distribution and calculates the mean and variance // // Tom Chothia T.Chothia@cs.bham.ac.uk 6/11/2008 // // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see <http://www.gnu.org/licenses/>. // // Copyright 2008 Tom Chothia package info; public class Files2Histo { // The program looks for the files fileNamesRoot_0,fileNamesRoot_1, ... , fileNamesRoot_noOfTests // //static String fileNamesRoot = "C:\\Documents and Settings\\tpc\\workspace\\Data\\Dist\\100000"; static String fileNamesRoot = "/home/scratch-staff/DCresults/nullCase/100000"; static int noOfTests = 99; static double[] Qhat; static double[] results = new double[noOfTests]; static double min = 0.095; static double max = 0.12000; static int segments = 75; static double total = 0; static double correction ; // if useBA = true then Blahut-Arimoto is used to find the maximising distribution // of the first file, and then use that for all the other matrices // if useBA = false then the uniform distribution is used. static boolean useBA = true; static int inputs; static int outputs; static int sampleSize; public static void main(String[] args) { // Read the first file and set key values ReadFile obsReader = new ReadFile(fileNamesRoot+"_0"); obsReader.readObservations(); sampleSize = obsReader.getObservations().getNoOfTests(); double[][] matrix = obsReader.getObservations().generateChannel().getMatrix(); inputs = matrix.length; outputs = matrix[0].length; correction = (((double)inputs-1)*((double)outputs -1))/(2*(double)sampleSize) *IT.log2(Math.E); // find the input distribution to be used with each matrix. double mi; if (useBA) { BlahutArimoto ba = new BlahutArimoto(obsReader.getObservations().generateChannel(),0.00001,5000); mi = ba.calculateCapacity(); System.out.println("\\hat Q(W_n) = "); Qhat = ba.getMaxInputDist(); IT.printPMF(Qhat); } else { Qhat = IT.unifromDist(inputs); mi = IT.mutualInformation(Qhat,matrix); System.out.println("Using uniform input"); IT.printPMF(Qhat); } // Print the first matrix System.out.println("\n"); obsReader.getObservations().generateChannel().printChannel(); System.out.println("\n"); //System.out.println("cap for "+fileNamesRoot+"_"+0+" is "+mi); System.out.print(mi); results[0] = mi; total = mi; for (int testIndex = 1;testIndex < noOfTests;testIndex++) { results[testIndex] = mi; obsReader = new ReadFile(fileNamesRoot+"_"+testIndex); obsReader.readObservations(); mi = IT.mutualInformation2(Qhat, obsReader.getObservations().generateChannel().getMatrix()); //System.out.println("cap for "+fileNamesRoot+"_"+testIndex+" is "+mi); if ((testIndex)%3==0) {System.out.print("\n");} else {System.out.print(" ");} System.out.print(mi); total = total+mi; } // calculate the high of each bar of the histogram int[] histoGraph = new int[segments]; int maxHigh = 0; double segwidth = (max -min)/(double)segments; System.out.println("\n"); for (int seg=0;seg<segments;seg++) { for (int i=0;i<noOfTests;i++) { if( results[i] > (min+seg*segwidth) && results[i] < (min+(seg+1)*segwidth) ) { histoGraph[seg]++; } } //System.out.println("seg"+seg+": "+histoGraph[seg]); maxHigh = Math.max(maxHigh, histoGraph[seg]); } // calculate mean and variance double mean = total/(double)noOfTests; double diffSqSum = 0; for (int i=0;i<noOfTests;i++){ double diff = results[i] - mean; double diffSq = diff*diff; diffSqSum = diffSqSum + diffSq; } double var = diffSqSum/noOfTests; // Print the histograph for (int row = maxHigh;row > 0;row--) { System.out.print("|"); for (int col = 0; col<segments;col++) { if (histoGraph[col]>=row) {System.out.print("*");} else {System.out.print(" ");} } System.out.print("\n"); } for (int seg = 0; seg<segments;seg++) { if ( (mean > (min+seg*segwidth) && mean < (min+(seg+1)*segwidth)) || (correction > (min+seg*segwidth) && correction < (min+(seg+1)*segwidth))) { System.out.print("|"); } else { System.out.print("_");} } System.out.print("\n"); for (int seg = 0; seg<segments;seg++) { if ( (mean > (min+seg*segwidth) && mean < (min+(seg+1)*segwidth)) || (correction > (min+seg*segwidth) && correction < (min+(seg+1)*segwidth))) { System.out.print("|"); } else { System.out.print(" ");} } System.out.print("\n"); for (int seg = 0; seg<segments;seg++) { if ( (mean > (min+seg*segwidth) && mean < (min+(seg+1)*segwidth)) && !(correction > (min+seg*segwidth) && correction < (min+(seg+1)*segwidth))) { System.out.print("M"); } else if ( !(mean > (min+seg*segwidth) && mean < (min+(seg+1)*segwidth)) && (correction > (min+seg*segwidth) && correction < (min+(seg+1)*segwidth))) { System.out.print("C"); } else if ( (mean > (min+seg*segwidth) && mean < (min+(seg+1)*segwidth)) && (correction > (min+seg*segwidth) && correction < (min+(seg+1)*segwidth))) { System.out.print("MC"); } else System.out.print(" "); } // Print the results System.out.println("\n"); System.out.println(inputs+" inputs, "+outputs+" outputs "+" and "+sampleSize+" samples\n"); System.out.println("observed mean = "+mean); System.out.println("observed variance = "+var); System.out.println("\ncorrection = log2(e).(inputs-1)(outputs-1)/2.sampleSize= "+correction); System.out.println("\nIf the value is null the the distribution should be chi-squared with:"); System.out.println(" mean = correction*log2(e) =" + correction); System.out.println(" variance = log2(e)^2.(inputs-1)(outputs-1)/2.(sampleSize^2) =" + (((double)inputs-1)*((double)outputs -1))/((double)2*Math.pow(sampleSize,2)) *Math.pow(IT.log2(Math.E),2)); System.out.println("\nIn the value is non-null the the distribution should be normal with:"); System.out.println(" mean = trueValue+correction"); System.out.println(" variance = " + IT.variance(sampleSize,Qhat, matrix)); } }