// IT.java // // This is a library of useful Information Theory definitions. Most are // complete standard, see for example "Elements of Information Theory" // by Cover and Thomas. // // Tom Chothia T.Chothia@cwi.nl 21/05/2008 // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see <http://www.gnu.org/licenses/>. // // Copyright 2008 Tom Chothia package info; //Probability Mass Functions are represented by an array of doubles // p(element i) = pmf[i] public class IT { // This method calculates the entropy of a PMF public static double entropy (double[] pmf) { double result = 0; for (int i= 0;i < pmf.length;i++) { // Entropy calculation requires that 0.log(0) = 0 if (pmf[i] != 0) {result = result + pmf[i] * log2(pmf[i]);} } return (-result); } public static double H (double[] pmf) { return entropy(pmf); } // N.B. the inputs to this function are a PMF and a Channel Matrix // that links that PMF with the other PMF i.e., X and the channel to Y // The inputs are NOT two pmfs i.e., H(Q,W) not H(X|Y) // W[input][output] = W(output|input) public static double conditionalEntropy (double[] pmf, double[][] matrix_W) { // This method returns H(X|Y) where X is given by pmf and matrix_W is the channel from X to Y // i.e. value returned equals: - Sigma_x Q(x).Sigma_y W(y|x).log ( Q(x).W(y|x)/(QW)(y) ) // = - Sigma_x.Sigma_y Q(x).W(y|x).log ( Q(x).W(y|x)/(QW)(y) ) // QW(y) = 0 => Q(x).W(y|x) = 0 therefore // if Q(x).W(y|x) = 0 we take W(y|x).log ( Q(x).W(y|x)/(QW)(y) ) = 0MIuniformInput double result = 0; //Sigma_x for (int i= 0;i < pmf.length;i++) { for (int j = 0; j<matrix_W[0].length;j++) { if (pmf[i] != 0 && matrix_W[i][j] != 0) { result = result + pmf[i] * matrix_W[i][j] * log2( (pmf[i] * matrix_W[i][j] ) / QW(j,pmf,matrix_W)); } } } return (-result); } public static double H (double[] Q, double[][] W) { return H(Q,W); } // N.B. the inputs to this function are a PMF and a Channel Matrix // that links that PMF with the other PMF i.e., X and the channel to Y // The inputs are NOT two pmfs i.e., I(Q,W) not I(X;Y) // This method returns I(Q,W) = I (X;Y) = H(X) - H(X|Y) // produces the same result as mutualInformation_I2 public static double mutualInformation (double[] Q, double[][] W) { return ( H(Q) - conditionalEntropy(Q,W)); } public static double I (double[] Q, double[][] W) { return mutualInformation (Q,W); } // This method returns I(Q,W) = I (X;Y) = Sigma_x Sigma_y Q(x).W(y|x)log(W(y|x)/QW(y)) // produces the same result as mutualInformation,I but it might be faster public static double mutualInformation2 (double[] Q, double[][] W) { double result = 0; for (int x =0;x<W.length;x++) { for(int y=0;y<(W[0]).length;y++) { result = result + Q[x]*W[x][y]*log2(W[x][y] / QW(y,Q,W)); } } return result; } public static double variance (int sampleSize, double[] Q, double[][] W) { // variance equals 1/N.Sigma_i Q(i).( // ( ( \Sigma_j W(j|i). (log( (Q(i).W(j|i)) / Sigma_{i'} Q(i').W(j|i')))^2) // - ( (\Sigma_j W(j|i).log( (Q(i).W(j|i)) / Sigma_{i'} Q(i').W(j|i')))^2) ) ) double result = 0; for (int x =0;x<W.length;x++) //Sigma_i { double firstPart = 0; // = ( \Sigma_j W(j|i). (log( (Q(i).W(j|i)) / Sigma_{i'} Q(i').W(j|i')))^2) double secondPart = 0; // =(\Sigma_j W(j|i).log( (Q(i).W(j|i)) / Sigma_{i'} Q(i').W(j|i')))^2 for(int y=0;y<(W[0]).length;y++) //Sigma_j { firstPart = firstPart + W[x][y]*Math.pow(log2( (Q[x]*W[x][y]) / QW(y,Q,W)),2); } for(int y=0;y<(W[0]).length;y++) //Sigma_j { secondPart = secondPart + W[x][y]*log2( (Q[x]*W[x][y]) / QW(y,Q,W)); } secondPart = Math.pow(secondPart, 2); result = result + Q[x]*(firstPart-secondPart); } return ((1/(double)sampleSize)*result); } public static double MIuniformInput (double[][] W) { return ( mutualInformation(unifromDist(W.length),W)); } // this method calculate the joint entropy of X and Y // where p(x,y) = p[x_index][y_index] // H( (X,Y) ) = - Sigma_x Sigma_y p(x,y).log (p(x,y)) public static double jointEntropy (double[][] p) { double result = 0; for (int x=0;x<p.length;x++) { for (int y=0;y<p[0].length;y++) { if (p[x][y] != 0) { result = result + p[x][y]*log2( p[x][y]); } } } return -result; } // this method calculates the relative entropy of two PMFs p and q // D(p||q) = Sigma_x p(x) log(p(x)/q(x)) // 0.log(0/q) = 0 and // p.log(p/0) = inf, (we throw an exception rather that return inf) public static double relativeEntropy (double[] p, double[] q) throws ArithmeticException { double result = 0; for (int x=0;x<p.length;x++) { if (p[x]!=0) { if (q[x]==0) { throw new ArithmeticException ("The Relative Entropy equals infinite"); } else { result = result + p[x] * log2(p[x]/q[x]); } } } return result; } public static double D (double[] p, double[] q) throws ArithmeticException { return relativeEntropy (p,q); } public static double KullbackLeibler (double[] p, double[] q) throws ArithmeticException { return relativeEntropy (p,q); } // this method finds the change of output elementIndex // using P_Y(y) = R(y) = QW(y) = Sigma_x W(y|x)Q(x) public static double outputProb (int outputIndex, double[] Q, double[][] W ) { double result = 0; for (int i=0;i<Q.length;i++) { result = result + W[i][outputIndex] * Q[i]; } return result; } public static double R (int outputIndex, double[] inputProbs_Q, double[][] matrix_W ) { return outputProb (outputIndex, inputProbs_Q, matrix_W); } public static double QW (int outputIndex, double[] inputProbs_Q, double[][] matrix_W ) { return outputProb (outputIndex, inputProbs_Q, matrix_W); } // Returns a uniform distribution of length "noOfElements" public static double[] unifromDist (int noOfElements) { double[] dist = new double[noOfElements]; for (int i=0;i<noOfElements;i++) { dist[i] = (1.0/(double)noOfElements); } return dist; } public static double log2 (double x) { if (x == 0) {//System.out.println("Taking a log of 0 to be 0"); return 0;} else { return Math.log(x)/Math.log(2);} } // prints the probability distribution probs with where // element a_i has prob[i], to 4 decimal places public static void printPMF(double[] probs) { // Require that names.length = probs.length > 1 System.out.printf(", a0:%6.5f",probs[0]); for (int i = 1;i < probs.length;i++) { System.out.printf(", a"+i+":%6.5f",probs[i]); } } // prints the probability distribution probs with where // element names[i] has prob[i] public static void printPMF(String[] names, double[] probs) { // Require that names.length = probs.length > 1 System.out.printf(names[0]+":%6.3f",probs[0]); for (int i = 1;i < names.length;i++) { System.out.printf(", "+names[i]+":%6.4f",probs[i]); } } }