/**
* Copyright Copyright 2010-14 Simon Andrews
*
* This file is part of BamQC.
*
* BamQC is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* BamQC is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with BamQC; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* Changelog:
* - Simon Andrews: Class creation.
*/
package uk.ac.babraham.BamQC.Statistics;
/**
* A Class to calculate the Pearson Correlation.
* @author Simon Andrews
*/
public class PearsonCorrelation {
/**
* Calculate correlation.
*
* @param data1 the first dataset
* @param data2 the second dataset
* @return the Pearson r-value
* @throws BamQCException if the two datasets don't have the same number of points in them.
*/
public static float calculateCorrelation (long [] data1, long [] data2) {
float [] d1 = new float[data1.length];
float [] d2 = new float[data2.length];
for (int i=0;i<data1.length;i++)d1[i] = data1[i];
for (int i=0;i<data2.length;i++)d2[i] = data2[i];
return calculateCorrelation(d1, d2);
}
/**
* Calculate correlation.
*
* @param data1 the first dataset
* @param data2 the second dataset
* @return the Pearson r-value
* @throws BamQCException if the two datasets don't have the same number of points in them.
*/
public static float calculateCorrelation (long [] data1, long [] data2, int offset) {
float [] d1 = new float[data1.length-offset];
float [] d2 = new float[data2.length-offset];
for (int i=0;i<d1.length;i++)d1[i] = data1[i];
for (int i=0;i<d2.length;i++)d2[i] = data2[i+offset];
return calculateCorrelation(d1, d2);
}
/**
* Calculate correlation.
*
* @param data1 the first dataset
* @param data2 the second dataset
* @return the Pearson r-value
* @throws BamQCException if the two datasets don't have the same number of points in them.
*/
public static float calculateCorrelation (float [] data1, float [] data2) {
if (data1.length != data2.length) {
throw new IllegalArgumentException("Data sets must be the same length when calculating correlation");
}
float sum12 = 0;
float sum1 = 0;
float sum2 = 0;
float sum1square = 0;
float sum2square =0;
for (int i=0;i<data1.length;i++) {
sum12 += data1[i]*data2[i];
sum1 += data1[i];
sum2 += data2[i];
sum1square += data1[i]*data1[i];
sum2square += data2[i]*data2[i];
}
float top = sum12 - ((sum1*sum2)/data1.length);
float bottomRight = sum2square - ((sum2*sum2)/data1.length);
float bottomLeft = sum1square - ((sum1*sum1)/data1.length);
float bottom = (float)Math.sqrt(bottomLeft * bottomRight);
return top/bottom;
}
}