package ca.pfv.spmf.algorithms.clustering.distanceFunctions;
import ca.pfv.spmf.patterns.cluster.DoubleArray;
/* This file is copyright (c) 2008-2015 Philippe Fournier-Viger
*
* This file is part of the SPMF DATA MINING SOFTWARE
* (http://www.philippe-fournier-viger.com/spmf).
*
* SPMF is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* SPMF is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
* A PARTICULAR PURPOSE. See the GNU General Public License for more details.
* You should have received a copy of the GNU General Public License along with
* SPMF. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* This class implements the correlation distance function. It is a subclass of the
* DistanceFunction class which represents any distance function.
* The correlation distance function calculates the distance between
* two vectors of double and returns a value in [0,2].
* A result of 0 means that there is a positive linear relationship.
* A result of 2 means that there is a negative linear relationship.
* A result of 1 means that there is no linear relationship (but there might be
* a non linear relationship).
* Note that this implementation is different from the typical correlation
* similarity measure which returns a value in [-1,1] where 1 is positive and -1 is a negative
* relationship. The reason why we use
* [0,2] instead is that in clustering a distance measure should in general
* return a value of 0
* when two objects are similar and a high value when they are different.
* <br/><br/>
*
* @see DistanceFunction
* @author Philippe Fournier-Viger
*/
public class DistanceCorrelation extends DistanceFunction {
/** the name of this distance function */
static String NAME = "correlation";
/**
* Calculate the Correlation distance between two vectors of doubles. The
* correlation distance function calculates the distance between two vectors
* of double and returns a value in [0,2]. A result of 0 means that there is
* a positive linear relationship. A result of 2 means that there is a
* negative linear relationship. A result of 1 means that there is no linear
* relationship (but there might be a non linear relationship).
*
* @param vector1
* the first vector
* @param vector2
* the second vector
* @return the distance
*/
public double calculateDistance(DoubleArray vector1, DoubleArray vector2) {
double mean1 = calculateMean(vector1);
double mean2 = calculateMean(vector2);
double standardDeviation1 = calculateStdDeviation(vector1, mean1);
double standardDeviation2 = calculateStdDeviation(vector2, mean2);
double correlation = 0;
for(int i=0; i< vector1.data.length; i++){
correlation -= (vector1.data[i] - mean1) * (vector2.data[i] - mean2);
}
double bottom = (standardDeviation1 * standardDeviation2 * (vector1.data.length - 1));
if(bottom == 0) {
return 0;
}
correlation = 1+correlation / bottom;
return correlation;
}
/**
* This method calculate the mean of a list of doubles
* @param list the list of doubles
* @return the mean
*/
private static double calculateMean(DoubleArray vector) {
double sum = 0;
for (double val : vector.data) {
sum += val;
}
return sum / vector.data.length;
}
/**
* This method calculate the standard deviation of a list of double.
* Note that it divides by n-1 instead of n, assuming that it is
* the standard deviation of a sample rather than a population.
* @param list the list of doubles
* @param the man of the list of double values
* @return the standard deviation
*/
private static double calculateStdDeviation(DoubleArray vector, double mean) {
double deviation = 0;
for (double val : vector.data) {
deviation += Math.pow(mean - val, 2);
}
return Math.sqrt(deviation / (vector.data.length - 1));
}
public static void main(String[] args) {
DoubleArray array1 = new DoubleArray(new double[] {1, 1, 1, 1, 1});
DoubleArray array2 = new DoubleArray(new double[] {1, 1, 1, 1, 1});
System.out.println(new DistanceCorrelation().calculateDistance(array1,array2));
// The result should be 0
DoubleArray array5 = new DoubleArray(new double[] {3, 6, 0, 3, 6});
DoubleArray array6 = new DoubleArray(new double[] {1, 2, 0, 1, 2});
System.out.println(new DistanceCorrelation().calculateDistance(array5,array6));
// The result should be 0.
DoubleArray array3 = new DoubleArray(new double[] {3, -6, 0, 3, -6});
DoubleArray array4 = new DoubleArray(new double[] {-1, 2, 0, -1, 2});
System.out.println(new DistanceCorrelation().calculateDistance(array3,array4));
// result should be 2
}
@Override
public String getName() {
return NAME;
}
}