/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.tools.math.matrix;
import Jama.Matrix;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
/**
* This helper class can be used to calculate a covariance matrix from given
* matrices or example sets.
*
* @author Regina Fritsch, Ingo Mierswa
* @version $Id: CovarianceMatrix.java,v 1.4 2008/05/09 19:23:23 ingomierswa Exp $
*/
public class CovarianceMatrix {
/** Transforms the example set into a double matrix (doubling the amount of used memory)
* and invokes {@link #getCovarianceMatrix(double[][])}. */
public static Matrix getCovarianceMatrix(ExampleSet exampleSet) {
double[][] data = new double[exampleSet.size()][exampleSet.getAttributes().size()];
int r = 0;
for (Example example : exampleSet) {
int c = 0;
for (Attribute attribute : exampleSet.getAttributes()) {
data[r][c] = example.getValue(attribute);
c++;
}
r++;
}
return getCovarianceMatrix(data);
}
/** Returns the covariance matrix from the given double matrix. */
public static Matrix getCovarianceMatrix(double[][] data) {
// checks
if (data.length == 0) {
throw new IllegalArgumentException("Calculation of covariance matrices not possible for data sets with zero rows.");
}
int numberOfColumns = -1;
for (int r = 0; r < data.length; r++) {
if (numberOfColumns < 0) {
numberOfColumns = data[r].length;
} else {
if (numberOfColumns != data[r].length) {
throw new IllegalArgumentException("Calculation of covariance matrices not possible for data sets with different numbers of columns.");
}
}
}
if (numberOfColumns <= 0) {
throw new IllegalArgumentException("Calculation of covariance matrices not possible for data sets with zero columns.");
}
// subtract column-averages
for (int c = 0; c < numberOfColumns; c++) {
double average = getAverageForColumn(data, c);
for (int r = 0; r < data.length; r++) {
data[r][c] -= average;
}
}
// create covariance matrix
double[][] covarianceMatrixEntries = new double[numberOfColumns][numberOfColumns];
// fill the covariance matrix
for (int i = 0; i < covarianceMatrixEntries.length; i++) {
for (int j = i; j < covarianceMatrixEntries.length; j++) {
double covariance = getCovariance(data, i, j);
covarianceMatrixEntries[i][j] = covariance;
covarianceMatrixEntries[j][i] = covariance;
}
}
return new Matrix(covarianceMatrixEntries);
}
/** Returns the average for the column with the given index. */
private static double getAverageForColumn(double[][] data, int column) {
double sum = 0.0d;
for (int r = 0; r < data.length; r++) {
sum += data[r][column];
}
return sum / data.length;
}
/** Returns the covariance between the given columns. */
private static double getCovariance(double[][] data, int x, int y) {
double cov = 0;
for (int i = 0; i < data.length; i++) {
cov += data[i][x] * data[i][y];
}
return cov / (data.length - 1);
}
}