/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.tools.math.matrix;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.ProcessStoppedException;
import Jama.Matrix;
/**
* This helper class can be used to calculate a covariance matrix from given matrices or example
* sets.
*
* @author Regina Fritsch, Ingo Mierswa
*/
public class CovarianceMatrix {
/**
* Transforms the example set into a double matrix (doubling the amount of used memory) and
* invokes {@link #getCovarianceMatrix(double[][])}.
*
* @deprecated Please use {@link #getCovarianceMatrix(ExampleSet, Operator)} so that the
* calculation can be stopped if necessary.
*/
@Deprecated
public static Matrix getCovarianceMatrix(ExampleSet exampleSet) {
Matrix matrix = new Matrix(0, 0);
try {
matrix = CovarianceMatrix.getCovarianceMatrix(exampleSet, null);
} catch (ProcessStoppedException e) {
// This Exception is impossible as long the parameter op is null
}
return matrix;
}
/**
* Transforms the example set into a double matrix (doubling the amount of used memory) and
* invokes {@link #getCovarianceMatrix(double[][])}.
*
* @param exampleSet
* ExampleSet to construct the covariance matrix from
* @param op
* executing Operator which will be used to check for stop (can be null).
*
* @throws ProcessStoppedException
*/
public static Matrix getCovarianceMatrix(ExampleSet exampleSet, Operator op) throws ProcessStoppedException {
boolean checkForStop = op != null;
double[][] data = new double[exampleSet.size()][exampleSet.getAttributes().size()];
int c = 0;
for (Attribute attribute : exampleSet.getAttributes()) {
int r = 0;
for (Example example : exampleSet) {
data[r][c] = example.getValue(attribute);
r++;
}
c++;
if (checkForStop) {
op.checkForStop();
}
}
return getCovarianceMatrix(data, op);
}
/**
* Returns the covariance matrix from the given double matrix.
*
* @deprecated Please use {@link #getCovarianceMatrix(double[][], Operator)} so that the
* calculation can be stopped if necessary.
*/
@Deprecated
public static Matrix getCovarianceMatrix(double[][] data) {
Matrix matrix = new Matrix(0, 0);
try {
matrix = CovarianceMatrix.getCovarianceMatrix(data, null);
} catch (ProcessStoppedException e) {
// This Exception is impossible as long the parameter op is null
}
return matrix;
}
/**
* Returns the covariance matrix from the given double matrix.
*
* @param data
* data to construct the covariance matrix from
* @param op
* executing Operator which will be used to check for stop (can be null).
*/
public static Matrix getCovarianceMatrix(double[][] data, Operator op) throws ProcessStoppedException {
// checks
if (data.length == 0) {
throw new IllegalArgumentException(
"Calculation of covariance matrices not possible for data sets with zero rows.");
}
int numberOfColumns = -1;
for (int r = 0; r < data.length; r++) {
if (numberOfColumns < 0) {
numberOfColumns = data[r].length;
if (numberOfColumns <= 0) {
throw new IllegalArgumentException(
"Calculation of covariance matrices not possible for data sets with zero columns.");
}
} else {
if (numberOfColumns != data[r].length) {
throw new IllegalArgumentException(
"Calculation of covariance matrices not possible for data sets with different numbers of columns.");
}
}
}
// check whether a operator is executing this
boolean checkForStop = op != null;
// subtract column-averages
for (int c = 0; c < numberOfColumns; c++) {
double average = getAverageForColumn(data, c);
for (int r = 0; r < data.length; r++) {
data[r][c] -= average;
}
}
// create covariance matrix
double[][] covarianceMatrixEntries = new double[numberOfColumns][numberOfColumns];
// fill the covariance matrix
for (int i = 0; i < covarianceMatrixEntries.length; i++) {
if (checkForStop) {
op.checkForStop();
}
for (int j = i; j < covarianceMatrixEntries.length; j++) {
double covariance = getCovariance(data, i, j);
covarianceMatrixEntries[i][j] = covariance;
covarianceMatrixEntries[j][i] = covariance;
}
}
return new Matrix(covarianceMatrixEntries);
}
/** Returns the average for the column with the given index. */
private static double getAverageForColumn(double[][] data, int column) {
double sum = 0.0d;
for (int r = 0; r < data.length; r++) {
sum += data[r][column];
}
return sum / data.length;
}
/** Returns the covariance between the given columns. */
private static double getCovariance(double[][] data, int x, int y) {
double cov = 0;
for (int i = 0; i < data.length; i++) {
cov += data[i][x] * data[i][y];
}
return cov / (data.length - 1);
}
}