/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.features.transformation;
import java.util.Iterator;
import java.util.List;
import Jama.Matrix;
import Jama.SingularValueDecomposition;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.Statistics;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.Model;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.tools.RandomGenerator;
import com.rapidminer.tools.math.MathFunctions;
/**
* This operator performs the independent componente analysis (ICA).
* Implementation of the FastICA-algorithm of Hyvaerinen und Oja. The operator
* outputs a <code>FastICAModel</code>. With the <code>ModelApplier</code>
* you can transform the features.
*
* @author Daniel Hakenjos, Ingo Mierswa
* @version $Id: FastICA.java,v 1.10 2008/08/21 13:17:07 ingomierswa Exp $
* @see FastICAModel
*/
public class FastICA extends Operator {
/** The parameter name for "Number components to be extracted (-1 number of attributes is used)." */
public static final String PARAMETER_NUMBER_OF_COMPONENTS = "number_of_components";
/** The parameter name for "If 'parallel' the components are extracted simultaneously, 'deflation' the components are extracted one at a time" */
public static final String PARAMETER_ALGORITHM_TYPE = "algorithm_type";
/** The parameter name for "The functional form of the G function used in the approximation to neg-entropy" */
public static final String PARAMETER_FUNCTION = "function";
/** The parameter name for "constant in range [1, 2] used in approximation to neg-entropy when fun="logcosh"" */
public static final String PARAMETER_ALPHA = "alpha";
/** The parameter name for "Indicates whether rows of the data matrix " */
public static final String PARAMETER_ROW_NORM = "row_norm";
/** The parameter name for "maximum number of iterations to perform" */
public static final String PARAMETER_MAX_ITERATION = "max_iteration";
/** The parameter name for "A positive scalar giving the tolerance at which " */
public static final String PARAMETER_TOLERANCE = "tolerance";
/** The parameter name for "Use the given random seed instead of global random numbers (-1: use global)" */
public static final String PARAMETER_LOCAL_RANDOM_SEED = "local_random_seed";
private static final Class[] INPUT_CLASSES = new Class[] { ExampleSet.class };
private static final Class[] OUTPUT_CLASSES = new Class[] { ExampleSet.class, Model.class };
private static final String[] ALGORITHM_TYPE = new String[] { "deflation", "parallel" };
private static final String[] FUNCTION = new String[] { "logcosh", "exp" };
private int algorithmType;
private int function;
private int numberOfComponents;
private double tolerance;
private double alpha;
private boolean rowNorm;
private int maxIteration;
private int numberOfSamples, numberOfAttributes;
private Attribute[] attributes;
private double[] means;
private double[][] data;
private double[][] wInit;
public FastICA(OperatorDescription description) {
super(description);
}
public IOObject[] apply() throws OperatorException {
// get the ExampleSet
ExampleSet set = getInput(ExampleSet.class);
set.recalculateAllAttributeStatistics();
numberOfSamples = set.size();
numberOfAttributes = set.getAttributes().size();
// all attributes numerical
attributes = new Attribute[numberOfAttributes];
means = new double[numberOfAttributes];
int i = 0;
Iterator<Attribute> atts = set.getAttributes().iterator();
while (atts.hasNext()) {
attributes[i] = atts.next();
if (!attributes[i].isNumerical()) {
throw new UserError(this, 104, new Object[] { "FastICA", attributes[i].getName() });
}
means[i] = set.getStatistics(attributes[i], Statistics.AVERAGE);
i++;
}
// get the parameter
algorithmType = getParameterAsInt(PARAMETER_ALGORITHM_TYPE);
function = getParameterAsInt(PARAMETER_FUNCTION);
tolerance = getParameterAsDouble(PARAMETER_TOLERANCE);
alpha = getParameterAsDouble(PARAMETER_ALPHA);
rowNorm = getParameterAsBoolean(PARAMETER_ROW_NORM);
maxIteration = getParameterAsInt(PARAMETER_MAX_ITERATION);
numberOfComponents = getParameterAsInt(PARAMETER_NUMBER_OF_COMPONENTS);
if (numberOfComponents < 1) {
numberOfComponents = numberOfAttributes;
}
if (numberOfComponents > numberOfAttributes) {
numberOfComponents = numberOfAttributes;
logWarning("The parameter 'number_of_components' is too large! Set to number of attributes.");
}
// get the centered data
data = new double[numberOfSamples][numberOfAttributes];
Iterator<Example> reader = set.iterator();
Example example;
for (int sample = 0; sample < numberOfSamples; sample++) {
example = reader.next();
for (int d = 0; d < numberOfAttributes; d++) {
data[sample][d] = example.getValue(attributes[d]) - means[d];
}
}
log("Initializing the weights...");
// init the weight matrix
// w.init <- matrix(rnorm(n.comp^2),n.comp,n.comp)
wInit = new double[numberOfComponents][numberOfComponents];
// init w randomly
RandomGenerator randomGenerator = RandomGenerator.getRandomGenerator(getParameterAsInt(PARAMETER_LOCAL_RANDOM_SEED));
for (i = 0; i < numberOfComponents; i++) {
for (int j = 0; j < numberOfComponents; j++) {
wInit[i][j] = randomGenerator.nextDouble() * 2 - 1;
}
}
// row normalization
// Scaling is done by dividing the rows of the data
// by their root-mean-square. The root-mean-square for a row
// is obtained by computing the
// square-root of the sum-of-squares of the values in the
// row divided by the number of values minus one.
if (rowNorm) {
log("Scaling the data now.");
double rms_row;
for (int row = 0; row < numberOfSamples; row++) {
// compute root mean square for the row
rms_row = 0.0d;
for (int d = 0; d < numberOfAttributes; d++) {
rms_row += data[row][d] * data[row][d];
}
rms_row = Math.sqrt(rms_row) / Math.max(1, numberOfAttributes - 1);
for (int d = 0; d < numberOfAttributes; d++) {
data[row][d] = data[row][d] / rms_row;
}
}
}
Matrix X = new Matrix(data);
X = X.transpose();
// Whitening
log("Whitening the data now.");
// V <- X %*% t(X)/n
// V nrow=nr_atts ncol=nr_atts
Matrix V = X.times(X.transpose().timesEquals(1.0d / numberOfSamples));
// s <- La.svd(V, method="dgesdd")
SingularValueDecomposition svd = V.svd();
// D <- diag(c(1/sqrt(s$d)))
Matrix D = svd.getS();
double[][] singularvalue = D.getArray();
for (i = 0; i < singularvalue.length; i++) {
singularvalue[i][i] = 1.0d / Math.sqrt(singularvalue[i][i]);
}
D = new Matrix(singularvalue);
// K <- D %*% t(s$u)
Matrix K = D.times(svd.getU().transpose());
// K <- matrix(K[1:n.comp, ], n.comp, p)
K = new Matrix(K.getArray(), numberOfComponents, numberOfAttributes);
// X1 <- K %*% X
Matrix X1 = K.times(X);
// end Whitening
Matrix a;
if (algorithmType == 0) {
a = deflation(X1);
} else {
a = parallel(X1);
}
log("Creating the model...");
// w <- a %*% K
Matrix w = a.times(K);
// S <- w %*% X
// Matrix S = w.times(X); // Unused. Shevek
// A <- t(w) %*% solve(w %*% t(w))
Matrix W2 = w.times(w.transpose());
Matrix A = w.transpose().times(W2.inverse());
/*
* return(list(X = t(X), K = t(K), W = t(a), A = t(A), S = t(S)))
*/
Matrix W;
X = X.transpose();
K = K.transpose();
W = a.transpose();
A = A.transpose();
// S = S.transpose(); // Unused. Shevek
// X pre-processed data matrix
// X(nr_samples,nr_atts)
// K pre-whitening matrix that projects data onto th first n.comp
// principal components.
// K(nr_atts,nr_components)
// W estimated un-mixing matrix
// W(nr_components,nr_components)
// A estimated mixing matrix
// A(nr_components,nr_atts)
// S estimated source matrix
// S(nr_samples,nr_components)
FastICAModel model = new FastICAModel(set, numberOfComponents, means, rowNorm, K, W, A);
return new IOObject[] { set, model };
}
private Matrix deflation(Matrix X) throws OperatorException {
log("Deflation FastICA using " + FUNCTION[function] + " approx. to neg-entropy function");
// X(nr_components,nr_samples)
// n <- nrow(X)
// p <- ncol(X)
// int n=X.getRowDimension();
// int p=X.getColumnDimension();
// W <- matrix(0, n.comp, n.comp)
Matrix W = new Matrix(numberOfComponents, numberOfComponents, 0.0d);
Matrix w, t, Wu, wx, gwx, xgwx, g_wx;
double k, rss, lim, value;
int iter;
int iterlog = 1;
while ((maxIteration / iterlog > 10) && (maxIteration / (iterlog * 10) >= 3)) {
iterlog *= 10;
}
for (int i = 0; i < numberOfComponents; i++) {
log("Component " + (i + 1));
// w <- matrix(w.init[i,], n.comp, 1)
w = new Matrix(wInit[i], wInit[i].length);
if (i > 0) {
// t <- w
// t[1:length(t)] <- 0
t = new Matrix(wInit[i].length, 1, 0.0d);
// for (u in 1:(i - 1)) {
for (int u = 1; u <= i; u++) {
// W[u, ]
// Wu(1,nr_components)
Wu = W.getMatrix(u - 1, u - 1, 0, numberOfComponents - 1);
// k <- sum(w * W[u, ])
// k(1,1)
k = w.transpose().times(Wu.transpose()).getArray()[0][0];
// t <- t + k * W[u, ]
t.plusEquals(Wu.times(k).transpose());
}
// w <- w - t
w.minusEquals(t);
}
// sqrt(sum(w^2))
rss = Math.sqrt(w.times(w.transpose()).getArray()[0][0]);
// w <- w/sqrt(sum(w^2))
// w(nr_components,1)
w.timesEquals(1.0d / rss);
// lim <- rep(1000, maxit)
lim = 1000.0d;
// it <- 1
iter = 1;
// while (lim[it] > tol && it < maxit) {
while (lim > tolerance && iter <= maxIteration) {
// wx <- t(w) %*% X
// wx(1,nr_samples)
wx = w.transpose().times(X);
double[][] wxarray = wx.getArray();
if (function == 0) {
// logcosh function
// gwx <- tanh(alpha * wx)
for (int j = 0; j < wxarray[0].length; j++) {
wxarray[0][j] = MathFunctions.tanh(alpha * wxarray[0][j]);
}
} else {
// exp function
// gwx <- wx * exp(-(wx^2)/2)
for (int j = 0; j < wxarray[0].length; j++) {
wxarray[0][j] = wxarray[0][j] * Math.exp(-0.5d * wxarray[0][j] * wxarray[0][j]);
}
}
// gwx(1,nr_samples)
gwx = new Matrix(wxarray);
// gwx <- matrix(gwx, n.comp, p, byrow = TRUE)
// gwx(nr_components,nr_samples)
// xgwx <- X * gwx
// gwx(nr_components,nr_samples)
double[][] gwxarray = gwx.getArray();
double[][] Xarray = X.getArray();
for (int row = 0; row < Xarray.length; row++) {
for (int col = 0; col < Xarray[0].length; col++) {
Xarray[row][col] = Xarray[row][col] * gwxarray[0][col];
}
}
// xgwx(nr_components,nr_samples)
xgwx = new Matrix(Xarray);
// v1 <- apply(xgwx, 1, FUN = mean)
// calculates the mean of each row
Matrix v1 = new Matrix(numberOfComponents, 1, 0.0d);
double mean;
for (int row = 0; row < numberOfComponents; row++) {
mean = 0;
for (int col = 0; col < numberOfSamples; col++) {
mean += xgwx.get(row, col);
}
mean = mean / numberOfSamples;
v1.set(row, 0, mean);
}
// g_wx(1,nr_samples)
g_wx = wx.copy();
mean = 0.0d;
if (function == 0) {
// logcosh function
// g.wx <- alpha * (1 - (tanh(alpha * wx))^2)
for (int j = 0; j < wxarray[0].length; j++) {
value = MathFunctions.tanh(alpha * g_wx.get(0, j));
value = alpha * (1.0d - value * value);
mean += value;
g_wx.set(0, j, value);
}
} else {
// exp function
// g.wx <- (1 - wx^2) * exp(-(wx^2)/2)
for (int j = 0; j < wxarray[0].length; j++) {
value = g_wx.get(0, j);
value = (1.0d - value * value) * Math.exp(-0.5d * value * value);
mean += value;
g_wx.set(0, j, value);
}
}
mean /= numberOfSamples;
// v2 <- mean(g.wx) * w
// v2(nr_components,1)
Matrix v2 = w.copy();
v2.timesEquals(mean);
// w1 <- v1 - v2
// w1 <- matrix(w1, n.comp, 1)
// w1(nr_components,1)
Matrix w1 = v1.minus(v2);
if (i > 0) {
// t <- w1
// t[1:length(t)] <- 0
t = new Matrix(w1.getRowDimension(), w1.getColumnDimension(), 0.0d);
// for (u in 1:(i - 1)) {
for (int u = 1; u <= i; u++) {
// W[u, ]
// Wu(1,nr_components)
Wu = W.getMatrix(u - 1, u - 1, 0, numberOfComponents - 1);
// k <- sum(w1 * W[u, ])
// k(1,1)
k = w1.transpose().times(Wu.transpose()).getArray()[0][0];
// t <- t + k * W[u, ]
t.plusEquals(Wu.times(k).transpose());
}
// w1 <- w1 - t
w1.minusEquals(t);
}
// w1 <- w1/sqrt(sum(w1^2))
rss = Math.sqrt(w1.transpose().times(w1).getArray()[0][0]);
w1.timesEquals(1.0d / rss);
// lim[it] <- Mod(Mod(sum((w1 * w))) - 1)
lim = Math.abs(Math.abs(w1.transpose().times(w).getArray()[0][0]) - 1.0d);
if ((iter % iterlog == 0) || (lim <= tolerance)) {
log("Iteration " + (iter) + ", tolerance = " + lim);
}
iter++;
// w <- matrix(w1, n.comp, 1)
w = w1.copy();
}
// W[i, ] <- w
for (int col = 0; col < numberOfComponents; col++) {
W.set(i, col, w.get(col, 0));
}
checkForStop();
}
return W;
}
private Matrix parallel(Matrix X) throws OperatorException {
log("Symmetric FastICA using " + FUNCTION[function] + " approx. to neg-entropy function");
// X(nr_components,nr_samples)
// n <- nrow(X)
// p <- ncol(X)
// int n=X.getRowDimension();
int p = X.getColumnDimension();
// W <- w.init
Matrix W = new Matrix(wInit);
// sW <- La.svd(W, method = "dgesdd")
SingularValueDecomposition svd = W.svd();
// diag(1/sW$d)
double[] svalues = svd.getSingularValues();
Matrix svaluesMatrix = new Matrix(svalues.length, svalues.length, 0.0d);
for (int i = 0; i < svalues.length; i++) {
svalues[i] = 1 / svalues[i];
svaluesMatrix.set(i, i, svalues[i]);
}
// W <- sW$u %*% diag(1/sW$d) %*% t(sW$u) %*% W
// W(nr_components,nr_components)
W = svd.getU().times(svaluesMatrix).times(svd.getU().transpose()).times(W);
// W1 <- W
Matrix W1 = W.copy();
// lim <- rep(1000, maxit)
double lim = 1000.0d;
int iter = 1;
int iterlog = 1;
while ((maxIteration / iterlog > 10) && (maxIteration / (iterlog * 10) >= 3)) {
iterlog *= 10;
}
Matrix wx, gwx, v1, g_wx, diagmean, v2;
double value, mean;
// while (lim[it] > tol && it < maxit) {
while (lim > tolerance && iter <= maxIteration) {
// wx <- W %*% X
// wx(nr_components,nr_samples)
wx = W.times(X);
// gwx(nr_components,nr_samples)
gwx = wx.copy();
if (function == 0) {
// logcosh function
// gwx <- tanh(alpha * wx)
for (int row = 0; row < numberOfComponents; row++) {
for (int col = 0; col < numberOfSamples; col++) {
value = gwx.get(row, col);
value = MathFunctions.tanh(alpha * value);
gwx.set(row, col, value);
}
}
} else {
// exp function
// gwx <- wx * exp(-(wx^2)/2)
for (int row = 0; row < numberOfComponents; row++) {
for (int col = 0; col < numberOfSamples; col++) {
value = gwx.get(row, col);
value = value * Math.exp(-0.5d * value * value);
gwx.set(row, col, value);
}
}
}
// v1 <- gwx %*% t(X)/p
// v1(nr_components,nr_components)
v1 = gwx.times(X.transpose()).times(p);
// g_wx(nr_components,nr_samples)
g_wx = gwx.copy();
// diag(apply(g.wx, 1, FUN = mean))
diagmean = new Matrix(numberOfComponents, numberOfComponents, 0.0d);
if (function == 0) {
// logcosh funtion
// g.wx <- alpha * (1 - (gwx)^2)
for (int row = 0; row < numberOfComponents; row++) {
mean = 0.0d;
for (int col = 0; col < numberOfSamples; col++) {
value = g_wx.get(row, col);
value = alpha * (1.0d - value * value);
g_wx.set(row, col, value);
mean += value;
}
mean = mean / numberOfSamples;
diagmean.set(row, row, mean);
}
} else {
// exp function
// g.wx <- (1 - wx^2) * exp(-(wx^2)/2)
g_wx = wx.copy();
for (int row = 0; row < numberOfComponents; row++) {
mean = 0.0d;
for (int col = 0; col < numberOfSamples; col++) {
value = g_wx.get(row, col);
value = (1.0d - value * value) * Math.exp(-0.5d * value * value);
g_wx.set(row, col, value);
mean += value;
}
mean = mean / numberOfSamples;
diagmean.set(row, row, mean);
}
}
// v2 <- diag(apply(g.wx, 1, FUN = mean)) %*% W
// v2(nr_components,nr_components)
v2 = diagmean.times(W);
// W1 <- v1 - v2
// W1(nr_components,nr_components)
W1 = v1.minus(v2);
// sW1 <- La.svd(W1, method = "dgesdd")
svd = W1.svd();
// diag(1/sW1$d)
svalues = svd.getSingularValues();
svaluesMatrix = new Matrix(svalues.length, svalues.length, 0.0d);
for (int i = 0; i < svalues.length; i++) {
svalues[i] = 1 / svalues[i];
svaluesMatrix.set(i, i, svalues[i]);
}
// W1 <- sW1$u %*% diag(1/sW1$d) %*% t(sW1$u) %*% W1
// W1(nr_components,nr_components)
W1 = svd.getU().times(svaluesMatrix).times(svd.getU().transpose()).times(W1);
// lim[it + 1] <- max(Mod(Mod(diag(W1 %*% t(W))) - 1))
double[][] diag = W1.times(W.transpose()).getArray();
value = Double.NEGATIVE_INFINITY;
for (int row = 0; row < numberOfComponents; row++) {
value = Math.max(value, Math.abs(Math.abs(diag[row][row]) - 1.0d));
}
lim = value;
// W <- W1
W = W1.copy();
if ((iter % iterlog == 0) || (lim <= tolerance)) {
log("Iteration " + (iter) + ", tolerance = " + lim);
}
iter++;
}
return W;
}
public Class<?>[] getInputClasses() {
return INPUT_CLASSES;
}
public Class<?>[] getOutputClasses() {
return OUTPUT_CLASSES;
}
public List<ParameterType> getParameterTypes() {
List<ParameterType> list = super.getParameterTypes();
ParameterType type = new ParameterTypeInt(PARAMETER_NUMBER_OF_COMPONENTS, "Number components to be extracted (-1 number of attributes is used).", -1, Integer.MAX_VALUE, -1);
type.setExpert(false);
list.add(type);
type = new ParameterTypeCategory(PARAMETER_ALGORITHM_TYPE, "If 'parallel' the components are extracted simultaneously, 'deflation' the components are extracted one at a time", ALGORITHM_TYPE, 0);
list.add(type);
type = new ParameterTypeCategory(PARAMETER_FUNCTION, "The functional form of the G function used in the approximation to neg-entropy", FUNCTION, 0);
list.add(type);
type = new ParameterTypeDouble(PARAMETER_ALPHA, "constant in range [1, 2] used in approximation to neg-entropy when fun=\"logcosh\"", 1.0d, 2.0d, 1.0d);
list.add(type);
type = new ParameterTypeBoolean(PARAMETER_ROW_NORM, "Indicates whether rows of the data matrix " + "should be standardized beforehand.", false);
list.add(type);
type = new ParameterTypeInt(PARAMETER_MAX_ITERATION, "maximum number of iterations to perform", 0, Integer.MAX_VALUE, 200);
list.add(type);
type = new ParameterTypeDouble(PARAMETER_TOLERANCE, "A positive scalar giving the tolerance at which " + "the un-mixing matrix is considered to have converged.", 0.0d, Double.POSITIVE_INFINITY, 1e-4);
list.add(type);
type = new ParameterTypeInt(PARAMETER_LOCAL_RANDOM_SEED, "Use the given random seed instead of global random numbers (-1: use global)", -1, Integer.MAX_VALUE, -1);
list.add(type);
return list;
}
}