/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.features.transformation; import java.util.Iterator; import java.util.List; import Jama.Matrix; import Jama.SingularValueDecomposition; import com.rapidminer.example.Attribute; import com.rapidminer.example.Example; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.Statistics; import com.rapidminer.operator.IOObject; import com.rapidminer.operator.Model; import com.rapidminer.operator.Operator; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.parameter.ParameterTypeCategory; import com.rapidminer.parameter.ParameterTypeDouble; import com.rapidminer.parameter.ParameterTypeInt; import com.rapidminer.tools.RandomGenerator; import com.rapidminer.tools.math.MathFunctions; /** * This operator performs the independent componente analysis (ICA). * Implementation of the FastICA-algorithm of Hyvaerinen und Oja. The operator * outputs a <code>FastICAModel</code>. With the <code>ModelApplier</code> * you can transform the features. * * @author Daniel Hakenjos, Ingo Mierswa * @version $Id: FastICA.java,v 1.10 2008/08/21 13:17:07 ingomierswa Exp $ * @see FastICAModel */ public class FastICA extends Operator { /** The parameter name for "Number components to be extracted (-1 number of attributes is used)." */ public static final String PARAMETER_NUMBER_OF_COMPONENTS = "number_of_components"; /** The parameter name for "If 'parallel' the components are extracted simultaneously, 'deflation' the components are extracted one at a time" */ public static final String PARAMETER_ALGORITHM_TYPE = "algorithm_type"; /** The parameter name for "The functional form of the G function used in the approximation to neg-entropy" */ public static final String PARAMETER_FUNCTION = "function"; /** The parameter name for "constant in range [1, 2] used in approximation to neg-entropy when fun="logcosh"" */ public static final String PARAMETER_ALPHA = "alpha"; /** The parameter name for "Indicates whether rows of the data matrix " */ public static final String PARAMETER_ROW_NORM = "row_norm"; /** The parameter name for "maximum number of iterations to perform" */ public static final String PARAMETER_MAX_ITERATION = "max_iteration"; /** The parameter name for "A positive scalar giving the tolerance at which " */ public static final String PARAMETER_TOLERANCE = "tolerance"; /** The parameter name for "Use the given random seed instead of global random numbers (-1: use global)" */ public static final String PARAMETER_LOCAL_RANDOM_SEED = "local_random_seed"; private static final Class[] INPUT_CLASSES = new Class[] { ExampleSet.class }; private static final Class[] OUTPUT_CLASSES = new Class[] { ExampleSet.class, Model.class }; private static final String[] ALGORITHM_TYPE = new String[] { "deflation", "parallel" }; private static final String[] FUNCTION = new String[] { "logcosh", "exp" }; private int algorithmType; private int function; private int numberOfComponents; private double tolerance; private double alpha; private boolean rowNorm; private int maxIteration; private int numberOfSamples, numberOfAttributes; private Attribute[] attributes; private double[] means; private double[][] data; private double[][] wInit; public FastICA(OperatorDescription description) { super(description); } public IOObject[] apply() throws OperatorException { // get the ExampleSet ExampleSet set = getInput(ExampleSet.class); set.recalculateAllAttributeStatistics(); numberOfSamples = set.size(); numberOfAttributes = set.getAttributes().size(); // all attributes numerical attributes = new Attribute[numberOfAttributes]; means = new double[numberOfAttributes]; int i = 0; Iterator<Attribute> atts = set.getAttributes().iterator(); while (atts.hasNext()) { attributes[i] = atts.next(); if (!attributes[i].isNumerical()) { throw new UserError(this, 104, new Object[] { "FastICA", attributes[i].getName() }); } means[i] = set.getStatistics(attributes[i], Statistics.AVERAGE); i++; } // get the parameter algorithmType = getParameterAsInt(PARAMETER_ALGORITHM_TYPE); function = getParameterAsInt(PARAMETER_FUNCTION); tolerance = getParameterAsDouble(PARAMETER_TOLERANCE); alpha = getParameterAsDouble(PARAMETER_ALPHA); rowNorm = getParameterAsBoolean(PARAMETER_ROW_NORM); maxIteration = getParameterAsInt(PARAMETER_MAX_ITERATION); numberOfComponents = getParameterAsInt(PARAMETER_NUMBER_OF_COMPONENTS); if (numberOfComponents < 1) { numberOfComponents = numberOfAttributes; } if (numberOfComponents > numberOfAttributes) { numberOfComponents = numberOfAttributes; logWarning("The parameter 'number_of_components' is too large! Set to number of attributes."); } // get the centered data data = new double[numberOfSamples][numberOfAttributes]; Iterator<Example> reader = set.iterator(); Example example; for (int sample = 0; sample < numberOfSamples; sample++) { example = reader.next(); for (int d = 0; d < numberOfAttributes; d++) { data[sample][d] = example.getValue(attributes[d]) - means[d]; } } log("Initializing the weights..."); // init the weight matrix // w.init <- matrix(rnorm(n.comp^2),n.comp,n.comp) wInit = new double[numberOfComponents][numberOfComponents]; // init w randomly RandomGenerator randomGenerator = RandomGenerator.getRandomGenerator(getParameterAsInt(PARAMETER_LOCAL_RANDOM_SEED)); for (i = 0; i < numberOfComponents; i++) { for (int j = 0; j < numberOfComponents; j++) { wInit[i][j] = randomGenerator.nextDouble() * 2 - 1; } } // row normalization // Scaling is done by dividing the rows of the data // by their root-mean-square. The root-mean-square for a row // is obtained by computing the // square-root of the sum-of-squares of the values in the // row divided by the number of values minus one. if (rowNorm) { log("Scaling the data now."); double rms_row; for (int row = 0; row < numberOfSamples; row++) { // compute root mean square for the row rms_row = 0.0d; for (int d = 0; d < numberOfAttributes; d++) { rms_row += data[row][d] * data[row][d]; } rms_row = Math.sqrt(rms_row) / Math.max(1, numberOfAttributes - 1); for (int d = 0; d < numberOfAttributes; d++) { data[row][d] = data[row][d] / rms_row; } } } Matrix X = new Matrix(data); X = X.transpose(); // Whitening log("Whitening the data now."); // V <- X %*% t(X)/n // V nrow=nr_atts ncol=nr_atts Matrix V = X.times(X.transpose().timesEquals(1.0d / numberOfSamples)); // s <- La.svd(V, method="dgesdd") SingularValueDecomposition svd = V.svd(); // D <- diag(c(1/sqrt(s$d))) Matrix D = svd.getS(); double[][] singularvalue = D.getArray(); for (i = 0; i < singularvalue.length; i++) { singularvalue[i][i] = 1.0d / Math.sqrt(singularvalue[i][i]); } D = new Matrix(singularvalue); // K <- D %*% t(s$u) Matrix K = D.times(svd.getU().transpose()); // K <- matrix(K[1:n.comp, ], n.comp, p) K = new Matrix(K.getArray(), numberOfComponents, numberOfAttributes); // X1 <- K %*% X Matrix X1 = K.times(X); // end Whitening Matrix a; if (algorithmType == 0) { a = deflation(X1); } else { a = parallel(X1); } log("Creating the model..."); // w <- a %*% K Matrix w = a.times(K); // S <- w %*% X // Matrix S = w.times(X); // Unused. Shevek // A <- t(w) %*% solve(w %*% t(w)) Matrix W2 = w.times(w.transpose()); Matrix A = w.transpose().times(W2.inverse()); /* * return(list(X = t(X), K = t(K), W = t(a), A = t(A), S = t(S))) */ Matrix W; X = X.transpose(); K = K.transpose(); W = a.transpose(); A = A.transpose(); // S = S.transpose(); // Unused. Shevek // X pre-processed data matrix // X(nr_samples,nr_atts) // K pre-whitening matrix that projects data onto th first n.comp // principal components. // K(nr_atts,nr_components) // W estimated un-mixing matrix // W(nr_components,nr_components) // A estimated mixing matrix // A(nr_components,nr_atts) // S estimated source matrix // S(nr_samples,nr_components) FastICAModel model = new FastICAModel(set, numberOfComponents, means, rowNorm, K, W, A); return new IOObject[] { set, model }; } private Matrix deflation(Matrix X) throws OperatorException { log("Deflation FastICA using " + FUNCTION[function] + " approx. to neg-entropy function"); // X(nr_components,nr_samples) // n <- nrow(X) // p <- ncol(X) // int n=X.getRowDimension(); // int p=X.getColumnDimension(); // W <- matrix(0, n.comp, n.comp) Matrix W = new Matrix(numberOfComponents, numberOfComponents, 0.0d); Matrix w, t, Wu, wx, gwx, xgwx, g_wx; double k, rss, lim, value; int iter; int iterlog = 1; while ((maxIteration / iterlog > 10) && (maxIteration / (iterlog * 10) >= 3)) { iterlog *= 10; } for (int i = 0; i < numberOfComponents; i++) { log("Component " + (i + 1)); // w <- matrix(w.init[i,], n.comp, 1) w = new Matrix(wInit[i], wInit[i].length); if (i > 0) { // t <- w // t[1:length(t)] <- 0 t = new Matrix(wInit[i].length, 1, 0.0d); // for (u in 1:(i - 1)) { for (int u = 1; u <= i; u++) { // W[u, ] // Wu(1,nr_components) Wu = W.getMatrix(u - 1, u - 1, 0, numberOfComponents - 1); // k <- sum(w * W[u, ]) // k(1,1) k = w.transpose().times(Wu.transpose()).getArray()[0][0]; // t <- t + k * W[u, ] t.plusEquals(Wu.times(k).transpose()); } // w <- w - t w.minusEquals(t); } // sqrt(sum(w^2)) rss = Math.sqrt(w.times(w.transpose()).getArray()[0][0]); // w <- w/sqrt(sum(w^2)) // w(nr_components,1) w.timesEquals(1.0d / rss); // lim <- rep(1000, maxit) lim = 1000.0d; // it <- 1 iter = 1; // while (lim[it] > tol && it < maxit) { while (lim > tolerance && iter <= maxIteration) { // wx <- t(w) %*% X // wx(1,nr_samples) wx = w.transpose().times(X); double[][] wxarray = wx.getArray(); if (function == 0) { // logcosh function // gwx <- tanh(alpha * wx) for (int j = 0; j < wxarray[0].length; j++) { wxarray[0][j] = MathFunctions.tanh(alpha * wxarray[0][j]); } } else { // exp function // gwx <- wx * exp(-(wx^2)/2) for (int j = 0; j < wxarray[0].length; j++) { wxarray[0][j] = wxarray[0][j] * Math.exp(-0.5d * wxarray[0][j] * wxarray[0][j]); } } // gwx(1,nr_samples) gwx = new Matrix(wxarray); // gwx <- matrix(gwx, n.comp, p, byrow = TRUE) // gwx(nr_components,nr_samples) // xgwx <- X * gwx // gwx(nr_components,nr_samples) double[][] gwxarray = gwx.getArray(); double[][] Xarray = X.getArray(); for (int row = 0; row < Xarray.length; row++) { for (int col = 0; col < Xarray[0].length; col++) { Xarray[row][col] = Xarray[row][col] * gwxarray[0][col]; } } // xgwx(nr_components,nr_samples) xgwx = new Matrix(Xarray); // v1 <- apply(xgwx, 1, FUN = mean) // calculates the mean of each row Matrix v1 = new Matrix(numberOfComponents, 1, 0.0d); double mean; for (int row = 0; row < numberOfComponents; row++) { mean = 0; for (int col = 0; col < numberOfSamples; col++) { mean += xgwx.get(row, col); } mean = mean / numberOfSamples; v1.set(row, 0, mean); } // g_wx(1,nr_samples) g_wx = wx.copy(); mean = 0.0d; if (function == 0) { // logcosh function // g.wx <- alpha * (1 - (tanh(alpha * wx))^2) for (int j = 0; j < wxarray[0].length; j++) { value = MathFunctions.tanh(alpha * g_wx.get(0, j)); value = alpha * (1.0d - value * value); mean += value; g_wx.set(0, j, value); } } else { // exp function // g.wx <- (1 - wx^2) * exp(-(wx^2)/2) for (int j = 0; j < wxarray[0].length; j++) { value = g_wx.get(0, j); value = (1.0d - value * value) * Math.exp(-0.5d * value * value); mean += value; g_wx.set(0, j, value); } } mean /= numberOfSamples; // v2 <- mean(g.wx) * w // v2(nr_components,1) Matrix v2 = w.copy(); v2.timesEquals(mean); // w1 <- v1 - v2 // w1 <- matrix(w1, n.comp, 1) // w1(nr_components,1) Matrix w1 = v1.minus(v2); if (i > 0) { // t <- w1 // t[1:length(t)] <- 0 t = new Matrix(w1.getRowDimension(), w1.getColumnDimension(), 0.0d); // for (u in 1:(i - 1)) { for (int u = 1; u <= i; u++) { // W[u, ] // Wu(1,nr_components) Wu = W.getMatrix(u - 1, u - 1, 0, numberOfComponents - 1); // k <- sum(w1 * W[u, ]) // k(1,1) k = w1.transpose().times(Wu.transpose()).getArray()[0][0]; // t <- t + k * W[u, ] t.plusEquals(Wu.times(k).transpose()); } // w1 <- w1 - t w1.minusEquals(t); } // w1 <- w1/sqrt(sum(w1^2)) rss = Math.sqrt(w1.transpose().times(w1).getArray()[0][0]); w1.timesEquals(1.0d / rss); // lim[it] <- Mod(Mod(sum((w1 * w))) - 1) lim = Math.abs(Math.abs(w1.transpose().times(w).getArray()[0][0]) - 1.0d); if ((iter % iterlog == 0) || (lim <= tolerance)) { log("Iteration " + (iter) + ", tolerance = " + lim); } iter++; // w <- matrix(w1, n.comp, 1) w = w1.copy(); } // W[i, ] <- w for (int col = 0; col < numberOfComponents; col++) { W.set(i, col, w.get(col, 0)); } checkForStop(); } return W; } private Matrix parallel(Matrix X) throws OperatorException { log("Symmetric FastICA using " + FUNCTION[function] + " approx. to neg-entropy function"); // X(nr_components,nr_samples) // n <- nrow(X) // p <- ncol(X) // int n=X.getRowDimension(); int p = X.getColumnDimension(); // W <- w.init Matrix W = new Matrix(wInit); // sW <- La.svd(W, method = "dgesdd") SingularValueDecomposition svd = W.svd(); // diag(1/sW$d) double[] svalues = svd.getSingularValues(); Matrix svaluesMatrix = new Matrix(svalues.length, svalues.length, 0.0d); for (int i = 0; i < svalues.length; i++) { svalues[i] = 1 / svalues[i]; svaluesMatrix.set(i, i, svalues[i]); } // W <- sW$u %*% diag(1/sW$d) %*% t(sW$u) %*% W // W(nr_components,nr_components) W = svd.getU().times(svaluesMatrix).times(svd.getU().transpose()).times(W); // W1 <- W Matrix W1 = W.copy(); // lim <- rep(1000, maxit) double lim = 1000.0d; int iter = 1; int iterlog = 1; while ((maxIteration / iterlog > 10) && (maxIteration / (iterlog * 10) >= 3)) { iterlog *= 10; } Matrix wx, gwx, v1, g_wx, diagmean, v2; double value, mean; // while (lim[it] > tol && it < maxit) { while (lim > tolerance && iter <= maxIteration) { // wx <- W %*% X // wx(nr_components,nr_samples) wx = W.times(X); // gwx(nr_components,nr_samples) gwx = wx.copy(); if (function == 0) { // logcosh function // gwx <- tanh(alpha * wx) for (int row = 0; row < numberOfComponents; row++) { for (int col = 0; col < numberOfSamples; col++) { value = gwx.get(row, col); value = MathFunctions.tanh(alpha * value); gwx.set(row, col, value); } } } else { // exp function // gwx <- wx * exp(-(wx^2)/2) for (int row = 0; row < numberOfComponents; row++) { for (int col = 0; col < numberOfSamples; col++) { value = gwx.get(row, col); value = value * Math.exp(-0.5d * value * value); gwx.set(row, col, value); } } } // v1 <- gwx %*% t(X)/p // v1(nr_components,nr_components) v1 = gwx.times(X.transpose()).times(p); // g_wx(nr_components,nr_samples) g_wx = gwx.copy(); // diag(apply(g.wx, 1, FUN = mean)) diagmean = new Matrix(numberOfComponents, numberOfComponents, 0.0d); if (function == 0) { // logcosh funtion // g.wx <- alpha * (1 - (gwx)^2) for (int row = 0; row < numberOfComponents; row++) { mean = 0.0d; for (int col = 0; col < numberOfSamples; col++) { value = g_wx.get(row, col); value = alpha * (1.0d - value * value); g_wx.set(row, col, value); mean += value; } mean = mean / numberOfSamples; diagmean.set(row, row, mean); } } else { // exp function // g.wx <- (1 - wx^2) * exp(-(wx^2)/2) g_wx = wx.copy(); for (int row = 0; row < numberOfComponents; row++) { mean = 0.0d; for (int col = 0; col < numberOfSamples; col++) { value = g_wx.get(row, col); value = (1.0d - value * value) * Math.exp(-0.5d * value * value); g_wx.set(row, col, value); mean += value; } mean = mean / numberOfSamples; diagmean.set(row, row, mean); } } // v2 <- diag(apply(g.wx, 1, FUN = mean)) %*% W // v2(nr_components,nr_components) v2 = diagmean.times(W); // W1 <- v1 - v2 // W1(nr_components,nr_components) W1 = v1.minus(v2); // sW1 <- La.svd(W1, method = "dgesdd") svd = W1.svd(); // diag(1/sW1$d) svalues = svd.getSingularValues(); svaluesMatrix = new Matrix(svalues.length, svalues.length, 0.0d); for (int i = 0; i < svalues.length; i++) { svalues[i] = 1 / svalues[i]; svaluesMatrix.set(i, i, svalues[i]); } // W1 <- sW1$u %*% diag(1/sW1$d) %*% t(sW1$u) %*% W1 // W1(nr_components,nr_components) W1 = svd.getU().times(svaluesMatrix).times(svd.getU().transpose()).times(W1); // lim[it + 1] <- max(Mod(Mod(diag(W1 %*% t(W))) - 1)) double[][] diag = W1.times(W.transpose()).getArray(); value = Double.NEGATIVE_INFINITY; for (int row = 0; row < numberOfComponents; row++) { value = Math.max(value, Math.abs(Math.abs(diag[row][row]) - 1.0d)); } lim = value; // W <- W1 W = W1.copy(); if ((iter % iterlog == 0) || (lim <= tolerance)) { log("Iteration " + (iter) + ", tolerance = " + lim); } iter++; } return W; } public Class<?>[] getInputClasses() { return INPUT_CLASSES; } public Class<?>[] getOutputClasses() { return OUTPUT_CLASSES; } public List<ParameterType> getParameterTypes() { List<ParameterType> list = super.getParameterTypes(); ParameterType type = new ParameterTypeInt(PARAMETER_NUMBER_OF_COMPONENTS, "Number components to be extracted (-1 number of attributes is used).", -1, Integer.MAX_VALUE, -1); type.setExpert(false); list.add(type); type = new ParameterTypeCategory(PARAMETER_ALGORITHM_TYPE, "If 'parallel' the components are extracted simultaneously, 'deflation' the components are extracted one at a time", ALGORITHM_TYPE, 0); list.add(type); type = new ParameterTypeCategory(PARAMETER_FUNCTION, "The functional form of the G function used in the approximation to neg-entropy", FUNCTION, 0); list.add(type); type = new ParameterTypeDouble(PARAMETER_ALPHA, "constant in range [1, 2] used in approximation to neg-entropy when fun=\"logcosh\"", 1.0d, 2.0d, 1.0d); list.add(type); type = new ParameterTypeBoolean(PARAMETER_ROW_NORM, "Indicates whether rows of the data matrix " + "should be standardized beforehand.", false); list.add(type); type = new ParameterTypeInt(PARAMETER_MAX_ITERATION, "maximum number of iterations to perform", 0, Integer.MAX_VALUE, 200); list.add(type); type = new ParameterTypeDouble(PARAMETER_TOLERANCE, "A positive scalar giving the tolerance at which " + "the un-mixing matrix is considered to have converged.", 0.0d, Double.POSITIVE_INFINITY, 1e-4); list.add(type); type = new ParameterTypeInt(PARAMETER_LOCAL_RANDOM_SEED, "Use the given random seed instead of global random numbers (-1: use global)", -1, Integer.MAX_VALUE, -1); list.add(type); return list; } }