/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.features.transformation; import java.util.List; import Jama.EigenvalueDecomposition; import Jama.Matrix; import com.rapidminer.example.Attribute; import com.rapidminer.example.ExampleSet; import com.rapidminer.operator.IOObject; import com.rapidminer.operator.Model; import com.rapidminer.operator.Operator; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeCategory; import com.rapidminer.parameter.ParameterTypeDouble; import com.rapidminer.parameter.ParameterTypeInt; import com.rapidminer.tools.math.matrix.CovarianceMatrix; /** * This operator performs a principal components analysis (PCA) using the * covariance matrix. The user can specify the amount of variance to cover in * the original data when retaining the best number of principal components. The * user can also specify manually the number of principal components. The * operator outputs a <code>PCAModel</code>. With the * <code>ModelApplier</code> you can transform the features. * * @author Ingo Mierswa * @version $Id: PCA.java,v 1.10 2008/07/07 07:06:44 ingomierswa Exp $ * @see PCAModel */ public class PCA extends Operator { /** The parameter name for "Keep the all components with a cumulative variance smaller than the given threshold." */ public static final String PARAMETER_VARIANCE_THRESHOLD = "variance_threshold"; /** The parameter name for "Keep this number of components. If '-1' then keep all components.'" */ public static final String PARAMETER_NUMBER_OF_COMPONENTS = "number_of_components"; public static final String PARAMETER_REDUCTION_TYPE = "dimensionality_reduction"; public static final String[] REDUCTION_METHODS = new String[] { "none", "keep variance", "fixed number" }; public static final int REDUCTION_NONE = 0; public static final int REDUCTION_VARIANCE = 1; public static final int REDUCTION_FIXED = 2; public PCA(OperatorDescription description) { super(description); } public IOObject[] apply() throws OperatorException { // 1) check whether all attributes are numerical ExampleSet exampleSet = getInput(ExampleSet.class); exampleSet.recalculateAllAttributeStatistics(); for (Attribute attribute : exampleSet.getAttributes()) { if (!attribute.isNumerical()) { throw new UserError(this, 104, "PCA", attribute.getName()); } } // 2) create covariance matrix log("Creating the covariance matrix..."); Matrix covarianceMatrix = CovarianceMatrix.getCovarianceMatrix(exampleSet); // 3) EigenVector and EigenValues of the covariance matrix log("Performing the eigenvalue decomposition..."); EigenvalueDecomposition eigenvalueDecomposition = covarianceMatrix.eig(); // 4) create and deliver results double[] eigenvalues = eigenvalueDecomposition.getRealEigenvalues(); Matrix eigenvectorMatrix = eigenvalueDecomposition.getV(); double[][] eigenvectors = eigenvectorMatrix.getArray(); PCAModel model = new PCAModel(exampleSet, eigenvalues, eigenvectors); int reductionType = getParameterAsInt(PARAMETER_REDUCTION_TYPE); switch (reductionType) { case REDUCTION_NONE: model.setNumberOfComponents(exampleSet.getAttributes().size()); break; case REDUCTION_VARIANCE: model.setVarianceThreshold(getParameterAsDouble(PARAMETER_VARIANCE_THRESHOLD)); break; case REDUCTION_FIXED: model.setNumberOfComponents(getParameterAsInt(PARAMETER_NUMBER_OF_COMPONENTS)); break; } return new IOObject[] { exampleSet, model }; } public Class<?>[] getInputClasses() { return new Class[] { ExampleSet.class }; } public Class<?>[] getOutputClasses() { return new Class[] { ExampleSet.class, Model.class }; } public List<ParameterType> getParameterTypes() { List<ParameterType> list = super.getParameterTypes(); ParameterType type = new ParameterTypeDouble(PARAMETER_VARIANCE_THRESHOLD, "Keep the all components with a cumulative variance smaller than the given threshold.", 0, 1, 0.95); type.setExpert(false); list.add(type); type = new ParameterTypeCategory(PARAMETER_REDUCTION_TYPE, "Indicates which type of dimensionality reduction should be applied", REDUCTION_METHODS, REDUCTION_VARIANCE); list.add(type); type = new ParameterTypeInt(PARAMETER_NUMBER_OF_COMPONENTS, "Keep this number of components. If \'-1\' then keep all components.'", -1, Integer.MAX_VALUE, -1); list.add(type); return list; } }