/** * Copyright (C) 2001-2017 by RapidMiner and the contributors * * Complete list of developers available at our web site: * * http://rapidminer.com * * This program is free software: you can redistribute it and/or modify it under the terms of the * GNU Affero General Public License as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License along with this program. * If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.features.transformation; import java.util.Iterator; import java.util.List; import java.util.logging.Level; import Jama.EigenvalueDecomposition; import Jama.Matrix; import com.rapidminer.example.Attribute; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.Tools; import com.rapidminer.operator.Model; import com.rapidminer.operator.Operator; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.operator.ports.InputPort; import com.rapidminer.operator.ports.OutputPort; import com.rapidminer.operator.ports.metadata.AttributeMetaData; import com.rapidminer.operator.ports.metadata.ExampleSetMetaData; import com.rapidminer.operator.ports.metadata.ExampleSetPassThroughRule; import com.rapidminer.operator.ports.metadata.ExampleSetPrecondition; import com.rapidminer.operator.ports.metadata.GenerateModelTransformationRule; import com.rapidminer.operator.ports.metadata.MDReal; import com.rapidminer.operator.ports.metadata.PassThroughRule; import com.rapidminer.operator.ports.metadata.SetRelation; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeCategory; import com.rapidminer.parameter.ParameterTypeDouble; import com.rapidminer.parameter.ParameterTypeInt; import com.rapidminer.parameter.UndefinedParameterError; import com.rapidminer.parameter.conditions.EqualTypeCondition; import com.rapidminer.tools.LogService; import com.rapidminer.tools.Ontology; import com.rapidminer.tools.math.matrix.CovarianceMatrix; /** * This operator performs a principal components analysis (PCA) using the covariance matrix. The * user can specify the amount of variance to cover in the original data when retaining the best * number of principal components. The user can also specify manually the number of principal * components. The operator outputs a <code>PCAModel</code>. With the <code>ModelApplier</code> you * can transform the features. * * @author Ingo Mierswa * @see PCAModel */ public class PCA extends Operator { /** * The parameter name for "Keep the all components with a cumulative variance smaller than * the given threshold." */ public static final String PARAMETER_VARIANCE_THRESHOLD = "variance_threshold"; /** * The parameter name for "Keep this number of components. If '-1' then keep all * components.'" */ public static final String PARAMETER_NUMBER_OF_COMPONENTS = "number_of_components"; public static final String PARAMETER_REDUCTION_TYPE = "dimensionality_reduction"; public static final String[] REDUCTION_METHODS = new String[] { "none", "keep variance", "fixed number" }; public static final int REDUCTION_NONE = 0; public static final int REDUCTION_VARIANCE = 1; public static final int REDUCTION_FIXED = 2; private InputPort exampleSetInput = getInputPorts().createPort("example set input"); private OutputPort exampleSetOutput = getOutputPorts().createPort("example set output"); private OutputPort originalOutput = getOutputPorts().createPort("original"); private OutputPort modelOutput = getOutputPorts().createPort("preprocessing model"); public PCA(OperatorDescription description) { super(description); exampleSetInput.addPrecondition(new ExampleSetPrecondition(exampleSetInput, Ontology.NUMERICAL)); getTransformer().addRule(new GenerateModelTransformationRule(exampleSetInput, modelOutput, PCAModel.class)); getTransformer().addRule(new ExampleSetPassThroughRule(exampleSetInput, exampleSetOutput, SetRelation.EQUAL) { @Override public ExampleSetMetaData modifyExampleSet(ExampleSetMetaData metaData) throws UndefinedParameterError { int numberOfAttributes = metaData.getNumberOfRegularAttributes(); int resultNumber = numberOfAttributes; if (getParameterAsInt(PARAMETER_REDUCTION_TYPE) == REDUCTION_FIXED) { resultNumber = getParameterAsInt(PARAMETER_NUMBER_OF_COMPONENTS); int regular_numbers = metaData.getNumberOfRegularAttributes(); if (regular_numbers < resultNumber) { LogService.getRoot().log(Level.WARNING, "com.rapidminer.operator.features.transformation.PCA.less_attributes", new Object[] { resultNumber, regular_numbers }); resultNumber = regular_numbers; } metaData.attributesAreKnown(); } else if (getParameterAsInt(PARAMETER_REDUCTION_TYPE) == REDUCTION_VARIANCE) { resultNumber = numberOfAttributes; metaData.attributesAreSubset(); } metaData.clearRegular(); for (int i = 1; i <= resultNumber; i++) { AttributeMetaData pcAMD = new AttributeMetaData("pc_" + i, Ontology.REAL); pcAMD.setMean(new MDReal(0.0)); metaData.addAttribute(pcAMD); } return metaData; } }); getTransformer().addRule(new PassThroughRule(exampleSetInput, originalOutput, false)); } /** Helper method for anonymous operators. */ public Model doWork(ExampleSet exampleSet) throws OperatorException { exampleSetInput.receive(exampleSet); doWork(); return modelOutput.getData(Model.class); } @Override public void doWork() throws OperatorException { // check whether all attributes are numerical ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class); Tools.onlyNonMissingValues(exampleSet, getOperatorClassName(), this, new String[0]); Tools.onlyNumericalAttributes(exampleSet, "PCA"); Iterator<Attribute> iterate = exampleSet.getAttributes().allAttributes(); while (iterate.hasNext()) { Attribute curattribute = iterate.next(); if (curattribute.getName().startsWith("pc_")) { throw new UserError(this, "pca_attribute_names", curattribute.getName()); } } // create covariance matrix log("Creating the covariance matrix..."); Matrix covarianceMatrix = CovarianceMatrix.getCovarianceMatrix(exampleSet, this); // EigenVector and EigenValues of the covariance matrix log("Performing the eigenvalue decomposition..."); EigenvalueDecomposition eigenvalueDecomposition = covarianceMatrix.eig(); checkForStop(); // create and deliver results double[] eigenvalues = eigenvalueDecomposition.getRealEigenvalues(); Matrix eigenvectorMatrix = eigenvalueDecomposition.getV(); double[][] eigenvectors = eigenvectorMatrix.getArray(); PCAModel model = new PCAModel(exampleSet, eigenvalues, eigenvectors); int reductionType = getParameterAsInt(PARAMETER_REDUCTION_TYPE); switch (reductionType) { case REDUCTION_NONE: model.setNumberOfComponents(exampleSet.getAttributes().size()); break; case REDUCTION_VARIANCE: model.setVarianceThreshold(getParameterAsDouble(PARAMETER_VARIANCE_THRESHOLD)); break; case REDUCTION_FIXED: model.setNumberOfComponents(Math.min(exampleSet.getAttributes().size(), getParameterAsInt(PARAMETER_NUMBER_OF_COMPONENTS))); break; } modelOutput.deliver(model); originalOutput.deliver(exampleSet); if (exampleSetOutput.isConnected()) { exampleSetOutput.deliver(model.apply(exampleSet)); } } @Override public List<ParameterType> getParameterTypes() { List<ParameterType> list = super.getParameterTypes(); ParameterType type = new ParameterTypeCategory(PARAMETER_REDUCTION_TYPE, "Indicates which type of dimensionality reduction should be applied", REDUCTION_METHODS, REDUCTION_VARIANCE); type.setExpert(false); list.add(type); type = new ParameterTypeDouble(PARAMETER_VARIANCE_THRESHOLD, "Keep the all components with a cumulative variance smaller than the given threshold.", 0, 1, 0.95); type.setExpert(false); type.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_REDUCTION_TYPE, REDUCTION_METHODS, true, REDUCTION_VARIANCE)); list.add(type); type = new ParameterTypeInt(PARAMETER_NUMBER_OF_COMPONENTS, "Keep this number of components.", 1, Integer.MAX_VALUE, 1); type.setExpert(false); type.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_REDUCTION_TYPE, REDUCTION_METHODS, true, REDUCTION_FIXED)); list.add(type); return list; } }