/* * RapidMiner * * Copyright (C) 2001-2011 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.visualization.dependencies; import java.util.List; import com.rapidminer.example.Attribute; import com.rapidminer.example.AttributeWeights; import com.rapidminer.example.ExampleSet; import com.rapidminer.operator.Operator; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.ports.InputPort; import com.rapidminer.operator.ports.OutputPort; import com.rapidminer.operator.ports.metadata.GenerateNewMDRule; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.tools.math.MathFunctions; /** * <p>This operator calculates the correlation matrix between all attributes of the * input example set. Furthermore, attribute weights based on the correlations * can be returned. This allows the de-selection of highly correlated attributes * with the help of an * {@link com.rapidminer.operator.features.selection.AttributeWeightSelection} * operator. If no weights should be created, this operator produces simply a * correlation matrix which up to now cannot be used by other operators but can * be displayed to the user in the result tab.</p> * * <p>Please note that this simple implementation * performs a data scan for each attribute combination and might therefore take * some time for non-memory example tables.</p> * * @author Ingo Mierswa */ public class CorrelationMatrixOperator extends Operator { public static final String PARAMETER_CREATE_WEIGHTS = "create_weights"; public static final String PARAMETER_NORMALIZE_WEIGHTS = "normalize_weights"; public static final String PARAMETER_SQUARED_CORRELATION = "squared_correlation"; private InputPort exampleSetInput = getInputPorts().createPort("example set", ExampleSet.class); private OutputPort exampleSetOutput = getOutputPorts().createPort("example set"); private OutputPort matrixOutput = getOutputPorts().createPort("matrix"); private OutputPort weightsOutput = getOutputPorts().createPort("weights"); public CorrelationMatrixOperator(OperatorDescription description) { super(description); getTransformer().addPassThroughRule(exampleSetInput, exampleSetOutput); getTransformer().addRule(new GenerateNewMDRule(matrixOutput, NumericalMatrix.class)); getTransformer().addRule(new GenerateNewMDRule(weightsOutput, AttributeWeights.class)); } @Override public void doWork() throws OperatorException { ExampleSet exampleSet = exampleSetInput.getData(); NumericalMatrix matrix = new NumericalMatrix("Correlation", exampleSet, true); int numberOfAttributes = exampleSet.getAttributes().size(); boolean squared = getParameterAsBoolean(PARAMETER_SQUARED_CORRELATION); boolean createWeights = getParameterAsBoolean(PARAMETER_CREATE_WEIGHTS); boolean normalizeWeights = getParameterAsBoolean(PARAMETER_NORMALIZE_WEIGHTS); int k = 0; for (Attribute firstAttribute : exampleSet.getAttributes()) { int l = 0; for (Attribute secondAttribute : exampleSet.getAttributes()) { matrix.setValue(k, l, MathFunctions.correlation(exampleSet, firstAttribute, secondAttribute, squared || createWeights)); checkForStop(); l++; } k++; } AttributeWeights weights = new AttributeWeights(); // use squared correlations for weights --> learning schemes should // be able to use both positively and negatively high correlated // values int i = 0; for (Attribute attribute : exampleSet.getAttributes()) { double sum = 0.0d; for (int j = 0; j < numberOfAttributes; j++) { sum += (1.0d - matrix.getValue(i, j)); // actually the // squared value } weights.setWeight(attribute.getName(), sum / numberOfAttributes); i++; } if (normalizeWeights) { weights.normalize(); } exampleSetOutput.deliver(exampleSet); weightsOutput.deliver(weights); matrixOutput.deliver(matrix); } @Override public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); ParameterType type = new ParameterTypeBoolean(PARAMETER_CREATE_WEIGHTS, "Indicates if attribute weights based on correlation should be calculated or if the complete matrix should be returned.", false); type.setExpert(false); type.setHidden(true); types.add(type); types.add(new ParameterTypeBoolean(PARAMETER_NORMALIZE_WEIGHTS, "Indicates if the attributes weights should be normalized.", true, false)); types.add(new ParameterTypeBoolean(PARAMETER_SQUARED_CORRELATION, "Indicates if the squared correlation should be calculated.", false, false)); return types; } }