/** * Copyright (C) 2001-2017 by RapidMiner and the contributors * * Complete list of developers available at our web site: * * http://rapidminer.com * * This program is free software: you can redistribute it and/or modify it under the terms of the * GNU Affero General Public License as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License along with this program. * If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.visualization.dependencies; import java.util.List; import com.rapidminer.example.Attribute; import com.rapidminer.example.AttributeWeights; import com.rapidminer.example.ExampleSet; import com.rapidminer.operator.Operator; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.ports.InputPort; import com.rapidminer.operator.ports.OutputPort; import com.rapidminer.operator.ports.metadata.GenerateNewMDRule; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.tools.math.MathFunctions; /** * <p> * This operator calculates the correlation matrix between all attributes of the input example set. * Furthermore, attribute weights based on the correlations can be returned. This allows the * de-selection of highly correlated attributes with the help of an * {@link com.rapidminer.operator.features.selection.AttributeWeightSelection} operator. If no * weights should be created, this operator produces simply a correlation matrix which up to now * cannot be used by other operators but can be displayed to the user in the result tab. * </p> * * <p> * Please note that this simple implementation performs a data scan for each attribute combination * and might therefore take some time for non-memory example tables. * </p> * * @author Ingo Mierswa */ public class CorrelationMatrixOperator extends Operator { public static final String PARAMETER_CREATE_WEIGHTS = "create_weights"; public static final String PARAMETER_NORMALIZE_WEIGHTS = "normalize_weights"; public static final String PARAMETER_SQUARED_CORRELATION = "squared_correlation"; private InputPort exampleSetInput = getInputPorts().createPort("example set", ExampleSet.class); private OutputPort exampleSetOutput = getOutputPorts().createPort("example set"); private OutputPort matrixOutput = getOutputPorts().createPort("matrix"); private OutputPort weightsOutput = getOutputPorts().createPort("weights"); public CorrelationMatrixOperator(OperatorDescription description) { super(description); getTransformer().addPassThroughRule(exampleSetInput, exampleSetOutput); getTransformer().addRule(new GenerateNewMDRule(matrixOutput, NumericalMatrix.class)); getTransformer().addRule(new GenerateNewMDRule(weightsOutput, AttributeWeights.class)); } @Override public void doWork() throws OperatorException { ExampleSet exampleSet = exampleSetInput.getData(ExampleSet.class); NumericalMatrix matrix = new NumericalMatrix("Correlation", exampleSet, true); int numberOfAttributes = exampleSet.getAttributes().size(); boolean squared = getParameterAsBoolean(PARAMETER_SQUARED_CORRELATION); boolean createWeights = getParameterAsBoolean(PARAMETER_CREATE_WEIGHTS); boolean normalizeWeights = getParameterAsBoolean(PARAMETER_NORMALIZE_WEIGHTS); int k = 0; long progressCounter = 0; getProgress().setTotal(100); long batch = Math.max(1L, exampleSet.getAttributes().size() * (long) exampleSet.getAttributes().size() / 100); Attribute[] regularAttributes = exampleSet.getAttributes().createRegularAttributeArray(); for (Attribute firstAttribute : regularAttributes) { int l = 0; for (Attribute secondAttribute : regularAttributes) { matrix.setValue(k, l, MathFunctions.correlation(exampleSet, firstAttribute, secondAttribute, squared || createWeights)); l++; if (++progressCounter % batch == 0 || progressCounter % 1000 == 0) { getProgress().setCompleted((int) (progressCounter * 100 / (exampleSet.getAttributes().size() * (long) exampleSet.getAttributes().size()))); } } k++; } AttributeWeights weights = new AttributeWeights(); // use squared correlations for weights --> learning schemes should // be able to use both positively and negatively high correlated // values int i = 0; for (Attribute attribute : regularAttributes) { double sum = 0.0d; for (int j = 0; j < numberOfAttributes; j++) { sum += 1.0d - matrix.getValue(i, j); // actually the // squared value } weights.setWeight(attribute.getName(), sum / numberOfAttributes); i++; } if (normalizeWeights) { weights.normalize(); } exampleSetOutput.deliver(exampleSet); weightsOutput.deliver(weights); matrixOutput.deliver(matrix); } @Override public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); ParameterType type = new ParameterTypeBoolean(PARAMETER_CREATE_WEIGHTS, "Indicates if attribute weights based on correlation should be calculated or if the complete matrix should be returned.", false); type.setExpert(false); type.setHidden(true); types.add(type); types.add(new ParameterTypeBoolean(PARAMETER_NORMALIZE_WEIGHTS, "Indicates if the attributes weights should be normalized.", true, false)); types.add(new ParameterTypeBoolean(PARAMETER_SQUARED_CORRELATION, "Indicates if the squared correlation should be calculated.", false, false)); return types; } }