/** * Copyright (C) 2001-2017 by RapidMiner and the contributors * * Complete list of developers available at our web site: * * http://rapidminer.com * * This program is free software: you can redistribute it and/or modify it under the terms of the * GNU Affero General Public License as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License along with this program. * If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.visualization.dependencies; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import com.rapidminer.example.Attribute; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.set.NonSpecialAttributesExampleSet; import com.rapidminer.operator.Operator; import com.rapidminer.operator.OperatorCreationException; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.ProcessSetupError.Severity; import com.rapidminer.operator.UserError; import com.rapidminer.operator.ports.InputPort; import com.rapidminer.operator.ports.OutputPort; import com.rapidminer.operator.ports.metadata.AttributeMetaData; import com.rapidminer.operator.ports.metadata.ExampleSetMetaData; import com.rapidminer.operator.ports.metadata.ExampleSetPrecondition; import com.rapidminer.operator.ports.metadata.GenerateNewMDRule; import com.rapidminer.operator.ports.metadata.PassThroughRule; import com.rapidminer.operator.preprocessing.transformation.GroupedANOVAOperator; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.parameter.ParameterTypeDouble; import com.rapidminer.parameter.UndefinedParameterError; import com.rapidminer.tools.OperatorService; import com.rapidminer.tools.math.SignificanceTestResult; /** * <p> * This operator calculates the significance of difference for the values for all numerical * attributes depending on the groups defined by all nominal attributes. Please refer to the * operator {@link GroupedANOVAOperator} for details of the calculation. * </p> * * @author Ingo Mierswa */ public class ANOVAMatrixOperator extends Operator { private InputPort exampleSetInput = getInputPorts().createPort("example set", new ExampleSetMetaData()); private OutputPort exampleSetOutput = getOutputPorts().createPort("example set"); private OutputPort anovaOutput = getOutputPorts().createPort("anova"); public ANOVAMatrixOperator(OperatorDescription description) { super(description); exampleSetInput.addPrecondition(new ExampleSetPrecondition(exampleSetInput) { @Override public void makeAdditionalChecks(ExampleSetMetaData emd) throws UndefinedParameterError { int numberOfNominals = 0; for (AttributeMetaData amd : emd.getAllAttributes()) { if (amd.isNominal()) { numberOfNominals++; } } if (numberOfNominals == 0) { createError(Severity.WARNING, "exampleset.must_contain_nominal_attribute"); } } }); getTransformer().addRule(new PassThroughRule(exampleSetInput, exampleSetOutput, false)); getTransformer().addRule(new GenerateNewMDRule(anovaOutput, ANOVAMatrix.class)); } @Override public void doWork() throws OperatorException { ExampleSet inputSet = exampleSetInput.getData(ExampleSet.class); ExampleSet exampleSet = NonSpecialAttributesExampleSet.create(inputSet); // determine anova and grouping attributes List<String> nominalAttributes = new ArrayList<String>(); List<String> numericalAttributes = new ArrayList<String>(); Iterator<Attribute> a = exampleSet.getAttributes().allAttributes(); while (a.hasNext()) { Attribute attribute = a.next(); if (attribute.isNominal()) { nominalAttributes.add(attribute.getName()); } else if (attribute.isNumerical()) { numericalAttributes.add(attribute.getName()); } } // init "inner" operator GroupedANOVAOperator groupedAnovaOperator = null; try { groupedAnovaOperator = OperatorService.createOperator(GroupedANOVAOperator.class); } catch (OperatorCreationException e) { throw new UserError(this, 109, GroupedANOVAOperator.class.getName()); } double significanceLevel = getParameterAsDouble(GroupedANOVAOperator.PARAMETER_SIGNIFICANCE_LEVEL); groupedAnovaOperator.setParameter(GroupedANOVAOperator.PARAMETER_SIGNIFICANCE_LEVEL, significanceLevel + ""); groupedAnovaOperator.setParameter(GroupedANOVAOperator.PARAMETER_ONLY_DISTINCT, getParameterAsBoolean(GroupedANOVAOperator.PARAMETER_ONLY_DISTINCT) + ""); // calculate all values double[][] probabilities = new double[numericalAttributes.size()][nominalAttributes.size()]; for (int numericalCounter = 0; numericalCounter < probabilities.length; numericalCounter++) { String numericalAttributeName = numericalAttributes.get(numericalCounter); for (int nominalCounter = 0; nominalCounter < probabilities[numericalCounter].length; nominalCounter++) { String nominalAttributeName = nominalAttributes.get(nominalCounter); groupedAnovaOperator.setParameter(GroupedANOVAOperator.PARAMETER_ANOVA_ATTRIBUTE, numericalAttributeName); groupedAnovaOperator.setParameter(GroupedANOVAOperator.PARAMETER_GROUP_BY_ATTRIBUTE, nominalAttributeName); try { SignificanceTestResult testResult = groupedAnovaOperator.apply((ExampleSet) exampleSet.clone()); probabilities[numericalCounter][nominalCounter] = testResult.getProbability(); } catch (UserError e) { e.setOperator(this); throw e; } } } // create and return result exampleSetOutput.deliver(exampleSet); anovaOutput.deliver(new ANOVAMatrix(probabilities, numericalAttributes, nominalAttributes, significanceLevel)); } @Override public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); types.add(new ParameterTypeDouble(GroupedANOVAOperator.PARAMETER_SIGNIFICANCE_LEVEL, "The significance level for the ANOVA calculation.", 0.0d, 1.0d, 0.05d)); types.add(new ParameterTypeBoolean(GroupedANOVAOperator.PARAMETER_ONLY_DISTINCT, "Indicates if only rows with distinct values for the aggregation attribute should be used for the calculation of the aggregation function.", false)); return types; } }