/* * RapidMiner * * Copyright (C) 2001-2011 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.preprocessing; import java.util.Collection; import java.util.List; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.table.AttributeFactory; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.annotation.ResourceConsumptionEstimator; import com.rapidminer.operator.ports.metadata.AttributeMetaData; import com.rapidminer.operator.ports.metadata.ExampleSetMetaData; import com.rapidminer.operator.ports.metadata.SetRelation; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeAttribute; import com.rapidminer.parameter.ParameterTypeDouble; import com.rapidminer.parameter.ParameterTypeInt; import com.rapidminer.parameter.ParameterTypeList; import com.rapidminer.parameter.UndefinedParameterError; import com.rapidminer.tools.Ontology; import com.rapidminer.tools.OperatorResourceConsumptionHandler; import com.rapidminer.tools.RandomGenerator; import com.rapidminer.tools.math.container.Range; /** * This operator adds random attributes and white noise to the data. New random * attributes are simply filled with random data which is not correlated to the * label at all. Additionally, this operator might add noise to the label * attribute or to the regular attributes. In case of a numerical label the * given <code>label_noise</code> is the percentage of the label range which * defines the standard deviation of normal distributed noise which is added to * the label attribute. For nominal labels the parameter * <code>label_noise</code> defines the probability to randomly change the * nominal label value. In case of adding noise to regular attributes the * parameter <code>default_attribute_noise</code> simply defines the standard * deviation of normal distributed noise without using the attribute value * range. Using the parameter list it is possible to set different noise levels * for different attributes. However, it is not possible to add noise to nominal * attributes. * * @author Ingo Mierswa */ public class NoiseOperator extends PreprocessingOperator { /** The parameter name for "Adds this number of random attributes." */ public static final String PARAMETER_RANDOM_ATTRIBUTES = "random_attributes"; /** The parameter name for "Add this percentage of a numerical label range as a normal distributed noise or probability for a nominal label change." */ public static final String PARAMETER_LABEL_NOISE = "label_noise"; /** The parameter name for "The standard deviation of the default attribute noise." */ public static final String PARAMETER_DEFAULT_ATTRIBUTE_NOISE = "default_attribute_noise"; /** The parameter name for "List of noises for each attributes." */ public static final String PARAMETER_NOISE = "noise"; /** The parameter name for "Offset added to the values of each random attribute" */ public static final String PARAMETER_OFFSET = "offset"; /** The parameter name for "Linear factor multiplicated with the values of each random attribute" */ public static final String PARAMETER_LINEAR_FACTOR = "linear_factor"; public NoiseOperator(OperatorDescription description) { super(description); } @Override public PreprocessingModel createPreprocessingModel(ExampleSet exampleSet) throws OperatorException { exampleSet.recalculateAllAttributeStatistics(); String[] attributeNames = new String[getParameterAsInt(PARAMETER_RANDOM_ATTRIBUTES)]; for (int i = 0; i < getParameterAsInt(PARAMETER_RANDOM_ATTRIBUTES); i++) { attributeNames[i] = AttributeFactory.createName("random"); } return new NoiseModel(exampleSet, RandomGenerator.getRandomGenerator(this), getParameterList(PARAMETER_NOISE), getParameterAsDouble(PARAMETER_DEFAULT_ATTRIBUTE_NOISE), getParameterAsDouble(PARAMETER_LABEL_NOISE), getParameterAsDouble(PARAMETER_OFFSET), getParameterAsDouble(PARAMETER_LINEAR_FACTOR), attributeNames); } @Override protected ExampleSetMetaData modifyMetaData(ExampleSetMetaData exampleSetMetaData) throws UndefinedParameterError { AttributeMetaData label= exampleSetMetaData.getLabelMetaData(); if (label != null) { if (label.isNumerical() && getParameterAsDouble(PARAMETER_LABEL_NOISE) > 0) label.setValueSetRelation(SetRelation.SUPERSET); } double defaultNoise = getParameterAsDouble(PARAMETER_DEFAULT_ATTRIBUTE_NOISE); if (defaultNoise > 0) { for (AttributeMetaData amd : exampleSetMetaData.getAllAttributes()) { if (!amd.isSpecial()) { if (amd.isNumerical()) { amd.setValueSetRelation(SetRelation.SUPERSET); } } } } int numberOfRandomAttributes = getParameterAsInt(PARAMETER_RANDOM_ATTRIBUTES); for (int i = 0; i < numberOfRandomAttributes; i++) { AttributeMetaData amd = new AttributeMetaData("random" + ((i == 0) ? "" : i + ""), Ontology.REAL); amd.setValueRange(new Range(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY), SetRelation.SUBSET); exampleSetMetaData.addAttribute(amd); } return exampleSetMetaData; } /** * This method isn't used anymore, since the calling super function is overridden. */ @Override protected Collection<AttributeMetaData> modifyAttributeMetaData(ExampleSetMetaData emd, AttributeMetaData amd) { return null; } @Override public boolean isSupportingAttributeRoles() { return true; } /** This operator does not support view creation proper. Hence hide the parameter */ @Override public boolean isSupportingView() { return false; } @Override public Class<? extends PreprocessingModel> getPreprocessingModelClass() { return NoiseModel.class; } @Override public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); ParameterType type = new ParameterTypeInt(PARAMETER_RANDOM_ATTRIBUTES, "Adds this number of random attributes.", 0, Integer.MAX_VALUE, 0); type.setExpert(false); types.add(type); type = new ParameterTypeDouble(PARAMETER_LABEL_NOISE, "Add this percentage of a numerical label range as a normal distributed noise or probability for a nominal label change.", 0.0d, Double.POSITIVE_INFINITY, 0.05d); type.setExpert(false); types.add(type); types.add(new ParameterTypeDouble(PARAMETER_DEFAULT_ATTRIBUTE_NOISE, "The standard deviation of the default attribute noise.", 0.0d, Double.POSITIVE_INFINITY, 0.0d)); types.add(new ParameterTypeList(PARAMETER_NOISE, "List of noises for each attributes.", new ParameterTypeAttribute("attribute", "To this attribute noise is added.", getExampleSetInputPort()), new ParameterTypeDouble(PARAMETER_NOISE, "The strength of gaussian noise, which is added to this attribute.", 0.0d, Double.POSITIVE_INFINITY, 0.05d))); type = new ParameterTypeDouble(PARAMETER_OFFSET, "Offset added to the values of each random attribute", Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, 0.0d); types.add(type); type = new ParameterTypeDouble(PARAMETER_LINEAR_FACTOR, "Linear factor multiplicated with the values of each random attribute", 0.0d, Double.POSITIVE_INFINITY, 1.0d); types.add(type); types.addAll(RandomGenerator.getRandomGeneratorParameters(this)); return types; } @Override protected int[] getFilterValueTypes() { return new int[] { Ontology.ATTRIBUTE_VALUE }; } @Override public ResourceConsumptionEstimator getResourceConsumptionEstimator() { return OperatorResourceConsumptionHandler.getResourceConsumptionEstimator(getInputPort(), NoiseOperator.class, attributeSelector); } }