/* * RapidMiner * * Copyright (C) 2001-2011 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.preprocessing.filter; import java.util.Arrays; import java.util.HashMap; import java.util.List; import com.rapidminer.example.Attribute; import com.rapidminer.example.ExampleSet; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.operator.preprocessing.PreprocessingModel; import com.rapidminer.operator.preprocessing.PreprocessingOperator; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeAttribute; import com.rapidminer.parameter.ParameterTypeCategory; import com.rapidminer.parameter.ParameterTypeList; import com.rapidminer.parameter.ParameterTypeStringCategory; import com.rapidminer.parameter.UndefinedParameterError; /** * Abstract superclass for all operators that replenish values, e.g. nan or * infinite values. * * @author Ingo Mierswa, Simon Fischer */ public abstract class ValueReplenishment extends PreprocessingOperator { /** The parameter name for "Function to apply to all columns that are not explicitly specified by parameter 'columns'." */ public static final String PARAMETER_DEFAULT = "default"; /** The parameter name for "List of replacement functions for each column." */ public static final String PARAMETER_COLUMNS = "columns"; public ValueReplenishment(OperatorDescription description) { super(description); } /** * Returns the value which should be replaced. */ public abstract double getReplacedValue(); /** Returns the value of the replenishment function with the given index. * @throws UndefinedParameterError * @throws UserError */ public abstract double getReplenishmentValue(int functionIndex, ExampleSet baseExampleSet, Attribute attribute) throws UndefinedParameterError, UserError; /** Returns an array of all replenishment functions. */ public abstract String[] getFunctionNames(); /** * Returns the index of the replenishment function which will be used for * attributes not listed in the parameter list "columns". */ public abstract int getDefaultFunction(); /** * Returns the index of the replenishment function which will be used for * attributes listed in the parameter list "columns". */ public abstract int getDefaultColumnFunction(); @Override public PreprocessingModel createPreprocessingModel(ExampleSet exampleSet) throws OperatorException { exampleSet.recalculateAllAttributeStatistics(); int defaultFunction = getParameterAsInt(PARAMETER_DEFAULT); List<String[]> functionList = getParameterList(PARAMETER_COLUMNS); double replacedValue = getReplacedValue(); HashMap<String, Double> numericalReplacementMap = new HashMap<String, Double>(); HashMap<String, String> nominalReplacementMap = new HashMap<String, String>(); List<String> functionNames = Arrays.asList(getFunctionNames()); for (Attribute attribute: exampleSet.getAttributes()) { String attributeName = attribute.getName(); int function = defaultFunction; for (String[] pair: functionList) { if (pair[0].equals(attributeName)) { function = functionNames.indexOf(pair[1]); if (function == -1) { throw new RuntimeException("Illegal replacement function: "+pair[1]); } } } if (attribute.isNominal()) { nominalReplacementMap.put(attributeName, attribute.getMapping().mapIndex((int) getReplenishmentValue(function, exampleSet, attribute))); } if (attribute.isNumerical()) { numericalReplacementMap.put(attributeName, getReplenishmentValue(function, exampleSet, attribute)); } } return new ValueReplenishmentModel(exampleSet, replacedValue, numericalReplacementMap, nominalReplacementMap); } @Override public Class<? extends PreprocessingModel> getPreprocessingModelClass() { return ValueReplenishmentModel.class; } @Override public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); String[] functionNames = getFunctionNames(); ParameterType type = new ParameterTypeCategory(PARAMETER_DEFAULT, "Function to apply to all columns that are not explicitly specified by parameter 'columns'.", functionNames, getDefaultFunction()); type.setExpert(false); types.add(type); ParameterTypeStringCategory categories = new ParameterTypeStringCategory("replace_with", "Selects the function, which is used to determine the replacement for the missing values of this attribute.", functionNames, getFunctionNames()[getDefaultColumnFunction()], false); categories.setEditable(false); types.add(new ParameterTypeList(PARAMETER_COLUMNS, "List of replacement functions for each column.", new ParameterTypeAttribute("attribute", "Specifies the attribute, which missing values are replaced.", getExampleSetInputPort()), categories)); return types; } }