/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.preprocessing.filter;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.preprocessing.PreprocessingModel;
import com.rapidminer.operator.preprocessing.PreprocessingOperator;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeAttribute;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeList;
import com.rapidminer.parameter.ParameterTypeStringCategory;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.tools.Ontology;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
/**
* Abstract superclass for all operators that replenish values, e.g. nan or infinite values.
*
* @author Ingo Mierswa, Simon Fischer
*/
public abstract class ValueReplenishment extends PreprocessingOperator {
/**
* The parameter name for "Function to apply to all columns that are not explicitly
* specified by parameter 'columns'."
*/
public static final String PARAMETER_DEFAULT = "default";
/** The parameter name for "List of replacement functions for each column." */
public static final String PARAMETER_COLUMNS = "columns";
public ValueReplenishment(OperatorDescription description) {
super(description);
}
/**
* Returns the value which should be replaced.
*/
public abstract double getReplacedValue();
/**
* Returns the value of the replenishment function with the given index.
*
* @throws UndefinedParameterError
* @throws UserError
*/
public abstract double getReplenishmentValue(int functionIndex, ExampleSet baseExampleSet, Attribute attribute)
throws UndefinedParameterError, UserError;
/** Returns an array of all replenishment functions. */
public abstract String[] getFunctionNames();
/**
* Returns the index of the replenishment function which will be used for attributes not listed
* in the parameter list "columns".
*/
public abstract int getDefaultFunction();
/**
* Returns the index of the replenishment function which will be used for attributes listed in
* the parameter list "columns".
*/
public abstract int getDefaultColumnFunction();
@Override
public PreprocessingModel createPreprocessingModel(ExampleSet exampleSet) throws OperatorException {
exampleSet.recalculateAllAttributeStatistics();
int defaultFunction = getParameterAsInt(PARAMETER_DEFAULT);
List<String[]> functionList = getParameterList(PARAMETER_COLUMNS);
double replacedValue = getReplacedValue();
HashMap<String, Double> numericalAndDateReplacementMap = new HashMap<String, Double>();
HashMap<String, String> nominalReplacementMap = new HashMap<String, String>();
List<String> functionNames = Arrays.asList(getFunctionNames());
for (Attribute attribute : exampleSet.getAttributes()) {
String attributeName = attribute.getName();
int function = defaultFunction;
for (String[] pair : functionList) {
if (pair[0].equals(attributeName)) {
function = functionNames.indexOf(pair[1]);
if (function == -1) {
throw new RuntimeException("Illegal replacement function: " + pair[1]);
}
}
}
final double replenishmentValue = getReplenishmentValue(function, exampleSet, attribute);
if (attribute.isNominal()) {
if ((replenishmentValue == -1) || Double.isNaN(replenishmentValue)) {
nominalReplacementMap.put(attributeName, null);
} else {
nominalReplacementMap.put(attributeName, attribute.getMapping().mapIndex((int) replenishmentValue));
}
}
if (attribute.isNumerical() || Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.DATE_TIME)) {
numericalAndDateReplacementMap.put(attributeName, replenishmentValue);
}
}
return new ValueReplenishmentModel(exampleSet, replacedValue, numericalAndDateReplacementMap, nominalReplacementMap);
}
@Override
public Class<? extends PreprocessingModel> getPreprocessingModelClass() {
return ValueReplenishmentModel.class;
}
@Override
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
String[] functionNames = getFunctionNames();
ParameterType type = new ParameterTypeCategory(PARAMETER_DEFAULT,
"Function to apply to all columns that are not explicitly specified by parameter 'columns'.", functionNames,
getDefaultFunction());
type.setExpert(false);
types.add(type);
ParameterTypeStringCategory categories = new ParameterTypeStringCategory(
"replace_with",
"Selects the function, which is used to determine the replacement for the missing values of this attribute.",
functionNames, getFunctionNames()[getDefaultColumnFunction()], false);
categories.setEditable(false);
types.add(new ParameterTypeList(PARAMETER_COLUMNS, "List of replacement functions for each column.",
new ParameterTypeAttribute("attribute", "Specifies the attribute, which missing values are replaced.",
getExampleSetInputPort()), categories));
return types;
}
}