/*
* RapidMiner
*
* Copyright (C) 2001-2011 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.preprocessing.filter;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.preprocessing.PreprocessingModel;
import com.rapidminer.operator.preprocessing.PreprocessingOperator;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeAttribute;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeList;
import com.rapidminer.parameter.ParameterTypeStringCategory;
import com.rapidminer.parameter.UndefinedParameterError;
/**
* Abstract superclass for all operators that replenish values, e.g. nan or
* infinite values.
*
* @author Ingo Mierswa, Simon Fischer
*/
public abstract class ValueReplenishment extends PreprocessingOperator {
/** The parameter name for "Function to apply to all columns that are not explicitly specified by parameter 'columns'." */
public static final String PARAMETER_DEFAULT = "default";
/** The parameter name for "List of replacement functions for each column." */
public static final String PARAMETER_COLUMNS = "columns";
public ValueReplenishment(OperatorDescription description) {
super(description);
}
/**
* Returns the value which should be replaced.
*/
public abstract double getReplacedValue();
/** Returns the value of the replenishment function with the given index.
* @throws UndefinedParameterError
* @throws UserError */
public abstract double getReplenishmentValue(int functionIndex, ExampleSet baseExampleSet, Attribute attribute) throws UndefinedParameterError, UserError;
/** Returns an array of all replenishment functions. */
public abstract String[] getFunctionNames();
/**
* Returns the index of the replenishment function which will be used for
* attributes not listed in the parameter list "columns".
*/
public abstract int getDefaultFunction();
/**
* Returns the index of the replenishment function which will be used for
* attributes listed in the parameter list "columns".
*/
public abstract int getDefaultColumnFunction();
@Override
public PreprocessingModel createPreprocessingModel(ExampleSet exampleSet) throws OperatorException {
exampleSet.recalculateAllAttributeStatistics();
int defaultFunction = getParameterAsInt(PARAMETER_DEFAULT);
List<String[]> functionList = getParameterList(PARAMETER_COLUMNS);
double replacedValue = getReplacedValue();
HashMap<String, Double> numericalReplacementMap = new HashMap<String, Double>();
HashMap<String, String> nominalReplacementMap = new HashMap<String, String>();
List<String> functionNames = Arrays.asList(getFunctionNames());
for (Attribute attribute: exampleSet.getAttributes()) {
String attributeName = attribute.getName();
int function = defaultFunction;
for (String[] pair: functionList) {
if (pair[0].equals(attributeName)) {
function = functionNames.indexOf(pair[1]);
if (function == -1) {
throw new RuntimeException("Illegal replacement function: "+pair[1]);
}
}
}
if (attribute.isNominal()) {
nominalReplacementMap.put(attributeName, attribute.getMapping().mapIndex((int) getReplenishmentValue(function, exampleSet, attribute)));
}
if (attribute.isNumerical()) {
numericalReplacementMap.put(attributeName, getReplenishmentValue(function, exampleSet, attribute));
}
}
return new ValueReplenishmentModel(exampleSet, replacedValue, numericalReplacementMap, nominalReplacementMap);
}
@Override
public Class<? extends PreprocessingModel> getPreprocessingModelClass() {
return ValueReplenishmentModel.class;
}
@Override
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
String[] functionNames = getFunctionNames();
ParameterType type = new ParameterTypeCategory(PARAMETER_DEFAULT, "Function to apply to all columns that are not explicitly specified by parameter 'columns'.", functionNames, getDefaultFunction());
type.setExpert(false);
types.add(type);
ParameterTypeStringCategory categories = new ParameterTypeStringCategory("replace_with", "Selects the function, which is used to determine the replacement for the missing values of this attribute.", functionNames, getFunctionNames()[getDefaultColumnFunction()], false);
categories.setEditable(false);
types.add(new ParameterTypeList(PARAMETER_COLUMNS, "List of replacement functions for each column.",
new ParameterTypeAttribute("attribute", "Specifies the attribute, which missing values are replaced.", getExampleSetInputPort()),
categories));
return types;
}
}