/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.preprocessing.filter;
import java.util.Iterator;
import java.util.List;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeList;
import com.rapidminer.parameter.ParameterTypeString;
/**
* Abstract superclass for all operators that replenish values, e.g. nan or
* infinite values.
*
* @author Ingo Mierswa, Simon Fischer
* @version $Id: ValueReplenishment.java,v 1.11 2006/04/05 08:57:27 ingomierswa
* Exp $
*/
public abstract class ValueReplenishment extends Operator {
/** The parameter name for "Function to apply to all columns that are not explicitly specified by parameter 'columns'." */
public static final String PARAMETER_DEFAULT = "default";
/** The parameter name for "List of replacement functions for each column." */
public static final String PARAMETER_COLUMNS = "columns";
/** The parameter name for "This value is used for some of the replenishment types." */
public static final String PARAMETER_REPLENISHMENT_VALUE = "replenishment_value";
public ValueReplenishment(OperatorDescription description) {
super(description);
}
/** Returns true iff the value should be replenished. */
public abstract boolean replenishValue(double currentValue);
/** Returns the value of the replenishment function with the given index. */
public abstract double getReplenishmentValue(int functionIndex, ExampleSet baseExampleSet, Attribute attribute, double currentValue, String valueString);
/** Returns an array of all replenishment functions. */
public abstract String[] getFunctionNames();
/**
* Returns the index of the replenishment function which will be used for
* attributes not listed in the parameter list "columns".
*/
public abstract int getDefaultFunction();
/**
* Returns the index of the replenishment function which will be used for
* attributes listed in the parameter list "columns".
*/
public abstract int getDefaultColumnFunction();
/**
* Iterates over all examples and all attributes makes callbacks to
* {@link #getReplenishmentValue(int, ExampleSet, Attribute, double, String)} if
* {@link #replenishValue(double)} returns true.
*/
public IOObject[] apply() throws OperatorException {
ExampleSet eSet = getInput(ExampleSet.class);
eSet.recalculateAllAttributeStatistics();
int[] replenishmentFunctions = new int[eSet.getAttributes().size()];
for (int j = 0; j < replenishmentFunctions.length; j++) {
replenishmentFunctions[j] = getParameterAsInt(PARAMETER_DEFAULT);
}
Iterator i = getParameterList(PARAMETER_COLUMNS).iterator();
while (i.hasNext()) {
Object[] pair = (Object[]) i.next();
String name = (String) pair[0];
Integer replenishmentFunctionIndex = (Integer) pair[1];
int j = 0;
for (Attribute attribute : eSet.getAttributes()) {
if (attribute.getName().equals(name)) {
replenishmentFunctions[j] = replenishmentFunctionIndex.intValue();
}
j++;
}
}
Iterator<Example> reader = eSet.iterator();
while (reader.hasNext()) {
Example example = reader.next();
int j = 0;
for (Attribute attribute : eSet.getAttributes()) {
double value = example.getValue(attribute);
if (replenishValue(value)) {
example.setValue(attribute, getReplenishmentValue(replenishmentFunctions[j], eSet, attribute, value, getParameterAsString(PARAMETER_REPLENISHMENT_VALUE)));
}
j++;
}
checkForStop();
}
return new IOObject[] { eSet };
}
public Class<?>[] getOutputClasses() {
return new Class[] { ExampleSet.class };
}
public Class<?>[] getInputClasses() {
return new Class[] { ExampleSet.class };
}
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
ParameterType type = new ParameterTypeCategory(PARAMETER_DEFAULT, "Function to apply to all columns that are not explicitly specified by parameter 'columns'.", getFunctionNames(), getDefaultFunction());
type.setExpert(false);
types.add(type);
types.add(new ParameterTypeList(PARAMETER_COLUMNS, "List of replacement functions for each column.", new ParameterTypeCategory("replace_with", "The key is the attribute name. The value is the name of function used to replace the missing value.", getFunctionNames(), getDefaultColumnFunction())));
types.add(new ParameterTypeString(PARAMETER_REPLENISHMENT_VALUE, "This value is used for some of the replenishment types.", true));
return types;
}
}