/*
* RapidMiner
*
* Copyright (C) 2001-2011 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.preprocessing.filter;
import java.util.List;
import java.util.Map;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.AbstractExampleSetProcessing;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ProcessSetupError.Severity;
import com.rapidminer.operator.annotation.ResourceConsumptionEstimator;
import com.rapidminer.operator.ports.metadata.AttributeMetaData;
import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
import com.rapidminer.operator.ports.metadata.MDInteger;
import com.rapidminer.operator.ports.metadata.MetaData;
import com.rapidminer.operator.ports.metadata.SimpleMetaDataError;
import com.rapidminer.operator.tools.AttributeSubsetSelector;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.ParameterTypeExpression;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.parameter.conditions.EqualTypeCondition;
import com.rapidminer.tools.Ontology;
import com.rapidminer.tools.OperatorResourceConsumptionHandler;
import com.rapidminer.tools.math.function.ExpressionParser;
/**
* Allows the declaration of a missing value (nominal or numeric) on a selected subset. The given value
* will be converted to Double.NaN, so subsequent operators will treat is as a missing value.
*
* @author Marco Boeck
*/
public class DeclareMissingValueOperator extends AbstractExampleSetProcessing {
/** parameter to set the missing value for numeric type*/
public static final String PARAMETER_MISSING_VALUE_NUMERIC = "numeric_value";
/** parameter to set the missing value for nominal type*/
public static final String PARAMETER_MISSING_VALUE_NOMINAL = "nominal_value";
/** parameter to set the epxression */
public static final String PARAMETER_MISSING_VALUE_EXPRESSION = "expression_value";
/** parameter to set the missing value type (numeric or nominal) */
public static final String PARAMETER_MODE = "mode";
/** Subset Selector for parameter use */
private AttributeSubsetSelector subsetSelector = new AttributeSubsetSelector(this, getExampleSetInputPort());
/** constant for PARAMETER_VALUE_TYPE */
private static final String NUMERIC = "numeric";
/** constant for PARAMETER_VALUE_TYPE */
private static final String NOMINAL = "nominal";
/** constant for PARAMETER_VALUE_TYPE */
private static final String EXPRESSION = "expression";
/** value types to choose from in {@link #PARAMETER_MODE}*/
private static final String[] VALUE_TYPES = new String[]{NUMERIC, NOMINAL, EXPRESSION};
/** the ExpressionParser instance */
private static ExpressionParser expParser;
public DeclareMissingValueOperator(OperatorDescription description) {
super(description);
expParser = new ExpressionParser(true);
expParser.getParser().setAllowUndeclared(true);
}
@Override
protected MetaData modifyMetaData(ExampleSetMetaData metaData) throws UndefinedParameterError {
if (isParameterSet(PARAMETER_MISSING_VALUE_NOMINAL) || isParameterSet(PARAMETER_MISSING_VALUE_NUMERIC)) {
ExampleSetMetaData subset = subsetSelector.getMetaDataSubset(metaData, false);
if (subset != null) {
MDInteger missingValueNumber;
boolean parameterAttributeTypeExistsInSubset = false;
String mode = getParameterAsString(PARAMETER_MODE);
for (AttributeMetaData amd : subset.getAllAttributes()) {
AttributeMetaData originalAMD = metaData.getAttributeByName(amd.getName());
missingValueNumber = originalAMD.getNumberOfMissingValues();
missingValueNumber.increaseByUnknownAmount();
if (mode.equals(NUMERIC)) {
switch(amd.getValueType()) {
case Ontology.NUMERICAL:
case Ontology.INTEGER:
case Ontology.REAL:
parameterAttributeTypeExistsInSubset = true;
break;
default:
continue;
}
} else if (mode.equals(NOMINAL)) {
switch(amd.getValueType()) {
case Ontology.NOMINAL:
case Ontology.STRING:
case Ontology.BINOMINAL:
case Ontology.POLYNOMINAL:
case Ontology.FILE_PATH:
case Ontology.DATE_TIME:
parameterAttributeTypeExistsInSubset = true;
break;
default:
continue;
}
} else if (mode.equals(EXPRESSION)) {
// expression can be on all types so always true
parameterAttributeTypeExistsInSubset = true;
}
}
if (!parameterAttributeTypeExistsInSubset) {
if (subset.getAllAttributes().size() <= 0) {
getInputPort().addError(new SimpleMetaDataError(Severity.ERROR, getInputPort(), "attribute_selection_empty"));
} else {
if (mode.equals(NUMERIC)) {
getInputPort().addError(new SimpleMetaDataError(Severity.ERROR, getInputPort(), "exampleset.must_contain_numerical_attribute"));
}
if (mode.equals(NOMINAL)) {
getInputPort().addError(new SimpleMetaDataError(Severity.ERROR, getInputPort(), "exampleset.must_contain_nominal_attribute"));
}
}
}
}
}
return metaData;
}
@Override
public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
ExampleSet subset = subsetSelector.getSubset(exampleSet, false);
Attributes attributes = subset.getAttributes();
String mode = getParameterAsString(PARAMETER_MODE);
// handle EXPRESSION mode
if (mode.equals(EXPRESSION)) {
// parse expression
expParser.getParser().parseExpression(getParameterAsString(PARAMETER_MISSING_VALUE_EXPRESSION));
// error after parsing?
if (expParser.getParser().hasError()) {
throw new OperatorException(expParser.getParser().getErrorInfo());
}
// let the parser know the attributes
Map<String, Attribute> name2attributes = ExpressionParser.deriveVariablesFromExampleSet(expParser.getParser(), exampleSet);
for (Example example : subset) {
// assign values to the variables
ExpressionParser.assignVariableValuesFromExample(expParser.getParser(), example, name2attributes);
for (Attribute attribute : attributes) {
Object result = expParser.getParser().getValueAsObject();
if (!(result instanceof Boolean)) {
//throw new OperatorException("expression does not evaluate to boolean!");
} else {
Boolean resultBoolean = (Boolean)result;
// change to missing on true evaluation
if (resultBoolean) {
example.setValue(attribute, Double.NaN);
}
}
}
}
}
// handle NUMERIC and NOMINAL modes
for (Example example : subset) {
for (Attribute attribute : attributes) {
if (mode.equals(NUMERIC)) {
if (example.getValue(attribute) == getParameterAsDouble(PARAMETER_MISSING_VALUE_NUMERIC)) {
example.setValue(attribute, Double.NaN);
}
} else if (mode.equals(NOMINAL)) {
if (example.getNominalValue(attribute).equals(getParameterAsString(PARAMETER_MISSING_VALUE_NOMINAL))) {
example.setValue(attribute, Double.NaN);
}
}
}
}
return exampleSet;
}
@Override
public List<ParameterType> getParameterTypes() {
List<ParameterType> parameters = super.getParameterTypes();
parameters.addAll(subsetSelector.getParameterTypes());
ParameterType type = new ParameterTypeCategory(PARAMETER_MODE, "Select the value type of the missing value", VALUE_TYPES, 0);
type.setExpert(false);
parameters.add(type);
type = new ParameterTypeDouble(PARAMETER_MISSING_VALUE_NUMERIC, "This parameter defines the missing numerical value", Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY, true);
type.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_MODE, VALUE_TYPES, true, 0));
type.setExpert(false);
parameters.add(type);
type = new ParameterTypeString(PARAMETER_MISSING_VALUE_NOMINAL, "This parameter defines the missing nominal value", true, false);
type.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_MODE, VALUE_TYPES, true, 1));
type.setExpert(false);
parameters.add(type);
type = new ParameterTypeExpression(PARAMETER_MISSING_VALUE_EXPRESSION, "This parameter defines the expression which if true equals the missing value", getInputPort(), true, false);
type.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_MODE, VALUE_TYPES, true, 2));
type.setExpert(false);
parameters.add(type);
return parameters;
}
@Override
public boolean writesIntoExistingData() {
return true;
}
@Override
public ResourceConsumptionEstimator getResourceConsumptionEstimator() {
return OperatorResourceConsumptionHandler.getResourceConsumptionEstimator(getInputPort(), DeclareMissingValueOperator.class, null);
}
}