/*
* RapidMiner
*
* Copyright (C) 2001-2011 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.features.construction;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.set.AttributeValueFilter;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.SimpleProcessSetupError;
import com.rapidminer.operator.ProcessSetupError.Severity;
import com.rapidminer.operator.ports.metadata.AttributeMetaData;
import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
import com.rapidminer.operator.ports.metadata.MetaData;
import com.rapidminer.operator.ports.metadata.SetRelation;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeList;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.tools.NumberParser;
import com.rapidminer.tools.Ontology;
import com.rapidminer.tools.math.container.Range;
/**
* Generates a new attribute and sets the attribute's values according to
* the fulfilling of the specified conditions. Sets the attribute value
* the first value, which condition is matched.
*
* <p>The parameter string must have the form
* <code>attribute op value</code>, where attribute is a name of an
* attribute, value is a value the attribute can take and op is one of the
* binary logical operators similar to the ones known from Java, e.g. greater
* than or equals. Please note your can define a logical OR of several conditions
* with || and a logical AND of two conditions with two ampersand. Please note also
* that for nominal attributes you can define a regular expression for value of the
* possible equal and not equal checks.</p>
*
* @author Tobias Malbrecht
*/
public class ConditionedFeatureGeneration extends AbstractFeatureConstruction {
public static final String PARAMETER_ATTRIBUTE_NAME = "attribute_name";
public static final String PARAMETER_VALUE_TYPE = "value_type";
public static final String PARAMETER_VALUES = "values";
public static final String PARAMETER_CONDITIONS = "conditions";
public static final String PARAMETER_DEFAULT_VALUE = "default_value";
public ConditionedFeatureGeneration(OperatorDescription description) {
super(description);
}
@Override
protected MetaData modifyMetaData(ExampleSetMetaData metaData) {
try {
AttributeMetaData amd = new AttributeMetaData(getParameterAsString(PARAMETER_ATTRIBUTE_NAME), getParameterAsInt(PARAMETER_VALUE_TYPE) + 1);
List<String[]> valueConditionList = getParameterList(PARAMETER_VALUES);
if (amd.isNominal()) {
// run through all parameters and adding values
Set<String> values = new HashSet<String>();
for(String[] pair: valueConditionList) {
values.add(pair[0]);
}
amd.setValueSet(values, SetRelation.EQUAL);
} else {
Range range = new Range();
String defaultValue = getParameterAsString(PARAMETER_DEFAULT_VALUE);
try {
double value = Double.parseDouble(defaultValue);
range.add(value);
} catch (NumberFormatException e) {
addError(new SimpleProcessSetupError(Severity.ERROR, getPortOwner(), "parameter_must_be_numerical", PARAMETER_DEFAULT_VALUE));
}
boolean threwError = false;
for(String[] pair: valueConditionList) {
try {
double value = Double.parseDouble(pair[0]);
range.add(value);
} catch (NumberFormatException e) {
if (!threwError) {
addError(new SimpleProcessSetupError(Severity.ERROR, getPortOwner(), "parameter_must_be_numerical", PARAMETER_VALUES));
threwError = true;
}
}
}
amd.setValueRange(range, SetRelation.EQUAL);
}
metaData.addAttribute(amd);
} catch (UndefinedParameterError e) {
}
return metaData;
}
@Override
public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
Attribute attribute = AttributeFactory.createAttribute(getParameterAsString(PARAMETER_ATTRIBUTE_NAME), getParameterAsInt(PARAMETER_VALUE_TYPE) + 1);
double mappedDefaultValue = Double.NaN;
String defaultValue = getParameterAsString(PARAMETER_DEFAULT_VALUE);
if (!defaultValue.equals("?")) {
if (attribute.isNominal()) {
mappedDefaultValue = attribute.getMapping().mapString(defaultValue);
} else {
try {
mappedDefaultValue = NumberParser.parseDouble(defaultValue);
} catch (NumberFormatException e) {
logError("default value has to be ? or numerical for numerical attributes: no feature is generated");
return exampleSet;
}
}
}
List<String[]> valueConditionList = getParameterList(PARAMETER_VALUES);
int numberOfValueConditions = valueConditionList.size();
String[] values = new String[numberOfValueConditions];
double[] mappedValues = new double[numberOfValueConditions];
AttributeValueFilter[] filters = new AttributeValueFilter[numberOfValueConditions];
Iterator<String[]> iterator = valueConditionList.iterator();
int j = 0;
while (iterator.hasNext()) {
String[] pair = iterator.next();
values[j] = pair[0];
if (values[j].equals("?")) {
mappedValues[j] = Double.NaN;
} else {
if (attribute.isNominal()) {
mappedValues[j] = attribute.getMapping().mapString(values[j]);
} else {
try {
mappedValues[j] = Double.parseDouble(values[j]);
} catch (NumberFormatException e) {
logError("values have to be numerical for numerical attributes: no feature is generated");
return exampleSet;
}
}
}
filters[j] = new AttributeValueFilter(exampleSet, pair[1]);
j++;
}
exampleSet.getExampleTable().addAttribute(attribute);
exampleSet.getAttributes().addRegular(attribute);
for (Example example : exampleSet) {
example.setValue(attribute, mappedDefaultValue);
for (int i = 0; i < numberOfValueConditions; i++) {
AttributeValueFilter filter = filters[i];
if (filter.conditionOk(example)) {
example.setValue(attribute, mappedValues[i]);
break;
}
}
}
exampleSet.recalculateAllAttributeStatistics();
return exampleSet;
}
@Override
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
ParameterType type = new ParameterTypeString(PARAMETER_ATTRIBUTE_NAME, "The name of the generated attribute.");
type.setExpert(false);
types.add(type);
String[] valueTypes = new String[Ontology.VALUE_TYPE_NAMES.length - 1];
for (int i = 1; i < Ontology.VALUE_TYPE_NAMES.length; i++) {
valueTypes[i - 1] = Ontology.VALUE_TYPE_NAMES[i];
}
type = new ParameterTypeCategory(PARAMETER_VALUE_TYPE, "Value type of the created attribute.", valueTypes, 0);
type.setExpert(false);
types.add(type);
type = new ParameterTypeList(PARAMETER_VALUES, "Values and conditions.",
new ParameterTypeString("result_value", "The value of the attribute if the condition matches."),
new ParameterTypeString(PARAMETER_CONDITIONS, "Value condition.", false));
type.setExpert(false);
types.add(type);
type = new ParameterTypeString(PARAMETER_DEFAULT_VALUE, "Default value.", "?");
type.setExpert(false);
types.add(type);
return types;
}
}