/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.preprocessing.filter;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.preprocessing.GuessValueTypes;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.tools.Ontology;
/**
* <p>This operator transforms nominal attributes into numerical ones. In contrast to
* the NominalToNumeric operator, this operator directly parses numbers from
* the wrongly as nominal values encoded values. Please note that this operator
* will first check the stored nominal mappings for all attributes. If (old) mappings
* are still stored which actually are nominal (without the corresponding data being part of
* the example set), the attribute will not be converted. Please use the operator
* {@link GuessValueTypes} in these cases.</p>
*
* @author Regina Fritsch, Ingo Mierswa
* @version $Id: NominalNumbers2Numerical.java,v 1.9 2008/07/07 07:06:40 ingomierswa Exp $
*/
public class NominalNumbers2Numerical extends Operator {
/** The parameter name for "Character that is used as decimal point." */
public static final String PARAMETER_DECIMAL_POINT_CHARACTER = "decimal_point_character";
public NominalNumbers2Numerical(OperatorDescription description) {
super(description);
}
public IOObject[] apply() throws OperatorException {
ExampleSet exampleSet = getInput(ExampleSet.class);
char decimalPointCharacter = getParameterAsString(PARAMETER_DECIMAL_POINT_CHARACTER).charAt(0);
List<Attribute> newAttributes = new LinkedList<Attribute>();
// using iterator for avoiding "concurrent modification"
Iterator<Attribute> a = exampleSet.getAttributes().iterator();
while (a.hasNext()) {
Attribute attribute = a.next();
if (attribute.isNominal()) {
boolean isNumericalNominal = true;
try {
for(String value : attribute.getMapping().getValues()) {
String checkValue = value.replace(decimalPointCharacter, '.');
Double.parseDouble(checkValue);
}
} catch (Exception e){
isNumericalNominal = false;
}
if (isNumericalNominal) {
// new attribute
Attribute newAttribute = AttributeFactory.createAttribute(Ontology.NUMERICAL);
exampleSet.getExampleTable().addAttribute(newAttribute);
newAttributes.add(newAttribute);
// copy data
for (Example e : exampleSet) {
double oldValue = e.getValue(attribute);
if (!Double.isNaN(oldValue)) {
String value = e.getValueAsString(attribute);
String replaceValue = value.replace(decimalPointCharacter, '.');
e.setValue(newAttribute, Double.parseDouble(replaceValue));
} else {
e.setValue(newAttribute, Double.NaN);
}
}
// delete attribute and rename the new attribute
exampleSet.getExampleTable().removeAttribute(attribute);
a.remove();
newAttribute.setName(attribute.getName());
}
}
}
for (Attribute attribute : newAttributes) {
exampleSet.getAttributes().addRegular(attribute);
}
return new IOObject[] { exampleSet };
}
public Class<?>[] getInputClasses() {
return new Class[] { ExampleSet.class };
}
public Class<?>[] getOutputClasses() {
return new Class[] { ExampleSet.class };
}
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
types.add(new ParameterTypeString(PARAMETER_DECIMAL_POINT_CHARACTER, "Character that is used as decimal point.", "."));
return types;
}
}