/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.preprocessing.filter; import java.util.Iterator; import java.util.LinkedList; import com.rapidminer.example.Attribute; import com.rapidminer.example.AttributeRole; import com.rapidminer.example.Attributes; import com.rapidminer.example.Example; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.SimpleAttributes; import com.rapidminer.example.table.AttributeFactory; import com.rapidminer.example.table.ViewAttribute; import com.rapidminer.operator.Model; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.preprocessing.PreprocessingModel; import com.rapidminer.operator.preprocessing.PreprocessingOperator; import com.rapidminer.tools.Ontology; /** * This operator maps all non numeric attributes to real valued attributes. * Nothing is done for numeric attributes, binary attributes are mapped to 0 and * 1. * * For nominal attributes one of the following calculations will be done: * <ul> * <li>Dichotomization, i.e. one new attribute for each value of the nominal * attribute. The new attribute which corresponds to the actual nominal value * gets value 1 and all other attributes gets value 0.</li> * <li>Alternatively the values of nominal attributes can be seen as equally * ranked, therefore the nominal attribute will simply be turned into a real * valued attribute, the old values results in equidistant real values.</li> * </ul> * * At this moment the same applies for ordinal attributes, in a future release * more appropriate values based on the ranking between the ordinal values may * be included. * * @author Ingo Mierswa, Sebastian Land * @version $Id: NominalToNumeric.java,v 1.15 2006/04/05 08:57:27 * ingomierswa Exp $ */ public class NominalToNumeric extends PreprocessingOperator { private static class NominalToNumericModel extends PreprocessingModel { private static final long serialVersionUID = -4203775081616082145L; protected NominalToNumericModel(ExampleSet exampleSet) { super(exampleSet); } public ExampleSet applyOnData(ExampleSet exampleSet) throws OperatorException { // selecting transformation attributes and creating new numeric attributes LinkedList<Attribute> nominalAttributes = new LinkedList<Attribute>(); LinkedList<Attribute> transformedAttributes = new LinkedList<Attribute>(); for (Attribute attribute : exampleSet.getAttributes()) { if (!attribute.isNumerical()) { nominalAttributes.add(attribute); // creating new attributes for nominal attributes transformedAttributes.add(AttributeFactory.createAttribute(attribute.getName(), Ontology.NUMERICAL)); } } // ensuring capacity in exampletable exampleSet.getExampleTable().addAttributes(transformedAttributes); // copying values for (Example example: exampleSet) { Iterator<Attribute> target = transformedAttributes.iterator(); for (Attribute attribute: nominalAttributes) { example.setValue(target.next(), example.getValue(attribute)); } } // removing nominal attributes from example Set Attributes attributes = exampleSet.getAttributes(); for(Attribute attribute: exampleSet.getAttributes()) { if (!attribute.isNumerical()) attributes.replace(attribute, transformedAttributes.poll()); } return exampleSet; } public Attributes getTargetAttributes(ExampleSet parentSet) { SimpleAttributes attributes = new SimpleAttributes(); // add special attributes to new attributes Iterator<AttributeRole> specialRoles = parentSet.getAttributes().specialAttributes(); while (specialRoles.hasNext()) { attributes.add(specialRoles.next()); } // add regular attributes Iterator<AttributeRole> i = parentSet.getAttributes().allAttributeRoles(); while (i.hasNext()) { AttributeRole attributeRole = i.next(); if (!attributeRole.isSpecial()) { Attribute attribute = attributeRole.getAttribute(); if (!attribute.isNumerical()) { attributes.addRegular(new ViewAttribute(this, attribute, attribute.getName(), Ontology.INTEGER, null)); } else { attributes.add(attributeRole); } } } return attributes; } public double getValue(Attribute targetAttribute, double value) { return value; } } public NominalToNumeric(OperatorDescription description) { super(description); } public Model createPreprocessingModel(ExampleSet exampleSet) throws OperatorException { return new NominalToNumericModel(exampleSet); } public Class<?>[] getInputClasses() { return new Class[] { ExampleSet.class }; } public Class<?>[] getOutputClasses() { return new Class[] { ExampleSet.class }; } }