/* * RapidMiner * * Copyright (C) 2001-2011 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.preprocessing.filter; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Set; import com.rapidminer.example.Attribute; import com.rapidminer.example.AttributeRole; import com.rapidminer.example.Example; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.set.NonSpecialAttributesExampleSet; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.operator.ports.metadata.AttributeMetaData; import com.rapidminer.operator.ports.metadata.ExampleSetMetaData; import com.rapidminer.operator.ports.metadata.MetaData; import com.rapidminer.operator.ports.metadata.MetaDataInfo; import com.rapidminer.operator.preprocessing.AbstractDataProcessing; import com.rapidminer.operator.tools.AttributeSubsetSelector; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.UndefinedParameterError; /** * This class is for preprocessing operators, which can be restricted to use only a * subset of the attributes. The MetaData is changed accordingly in a way equivalent to * surrounding the operator with an AttributeSubsetPreprocessing operator. * Subclasses must overwrite the methods {@link #applyOnFiltered(ExampleSet)} and * {@link #applyOnFilteredMetaData(ExampleSetMetaData)} in order to provide their * functionality and the correct meta data handling. * * @author Sebastian Land * */ public abstract class AbstractFilteredDataProcessing extends AbstractDataProcessing { private final AttributeSubsetSelector attributeSelector = new AttributeSubsetSelector(this, getExampleSetInputPort(), getFilterValueTypes());; public AbstractFilteredDataProcessing(OperatorDescription description) { super(description); } @Override protected final MetaData modifyMetaData(ExampleSetMetaData inputMetaData) { ExampleSetMetaData workingMetaData = inputMetaData.clone(); ExampleSetMetaData subsetAmd = attributeSelector.getMetaDataSubset(workingMetaData, false); // storing unused attributes and saving roles List<AttributeMetaData> unusedAttributes = new LinkedList<AttributeMetaData>(); Iterator<AttributeMetaData> iterator = workingMetaData.getAllAttributes().iterator(); while (iterator.hasNext()) { AttributeMetaData amd = iterator.next(); if (!(subsetAmd.containsAttributeName(amd.getName()) == MetaDataInfo.YES)) { unusedAttributes.add(amd); iterator.remove(); } else if (amd.isSpecial()) { amd.setRegular(); } } // retrieving result ExampleSetMetaData resultMetaData = workingMetaData; try { resultMetaData = applyOnFilteredMetaData(workingMetaData); } catch (UndefinedParameterError e) { } // merge result with unusedAttributes: restore special types from original input Iterator<AttributeMetaData> r = resultMetaData.getAllAttributes().iterator(); while (r.hasNext()) { AttributeMetaData newMetaData = r.next(); AttributeMetaData oldMetaData = inputMetaData.getAttributeByName(newMetaData.getName()); if (oldMetaData != null) { if (oldMetaData.isSpecial()) { String specialName = oldMetaData.getRole(); newMetaData.setRole(specialName); } } } // add unused attributes again resultMetaData.addAllAttributes(unusedAttributes); return resultMetaData; } @Override /** * This method filters the attributes according to the AttributeSubsetSelector and * then applies the operation of the subclass on this data. Finally the changed data is merged * back into the exampleSet. This is done in the AttributeSubsetPreprocessing way and somehow doubles the * code. */ public final ExampleSet apply(ExampleSet exampleSet) throws OperatorException { ExampleSet workingExampleSet = (ExampleSet) exampleSet.clone(); Set<Attribute> selectedAttributes = attributeSelector.getAttributeSubset(workingExampleSet, false); List<Attribute> unusedAttributes = new LinkedList<Attribute>(); Iterator<Attribute> iterator = workingExampleSet.getAttributes().allAttributes(); while (iterator.hasNext()) { Attribute attribute = iterator.next(); if (!selectedAttributes.contains(attribute)) { unusedAttributes.add(attribute); iterator.remove(); } } // converting special to normal workingExampleSet = new NonSpecialAttributesExampleSet(workingExampleSet); // applying filtering ExampleSet resultSet = applyOnFiltered(workingExampleSet); // transform special attributes back Iterator<AttributeRole> r = resultSet.getAttributes().allAttributeRoles(); while (r.hasNext()) { AttributeRole newRole = r.next(); AttributeRole oldRole = exampleSet.getAttributes().getRole(newRole.getAttribute().getName()); if (oldRole != null) { if (oldRole.isSpecial()) { String specialName = oldRole.getSpecialName(); newRole.setSpecial(specialName); } } } // add old attributes if desired if (resultSet.size() != exampleSet.size()) { throw new UserError(this, 127, "changing the size of the example set is not allowed if the non-processed attributes should be kept."); } if (resultSet.getExampleTable().equals(exampleSet.getExampleTable())) { for (Attribute attribute : unusedAttributes) { AttributeRole role = exampleSet.getAttributes().getRole(attribute); resultSet.getAttributes().add(role); } } else { getLogger().warning("Underlying example table has changed: data copy into new table is necessary in order to keep non-processed attributes."); for (Attribute oldAttribute : unusedAttributes) { AttributeRole oldRole = exampleSet.getAttributes().getRole(oldAttribute); // create and add copy of attribute Attribute newAttribute = (Attribute)oldAttribute.clone(); resultSet.getExampleTable().addAttribute(newAttribute); AttributeRole newRole = new AttributeRole(newAttribute); if (oldRole.isSpecial()) newRole.setSpecial(oldRole.getSpecialName()); resultSet.getAttributes().add(newRole); // copy data for the new attribute Iterator<Example> oldIterator = exampleSet.iterator(); Iterator<Example> newIterator = resultSet.iterator(); while (oldIterator.hasNext()) { Example oldExample = oldIterator.next(); Example newExample = newIterator.next(); newExample.setValue(newAttribute, oldExample.getValue(oldAttribute)); } } } return resultSet; } /** * Subclasses have to implement this method in order to operate only on the * selected attributes. The results are merged back into the original example set. */ public abstract ExampleSet applyOnFiltered(ExampleSet exampleSet) throws OperatorException; /** * This method has to be implemented in order to specify the changes of the meta data * caused by the application of this operator. */ public abstract ExampleSetMetaData applyOnFilteredMetaData(ExampleSetMetaData emd) throws UndefinedParameterError; /** * Defines the value types of the attributes which are processed or * affected by this operator. Has to be overridden to restrict * the attributes which can be chosen by an {@link AttributeSubsetSelector}. * @return array of value types */ protected abstract int[] getFilterValueTypes(); @Override public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); types.addAll(attributeSelector.getParameterTypes()); return types; } }