/* * RapidMiner * * Copyright (C) 2001-2011 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.preprocessing.filter; import java.util.List; import java.util.Set; import java.util.regex.Pattern; import com.rapidminer.example.Attribute; import com.rapidminer.example.ExampleSet; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.ProcessSetupError.Severity; import com.rapidminer.operator.SimpleProcessSetupError; import com.rapidminer.operator.UserError; import com.rapidminer.operator.annotation.ResourceConsumptionEstimator; import com.rapidminer.operator.ports.metadata.AttributeMetaData; import com.rapidminer.operator.ports.metadata.ExampleSetMetaData; import com.rapidminer.operator.ports.metadata.MetaData; import com.rapidminer.operator.preprocessing.AbstractDataProcessing; import com.rapidminer.operator.tools.AttributeSubsetSelector; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeRegexp; import com.rapidminer.parameter.ParameterTypeString; import com.rapidminer.parameter.UndefinedParameterError; import com.rapidminer.tools.OperatorResourceConsumptionHandler; /** * <p> * This operator replaces parts of the attribute names (like whitespaces, parentheses, or other unwanted characters) by * a specified replacement. The replace_what parameter can be defined as a regular expression (please refer to the annex * of the RapidMiner tutorial for a description). The replace_by parameter can be defined as an arbitrary string. Empty * strings are also allowed. Capturing groups of the defined regular expression can be accessed with $1, $2, $3... * </p> * * @author Ingo Mierswa, Tobias Malbrecht */ public class ChangeAttributeNamesReplace extends AbstractDataProcessing { public static final String PARAMETER_REPLACE_WHAT = "replace_what"; public static final String PARAMETER_REPLACE_BY = "replace_by"; private final AttributeSubsetSelector attributeSelector = new AttributeSubsetSelector(this, getExampleSetInputPort()); public ChangeAttributeNamesReplace(OperatorDescription description) { super(description); } @Override protected MetaData modifyMetaData(ExampleSetMetaData exampleSetMetaData) { try { ExampleSetMetaData subsetMetaData = attributeSelector.getMetaDataSubset(exampleSetMetaData, false); Pattern replaceWhatPattern = Pattern.compile(getParameterAsString(PARAMETER_REPLACE_WHAT)); String replaceByString = isParameterSet(PARAMETER_REPLACE_BY) ? getParameterAsString(PARAMETER_REPLACE_BY) : ""; for (AttributeMetaData attributeMetaData : subsetMetaData.getAllAttributes()) { String name = attributeMetaData.getName(); exampleSetMetaData.getAttributeByName(name).setName(replaceWhatPattern.matcher(name).replaceAll(replaceByString)); } } catch (UndefinedParameterError e) { } catch (IndexOutOfBoundsException e) { addError(new SimpleProcessSetupError(Severity.ERROR, getPortOwner(), "capturing_group_undefined", PARAMETER_REPLACE_BY, PARAMETER_REPLACE_WHAT)); } return exampleSetMetaData; } @Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { Set<Attribute> attributeSubset = attributeSelector.getAttributeSubset(exampleSet, false); Pattern replaceWhatPattern = Pattern.compile(getParameterAsString(PARAMETER_REPLACE_WHAT)); String replaceByString = isParameterSet(PARAMETER_REPLACE_BY) ? getParameterAsString(PARAMETER_REPLACE_BY) : ""; try { for (Attribute attribute : attributeSubset) { attribute.setName(replaceWhatPattern.matcher(attribute.getName()).replaceAll(replaceByString)); } } catch (IndexOutOfBoundsException e) { throw new UserError(this, 215, replaceByString, PARAMETER_REPLACE_WHAT); } return exampleSet; } @Override public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); types.addAll(attributeSelector.getParameterTypes()); ParameterType type = new ParameterTypeRegexp(PARAMETER_REPLACE_WHAT, "A regular expression defining what should be replaced in the attribute names.", "\\W"); type.setShowRange(false); type.setExpert(false); types.add(type); types.add(new ParameterTypeString(PARAMETER_REPLACE_BY, "This string is used as replacement for all parts of the matching attributes where the parameter '" + PARAMETER_REPLACE_WHAT + "' matches.", true, false)); return types; } @Override public boolean writesIntoExistingData() { return false; } @Override public ResourceConsumptionEstimator getResourceConsumptionEstimator() { return OperatorResourceConsumptionHandler.getResourceConsumptionEstimator(getInputPort(), ChangeAttributeNamesReplace.class, attributeSelector); } }