/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.preprocessing.filter; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import com.rapidminer.example.Attribute; import com.rapidminer.example.AttributeRole; import com.rapidminer.example.Example; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.table.AttributeFactory; import com.rapidminer.operator.IOObject; import com.rapidminer.operator.Operator; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.parameter.ParameterTypeList; import com.rapidminer.parameter.ParameterTypeString; import com.rapidminer.parameter.conditions.BooleanParameterCondition; /** * This operator takes an <code>ExampleSet</code> as input and maps the * values of certain attributes to other values. The operator can replace * nominal values (e.g. replace the value "green" by the value * "green_color") as well as numerical values (e.g. replace the * all values "3" by "-1"). A single mapping can be * specified using the parameters <tt>replace_what</tt> and * <tt>replace_by</tt>. Multiple mappings can be specified in the parameter * list <tt>value_mappings</tt>.<br><br> * * Additionally, the operator allows to define (and consider) a default * mapping. If <tt>add_default_mapping</tt> is set to true and <tt>default_value</tt> * is properly set, all values that occur in the example set but are not listed * in the value mappings list are replaced by the default value. This may be * helpful in cases where only some values should be mapped explicitly and * many unimportant values should be mapped to a default value (e.g. "other").<br><br> * * <p> If the parameter <tt>consider_regular_expressions</tt> is enabled, the * values are replaced by the new values if the original values match the given * regular expressions. The value corresponding to the first matching regular * expression in the mappings list is taken as replacement.</p> * * <p> This operator supports regular expressions for the attribute names, * i.e. the value mapping is applied on all attributes for which the name * fulfills the pattern defined by the name expression.</p> * * @author Tobias Malbrecht * @version $Id: AttributeValueMapper.java,v 1.11 2008/09/08 19:35:53 tobiasmalbrecht Exp $ */ public class AttributeValueMapper extends Operator { /** The parameter name for "The specified values will be merged in all attributes specified by the given regular expression." */ public static final String PARAMETER_ATTRIBUTES = "attributes"; /** The parameter name for "Filter also special attributes (label, id...)" */ public static final String PARAMETER_APPLY_TO_SPECIAL_FEATURES = "apply_to_special_features"; /** The parameter name for "The first value which should be merged." */ public static final String PARAMETER_VALUE_MAPPINGS = "value_mappings"; /** The parameter name for "The second value which should be merged." */ public static final String PARAMETER_OLD_VALUES = "old_values"; /** The parameter name for "All occurrences of this value will be replaced." */ public static final String PARAMETER_REPLACE_WHAT = "replace_what"; /** The parameter name for "The new attribute value to use." */ public static final String PARAMETER_REPLACE_BY = "replace_by"; /** The parameter name for "Enables matching based on regular expressions; original values may be specified as regular expressions." */ public static final String PARAMETER_CONSIDER_REGULAR_EXPRESSIONS = "consider_regular_expressions"; /** The parameter name for "If set to true, all original values which are not listed in the value mappings list are mapped to the default value." */ public static final String PARAMETER_ADD_DEFAULT_MAPPING = "add_default_mapping"; /** The parameter name for "The default value all original values are mapped to, if add_default_mapping is set to true." */ public static final String PARAMETER_DEFAULT_VALUE = "default_value"; public AttributeValueMapper(OperatorDescription description) { super(description); } public IOObject[] apply() throws OperatorException { ExampleSet exampleSet = getInput(ExampleSet.class); String attributeNameRegex = getParameterAsString(PARAMETER_ATTRIBUTES); Pattern pattern = null; try { pattern = Pattern.compile(attributeNameRegex); } catch (PatternSyntaxException e) { throw new UserError(this, 206, attributeNameRegex, e.getMessage()); } boolean nominal = false; boolean first = true; ArrayList<Attribute> attributes = new ArrayList<Attribute>(); Iterator<Attribute> iterator = getParameterAsBoolean(PARAMETER_APPLY_TO_SPECIAL_FEATURES) ? exampleSet.getAttributes().allAttributes() : exampleSet.getAttributes().iterator(); while (iterator.hasNext()) { Attribute attribute = iterator.next(); Matcher matcher = pattern.matcher(attribute.getName()); if (matcher.matches()) { if (first) { nominal = attribute.isNominal(); first = false; } else { if (nominal != attribute.isNominal()) { throw new UserError(this, 126); } } attributes.add(attribute); } checkForStop(); } boolean useValueRegex = getParameterAsBoolean(PARAMETER_CONSIDER_REGULAR_EXPRESSIONS); List mappingParameterList = getParameterList(PARAMETER_VALUE_MAPPINGS); HashMap<String, String> mappings = new HashMap<String, String>(); HashMap<Pattern, String> patternMappings = new HashMap<Pattern, String>(); String replaceWhat = getParameterAsString(PARAMETER_REPLACE_WHAT); String replaceBy = getParameterAsString(PARAMETER_REPLACE_BY); if (replaceWhat != null && replaceBy != null && !replaceWhat.equals("") && !replaceBy.equals("")) { mappings.put(replaceWhat, replaceBy); if (useValueRegex) { try { Pattern valuePattern = Pattern.compile(replaceWhat); patternMappings.put(valuePattern, replaceBy); } catch (PatternSyntaxException e) { throw new UserError(this, 206, replaceWhat, e.getMessage()); } } } Iterator listIterator = mappingParameterList.iterator(); int j = 0; while (listIterator.hasNext()) { Object[] pair = (Object[]) listIterator.next(); replaceWhat = (String) pair[1]; replaceBy = (String) pair[0]; mappings.put(replaceWhat,replaceBy); if (useValueRegex) { try { Pattern valuePattern = Pattern.compile(replaceWhat); patternMappings.put(valuePattern, replaceBy); } catch (PatternSyntaxException e) { throw new UserError(this, 206, replaceWhat, e.getMessage()); } } j++; } boolean defaultMappingAdded = getParameterAsBoolean(PARAMETER_ADD_DEFAULT_MAPPING); String defaultValue = getParameterAsString(PARAMETER_DEFAULT_VALUE); if (defaultMappingAdded) { if (defaultValue == null || defaultValue.equals("")) { throw new UserError(this, 201, new Object[] { PARAMETER_ADD_DEFAULT_MAPPING, "true", PARAMETER_DEFAULT_VALUE }); } } if (attributes.size() > 0) { if (nominal) { for (Attribute attribute : attributes) { Attribute newAttribute = AttributeFactory.createAttribute("mapped" + attribute.getName(), attribute.getValueType()); exampleSet.getExampleTable().addAttribute(newAttribute); exampleSet.getAttributes().addRegular(newAttribute); for (Example example : exampleSet) { double value = example.getValue(attribute); String stringValue = null; if (Double.isNaN(value)) { stringValue = "?"; } else { stringValue = attribute.getMapping().mapIndex((int) value); } String mappedValue = (String) mappings.get(stringValue); if (useValueRegex) { for (java.util.Map.Entry<Pattern, String> entry : patternMappings.entrySet()) { Matcher matcher = entry.getKey().matcher(stringValue); if (matcher.matches()) { mappedValue = entry.getValue(); } } } if (mappedValue == null) { if (stringValue.equals("?")) { example.setValue(newAttribute, Double.NaN); } else { if (defaultMappingAdded) { if (defaultValue.equals("?")) { example.setValue(newAttribute, Double.NaN); } else { example.setValue(newAttribute, defaultValue); } } else { example.setValue(newAttribute, newAttribute.getMapping().mapString(stringValue)); } } } else { if (mappedValue.equals("?")) { example.setValue(newAttribute, Double.NaN); } else { example.setValue(newAttribute, newAttribute.getMapping().mapString(mappedValue)); } } checkForStop(); } AttributeRole role = exampleSet.getAttributes().getRole(attribute); exampleSet.getAttributes().remove(attribute); newAttribute.setName(attribute.getName()); if (role.isSpecial()) { exampleSet.getAttributes().setSpecialAttribute(newAttribute, role.getSpecialName()); } } } else { HashMap<Double, Double> numericalValueMapping = new HashMap<Double, Double>(); for (java.util.Map.Entry<String, String> entry : mappings.entrySet()) { double oldValue = Double.NaN; double newValue = Double.NaN; if (!entry.getKey().equals("?")) { oldValue = Double.valueOf(entry.getKey()); } if (!entry.getValue().equals("?")) { newValue = Double.valueOf(entry.getValue()); } numericalValueMapping.put(oldValue, newValue); } double numericalDefaultValue = Double.NaN; if (defaultMappingAdded && !defaultValue.equals("?")) { numericalDefaultValue = Double.valueOf(defaultValue); } for (Attribute attribute : attributes) { Attribute newAttribute = AttributeFactory.createAttribute("mapped" + attribute.getName(), attribute.getValueType()); exampleSet.getExampleTable().addAttribute(newAttribute); exampleSet.getAttributes().addRegular(newAttribute); for (Example example : exampleSet) { double value = example.getValue(attribute); Double mappedValue = numericalValueMapping.get(Double.valueOf(value)); if (mappedValue == null) { if (defaultMappingAdded) { example.setValue(newAttribute, numericalDefaultValue); } else { example.setValue(newAttribute, value); } } else { example.setValue(newAttribute, mappedValue); } checkForStop(); } AttributeRole role = exampleSet.getAttributes().getRole(attribute); exampleSet.getAttributes().remove(attribute); newAttribute.setName(attribute.getName()); if (role.isSpecial()) { exampleSet.getAttributes().setSpecialAttribute(newAttribute, role.getSpecialName()); } } } } return new IOObject[] { exampleSet }; } public Class<?>[] getInputClasses() { return new Class[] { ExampleSet.class }; } public Class<?>[] getOutputClasses() { return new Class[] { ExampleSet.class }; } public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); types.add(new ParameterTypeString(PARAMETER_ATTRIBUTES, "The specified values will be merged in all attributes specified by the given regular expression.", false)); types.add(new ParameterTypeBoolean(PARAMETER_APPLY_TO_SPECIAL_FEATURES, "Filter also special attributes (label, id...)", false)); ParameterType values = new ParameterTypeString(PARAMETER_OLD_VALUES, "The original values which should be replaced.", false); types.add(new ParameterTypeList(PARAMETER_VALUE_MAPPINGS, "The value mappings.", values)); types.add(new ParameterTypeString(PARAMETER_REPLACE_WHAT, "All occurrences of this value will be replaced.", true)); types.add(new ParameterTypeString(PARAMETER_REPLACE_BY, "The new attribute value to use.", true)); types.add(new ParameterTypeBoolean(PARAMETER_CONSIDER_REGULAR_EXPRESSIONS, "Enables matching based on regular expressions; original values may be specified as regular expressions.", false)); types.add(new ParameterTypeBoolean(PARAMETER_ADD_DEFAULT_MAPPING, "If set to true, all original values which are not listed in the value mappings list are mapped to the default value.", false)); ParameterType type = new ParameterTypeString(PARAMETER_DEFAULT_VALUE, "The default value all original values are mapped to, if add_default_mapping is set to true.", true); type.registerDependencyCondition(new BooleanParameterCondition(this, PARAMETER_ADD_DEFAULT_MAPPING, true)); types.add(type); return types; } }