/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.meta; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import com.rapidminer.example.Attribute; import com.rapidminer.example.ExampleSet; import com.rapidminer.operator.IOContainer; import com.rapidminer.operator.IOObject; import com.rapidminer.operator.OperatorChain; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.ValueDouble; import com.rapidminer.operator.ValueString; import com.rapidminer.operator.condition.InnerOperatorCondition; import com.rapidminer.operator.condition.LastInnerOperatorCondition; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.parameter.ParameterTypeCategory; import com.rapidminer.parameter.ParameterTypeString; /** * <p>This operator takes an input data set and applies its inner operators * as often as the number of features of the input data is. Inner operators * can access the current feature name by a macro, whose name can be * specified via the parameter <code>iteration_macro</code>.</p> * * <p>The user can specify with a parameter if this loop should iterate over * all features or only over features with a specific value type, i.e. only * over numerical or over nominal features. A regular expression can also be * specified which is used as a filter, i.e. the inner operators are only * applied for feature names fulfilling the filter expression.</p> * * @author Ingo Mierswa, Tobias Malbrecht * @version $Id: FeatureIterator.java,v 1.5 2008/08/20 16:50:50 ingomierswa Exp $ */ public class FeatureIterator extends OperatorChain { public static final String PARAMETER_FILTER = "filter"; public static final String PARAMETER_INVERT_SELECTION = "invert_selection"; public static final String PARAMETER_TYPE_FILTER = "type_filter"; public static final String PARAMETER_ITERATION_MACRO = "iteration_macro"; public static final String[] TYPE_FILTERS = new String[] { "none", "nominal", "numerical" }; public static final int TYPE_FILTER_NONE = 0; public static final int TYPE_FILTER_NOMINAL = 1; public static final int TYPE_FILTER_NUMERICAL = 2; public static final String DEFAULT_ITERATION_MACRO_NAME = "loop_feature"; private int iteration; private String currentName = null; public FeatureIterator(OperatorDescription description) { super(description); addValue(new ValueDouble("iteration", "The number of the current iteration / loop.") { public double getDoubleValue() { return iteration; } }); addValue(new ValueString("feature_name", "The number of the current feature.") { public String getStringValue() { return currentName; } }); } public IOObject[] apply() throws OperatorException { ExampleSet exampleSet = getInput(ExampleSet.class); String iterationMacroName = getParameterAsString(PARAMETER_ITERATION_MACRO); // init filters String filterRegExp = getParameterAsString(PARAMETER_FILTER); Pattern pattern = null; if ((filterRegExp != null) && (filterRegExp.length() > 0)) { pattern = Pattern.compile(filterRegExp); } int typeFilter = getParameterAsInt(PARAMETER_TYPE_FILTER); boolean invertSelection = getParameterAsBoolean(PARAMETER_INVERT_SELECTION); // filter and loop iteration = 0; for (Attribute attribute : exampleSet.getAttributes()) { if ((acceptPattern(attribute, pattern, invertSelection)) && (acceptType(attribute, typeFilter, invertSelection))) { String name = attribute.getName(); getProcess().getMacroHandler().addMacro(iterationMacroName, name); currentName = name; applyInnerOperators(exampleSet); iteration++; } } getProcess().getMacroHandler().removeMacro(iterationMacroName); return new IOObject[] { exampleSet }; } private boolean acceptPattern(Attribute attribute, Pattern pattern, boolean invertSelection) { if (!invertSelection) { if (pattern != null) { Matcher matcher = pattern.matcher(attribute.getName()); return matcher.matches(); } else { return true; } } else { if (pattern != null) { Matcher matcher = pattern.matcher(attribute.getName()); return !matcher.matches(); } else { return true; } } } private boolean acceptType(Attribute attribute, int typeFilter, boolean invertSelection) { if (!invertSelection) { switch (typeFilter) { case TYPE_FILTER_NUMERICAL: return attribute.isNumerical(); case TYPE_FILTER_NOMINAL: return attribute.isNominal(); default: return true; } } else { switch (typeFilter) { case TYPE_FILTER_NUMERICAL: return !attribute.isNumerical(); case TYPE_FILTER_NOMINAL: return !attribute.isNominal(); default: return true; } } } private void applyInnerOperators(ExampleSet exampleSet) throws OperatorException { IOContainer input = new IOContainer((ExampleSet)exampleSet.clone()); for (int i = 0; i < getNumberOfOperators(); i++) { input = getOperator(i).apply(input); } } public InnerOperatorCondition getInnerOperatorCondition() { return new LastInnerOperatorCondition(new Class[] { ExampleSet.class }, new Class[0]); } public int getMaxNumberOfInnerOperators() { return Integer.MAX_VALUE; } public int getMinNumberOfInnerOperators() { return 1; } public Class<?>[] getInputClasses() { return new Class[] { ExampleSet.class }; } public Class<?>[] getOutputClasses() { return new Class[] { ExampleSet.class }; } public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); types.add(new ParameterTypeString(PARAMETER_FILTER, "A regular expression which can be used to filter the features in this loop, i.e. the inner operators are only applied to features which name fulfills the filter expression.", true)); types.add(new ParameterTypeCategory(PARAMETER_TYPE_FILTER, "Indicates if a value type filter should be applied for this loop.", TYPE_FILTERS, TYPE_FILTER_NONE)); types.add(new ParameterTypeBoolean(PARAMETER_INVERT_SELECTION, "Indicates if the filter settings should be inverted, i.e. the loop will run over all features not fulfilling the specified criteria.", false)); types.add(new ParameterTypeString(PARAMETER_ITERATION_MACRO, "The name of the macro which holds the name of the current feature in each iteration.", DEFAULT_ITERATION_MACRO_NAME)); return types; } }