/** * Copyright (C) 2001-2017 by RapidMiner and the contributors * * Complete list of developers available at our web site: * * http://rapidminer.com * * This program is free software: you can redistribute it and/or modify it under the terms of the * GNU Affero General Public License as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License along with this program. * If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.preprocessing.filter; import java.util.List; import com.rapidminer.example.AttributeTypeException; import com.rapidminer.example.Example; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.set.Condition; import com.rapidminer.example.set.ConditionCreationException; import com.rapidminer.example.set.ConditionedExampleSet; import com.rapidminer.example.set.CustomFilter; import com.rapidminer.example.set.ExpressionFilter; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.OperatorVersion; import com.rapidminer.operator.UserError; import com.rapidminer.operator.annotation.ResourceConsumptionEstimator; import com.rapidminer.operator.ports.OutputPort; import com.rapidminer.operator.ports.metadata.AttributeMetaData; import com.rapidminer.operator.ports.metadata.ExampleSetMetaData; import com.rapidminer.operator.ports.metadata.MDInteger; import com.rapidminer.operator.ports.metadata.MetaData; import com.rapidminer.operator.ports.metadata.PassThroughRule; import com.rapidminer.operator.preprocessing.AbstractDataProcessing; import com.rapidminer.operator.tools.ExpressionEvaluationException; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.parameter.ParameterTypeExpression; import com.rapidminer.parameter.ParameterTypeFilter; import com.rapidminer.parameter.ParameterTypeList; import com.rapidminer.parameter.ParameterTypeString; import com.rapidminer.parameter.ParameterTypeStringCategory; import com.rapidminer.parameter.UndefinedParameterError; import com.rapidminer.parameter.conditions.EqualStringCondition; import com.rapidminer.tools.OperatorResourceConsumptionHandler; import com.rapidminer.tools.expression.ExpressionException; import com.rapidminer.tools.expression.internal.ExpressionParserUtils; /** * <p> * This operator takes an {@link ExampleSet} as input and returns a new {@link ExampleSet} including * only the {@link Example}s that fulfill a condition. * </p> * * <p> * By specifying an implementation of {@link com.rapidminer.example.set.Condition} and a parameter * string, arbitrary filters can be applied. Users can implement their own conditions by writing a * subclass of the above class and implementing a two argument constructor taking an * {@link ExampleSet} and a parameter string. This parameter string is specified by the parameter * <code>parameter_string</code>. Instead of using one of the predefined conditions users can define * their own implementation with the fully qualified class name. * </p> * * <p> * For "attribute_value_condition" the parameter string must have the form * <code>attribute op value</code>, where attribute is a name of an attribute, value is a value the * attribute can take and op is one of the binary logical operators similar to the ones known from * Java, e.g. greater than or equals. Please note your can define a logical OR of several conditions * with || and a logical AND of two conditions with two ampers and - or simply by applying several * ExampleFilter operators in a row. Please note also that for nominal attributes you can define a * regular expression for value of the possible equal and not equal checks. * </p> * * <p> * For "unknown_attributes" the parameter string must be empty. This filter removes all * examples containing attributes that have missing or illegal values. For "unknown_label" * the parameter string must also be empty. This filter removes all examples with an unknown label * value. * </p> * * @author Ingo Mierswa, Simon Fischer */ public class ExampleFilter extends AbstractDataProcessing { /** The parameter name for "Implementation of the condition." */ public static final String PARAMETER_CONDITION_CLASS = "condition_class"; /** * The parameter name for "Parameter string for the condition, e.g. 'attribute=value' for * the AttributeValueFilter." */ public static final String PARAMETER_PARAMETER_STRING = "parameter_string"; /** * The parameter name for "Parameter string for the expression, e.g. 'attribute1 == * attribute2'." */ public static final String PARAMETER_PARAMETER_EXPRESSION = "parameter_expression"; /** The parameter name for "Defines the list of filters to apply." */ public static final String PARAMETER_FILTER = "filters"; /** * The parameter name for "Indicates if only examples should be accepted which would * normally filtered." */ public static final String PARAMETER_INVERT_FILTER = "invert_filter"; /** The hidden parameter for "The list of filters." */ public static final String PARAMETER_FILTERS_LIST = "filters_list"; /** The key parameter for the hidden {@value #PARAMETER_FILTERS_LIST} parameter */ public static final String PARAMETER_FILTERS_ENTRY_KEY = "filters_entry_key"; /** The key parameter for the hidden {@value #PARAMETER_FILTERS_LIST} parameter */ public static final String PARAMETER_FILTERS_ENTRY_VALUE = "filters_entry_value"; /** The hidden parameter for "Logic operator for filters." */ public static final String PARAMETER_FILTERS_LOGIC_AND = "filters_logic_and"; /** The hidden parameter for "Check meta data for comparators." */ public static final String PARAMETER_FILTERS_CHECK_METADATA = "filters_check_metadata"; private final OutputPort unmatchedOutput = getOutputPorts().createPort("unmatched example set"); public ExampleFilter(final OperatorDescription description) { super(description); getTransformer().addRule(new PassThroughRule(getInputPort(), unmatchedOutput, false) { @Override public MetaData modifyMetaData(MetaData metaData) { if (metaData instanceof ExampleSetMetaData) { return ExampleFilter.this.modifyMetaData((ExampleSetMetaData) metaData); } else { return metaData; } } }); } @Override public ExampleSetMetaData modifyMetaData(final ExampleSetMetaData emd) { emd.getNumberOfExamples().reduceByUnknownAmount(); try { if (getParameterAsString(PARAMETER_CONDITION_CLASS).equals( ConditionedExampleSet.KNOWN_CONDITION_NAMES[ConditionedExampleSet.CONDITION_NO_MISSING_ATTRIBUTES])) { for (AttributeMetaData amd : emd.getAllAttributes()) { amd.setNumberOfMissingValues(new MDInteger(0)); } } } catch (UndefinedParameterError e) { } return emd; } @Override public ExampleSet apply(final ExampleSet inputSet) throws OperatorException { getLogger().fine(getName() + ": input set has " + inputSet.size() + " examples."); String className = getParameterAsString(PARAMETER_CONDITION_CLASS); String parameter = getParameterAsString(PARAMETER_PARAMETER_STRING); getLogger().fine("Creating condition '" + className + "' with parameter '" + parameter + "'"); Condition condition = null; try { if (className.equals(ConditionedExampleSet.KNOWN_CONDITION_NAMES[ConditionedExampleSet.CONDITION_CUSTOM_FILTER])) { // special handling for custom_filters, as they cannot be instantiated via a simple // string parameter // this is necessary as operator.getParameterList() replaces '%{test}' by 'test' String rawParameterString = getParameters().getParameterAsSpecified(PARAMETER_FILTERS_LIST); if (rawParameterString == null) { throw new UndefinedParameterError(PARAMETER_FILTER, this); } List<String[]> operatorFilterList = ParameterTypeList.transformString2List(rawParameterString); condition = new CustomFilter(inputSet, operatorFilterList, getParameterAsBoolean(PARAMETER_FILTERS_LOGIC_AND), getProcess().getMacroHandler()); } else if (className .equals(ConditionedExampleSet.KNOWN_CONDITION_NAMES[ConditionedExampleSet.CONDITION_EXPRESSION])) { // special handling for expression, has different String expression = getParameterAsString(PARAMETER_PARAMETER_EXPRESSION); if (expression == null || expression.isEmpty()) { throw new UndefinedParameterError(PARAMETER_PARAMETER_EXPRESSION, this); } try { condition = new ExpressionFilter(inputSet, expression, this); } catch (ExpressionException e) { throw new UserError(this, "cannot_parse_expression", expression, e.getShortMessage()); } } else { condition = ConditionedExampleSet.createCondition(className, inputSet, parameter); } } catch (ConditionCreationException e) { throw new UserError(this, e, 904, className, e.getMessage()); } catch (AttributeTypeException e) { throw new UserError(this, e, "filter_wrong_type", e.getMessage()); } catch (IllegalArgumentException e) { throw new UserError(this, e, 904, className, e.getMessage()); } try { ExampleSet result = new ConditionedExampleSet(inputSet, condition, getParameterAsBoolean(PARAMETER_INVERT_FILTER), getProgress()); if (unmatchedOutput.isConnected()) { ExampleSet unmatchedResult = new ConditionedExampleSet(inputSet, condition, !getParameterAsBoolean(PARAMETER_INVERT_FILTER)); unmatchedOutput.deliver(unmatchedResult); } return result; } catch (AttributeTypeException e) { throw new UserError(this, e, "filter_wrong_type", e.getMessage()); } catch (ExpressionEvaluationException e) { throw new UserError(this, e, 904, className, e.getMessage()); } } @Override public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); ParameterType type = new ParameterTypeFilter(PARAMETER_FILTER, "Defines the list of filters to apply.", getInputPort(), true); type.registerDependencyCondition(new EqualStringCondition(this, PARAMETER_CONDITION_CLASS, false, ConditionedExampleSet.KNOWN_CONDITION_NAMES[ConditionedExampleSet.CONDITION_CUSTOM_FILTER])); type.setExpert(false); types.add(type); type = new ParameterTypeString(PARAMETER_PARAMETER_STRING, "Parameter string for the condition, e.g. 'attribute=value' for the AttributeValueFilter.", true); type.registerDependencyCondition(new EqualStringCondition(this, PARAMETER_CONDITION_CLASS, true, ConditionedExampleSet.KNOWN_CONDITION_NAMES[ConditionedExampleSet.CONDITION_ATTRIBUTE_VALUE_FILTER])); type.setExpert(false); types.add(type); type = new ParameterTypeExpression(PARAMETER_PARAMETER_EXPRESSION, "Parameter string for the expression, e.g. 'attribute1 == attribute2'.", getInputPort(), true); type.registerDependencyCondition(new EqualStringCondition(this, PARAMETER_CONDITION_CLASS, true, ConditionedExampleSet.KNOWN_CONDITION_NAMES[ConditionedExampleSet.CONDITION_EXPRESSION])); type.setExpert(false); types.add(type); type = new ParameterTypeStringCategory(PARAMETER_CONDITION_CLASS, "Implementation of the condition.", ConditionedExampleSet.KNOWN_CONDITION_NAMES, ConditionedExampleSet.KNOWN_CONDITION_NAMES[ConditionedExampleSet.CONDITION_CUSTOM_FILTER], false); type.setExpert(true); // confusing, only show for experts, default custom filters are fine // for new users types.add(type); type = new ParameterTypeBoolean(PARAMETER_INVERT_FILTER, "Indicates if only examples should be accepted which would normally be filtered.", false); type.setExpert(false); types.add(type); // hidden parameter, only used to store the filters set via the ParameterTypeFilter dialog // above type = new ParameterTypeList(PARAMETER_FILTERS_LIST, "The list of filters.", new ParameterTypeString( "PARAMETER_FILTERS_ENTRY_KEY", "A key entry of the filters list."), new ParameterTypeString( "PARAMETER_FILTERS_ENTRY_VALUE", "A value entry of the filters list."), false); type.setHidden(true); type.registerDependencyCondition(new EqualStringCondition(this, PARAMETER_CONDITION_CLASS, true, ConditionedExampleSet.KNOWN_CONDITION_NAMES[8])); types.add(type); // hidden parameter, only used to store if the filters from the ParameterTypeFilter dialog // above should be ANDed or ORed type = new ParameterTypeBoolean(PARAMETER_FILTERS_LOGIC_AND, "Logic operator for filters.", true, false); type.setHidden(true); type.registerDependencyCondition(new EqualStringCondition(this, PARAMETER_CONDITION_CLASS, true, ConditionedExampleSet.KNOWN_CONDITION_NAMES[8])); types.add(type); // hidden parameter, only used to store if the meta data should be checked in the // ParameterTypeFilter dialog type = new ParameterTypeBoolean(PARAMETER_FILTERS_CHECK_METADATA, "Check meta data for comparators.", true, false); type.setHidden(true); type.registerDependencyCondition(new EqualStringCondition(this, PARAMETER_CONDITION_CLASS, true, ConditionedExampleSet.KNOWN_CONDITION_NAMES[8])); types.add(type); return types; } @Override public boolean writesIntoExistingData() { return false; } @Override public ResourceConsumptionEstimator getResourceConsumptionEstimator() { return OperatorResourceConsumptionHandler.getResourceConsumptionEstimator(getInputPort(), ExampleFilter.class, null); } @Override public OperatorVersion[] getIncompatibleVersionChanges() { return ExpressionParserUtils.addIncompatibleExpressionParserChange(super.getIncompatibleVersionChanges()); } }