/** * Copyright (C) 2001-2017 by RapidMiner and the contributors * * Complete list of developers available at our web site: * * http://rapidminer.com * * This program is free software: you can redistribute it and/or modify it under the terms of the * GNU Affero General Public License as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License along with this program. * If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.preprocessing.filter.attributes; import java.util.ArrayList; import java.util.LinkedList; import java.util.List; import org.apache.commons.lang.StringEscapeUtils; import com.rapidminer.example.Attribute; import com.rapidminer.example.Example; import com.rapidminer.example.set.ConditionCreationException; import com.rapidminer.operator.Operator; import com.rapidminer.operator.UserError; import com.rapidminer.operator.ports.InputPort; import com.rapidminer.operator.ports.metadata.AttributeMetaData; import com.rapidminer.operator.ports.metadata.MetaDataInfo; import com.rapidminer.parameter.ParameterHandler; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeString; import com.rapidminer.tools.I18N; /** * This class implements a condition for the AttributeFilter operator. It provides the possibility * to check if all values of a numerical attribute match a condition. This conditions might be * specified by != or <>, =, <, <=, >, >= followed by a value. For example like this: "> 6.5" would * keep all attributes having only values greater 6.5. This single conditions might be combined by * || or && but not mixed. Example: "> 6.5 && < 11" would keep all attributes containing only values * between 6.5 and 11. Whitespaces (that do not disrupt relational operators, && or ||) will be * ignored, so it will make no difference if the condition is for example ">6.5&&<11" or * " > 6.5 && < 11 " * * @author Sebastian Land, Ingo Mierswa, Marcel Seifert */ public class NumericValueAttributeFilter extends AbstractAttributeFilterCondition { /** * Condition Operators */ private static final String CONDITION_OPERATOR_NOT_EQUAL = "<>"; private static final String CONDITION_OPERATOR_NOT_EQUAL_2 = "!="; private static final String CONDITION_OPERATOR_LESS_OR_EQUAL = "<="; private static final String CONDITION_OPERATOR_LESS = "<"; private static final String CONDITION_OPERATOR_GREATER_OR_EQUAL = ">="; private static final String CONDITION_OPERATOR_GREATER = ">"; private static final String CONDITION_OPERATOR_EQUAL = "="; public static String PARAMETER_NUMERIC_CONDITION = "numeric_condition"; private static final String[] CONDITION_OPERATORS = { CONDITION_OPERATOR_NOT_EQUAL, CONDITION_OPERATOR_NOT_EQUAL_2, CONDITION_OPERATOR_LESS_OR_EQUAL, CONDITION_OPERATOR_LESS, CONDITION_OPERATOR_GREATER_OR_EQUAL, CONDITION_OPERATOR_GREATER, CONDITION_OPERATOR_EQUAL }; private Attribute lastCheckedAttribute = null; private ArrayList<Condition> conditions; private boolean keep = true; private boolean conjunctiveMode; private static class Condition { private int condition; private double value; public Condition(String condition, String value) { this.value = Double.parseDouble(value); if (condition.equals(CONDITION_OPERATOR_NOT_EQUAL) || condition.equals(CONDITION_OPERATOR_NOT_EQUAL_2)) { this.condition = 1; } else if (condition.equals(CONDITION_OPERATOR_LESS_OR_EQUAL)) { this.condition = 2; } else if (condition.equals(CONDITION_OPERATOR_LESS)) { this.condition = 3; } else if (condition.equals(CONDITION_OPERATOR_GREATER_OR_EQUAL)) { this.condition = 4; } else if (condition.equals(CONDITION_OPERATOR_GREATER)) { this.condition = 5; } else if (condition.equals(CONDITION_OPERATOR_EQUAL)) { this.condition = 0; } } public boolean check(double value) { if (Double.isNaN(value)) { return true; } switch (condition) { case 0: return value == this.value; case 1: return value != this.value; case 2: return value <= this.value; case 3: return value < this.value; case 4: return value >= this.value; case 5: return value > this.value; } return false; } } @Override public void init(ParameterHandler parameterHandler) throws UserError, ConditionCreationException { String conditionString = parameterHandler.getParameterAsString(PARAMETER_NUMERIC_CONDITION); Operator operator = null; if (parameterHandler instanceof Operator) { operator = (Operator) parameterHandler; } if (conditionString == null || conditionString.length() == 0) { throw new UserError(operator, "cannot_parse_expression", StringEscapeUtils.escapeHtml(conditionString), I18N.getErrorMessage("numeric_value_filter.no_parameter_string")); } // testing if not allowed combination of and and or if (conditionString.contains("||") && conditionString.contains("&&")) { throw new UserError(operator, "cannot_parse_expression", StringEscapeUtils.escapeHtml(conditionString), I18N.getErrorMessage("numeric_value_filter.and_combined_with_or")); } this.conjunctiveMode = conditionString.contains("&&"); conditions = new ArrayList<>(); boolean conditionFound; for (String conditionSubString : conditionString.split("[|&]{2}")) { conditionFound = false; for (String conditionOperator : CONDITION_OPERATORS) { if (conditionSubString.trim().startsWith(conditionOperator)) { conditionFound = true; // Quotation needed, to prevent conditionOperator to be interpreted as a regex String number = conditionSubString.replaceFirst("\\Q" + conditionOperator + "\\E", "").trim(); // check if number is valid try { Double.parseDouble(number); conditions.add(new Condition(conditionOperator, number)); break; } catch (NullPointerException | NumberFormatException e) { throw new UserError(operator, "cannot_parse_expression", StringEscapeUtils.escapeHtml(conditionString), I18N.getErrorMessage( "numeric_value_filter.invalid_syntax", new Object())); } } } if (!conditionFound) { throw new UserError(operator, "cannot_parse_expression", StringEscapeUtils.escapeHtml(conditionString), I18N.getErrorMessage("numeric_value_filter.missing_relational_operator")); } } } @Override public MetaDataInfo isFilteredOutMetaData(AttributeMetaData attribute, ParameterHandler handler) { // TODO: If some infos over the value range are available: Use them to decide if possible return MetaDataInfo.UNKNOWN; } @Override public boolean isNeedingScan() { return true; } /** * Don't remove any attribute without checking values */ @Override public ScanResult beforeScanCheck(Attribute attribute) throws UserError { return ScanResult.UNCHECKED; } @Override public ScanResult check(Attribute attribute, Example example) { if (lastCheckedAttribute != attribute) { keep = true; } if (attribute.isNumerical()) { boolean exampleResult; double checkValue = example.getValue(attribute); if (conjunctiveMode) { exampleResult = true; for (Condition condition : conditions) { exampleResult &= condition.check(checkValue); } } else { exampleResult = false; for (Condition condition : conditions) { exampleResult |= condition.check(checkValue); } } keep &= exampleResult; } if (!keep && attribute.isNumerical()) { return ScanResult.REMOVE; } else { return ScanResult.UNCHECKED; } } @Override public List<ParameterType> getParameterTypes(ParameterHandler operator, InputPort inPort, int... valueTypes) { LinkedList<ParameterType> types = new LinkedList<ParameterType>(); types.add(new ParameterTypeString(PARAMETER_NUMERIC_CONDITION, "Parameter string for the condition, e.g. '>= 5'", true, false)); return types; } }