/* * RapidMiner * * Copyright (C) 2001-2011 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.preprocessing.filter.attributes; import java.util.Collection; import java.util.LinkedList; import java.util.List; import java.util.regex.PatternSyntaxException; import com.rapidminer.example.Attribute; import com.rapidminer.example.set.ConditionCreationException; import com.rapidminer.operator.Operator; import com.rapidminer.operator.UserError; import com.rapidminer.operator.ports.InputPort; import com.rapidminer.operator.ports.metadata.AttributeMetaData; import com.rapidminer.operator.ports.metadata.ExampleSetMetaData; import com.rapidminer.operator.ports.metadata.MetaData; import com.rapidminer.operator.ports.metadata.MetaDataInfo; import com.rapidminer.parameter.ParameterHandler; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.parameter.ParameterTypeRegexp; import com.rapidminer.parameter.conditions.BooleanParameterCondition; import com.rapidminer.tools.Ontology; /** * This Attribute Filter removes every attribute, which name does not match the * Regular expression given by parameter. A data scan is not needed. * * @author Sebastian Land, Ingo Mierswa, Tobias Malbrecht */ public class RegexpAttributeFilter extends AbstractAttributeFilterCondition { public static final String PARAMETER_REGULAR_EXPRESSION = "regular_expression"; public static final String PARAMETER_ADD_EXCEPTION = "use_except_expression"; public static final String PARAMETER_EXCEPT_REGULAR_EXPRESSION = "except_regular_expression"; private String attributeNameRegexp; private String exceptRegexp = null; @Override public void init(ParameterHandler operator) throws UserError, ConditionCreationException { attributeNameRegexp = operator.getParameterAsString(PARAMETER_REGULAR_EXPRESSION); if ((attributeNameRegexp == null) || (attributeNameRegexp.length() == 0)) throw new UserError((operator instanceof Operator) ? (Operator) operator: null, 904, "The condition for attribute names needs a parameter string."); if (operator.isParameterSet(PARAMETER_EXCEPT_REGULAR_EXPRESSION) && operator.getParameterAsBoolean(PARAMETER_ADD_EXCEPTION)) exceptRegexp = operator.getParameterAsString(PARAMETER_EXCEPT_REGULAR_EXPRESSION); if ((exceptRegexp != null) && (exceptRegexp.length() == 0)) { exceptRegexp = null; } } public MetaDataInfo isFilteredOutMetaData(AttributeMetaData attribute, ParameterHandler handler) throws ConditionCreationException { try { if (attribute.getName().matches(attributeNameRegexp)) { if (exceptRegexp != null) { if (attribute.getName().matches(exceptRegexp)) return MetaDataInfo.YES; else return MetaDataInfo.NO; } return MetaDataInfo.NO; } else return MetaDataInfo.YES; } catch (PatternSyntaxException e) { return MetaDataInfo.UNKNOWN; } } public ScanResult beforeScanCheck(Attribute attribute) throws UserError { if (attribute.getName().matches(attributeNameRegexp)) { if (exceptRegexp != null) { if (attribute.getName().matches(exceptRegexp)) { return ScanResult.REMOVE; } } return ScanResult.KEEP; } else return ScanResult.REMOVE; } private boolean isOfAllowedType(int valueType, int[] allowedValueTypes) { boolean isAllowed = false; for (int type: allowedValueTypes) { isAllowed |= Ontology.ATTRIBUTE_VALUE_TYPE.isA(valueType, type); } return isAllowed; } @Override public List<ParameterType> getParameterTypes(ParameterHandler operator, final InputPort inPort, final int...valueTypes) { List<ParameterType> types = new LinkedList<ParameterType>(); types.add(new ParameterTypeRegexp(PARAMETER_REGULAR_EXPRESSION, "A regular expression for the names of the attributes which should be kept.", true, false) { private static final long serialVersionUID = 8133149560984042644L; @Override public Collection<String> getPreviewList() { Collection<String> regExpPreviewList = new LinkedList<String>(); if (inPort == null) { return null; } MetaData metaData = inPort.getMetaData(); if (metaData instanceof ExampleSetMetaData) { ExampleSetMetaData emd = (ExampleSetMetaData) metaData; for (AttributeMetaData amd : emd.getAllAttributes()) { if (isOfAllowedType(amd.getValueType(), valueTypes)) { regExpPreviewList.add(amd.getName()); } } } return regExpPreviewList; } }); types.add(new ParameterTypeBoolean(PARAMETER_ADD_EXCEPTION, "If enabled, an exception to the specified regular expression might be specified. Attributes of matching this will be filtered out, although matching the first expression.", false, true)); ParameterType type = (new ParameterTypeRegexp(PARAMETER_EXCEPT_REGULAR_EXPRESSION, "A regular expression for the names of the attributes which should be filtered out although matching the above regular expression.", true, true) { private static final long serialVersionUID = 81331495609840426L; @Override public Collection<String> getPreviewList() { Collection<String> regExpPreviewList = new LinkedList<String>(); if (inPort == null) { return null; } MetaData metaData = inPort.getMetaData(); if (metaData instanceof ExampleSetMetaData) { ExampleSetMetaData emd = (ExampleSetMetaData) metaData; for (AttributeMetaData amd : emd.getAllAttributes()) { if (isOfAllowedType(amd.getValueType(), valueTypes)) { regExpPreviewList.add(amd.getName()); } } } return regExpPreviewList; } }); type.setExpert(true); type.registerDependencyCondition(new BooleanParameterCondition(operator, PARAMETER_ADD_EXCEPTION, true, true)); types.add(type); return types; } }