/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.preprocessing.filter.attributes;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.set.Condition;
import com.rapidminer.example.set.ConditionCreationException;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.Operator;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.parameter.ParameterTypeStringCategory;
import com.rapidminer.parameter.conditions.EqualStringCondition;
/**
* <p>This operator filters the attributes of an exampleSet. Therefore, different conditions may be selected as
* parameter and only attributes fulfilling this condition are kept. The rest will be removed from the exampleSet
* The conditions may be inverted.
* The conditions are tested over all attributes and for every attribute over all examples. For example the
* numeric_value_filter with the parameter string "> 6" will keep all nominal attributes and all numeric attributes
* having a value of greater 6 in every example. A combination of conditions is possible: "> 6 ANDAND < 11" or "<= 5 || < 0".
* But ANDAND and || must not be mixed. Please note that ANDAND has to be replaced by two ampers ands.</p>
* <p>The attribute_name_filter keeps all attributes which names match the given regular expression.
* The nominal_value_filter keeps all numeric attribute and all nominal attributes containing at least one of specified
* nominal values. "rainy ANDAND cloudy" would keep all attributes containing at least one time "rainy" and one time "cloudy".
* "rainy || sunny" would keep all attributes containing at least one time "rainy" or one time "sunny". ANDAND and || are not
* allowed to be mixed. And again, ANDAND has to be replaced by two ampers ands.</p>
*
* @author Sebastian Land, Ingo Mierswa
* @version $Id: AttributeFilter.java,v 1.8 2008/07/19 16:31:17 ingomierswa Exp $
*/
public class AttributeFilter extends Operator {
/** The parameter name for "Implementation of the condition." */
public static final String PARAMETER_CONDITION_NAME = "condition_class";
/** The parameter name for "Parameter string for the condition, e.g. 'attribute=value' for the AttributeValueFilter." */
public static final String PARAMETER_PARAMETER_STRING = "parameter_string";
/** The parameter name for "Indicates if only examples should be accepted which would normally filtered." */
public static final String PARAMETER_INVERT_FILTER = "invert_filter";
public static final String[] CONDITION_NAMES = new String[] {
"no_missing_values",
"numeric_value_filter",
"attribute_name_filter",
"is_nominal",
"is_numerical",
"is_date"
};
private static final String[] CONDITION_IMPLEMENTATIONS = {
NoMissingValuesAttributeFilter.class.getName(),
NumericValueAttributeFilter.class.getName(),
NameAttributeFilter.class.getName(),
NominalAttributeFilter.class.getName(),
NumericalAttributeFilter.class.getName(),
DateAttributeFilter.class.getName()
};
public static final int CONDITION_NO_MISSING_VALUES = 0;
public static final int CONDITION_NUMERIC_VALUE_FILTER = 1;
public static final int CONDITION_ATTRIBUTE_NAME_FILTER = 2;
public static final int CONDITION_IS_NOMINAL = 3;
public static final int CONDITION_IS_NUMERICAL = 4;
public static final int CONDITION_IS_DATE = 5;
public AttributeFilter(OperatorDescription description) {
super(description);
}
public IOObject[] apply() throws OperatorException {
ExampleSet exampleSet = getInput(ExampleSet.class);
Attributes attributes = exampleSet.getAttributes();
try {
AttributeFilterCondition condition = createCondition(getParameterAsString(PARAMETER_CONDITION_NAME));
// init and removing attributes not needed to checked per example
String parameterString = getParameterAsString(PARAMETER_PARAMETER_STRING);
boolean invert = getParameterAsBoolean(PARAMETER_INVERT_FILTER);
List<Attribute> remainingAttributes = new LinkedList<Attribute>();
Iterator<Attribute> iterator = attributes.iterator();
while (iterator.hasNext()) {
Attribute attribute = iterator.next();
if (condition.beforeScanCheck(attribute, parameterString, invert)) {
iterator.remove();
} else {
remainingAttributes.add(attribute);
}
}
// now checking for every example
if (condition.isNeedingScan()) {
condition.initScanCheck();
Iterator<Attribute> r = remainingAttributes.iterator();
while (r.hasNext()) {
Attribute attribute = r.next();
boolean remove = false;
for (Example example: exampleSet) {
if (condition.check(attribute, example)) {
remove = true;
break;
}
}
if (remove) {
exampleSet.getAttributes().remove(attribute);
}
}
}
} catch (ConditionCreationException e) {
throw new UserError(this, 904, getParameterAsString(PARAMETER_CONDITION_NAME), e.getMessage());
}
return new IOObject[] {exampleSet};
}
/**
* Checks if the given name is the short name of a known condition and
* creates it. If the name is not known, this method creates a new instance
* of className which must be an implementation of {@link Condition} by
* calling its two argument constructor passing it the example set and the
* parameter string
*/
public static AttributeFilterCondition createCondition(String name) throws ConditionCreationException {
String className = name;
for (int i = 0; i < CONDITION_NAMES.length; i++) {
if (CONDITION_NAMES[i].equals(name)) {
className = CONDITION_IMPLEMENTATIONS[i];
break;
}
}
try {
Class<?> clazz = com.rapidminer.tools.Tools.classForName(className);
return (AttributeFilterCondition) clazz.newInstance();
} catch (ClassNotFoundException e) {
throw new ConditionCreationException("Cannot find class '" + className + "'. Check your classpath.");
} catch (IllegalAccessException e) {
throw new ConditionCreationException("'" + className + "' cannot access two argument constructor " + className + "(ExampleSet, String)!");
} catch (InstantiationException e) {
throw new ConditionCreationException(className + ": cannot create condition (" + e.getMessage() + ").");
} catch (Throwable e) {
throw new ConditionCreationException(className + ": cannot invoke condition (" + (e.getCause() != null ? e.getCause().getMessage() : e.getMessage()) + ").");
}
}
public Class<?>[] getInputClasses() {
return new Class[] { ExampleSet.class };
}
public Class<?>[] getOutputClasses() {
return new Class[] { ExampleSet.class };
}
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
ParameterType type = new ParameterTypeStringCategory(PARAMETER_CONDITION_NAME, "Implementation of the condition.", CONDITION_NAMES, CONDITION_NAMES[0]);
type.setExpert(false);
types.add(type);
type = new ParameterTypeString(PARAMETER_PARAMETER_STRING, "Parameter string for the condition, e.g. '>= 5' for the numerical value filter.", true);
type.registerDependencyCondition(new EqualStringCondition(this, PARAMETER_CONDITION_NAME, true, CONDITION_NAMES[CONDITION_NUMERIC_VALUE_FILTER], CONDITION_NAMES[CONDITION_ATTRIBUTE_NAME_FILTER]));
type.setExpert(false);
types.add(type);
types.add(new ParameterTypeBoolean(PARAMETER_INVERT_FILTER, "Indicates if only attributes should be accepted which would normally filtered.", false));
return types;
}
}