/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.preprocessing.filter.attributes;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.annotation.ResourceConsumptionEstimator;
import com.rapidminer.operator.features.selection.AbstractFeatureSelection;
import com.rapidminer.operator.ports.metadata.AttributeMetaData;
import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
import com.rapidminer.operator.ports.metadata.MetaData;
import com.rapidminer.operator.tools.AttributeSubsetSelector;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.tools.OperatorResourceConsumptionHandler;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
/**
* <p>
* This operator filters the attributes of an exampleSet. Therefore, different conditions may be
* selected as parameter and only attributes fulfilling this condition are kept. The rest will be
* removed from the exampleSet The conditions may be inverted. The conditions are tested over all
* attributes and for every attribute over all examples. For example the numeric_value_filter with
* the parameter string "> 6" will keep all nominal attributes and all numeric
* attributes having a value of greater 6 in every example. A combination of conditions is possible:
* "> 6 ANDAND < 11" or "<= 5 || < 0". But ANDAND and || must not be
* mixed. Please note that ANDAND has to be replaced by two ampers ands.
* </p>
*
* <p>
* The attribute_name_filter keeps all attributes which names match the given regular expression.
* The nominal_value_filter keeps all numeric attribute and all nominal attributes containing at
* least one of specified nominal values. "rainy ANDAND cloudy" would keep all attributes
* containing at least one time "rainy" and one time "cloudy". "rainy ||
* sunny" would keep all attributes containing at least one time "rainy" or one time
* "sunny". ANDAND and || are not allowed to be mixed. And again, ANDAND has to be
* replaced by two ampers ands.
* </p>
*
* @author Sebastian Land, Ingo Mierswa
*/
public class AttributeFilter extends AbstractFeatureSelection {
private final AttributeSubsetSelector attributeSelector = new AttributeSubsetSelector(this, getExampleSetInputPort());
public AttributeFilter(OperatorDescription description) {
super(description);
}
@Override
protected MetaData modifyMetaData(ExampleSetMetaData metaData) {
ExampleSetMetaData subset = attributeSelector.getMetaDataSubset(metaData, true);
Iterator<AttributeMetaData> amdIterator = metaData.getAllAttributes().iterator();
while (amdIterator.hasNext()) {
AttributeMetaData amd = amdIterator.next();
AttributeMetaData subsetAMD = subset.getAttributeByName(amd.getName());
if (subsetAMD == null) {
amdIterator.remove();
}
}
return metaData;
}
@Override
public ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
Attributes attributes = exampleSet.getAttributes();
Set<Attribute> attributeSubset = attributeSelector.getAttributeSubset(exampleSet, true);
Iterator<Attribute> r = attributes.allAttributes();
while (r.hasNext()) {
Attribute attribute = r.next();
if (!attributeSubset.contains(attribute)) {
r.remove();
}
}
return exampleSet;
}
@Override
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
types.addAll(attributeSelector.getParameterTypes());
return types;
}
@Override
public ResourceConsumptionEstimator getResourceConsumptionEstimator() {
return OperatorResourceConsumptionHandler.getResourceConsumptionEstimator(getInputPort(), AttributeFilter.class,
attributeSelector);
}
}