/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.preprocessing.filter;
import java.util.Arrays;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.AttributeRole;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.set.NonSpecialAttributesExampleSet;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.OperatorVersion;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.ports.metadata.AttributeMetaData;
import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
import com.rapidminer.operator.ports.metadata.MetaData;
import com.rapidminer.operator.ports.metadata.MetaDataInfo;
import com.rapidminer.operator.ports.metadata.SetRelation;
import com.rapidminer.operator.preprocessing.AbstractDataProcessing;
import com.rapidminer.operator.tools.AttributeSubsetSelector;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.UndefinedParameterError;
/**
* This class is for preprocessing operators, which can be restricted to use only a subset of the
* attributes. The MetaData is changed accordingly in a way equivalent to surrounding the operator
* with an AttributeSubsetPreprocessing operator. Subclasses must overwrite the methods
* {@link #applyOnFiltered(ExampleSet)} and {@link #applyOnFilteredMetaData(ExampleSetMetaData)} in
* order to provide their functionality and the correct meta data handling.
*
* @author Sebastian Land
*
*/
public abstract class AbstractFilteredDataProcessing extends AbstractDataProcessing {
private final AttributeSubsetSelector attributeSelector = new AttributeSubsetSelector(this, getExampleSetInputPort(),
getFilterValueTypes());;
public AbstractFilteredDataProcessing(OperatorDescription description) {
super(description);
}
private static final OperatorVersion FAIL_ON_MISSING_ATTRIBUTES = new OperatorVersion(6, 0, 3);
@Override
public OperatorVersion[] getIncompatibleVersionChanges() {
OperatorVersion[] odlOne = super.getIncompatibleVersionChanges();
OperatorVersion[] newOne = Arrays.copyOf(odlOne, odlOne.length + 1);
newOne[odlOne.length] = FAIL_ON_MISSING_ATTRIBUTES;
return newOne;
}
@Override
protected final MetaData modifyMetaData(ExampleSetMetaData inputMetaData) {
ExampleSetMetaData workingMetaData = inputMetaData.clone();
ExampleSetMetaData subsetAmd = attributeSelector.getMetaDataSubset(workingMetaData, false);
// storing unused attributes and saving roles
List<AttributeMetaData> unusedAttributes = new LinkedList<>();
Iterator<AttributeMetaData> iterator = workingMetaData.getAllAttributes().iterator();
while (iterator.hasNext()) {
AttributeMetaData amd = iterator.next();
String name = amd.getName();
MetaDataInfo containsAttributeName = subsetAmd.containsAttributeName(name);
if (subsetAmd.getAttributeSetRelation() == SetRelation.SUBSET && containsAttributeName == MetaDataInfo.NO
|| subsetAmd.getAttributeSetRelation() != SetRelation.SUBSET
&& containsAttributeName != MetaDataInfo.YES) {
unusedAttributes.add(amd);
iterator.remove();
} else if (amd.isSpecial()) {
amd.setRegular();
}
}
// retrieving result
ExampleSetMetaData resultMetaData = workingMetaData;
try {
resultMetaData = applyOnFilteredMetaData(workingMetaData);
} catch (UndefinedParameterError e) {
}
// merge result with unusedAttributes: restore special types from original input
Iterator<AttributeMetaData> r = resultMetaData.getAllAttributes().iterator();
while (r.hasNext()) {
AttributeMetaData newMetaData = r.next();
AttributeMetaData oldMetaData = inputMetaData.getAttributeByName(newMetaData.getName());
if (oldMetaData != null) {
if (oldMetaData.isSpecial()) {
String specialName = oldMetaData.getRole();
newMetaData.setRole(specialName);
}
}
}
// add unused attributes again
resultMetaData.addAllAttributes(unusedAttributes);
return resultMetaData;
}
@Override
/**
* This method filters the attributes according to the AttributeSubsetSelector and
* then applies the operation of the subclass on this data. Finally the changed data is merged
* back into the exampleSet. This is done in the AttributeSubsetPreprocessing way and somehow doubles the
* code.
*/
public final ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
ExampleSet workingExampleSet = (ExampleSet) exampleSet.clone();
Set<Attribute> selectedAttributes = attributeSelector.getAttributeSubset(workingExampleSet, false, this
.getCompatibilityLevel().isAtMost(FAIL_ON_MISSING_ATTRIBUTES) ? false : true);
List<Attribute> unusedAttributes = new LinkedList<>();
Iterator<Attribute> iterator = workingExampleSet.getAttributes().allAttributes();
while (iterator.hasNext()) {
Attribute attribute = iterator.next();
if (!selectedAttributes.contains(attribute)) {
unusedAttributes.add(attribute);
iterator.remove();
}
}
// converting special to normal
workingExampleSet = NonSpecialAttributesExampleSet.create(workingExampleSet);
// applying filtering
ExampleSet resultSet = applyOnFiltered(workingExampleSet);
// transform special attributes back
Iterator<AttributeRole> r = resultSet.getAttributes().allAttributeRoles();
while (r.hasNext()) {
AttributeRole newRole = r.next();
AttributeRole oldRole = exampleSet.getAttributes().getRole(newRole.getAttribute().getName());
if (oldRole != null) {
if (oldRole.isSpecial()) {
String specialName = oldRole.getSpecialName();
newRole.setSpecial(specialName);
}
}
}
// add old attributes if desired
if (resultSet.size() != exampleSet.size()) {
throw new UserError(this, 127,
"changing the size of the example set is not allowed if the non-processed attributes should be kept.");
}
if (resultSet.getExampleTable().equals(exampleSet.getExampleTable())) {
for (Attribute attribute : unusedAttributes) {
AttributeRole role = exampleSet.getAttributes().getRole(attribute);
resultSet.getAttributes().add(role);
}
} else {
getLogger()
.warning(
"Underlying example table has changed: data copy into new table is necessary in order to keep non-processed attributes.");
for (Attribute oldAttribute : unusedAttributes) {
AttributeRole oldRole = exampleSet.getAttributes().getRole(oldAttribute);
// create and add copy of attribute
Attribute newAttribute = (Attribute) oldAttribute.clone();
resultSet.getExampleTable().addAttribute(newAttribute);
AttributeRole newRole = new AttributeRole(newAttribute);
if (oldRole.isSpecial()) {
newRole.setSpecial(oldRole.getSpecialName());
}
resultSet.getAttributes().add(newRole);
// copy data for the new attribute
Iterator<Example> oldIterator = exampleSet.iterator();
Iterator<Example> newIterator = resultSet.iterator();
while (oldIterator.hasNext()) {
Example oldExample = oldIterator.next();
Example newExample = newIterator.next();
newExample.setValue(newAttribute, oldExample.getValue(oldAttribute));
}
}
}
return resultSet;
}
/**
* Subclasses have to implement this method in order to operate only on the selected attributes.
* The results are merged back into the original example set.
*/
public abstract ExampleSet applyOnFiltered(ExampleSet exampleSet) throws OperatorException;
/**
* This method has to be implemented in order to specify the changes of the meta data caused by
* the application of this operator.
*/
public abstract ExampleSetMetaData applyOnFilteredMetaData(ExampleSetMetaData emd) throws UndefinedParameterError;
/**
* Defines the value types of the attributes which are processed or affected by this operator.
* Has to be overridden to restrict the attributes which can be chosen by an
* {@link AttributeSubsetSelector}.
*
* @return array of value types
*/
protected abstract int[] getFilterValueTypes();
@Override
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
types.addAll(attributeSelector.getParameterTypes());
return types;
}
}