/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.preprocessing;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.AttributeRole;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.set.AttributeSelectionExampleSet;
import com.rapidminer.example.set.ConditionCreationException;
import com.rapidminer.example.set.NonSpecialAttributesExampleSet;
import com.rapidminer.operator.IOContainer;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.OperatorChain;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.condition.InnerOperatorCondition;
import com.rapidminer.operator.condition.LastInnerOperatorCondition;
import com.rapidminer.operator.preprocessing.filter.attributes.AttributeFilter;
import com.rapidminer.operator.preprocessing.filter.attributes.AttributeFilterCondition;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.parameter.ParameterTypeStringCategory;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.parameter.conditions.EqualStringCondition;
/**
* <p>This operator can be used to select one attribute (or a subset) by defining a
* regular expression for the attribute name and applies its inner operators to
* the resulting subset. Please note that this operator will also use special
* attributes which makes it necessary for all preprocessing steps which should
* be performed on special attributes (and are normally not performed on special
* attributes).</p>
*
* <p>This operator is also able to deliver the additional results of the inner
* operator if desired.</p>
*
* <p>Afterwards, the remaining original attributes are added
* to the resulting example set if the parameter "keep_subset_only" is set to
* false (default).</p>
*
* <p>Please note that this operator is very powerful and can be used to create
* new preprocessing schemes by combinating it with other preprocessing operators.
* Hoewever, there are two major restrictions (among some others): first, since the inner result
* will be combined with the rest of the input example set, the number of
* examples (data points) is not allowed to be changed inside of the subset preprocessing.
* Second, attribute role changes will not be delivered to the outside since internally all special
* attributes will be changed to regular for the inner operators and role changes can afterwards
* not be delivered.</p>
*
* @author Ingo Mierswa, Shevek
* @version $Id: AttributeSubsetPreprocessing.java,v 1.15 2008/07/19 16:31:17 ingomierswa Exp $
*/
public class AttributeSubsetPreprocessing extends OperatorChain {
/** The parameter name for "A regular expression which matches against all attribute names (including special attributes)." */
public static final String PARAMETER_ATTRIBUTE_NAME_REGEX = "attribute_name_regex";
/** The parameter name for "Indicates if the attributes which did not match the regular expression should be removed by this operator." */
public static final String PARAMETER_INVERT_SELECTION = "invert_selection";
/** The parameter name for "Indicates if special attributes like labels etc. should also be processed." */
public static final String PARAMETER_PROCESS_SPECIAL_ATTRIBUTES = "process_special_attributes";
/** The parameter name for "Indicates if the additional results (other than example set) of the inner operator should also be returned." */
public static final String PARAMETER_DELIVER_INNER_RESULTS = "deliver_inner_results";
/** The parameter name for "Indicates if the attributes which did not match the regular expression should be removed by this operator." */
public static final String PARAMETER_KEEP_SUBSET_ONLY = "keep_subset_only";
public AttributeSubsetPreprocessing(OperatorDescription description) {
super(description);
}
public IOObject[] apply() throws OperatorException {
ExampleSet inputSet = getInput(ExampleSet.class);
ExampleSet workingExampleSet = (ExampleSet)inputSet.clone();
if (getParameterAsBoolean(PARAMETER_PROCESS_SPECIAL_ATTRIBUTES))
workingExampleSet = new NonSpecialAttributesExampleSet(workingExampleSet);
// this list will be filled in the method createSubSetView(...)
List<Attribute> unusedAttributes = new LinkedList<Attribute>();
workingExampleSet = createSubSetView(workingExampleSet, unusedAttributes);
// perform inner operators
IOContainer input = new IOContainer(new IOObject[] { workingExampleSet });
for (int i = 0; i < getNumberOfOperators(); i++) {
input = getOperator(i).apply(input);
}
// retrieve transformed example set
ExampleSet resultSet = input.get(ExampleSet.class);
// transform special attributes back
Iterator<AttributeRole> r = resultSet.getAttributes().allAttributeRoles();
while (r.hasNext()) {
AttributeRole newRole = r.next();
AttributeRole oldRole = inputSet.getAttributes().getRole(newRole.getAttribute().getName());
if (oldRole != null) {
if (oldRole.isSpecial()) {
String specialName = oldRole.getSpecialName();
newRole.setSpecial(specialName);
}
}
}
// add old attributes if desired
if (!getParameterAsBoolean(PARAMETER_KEEP_SUBSET_ONLY)) {
if (resultSet.size() != inputSet.size()) {
throw new UserError(this, 127, "changing the size of the example set is not allowed if the non-processed attributes should be kept.");
}
if (resultSet.getExampleTable().equals(inputSet.getExampleTable())) {
for (Attribute attribute : unusedAttributes) {
AttributeRole role = inputSet.getAttributes().getRole(attribute);
resultSet.getAttributes().add(role);
}
} else {
logWarning("Underlying example table has changed: data copy into new table is necessary in order to keep non-processed attributes.");
for (Attribute oldAttribute : unusedAttributes) {
AttributeRole oldRole = inputSet.getAttributes().getRole(oldAttribute);
// create and add copy of attribute
Attribute newAttribute = (Attribute)oldAttribute.clone();
resultSet.getExampleTable().addAttribute(newAttribute);
AttributeRole newRole = new AttributeRole(newAttribute);
if (oldRole.isSpecial())
newRole.setSpecial(oldRole.getSpecialName());
resultSet.getAttributes().add(newRole);
// copy data for the new attribute
Iterator<Example> oldIterator = inputSet.iterator();
Iterator<Example> newIterator = resultSet.iterator();
while (oldIterator.hasNext()) {
Example oldExample = oldIterator.next();
Example newExample = newIterator.next();
newExample.setValue(newAttribute, oldExample.getValue(oldAttribute));
}
}
}
}
// add all other results if desired
List<IOObject> allResults = new LinkedList<IOObject>();
allResults.add(resultSet);
if (getParameterAsBoolean(PARAMETER_DELIVER_INNER_RESULTS)) {
for (IOObject current : input.getIOObjects()) {
if (!(current instanceof ExampleSet)) {
allResults.add(current);
}
}
}
// create and deliver final result
IOObject[] finalResult = new IOObject[allResults.size()];
allResults.toArray(finalResult);
return finalResult;
}
private ExampleSet createSubSetView(ExampleSet exampleSet, List<Attribute> unusedAttributes) throws UserError {
Attributes attributes = exampleSet.getAttributes();
boolean[] selectionMask = new boolean[exampleSet.getAttributes().size()];
// init and removing attributes not needed to checked per example
String conditionName = getParameterAsString(AttributeFilter.PARAMETER_CONDITION_NAME);
String parameterString = null;
if (AttributeFilter.CONDITION_NAMES[AttributeFilter.CONDITION_ATTRIBUTE_NAME_FILTER].equals(conditionName)) {
if (isParameterSet(PARAMETER_ATTRIBUTE_NAME_REGEX))
parameterString = getParameterAsString(PARAMETER_ATTRIBUTE_NAME_REGEX);
else
throw new UndefinedParameterError("Parameter '" + PARAMETER_ATTRIBUTE_NAME_REGEX + "' not defined.");
} else {
parameterString = getParameterAsString(AttributeFilter.PARAMETER_PARAMETER_STRING);
}
boolean invert = getParameterAsBoolean(PARAMETER_INVERT_SELECTION);
try {
AttributeFilterCondition condition = AttributeFilter.createCondition(getParameterAsString(AttributeFilter.PARAMETER_CONDITION_NAME));
int a = 0;
for (Attribute attribute : attributes) {
if (condition != null) {
if (condition.beforeScanCheck(attribute, parameterString, invert)) {
unusedAttributes.add(attribute);
selectionMask[a] = false;
} else {
selectionMask[a] = true;
}
} else {
selectionMask[a] = true;
}
a++;
}
// now checking for every example
if (condition.isNeedingScan()) {
condition.initScanCheck();
a = 0;
for (Attribute attribute : attributes) {
for (Example example: exampleSet) {
if (condition.check(attribute, example)) {
selectionMask[a] = false;
unusedAttributes.add(attribute);
break;
}
}
a++;
}
}
} catch (ConditionCreationException e) {
throw new UserError(this, 904, parameterString, e.getMessage());
}
return new AttributeSelectionExampleSet(exampleSet, selectionMask);
}
public InnerOperatorCondition getInnerOperatorCondition() {
return new LastInnerOperatorCondition(new Class[] { ExampleSet.class }, new Class[] { ExampleSet.class });
}
public int getMaxNumberOfInnerOperators() {
return Integer.MAX_VALUE;
}
public int getMinNumberOfInnerOperators() {
return 1;
}
public Class<?>[] getInputClasses() {
return new Class[] {ExampleSet.class };
}
public Class<?>[] getOutputClasses() {
Class[] innerResult = null;
try {
if (getParameterAsBoolean(PARAMETER_DELIVER_INNER_RESULTS)) {
if (getNumberOfOperators() > 0)
innerResult = getOperator(0).getOutputClasses();
}
} catch (NullPointerException e) {
// hack to allow parameter retrieval in getInputClasses before
// initialization has finished
// after init (i.e. during process runtime) this method of course
// works...
} catch (ArrayIndexOutOfBoundsException e) {
// dito
}
if (innerResult != null) {
List<Class> completeOutput = new LinkedList<Class>();
completeOutput.add(ExampleSet.class);
for (Class clazz : innerResult) {
if (!clazz.equals(ExampleSet.class))
completeOutput.add(clazz);
}
Class[] result = new Class[completeOutput.size()];
completeOutput.toArray(result);
return result;
} else {
return new Class[] { ExampleSet.class };
}
}
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
ParameterType type = new ParameterTypeStringCategory(AttributeFilter.PARAMETER_CONDITION_NAME, "Implementation of the condition.", AttributeFilter.CONDITION_NAMES);
type.setExpert(false);
types.add(type);
type = new ParameterTypeString(AttributeFilter.PARAMETER_PARAMETER_STRING, "Parameter string for the condition, e.g. 'attribute=value' for the nominal value filter.", true);
type.registerDependencyCondition(new EqualStringCondition(this, AttributeFilter.PARAMETER_CONDITION_NAME, true, AttributeFilter.CONDITION_NAMES[AttributeFilter.CONDITION_NUMERIC_VALUE_FILTER]));
type.setExpert(false);
types.add(type);
type = new ParameterTypeString(PARAMETER_ATTRIBUTE_NAME_REGEX, "A regular expression which matches against all attribute names (including special attributes).", true);
type.registerDependencyCondition(new EqualStringCondition(this, AttributeFilter.PARAMETER_CONDITION_NAME, true, AttributeFilter.CONDITION_NAMES[AttributeFilter.CONDITION_ATTRIBUTE_NAME_FILTER]));
type.setExpert(false);
types.add(type);
type = new ParameterTypeBoolean(PARAMETER_INVERT_SELECTION, "Indicates if the specified attribute selection should be inverted.", false);
type.setExpert(false);
types.add(type);
type = new ParameterTypeBoolean(PARAMETER_PROCESS_SPECIAL_ATTRIBUTES, "Indicates if special attributes like labels etc. should also be processed.", false);
type.setExpert(false);
types.add(type);
types.add(new ParameterTypeBoolean(PARAMETER_KEEP_SUBSET_ONLY, "Indicates if the attributes which did not match the regular expression should be removed by this operator.", false));
types.add(new ParameterTypeBoolean(PARAMETER_DELIVER_INNER_RESULTS, "Indicates if the additional results (other than example set) of the inner operator should also be returned.", false));
return types;
}
}