/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.preprocessing;
import java.util.Collection;
import java.util.List;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.set.NonSpecialAttributesExampleSet;
import com.rapidminer.operator.AbstractModel;
import com.rapidminer.operator.Model;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.ports.OutputPort;
import com.rapidminer.operator.ports.metadata.AttributeMetaData;
import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
import com.rapidminer.operator.ports.metadata.GenerateModelTransformationRule;
import com.rapidminer.operator.tools.AttributeSubsetSelector;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.tools.container.Pair;
/**
* Superclass for all preprocessing operators. Classes which extend this class must implement the
* method {@link #createPreprocessingModel(ExampleSet)} . This method can also be returned by this
* operator and will be combined with other models.
*
* @author Ingo Mierswa
*/
public abstract class PreprocessingOperator extends AbstractDataProcessing {
private final OutputPort modelOutput = getOutputPorts().createPort("preprocessing model");
protected final AttributeSubsetSelector attributeSelector = new AttributeSubsetSelector(this, getExampleSetInputPort(),
getFilterValueTypes());
/**
* The parameter name for "Indicates if the preprocessing model should also be
* returned"
*/
public static final String PARAMETER_RETURN_PREPROCESSING_MODEL = "return_preprocessing_model";
/**
* Indicates if this operator should create a view (new example set on the view stack) instead
* of directly changing the data.
*/
public static final String PARAMETER_CREATE_VIEW = "create_view";
public PreprocessingOperator(OperatorDescription description) {
super(description);
getTransformer().addRule(
new GenerateModelTransformationRule(getExampleSetInputPort(), modelOutput, getPreprocessingModelClass()));
getExampleSetInputPort().addPrecondition(attributeSelector.makePrecondition());
}
/**
* Subclasses might override this method to define the meta data transformation performed by
* this operator. The default implementation takes all attributes specified by the
* {@link AttributeSubsetSelector} and passes them to
* {@link #modifyAttributeMetaData(ExampleSetMetaData, AttributeMetaData)} and replaces them
* accordingly.
*
* @throws UndefinedParameterError
*/
@Override
protected ExampleSetMetaData modifyMetaData(ExampleSetMetaData exampleSetMetaData) throws UndefinedParameterError {
ExampleSetMetaData subsetMetaData = attributeSelector.getMetaDataSubset(exampleSetMetaData,
isSupportingAttributeRoles());
checkSelectedSubsetMetaData(subsetMetaData);
for (AttributeMetaData amd : subsetMetaData.getAllAttributes()) {
Collection<AttributeMetaData> replacement = null;
replacement = modifyAttributeMetaData(exampleSetMetaData, amd);
if (replacement != null) {
if (replacement.size() == 1) {
AttributeMetaData replacementAttribute = replacement.iterator().next();
replacementAttribute.setRole(exampleSetMetaData.getAttributeByName(amd.getName()).getRole());
}
exampleSetMetaData.removeAttribute(amd);
exampleSetMetaData.addAllAttributes(replacement);
}
}
return exampleSetMetaData;
}
/** Can be overridden to check the selected attributes for compatibility. */
protected void checkSelectedSubsetMetaData(ExampleSetMetaData subsetMetaData) {}
/**
* If this preprocessing operator generates new attributes, the corresponding meta data should
* be returned by this method. The attribute will be replaced by the collection. If this
* operator modifies a single one, amd itself should be modified as a side effect and null
* should be returned. Note: If an empty collection is returned, amd will be removed, but no new
* attribute will be added.
**/
protected abstract Collection<AttributeMetaData> modifyAttributeMetaData(ExampleSetMetaData emd, AttributeMetaData amd)
throws UndefinedParameterError;
public abstract PreprocessingModel createPreprocessingModel(ExampleSet exampleSet) throws OperatorException;
/**
* This method allows subclasses to easily get a collection of the affected attributes.
*
* @throws UndefinedParameterError
* @throws UserError
*/
protected final ExampleSet getSelectedAttributes(ExampleSet exampleSet) throws UndefinedParameterError, UserError {
return attributeSelector.getSubset(exampleSet, isSupportingAttributeRoles());
}
@Override
public final ExampleSet apply(ExampleSet exampleSet) throws OperatorException {
ExampleSet workingSet = isSupportingAttributeRoles() ? getSelectedAttributes(exampleSet)
: NonSpecialAttributesExampleSet.create(getSelectedAttributes(exampleSet));
AbstractModel model = createPreprocessingModel(workingSet);
model.setParameter(PARAMETER_CREATE_VIEW, getParameterAsBoolean(PARAMETER_CREATE_VIEW));
if (getExampleSetOutputPort().isConnected()) {
model.setOperator(this);
model.setShowProgress(true);
exampleSet = model.apply(exampleSet);
model.setOperator(null);
model.setShowProgress(false);
}
modelOutput.deliver(model);
return exampleSet;
}
/**
* Helper wrapper for {@link #exampleSetInput that can be called by other operators to apply
* this operator when it is created anonymously.
*/
public ExampleSet doWork(ExampleSet exampleSet) throws OperatorException {
ExampleSet workingSet = isSupportingAttributeRoles() ? getSelectedAttributes(exampleSet)
: NonSpecialAttributesExampleSet.create(getSelectedAttributes(exampleSet));
AbstractModel model = createPreprocessingModel(workingSet);
model.setParameter(PARAMETER_CREATE_VIEW, getParameterAsBoolean(PARAMETER_CREATE_VIEW));
model.setOperator(this);
return model.apply(exampleSet);
}
public Pair<ExampleSet, Model> doWorkModel(ExampleSet exampleSet) throws OperatorException {
exampleSet = apply(exampleSet);
Model model = modelOutput.getData(Model.class);
return new Pair<>(exampleSet, model);
}
/**
* If a {@link PreprocessingOperator} returns a {@link PreprocessingModel}, the model is
* responsible for preventing data corruption. Therefore,
* {@link PreprocessingModel#writesIntoExistingData()} should be adjusted for the associated
* model and this method should be overwritten to return {@code false}.
*/
@Override
public boolean writesIntoExistingData() {
return !getParameterAsBoolean(PARAMETER_CREATE_VIEW);
}
@Override
public boolean shouldAutoConnect(OutputPort outputPort) {
if (outputPort == modelOutput) {
return getParameterAsBoolean(PARAMETER_RETURN_PREPROCESSING_MODEL);
} else {
return super.shouldAutoConnect(outputPort);
}
}
/**
* Defines the value types of the attributes which are processed or affected by this operator.
* Has to be overridden to restrict the attributes which can be chosen by an
* {@link AttributeSubsetSelector}.
*
* @return array of value types
*/
protected abstract int[] getFilterValueTypes();
public abstract Class<? extends PreprocessingModel> getPreprocessingModelClass();
@Override
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
ParameterType type = new ParameterTypeBoolean(PARAMETER_RETURN_PREPROCESSING_MODEL,
"Indicates if the preprocessing model should also be returned", false);
type.setHidden(true);
types.add(type);
type = new ParameterTypeBoolean(PARAMETER_CREATE_VIEW,
"Create View to apply preprocessing instead of changing the data", false);
type.setHidden(!isSupportingView());
types.add(type);
types.addAll(attributeSelector.getParameterTypes());
return types;
}
/**
* Subclasses which need to have the attribute roles must return true. Otherwise all selected
* attributes are converted into regular and afterwards given their old roles.
*/
public boolean isSupportingAttributeRoles() {
return false;
}
/**
* Subclasses might overwrite this in order to hide the create_view parameter
*
* @return
*/
public boolean isSupportingView() {
return true;
}
public OutputPort getPreprocessingModelOutputPort() {
return modelOutput;
}
}