/* * RapidMiner * * Copyright (C) 2001-2011 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.preprocessing; import java.util.Collection; import java.util.List; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.set.NonSpecialAttributesExampleSet; import com.rapidminer.operator.Model; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.operator.ports.OutputPort; import com.rapidminer.operator.ports.metadata.AttributeMetaData; import com.rapidminer.operator.ports.metadata.ExampleSetMetaData; import com.rapidminer.operator.ports.metadata.GenerateModelTransformationRule; import com.rapidminer.operator.tools.AttributeSubsetSelector; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.parameter.UndefinedParameterError; import com.rapidminer.tools.container.Pair; /** * Superclass for all preprocessing operators. Classes which extend this class * must implement the method {@link #createPreprocessingModel(ExampleSet)}. This * method can also be returned by this operator and will be combined with other * models. * * @author Ingo Mierswa */ public abstract class PreprocessingOperator extends AbstractDataProcessing { private final OutputPort modelOutput = getOutputPorts().createPort("preprocessing model"); protected final AttributeSubsetSelector attributeSelector = new AttributeSubsetSelector(this, getExampleSetInputPort(), getFilterValueTypes()); /** The parameter name for "Indicates if the preprocessing model should also be returned" */ public static final String PARAMETER_RETURN_PREPROCESSING_MODEL = "return_preprocessing_model"; /** * Indicates if this operator should create a view (new example set on the view stack) instead of directly changing * the data. */ public static final String PARAMETER_CREATE_VIEW = "create_view"; public PreprocessingOperator(OperatorDescription description) { super(description); getTransformer().addRule(new GenerateModelTransformationRule(getExampleSetInputPort(), modelOutput, getPreprocessingModelClass())); getExampleSetInputPort().addPrecondition(attributeSelector.makePrecondition()); } /** * Subclasses might override this method to define the meta data transformation performed by this operator. * The default implementation takes all attributes specified by the {@link AttributeSubsetSelector} * and passes them to {@link #modifyAttributeMetaData(ExampleSetMetaData, AttributeMetaData)} and replaces them accordingly. * @throws UndefinedParameterError */ @Override protected ExampleSetMetaData modifyMetaData(ExampleSetMetaData exampleSetMetaData) throws UndefinedParameterError { ExampleSetMetaData subsetMetaData = attributeSelector.getMetaDataSubset(exampleSetMetaData, isSupportingAttributeRoles()); checkSelectedSubsetMetaData(subsetMetaData); for (AttributeMetaData amd : subsetMetaData.getAllAttributes()) { Collection<AttributeMetaData> replacement = null; replacement = modifyAttributeMetaData(exampleSetMetaData, amd); if (replacement != null) { if (replacement.size() == 1) { AttributeMetaData replacementAttribute = replacement.iterator().next(); replacementAttribute.setRole(exampleSetMetaData.getAttributeByName(amd.getName()).getRole()); } exampleSetMetaData.removeAttribute(amd); exampleSetMetaData.addAllAttributes(replacement); } } return exampleSetMetaData; } /** Can be overridden to check the selected attributes for compatibility. */ protected void checkSelectedSubsetMetaData(ExampleSetMetaData subsetMetaData) { } /** If this preprocessing operator generates new attributes, the corresponding meta data should be * returned by this method. The attribute will be replaced by the collection. * If this operator modifies a single one, amd itself should be modified as a side effect * and null should be returned. Note: If an empty collection is returned, amd will be removed, * but no new attribute will be added. **/ protected abstract Collection<AttributeMetaData> modifyAttributeMetaData(ExampleSetMetaData emd, AttributeMetaData amd) throws UndefinedParameterError ; public abstract PreprocessingModel createPreprocessingModel(ExampleSet exampleSet) throws OperatorException; /** * This method allows subclasses to easily get a collection of the affected attributes. * * @throws UndefinedParameterError * @throws UserError */ protected final ExampleSet getSelectedAttributes(ExampleSet exampleSet) throws UndefinedParameterError, UserError { return attributeSelector.getSubset(exampleSet, isSupportingAttributeRoles()); } @Override public final ExampleSet apply(ExampleSet exampleSet) throws OperatorException { ExampleSet workingSet = (isSupportingAttributeRoles()) ? getSelectedAttributes(exampleSet) : new NonSpecialAttributesExampleSet(getSelectedAttributes(exampleSet)); Model model = createPreprocessingModel(workingSet); model.setParameter(PARAMETER_CREATE_VIEW, getParameterAsBoolean(PARAMETER_CREATE_VIEW)); if (getExampleSetOutputPort().isConnected()) exampleSet = model.apply(exampleSet); modelOutput.deliver(model); return exampleSet; } /** * Helper wrapper for {@link #exampleSetInput that can be called by other operators to apply this operator when it * is created anonymously. */ public ExampleSet doWork(ExampleSet exampleSet) throws OperatorException { ExampleSet workingSet = (isSupportingAttributeRoles()) ? getSelectedAttributes(exampleSet) : new NonSpecialAttributesExampleSet(getSelectedAttributes(exampleSet)); Model model = createPreprocessingModel(workingSet); model.setParameter(PARAMETER_CREATE_VIEW, getParameterAsBoolean(PARAMETER_CREATE_VIEW)); return model.apply(exampleSet); } public Pair<ExampleSet, Model> doWorkModel(ExampleSet exampleSet) throws OperatorException { exampleSet = apply(exampleSet); Model model = modelOutput.getData(); return new Pair<ExampleSet, Model>(exampleSet, model); } @Override public boolean writesIntoExistingData() { return !getParameterAsBoolean(PARAMETER_CREATE_VIEW); } @Override public boolean shouldAutoConnect(OutputPort outputPort) { if (outputPort == modelOutput) { return getParameterAsBoolean(PARAMETER_RETURN_PREPROCESSING_MODEL); } else { return super.shouldAutoConnect(outputPort); } } /** * Defines the value types of the attributes which are processed or * affected by this operator. Has to be overridden to restrict * the attributes which can be chosen by an {@link AttributeSubsetSelector}. * @return array of value types */ protected abstract int[] getFilterValueTypes(); public abstract Class<? extends PreprocessingModel> getPreprocessingModelClass(); @Override public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); ParameterType type = new ParameterTypeBoolean(PARAMETER_RETURN_PREPROCESSING_MODEL, "Indicates if the preprocessing model should also be returned", false); type.setHidden(true); types.add(type); type = new ParameterTypeBoolean(PARAMETER_CREATE_VIEW, "Create View to apply preprocessing instead of changing the data", false); type.setHidden(!isSupportingView()); types.add(type); types.addAll(attributeSelector.getParameterTypes()); return types; } /** * Subclasses which need to have the attribute roles must return true. Otherwise all selected attributes are * converted into regular and afterwards given their old roles. */ public boolean isSupportingAttributeRoles() { return false; } /** * Subclasses might overwrite this in order to hide the create_view parameter * * @return */ public boolean isSupportingView() { return true; } public OutputPort getPreprocessingModelOutputPort() { return modelOutput; } }