/** * Copyright (C) 2001-2017 by RapidMiner and the contributors * * Complete list of developers available at our web site: * * http://rapidminer.com * * This program is free software: you can redistribute it and/or modify it under the terms of the * GNU Affero General Public License as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License along with this program. * If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.preprocessing; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import com.rapidminer.example.Attribute; import com.rapidminer.example.AttributeRole; import com.rapidminer.example.Attributes; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.set.ModelViewExampleSet; import com.rapidminer.example.set.NonSpecialAttributesExampleSet; import com.rapidminer.example.set.RemappedExampleSet; import com.rapidminer.operator.AbstractModel; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.ViewModel; import com.rapidminer.tools.Tools; import com.rapidminer.tools.container.Pair; /** * Returns a more appropriate result icon. This model allows preprocessing Operators to be applied * through a view without changing the underlying data. Since Apply Model does not know the models, * because they are wrapped within a container model, it is necessary to ask for the parameter * PARAMETER_CREATE_VIEW. This must be set by Apply Model, and should be the default behavior. * * @author Ingo Mierswa, Sebastian Land */ public abstract class PreprocessingModel extends AbstractModel implements ViewModel { private static final long serialVersionUID = -2603663450216521777L; private HashMap<String, Object> parameterMap = new HashMap<>(); protected PreprocessingModel(ExampleSet exampleSet) { super(exampleSet); } /** * Applies the model by changing the underlying data. */ public abstract ExampleSet applyOnData(ExampleSet exampleSet) throws OperatorException; @Override public ExampleSet apply(ExampleSet exampleSet) throws OperatorException { // applying by creating view or changing data boolean createView = isCreateView(); // materialize if the model writes into existing data if (!createView && writesIntoExistingData()) { exampleSet = MaterializeDataInMemory.materializeExampleSet(exampleSet); } // adapting example set to contain only attributes, which were present during learning time // and remove roles if necessary ExampleSet nonSpecialRemapped = RemappedExampleSet.create( isSupportingAttributeRoles() ? exampleSet : NonSpecialAttributesExampleSet.create(exampleSet), getTrainingHeader(), false, needsRemapping()); LinkedList<AttributeRole> unusedList = new LinkedList<>(); Iterator<AttributeRole> iterator = exampleSet.getAttributes().allAttributeRoles(); while (iterator.hasNext()) { AttributeRole role = iterator.next(); if (nonSpecialRemapped.getAttributes().get(role.getAttribute().getName()) == null) { unusedList.add(role); } } ExampleSet result; if (createView) { // creating only view result = ModelViewExampleSet.create(nonSpecialRemapped, this); } else { result = applyOnData(nonSpecialRemapped); } // restoring roles if possible Iterator<Attribute> attributeIterator = result.getAttributes().allAttributes(); List<Pair<Attribute, String>> roleList = new LinkedList<>(); Attributes inputAttributes = exampleSet.getAttributes(); while (attributeIterator.hasNext()) { Attribute resultAttribute = attributeIterator.next(); AttributeRole role = inputAttributes.getRole(resultAttribute.getName()); if (role != null && role.isSpecial()) { // since underlying connection is changed roleList.add(new Pair<>(resultAttribute, role.getSpecialName())); } } for (Pair<Attribute, String> rolePair : roleList) { result.getAttributes().setSpecialAttribute(rolePair.getFirst(), rolePair.getSecond()); } // adding unused Attributes resultAttributes = result.getAttributes(); for (AttributeRole role : unusedList) { resultAttributes.add(role); } return result; } /** * @return {@code true} if the parameter "create view" is selected */ protected boolean isCreateView() { boolean createView = false; if (parameterMap.containsKey(PreprocessingOperator.PARAMETER_CREATE_VIEW)) { Boolean booleanObject = (Boolean) parameterMap.get(PreprocessingOperator.PARAMETER_CREATE_VIEW); if (booleanObject != null) { createView = booleanObject.booleanValue(); } } return createView; } @Override public String toResultString() { StringBuilder builder = new StringBuilder(); Attributes trainAttributes = getTrainingHeader().getAttributes(); builder.append(getName() + Tools.getLineSeparators(2)); builder.append("Model covering " + trainAttributes.size() + " attributes:" + Tools.getLineSeparator()); for (Attribute attribute : trainAttributes) { builder.append(" - " + attribute.getName() + Tools.getLineSeparator()); } return builder.toString(); } @Override public void setParameter(String key, Object value) { parameterMap.put(key, value); } /** * Subclasses which need to have the attribute roles must return true. Otherwise all selected * attributes are converted into regular and afterwards given their old roles. */ public boolean isSupportingAttributeRoles() { return false; } /** * Determines whether before the model application the nominal attributes should be remapped on * the fly such that their returned indices match the indices of the training set. Subclasses * should overwrite this to adjust to their needs. Note that only attributes that are not part * of the returned example set should be remapped. * * @see {@link RemappedExampleSet#RemappedExampleSet(ExampleSet, ExampleSet, boolean, boolean)} * * @return {@code true} by default for compatibility reasons * @since 7.4.0 */ protected boolean needsRemapping() { return true; } /** * Whether {@link #applyOnData(ExampleSet)} writes into existing data. If this method returns * {@code true} then the data is materialized before {@link #applyOnData(ExampleSet)} is called. * For the corresponding {@link PreprocessingOperator} the method * {@link PreprocessingOperator#writesIntoExistingData()} should return {@code false} to prevent * materializing twice. * * <strong>Note: </strong> Subclasses must implement this method if they write into the data. * The safe implementation would be to return true, however, for backwards compatibility, the * default implementation returns false. * * @return {@code true} if {@link #applyOnData(ExampleSet)} writes into existing data */ protected boolean writesIntoExistingData() { return false; } }