/** * Copyright (C) 2001-2017 by RapidMiner and the contributors * * Complete list of developers available at our web site: * * http://rapidminer.com * * This program is free software: you can redistribute it and/or modify it under the terms of the * GNU Affero General Public License as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License along with this program. * If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.preprocessing.discretization; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.SortedSet; import java.util.TreeSet; import org.apache.commons.lang.ArrayUtils; import com.rapidminer.example.Attribute; import com.rapidminer.example.ExampleSet; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.OperatorVersion; import com.rapidminer.operator.annotation.ResourceConsumptionEstimator; import com.rapidminer.operator.ports.metadata.AttributeMetaData; import com.rapidminer.operator.ports.metadata.ExampleSetMetaData; import com.rapidminer.operator.ports.metadata.SetRelation; import com.rapidminer.operator.preprocessing.PreprocessingModel; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeDouble; import com.rapidminer.parameter.ParameterTypeList; import com.rapidminer.parameter.ParameterTypeString; import com.rapidminer.parameter.UndefinedParameterError; import com.rapidminer.tools.Ontology; import com.rapidminer.tools.OperatorResourceConsumptionHandler; import com.rapidminer.tools.container.Tupel; /** * This operator discretizes a numerical attribute to either a nominal or an ordinal attribute. The * numerical values are mapped to the classes according to the thresholds specified by the user. The * user can define the classes by specifying the upper limits of each class. The lower limit of the * next class is automatically specified as the upper limit of the previous one. A parameter defines * to which adjacent class values that are equal to the given limits should be mapped. If the upper * limit in the last list entry is not equal to Infinity, an additional class which is automatically * named is added. If a '?' is given as class value the according numerical values are mapped to * unknown values in the resulting attribute. * * @author Sebastian Land */ public class UserBasedDiscretization extends AbstractDiscretizationOperator { static { registerDiscretizationOperator(UserBasedDiscretization.class); } /** The parameter name for the upper limit. */ public static final String PARAMETER_UPPER_LIMIT = "upper_limit"; /** The parameter name for "Defines the classes and the upper limits of each class." */ public static final String PARAMETER_RANGE_NAMES = "classes"; private static final String PARAMETER_CLASS_NAME = "class_names"; /** * Incompatible version, old version writes into the exampleset, if original output port is not * connected. */ private static final OperatorVersion VERSION_MAY_WRITE_INTO_DATA = new OperatorVersion(7, 1, 1); public UserBasedDiscretization(OperatorDescription description) { super(description); } @Override protected Collection<AttributeMetaData> modifyAttributeMetaData(ExampleSetMetaData emd, AttributeMetaData amd) throws UndefinedParameterError { AttributeMetaData newAMD = new AttributeMetaData(amd.getName(), Ontology.NOMINAL, amd.getRole()); List<String[]> rangeList = getParameterList(PARAMETER_RANGE_NAMES); TreeSet<String> values = new TreeSet<String>(); for (String[] pair : rangeList) { values.add(pair[0]); } newAMD.setValueSet(values, SetRelation.SUBSET); return Collections.singletonList(newAMD); } @Override public PreprocessingModel createPreprocessingModel(ExampleSet exampleSet) throws OperatorException { HashMap<String, SortedSet<Tupel<Double, String>>> ranges = new HashMap<String, SortedSet<Tupel<Double, String>>>(); List<String[]> rangeList = getParameterList(PARAMETER_RANGE_NAMES); TreeSet<Tupel<Double, String>> thresholdPairs = new TreeSet<Tupel<Double, String>>(); for (String[] pair : rangeList) { thresholdPairs.add(new Tupel<Double, String>(Double.valueOf(pair[1]), pair[0])); } for (Attribute attribute : exampleSet.getAttributes()) { if (attribute.isNumerical()) { ranges.put(attribute.getName(), thresholdPairs); } } DiscretizationModel model = new DiscretizationModel(exampleSet); model.setRanges(ranges); return model; } @Override public Class<? extends PreprocessingModel> getPreprocessingModelClass() { return DiscretizationModel.class; } @Override public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); ParameterType classType = new ParameterTypeString(PARAMETER_CLASS_NAME, "The name of this range."); ParameterType threshold = new ParameterTypeDouble(PARAMETER_UPPER_LIMIT, "The upper limit.", Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY); List<String[]> defaultList = new LinkedList<String[]>(); defaultList.add(new String[] { "first", Double.NEGATIVE_INFINITY + "" }); defaultList.add(new String[] { "last", Double.POSITIVE_INFINITY + "" }); ParameterType type = new ParameterTypeList(PARAMETER_RANGE_NAMES, "Defines the classes and the upper limits of each class.", classType, threshold, defaultList); type.setExpert(false); types.add(type); return types; } @Override public boolean writesIntoExistingData() { if (getCompatibilityLevel().isAbove(VERSION_MAY_WRITE_INTO_DATA)) { return false; } else { // old version: true only if original output port is connected return isOriginalOutputConnected() && super.writesIntoExistingData(); } } @Override public ResourceConsumptionEstimator getResourceConsumptionEstimator() { return OperatorResourceConsumptionHandler.getResourceConsumptionEstimator(getInputPort(), UserBasedDiscretization.class, attributeSelector); } @Override public OperatorVersion[] getIncompatibleVersionChanges() { return (OperatorVersion[]) ArrayUtils.addAll(super.getIncompatibleVersionChanges(), new OperatorVersion[] { VERSION_MAY_WRITE_INTO_DATA }); } }