MultiwayDecisionTree.java example

Explorer
rapidminer-studio-master
- doc
  - doc
- src
/**
 * Copyright (C) 2001-2017 by RapidMiner and the contributors
 *
 * Complete list of developers available at our web site:
 *
 * http://rapidminer.com
 *
 * This program is free software: you can redistribute it and/or modify it under the terms of the
 * GNU Affero General Public License as published by the Free Software Foundation, either version 3
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
 * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License along with this program.
 * If not, see http://www.gnu.org/licenses/.
 */
package com.rapidminer.operator.learner.tree;

import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.GroupedModel;
import com.rapidminer.operator.Model;
import com.rapidminer.operator.OperatorCapability;
import com.rapidminer.operator.OperatorCreationException;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.OperatorVersion;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.learner.meta.AbstractMetaLearner;
import com.rapidminer.operator.preprocessing.PreprocessingOperator;
import com.rapidminer.operator.preprocessing.discretization.UserBasedDiscretization;
import com.rapidminer.operator.preprocessing.filter.attributes.RegexpAttributeFilter;
import com.rapidminer.operator.tools.AttributeSubsetSelector;
import com.rapidminer.parameter.ParameterTypeList;
import com.rapidminer.tools.OperatorService;
import com.rapidminer.tools.Tools;
import com.rapidminer.tools.container.Pair;


/**
 * This operator is a meta learner for numerical tree builder. It might be used to flatten decision
 * trees, which consists of many splits on the same attribute. All numerical attributes used for at
 * least one decision in a tree will be discretized with the decisions' split points as borders. For
 * example, if attribute att1 is splitted on the points 4.5 and 2.1 then it will be discretized in
 * three values: -Infinity to 2.1, 2.1 to 4.5 and 4.5 to Infinity. After this, a new tree is grown
 * on the transformed data. Since the used attributes are now numerical, all splits will be made
 * immediately and hence the depth might be reduced. Please note: The resulting tree might be easier
 * to comprehend, but this have to make it perform neither better nor worse! To get an impression of
 * the reliability of the result perform a XValidation.
 *
 * @author Sebastian Land
 */
public class MultiwayDecisionTree extends AbstractMetaLearner {

	/**
	 * After version 7.3.0 the behaviour of the operator changed. Previously all the models in the
	 * Grouped Model were created over the original ExampleSet without ever being applied. This made
	 * it so that the Grouped Model could not easily be applied, since the final Decision Tree model
	 * was not coherent with the previous models. Starting in version 7.3.1, every model is created
	 * after the previous one has been applied. Like this, the Grouped Model can be easily applied.
	 */

	private static final OperatorVersion VERSION_NOT_DISCRETIZING = new OperatorVersion(7, 3, 0);

	/**
	 * @param description
	 */
	public MultiwayDecisionTree(OperatorDescription description) {
		super(description);

	}

	@Override
	public Model learn(ExampleSet exampleSet) throws OperatorException {
		GroupedModel groupedModel = new GroupedModel();

		// applying inner learner in order to get tree model
		TreeModel model = (TreeModel) applyInnerLearner(exampleSet);

		// searching numerical split point inside this tree model
		Map<String, List<Double>> attributePointMap = new HashMap<String, List<Double>>();
		addNodeSplitPoints(model.getRoot(), attributePointMap);

		try {
			// operator construction
			PreprocessingOperator userbasedDiscretization = OperatorService.createOperator(UserBasedDiscretization.class);

			// setting common parameters
			userbasedDiscretization.setParameter(AttributeSubsetSelector.PARAMETER_FILTER_TYPE,
					AttributeSubsetSelector.CONDITION_REGULAR_EXPRESSION + "");
			userbasedDiscretization.setParameter(PreprocessingOperator.PARAMETER_RETURN_PREPROCESSING_MODEL, "true");

			// iterating over all attributes which have to be transformed
			for (Entry<String, List<Double>> currentEntry : attributePointMap.entrySet()) {
				// sorting split points
				double[] splitPoints = new double[currentEntry.getValue().size()];
				int i = 0;
				String currentAttributeName = currentEntry.getKey();
				for (Double splitPoint : attributePointMap.get(currentAttributeName)) {
					splitPoints[i] = splitPoint;
					i++;
				}
				Arrays.sort(splitPoints);

				// setting attribute to consider
				userbasedDiscretization.setParameter(RegexpAttributeFilter.PARAMETER_REGULAR_EXPRESSION,
						currentAttributeName);
				// setting borders for splitting
				List<String[]> borders = new LinkedList<String[]>();
				double lowerBorder = Double.NEGATIVE_INFINITY;
				for (i = 0; i < splitPoints.length; i++) {
					String[] pointSpecifier = new String[] { currentEntry + " in " + Tools.formatNumber(lowerBorder, 2)
							+ " to " + Tools.formatNumber(splitPoints[i], 2), splitPoints[i] + "" };
					lowerBorder = splitPoints[i];
					borders.add(pointSpecifier);
				}
				String[] pointSpecifier = new String[] { currentEntry + " in " + Tools.formatNumber(lowerBorder, 2) + " to "
						+ Tools.formatNumber(Double.POSITIVE_INFINITY, 2), "Infinity" };
				borders.add(pointSpecifier);

				userbasedDiscretization.setParameter(UserBasedDiscretization.PARAMETER_RANGE_NAMES,
						ParameterTypeList.transformList2String(borders));

				// Execute the discretization operator and add the outputed discretization model to
				// the groupedModel
				Pair<ExampleSet, Model> result = userbasedDiscretization.doWorkModel(exampleSet);
				groupedModel.addModel(result.getSecond());
			}

			// Apply the previously built discretization models to the example Set
			// if the compatibility level requires it.
			if (getCompatibilityLevel().isAbove(VERSION_NOT_DISCRETIZING)) {
				exampleSet = groupedModel.apply(exampleSet);
			}

			// now apply inner operator on transformed exampleset
			groupedModel.addModel(applyInnerLearner(exampleSet));

			// the grouped model to be returned should contain a training header. Get the one from
			// the last model
			List<Model> models = groupedModel.getModels();

			ExampleSet trainingHeader = models.get(models.size() - 1).getTrainingHeader();

			// construct the model to be returned
			GroupedModel groupModelWithTrainingHeader = new GroupedModel(trainingHeader);
			for (Model m : models) {
				groupModelWithTrainingHeader.addModel(m);
			}

			return groupModelWithTrainingHeader;
		} catch (OperatorCreationException e) {
			throw new UserError(this, 904, "operators", e.getMessage());
		}
	}

	/**
	 * This method recursively adds splitting points from all numerical split conditions.
	 */
	private void addNodeSplitPoints(Tree root, Map<String, List<Double>> attributePointMap) {
		// add split point of this node
		if (!root.isLeaf()) {
			SplitCondition condition = root.childIterator().next().getCondition();
			if (condition instanceof GreaterSplitCondition) {
				addSplit(condition.getAttributeName(), ((GreaterSplitCondition) condition).getValue(), attributePointMap);
			} else if (condition instanceof LessEqualsSplitCondition) {
				addSplit(condition.getAttributeName(), ((LessEqualsSplitCondition) condition).getValue(), attributePointMap);
			}
		}

		// recursive descent
		Iterator<Edge> iterator = root.childIterator();
		while (iterator.hasNext()) {
			addNodeSplitPoints(iterator.next().getChild(), attributePointMap);
		}
	}

	private void addSplit(String attributeName, double value, Map<String, List<Double>> attributePointMap) {
		List<Double> valueList = attributePointMap.get(attributeName);
		if (valueList == null) {
			valueList = new LinkedList<Double>();
			attributePointMap.put(attributeName, valueList);
		}
		valueList.add(value);
	}

	@Override
	public boolean supportsCapability(OperatorCapability capability) {
		switch (capability) {
			case BINOMINAL_ATTRIBUTES:
			case POLYNOMINAL_ATTRIBUTES:
			case NUMERICAL_ATTRIBUTES:
			case POLYNOMINAL_LABEL:
			case BINOMINAL_LABEL:
			case WEIGHTED_EXAMPLES:
			case MISSING_VALUES:
				return true;
			default:
				return false;
		}
	}

	@Override
	public OperatorVersion[] getIncompatibleVersionChanges() {
		OperatorVersion[] changes = super.getIncompatibleVersionChanges();
		changes = Arrays.copyOf(changes, changes.length + 1);
		changes[changes.length - 1] = VERSION_NOT_DISCRETIZING;
		return changes;
	}

}