TransformedRegression.java example

Explorer
rapidminer-vega-master
/*
 *  RapidMiner
 *
 *  Copyright (C) 2001-2011 by Rapid-I and the contributors
 *
 *  Complete list of developers available at our web site:
 *
 *       http://rapid-i.com
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Affero General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Affero General Public License for more details.
 *
 *  You should have received a copy of the GNU Affero General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses/.
 */
package com.rapidminer.operator.learner.meta;

import java.util.Iterator;
import java.util.List;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.Statistics;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.operator.Model;
import com.rapidminer.operator.OperatorCapability;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.ProcessSetupError.Severity;
import com.rapidminer.operator.ports.PortPairExtender;
import com.rapidminer.operator.ports.metadata.AttributeMetaData;
import com.rapidminer.operator.ports.metadata.ExampleSetMetaData;
import com.rapidminer.operator.ports.metadata.MDReal;
import com.rapidminer.operator.ports.metadata.MetaData;
import com.rapidminer.operator.ports.metadata.SetRelation;
import com.rapidminer.operator.ports.metadata.SimpleMetaDataError;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.conditions.EqualTypeCondition;
import com.rapidminer.tools.Ontology;


/**
 * This meta learner applies a transformation on the label before the inner
 * regression learner is applied.
 * Please note, that the logistic link function will only work for probabilities or 
 * other labels with a range from 0 to 1 exclusive. If a value exceeds this range, it is set to the nearest
 * possible element within this range.
 * 
 * @author Stefan Rueping, Ingo Mierswa
 * @author Stefan Rueping, Ingo Mierswa, Sebastian Land
 */
public class TransformedRegression extends AbstractMetaLearner {

	/** The parameter name for "Type of transformation to use on the labels (log, exp, transform to mean 0 and variance 1, rank, or none)." */
	public static final String PARAMETER_TRANSFORMATION_METHOD = "transformation_method";

	/** The parameter name for "Scale transformed values to mean 0 and standard deviation 1?" */
	public static final String PARAMETER_Z_SCALE = "z_scale";

	/** The parameter name for "Interpolate prediction if predicted rank is not an integer?" */
	public static final String PARAMETER_INTERPOLATE_RANK = "interpolate_rank";

	private final PortPairExtender through = new PortPairExtender("through", getSubprocess(0).getInnerSinks(), getOutputPorts());

	public TransformedRegression(OperatorDescription description) {
		super(description);

		through.start();

		getTransformer().addRule(through.makePassThroughRule());
	}

	@Override
	protected MetaData modifyExampleSetMetaData(ExampleSetMetaData unmodifiedMetaData) {
		switch (unmodifiedMetaData.hasSpecial(Attributes.LABEL_NAME)) {
		case NO:
			getTrainingSetInputPort().addError(new SimpleMetaDataError(Severity.ERROR, getTrainingSetInputPort(), "special_missing", "label"));
			return unmodifiedMetaData;
		case UNKNOWN:
			getTrainingSetInputPort().addError(new SimpleMetaDataError(Severity.WARNING, getTrainingSetInputPort(), "special_unknown", "label"));
			return unmodifiedMetaData;
		case YES:
			AttributeMetaData labelMD = unmodifiedMetaData.getLabelMetaData();
			unmodifiedMetaData.removeAttribute(labelMD);
			AttributeMetaData transformedMD = labelMD.copy();
			transformedMD.setName("transformation(" + labelMD.getName() + ")");
			// TODO: Transform values instead of setting unkown
			transformedMD.setValueSetRelation(SetRelation.UNKNOWN);
			transformedMD.setMean(new MDReal());

			unmodifiedMetaData.addAttribute(transformedMD);
			return unmodifiedMetaData;
		default:
			return unmodifiedMetaData;
		}
	}

	public Model learn(ExampleSet inputSet) throws OperatorException {
		int method = getParameterAsInt(PARAMETER_TRANSFORMATION_METHOD);
		double[] rank = null;
		double mean = 0.0d;
		double stddev = 1.0d;

		Attribute label = inputSet.getAttributes().getLabel();
		inputSet.recalculateAttributeStatistics(label);

		ExampleSet eSet = (ExampleSet) inputSet.clone();
		Attribute tempLabel = AttributeFactory.createAttribute("transformation(" + label.getName() + ")", Ontology.REAL);
		eSet.getExampleTable().addAttribute(tempLabel);

		// 1. Set new regression labels
		Iterator<Example> r = eSet.iterator();
		switch (method) {
		case TransformedRegressionModel.LOG:
			double offset = 1.0d - inputSet.getStatistics(label, Statistics.MINIMUM);
			rank = new double[1];
			rank[0] = offset;
			while (r.hasNext()) {
				Example e = r.next();
				e.setValue(tempLabel, Math.log(offset + e.getValue(label)));
			}
			break;
		case TransformedRegressionModel.LOG_LINK:
			while (r.hasNext()) {
				Example e = r.next();
				double value = e.getValue(label);
				if (value >= 1d)
					value = 0.99999999999d;
				if (value <= 0d)
					value = 0.00000000001d;
				e.setValue(tempLabel, Math.log(value / (1 - value)));
			}
			break;				
		case TransformedRegressionModel.EXP:
			while (r.hasNext()) {
				Example e = r.next();
				e.setValue(tempLabel, Math.exp(e.getValue(label)));
			}
			break;
		case TransformedRegressionModel.RANK:
			double[] dummy = new double[eSet.size()];
			int i = 0;
			while (r.hasNext()) {
				Example e = r.next();
				dummy[i] = e.getValue(label);
				i++;
			}
			java.util.Arrays.sort(dummy);
			// remove double entries
			i = 0;
			for (int j = 0; j < dummy.length; j++) {
				if (dummy[i] != dummy[j]) {
					i++;
					dummy[i] = dummy[j];
				}
			}
			rank = new double[i + 1];
			for (int j = 0; j < i + 1; j++) {
				rank[j] = dummy[j];
			}

			r = eSet.iterator();
			while (r.hasNext()) {
				Example e = r.next();
				e.setValue(tempLabel, java.util.Arrays.binarySearch(rank, e.getValue(label)));
			}
			// }
			break;
		case TransformedRegressionModel.NONE:
			// just for convenience...
			while (r.hasNext()) {
				Example e = r.next();
				e.setValue(tempLabel, e.getValue(label));
			}
			break;
		default:
			// cannot happen
			break;
		}

		if (getParameterAsBoolean(PARAMETER_Z_SCALE)) {
			eSet.recalculateAttributeStatistics(tempLabel);
			mean = eSet.getStatistics(tempLabel, Statistics.AVERAGE);
			stddev = eSet.getStatistics(tempLabel, Statistics.VARIANCE);
			if (stddev <= 0.0d) {
				// catch numerical errors
				stddev = 1.0d;
			};
			r = eSet.iterator();
			while (r.hasNext()) {
				Example e = r.next();
				e.setValue(tempLabel, (e.getValue(tempLabel) - mean) / stddev);
			}
		};

		// 2. Apply learner
		eSet.getAttributes().remove(label);
		eSet.getAttributes().addRegular(tempLabel);
		eSet.getAttributes().setLabel(tempLabel);
		Model model = applyInnerLearner(eSet);
		TransformedRegressionModel resultModel = new TransformedRegressionModel(inputSet, method, rank, model, getParameterAsBoolean(PARAMETER_Z_SCALE), mean, stddev, getParameterAsBoolean(PARAMETER_INTERPOLATE_RANK));

		// passing inner data 
		through.passDataThrough();

		return resultModel;
	}

	@Override
	public List<ParameterType> getParameterTypes() {
		List<ParameterType> types = super.getParameterTypes();
		ParameterType type = new ParameterTypeCategory(PARAMETER_TRANSFORMATION_METHOD, "Type of transformation to use on the labels (log, exp, transform to mean 0 and variance 1, rank, or none).", TransformedRegressionModel.METHODS, TransformedRegressionModel.LOG);
		type.setExpert(false);
		types.add(type);
		type = new ParameterTypeBoolean(PARAMETER_Z_SCALE, "If checked the values will be normalized to mean 0 and standard deviation 1.", false);
		type.setExpert(true);
		types.add(type);
		type = new ParameterTypeBoolean(PARAMETER_INTERPOLATE_RANK, "If checked and predicted rank is not an integer, it will be interpolated.", true);
		type.registerDependencyCondition(new EqualTypeCondition(this, PARAMETER_TRANSFORMATION_METHOD, TransformedRegressionModel.METHODS, false, TransformedRegressionModel.RANK));
		type.setExpert(true);
		types.add(type);
		return types;
	}

	@Override
	public boolean supportsCapability(OperatorCapability capability) {
		switch (capability) {
		case POLYNOMINAL_LABEL:
		case BINOMINAL_LABEL:
		case NO_LABEL:
		case UPDATABLE:
		case FORMULA_PROVIDER:
			return false;
		default:
			return true;
		}
	}
}