ImprovedNeuralNetLearner.java example

Explorer
rapidminer-vega-master
/*
 *  RapidMiner
 *
 *  Copyright (C) 2001-2011 by Rapid-I and the contributors
 *
 *  Complete list of developers available at our web site:
 *
 *       http://rapid-i.com
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Affero General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Affero General Public License for more details.
 *
 *  You should have received a copy of the GNU Affero General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses/.
 */
package com.rapidminer.operator.learner.functions.neuralnet;

import java.util.List;

import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.Model;
import com.rapidminer.operator.OperatorCapability;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.learner.AbstractLearner;
import com.rapidminer.operator.learner.PredictionModel;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.ParameterTypeList;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.tools.RandomGenerator;

/**
 * <p>This operator learns a model by means of a feed-forward neural network trained by a
 * backpropagation algorithm (multi-layer perceptron). The user can define the structure 
 * of the neural network with the parameter list "hidden_layers". Each list entry 
 * describes a new hidden layer. The key of each entry must correspond to the layer name.
 * The value of each entry must be a number defining the size of the hidden layer. A size value
 * of -1 indicates that the layer size should be calculated from the number of attributes
 * of the input example set. In this case, the layer size will be set to 
 * (number of attributes + number of classes) / 2 + 1.</p>
 * 
 * <p>If the user does not specify any hidden layers, a default hidden layer with 
 * sigmoid type and size (number of attributes + number of classes) / 2 + 1 will be created and 
 * added to the net. If only a single layer without nodes is specified, the input nodes are 
 * directly connected to the output nodes and no hidden layer will be used.</p>
 * 
 * <p>The used activation function is the usual sigmoid function. Therefore, the values ranges
 * of the attributes should be scaled to -1 and +1. This is also done by this operator if
 * not specified otherwise by the corresponding parameter setting. The type of the output node 
 * is sigmoid if the learning data describes a classification task and linear for numerical 
 * regression tasks.</p>
 * 
 * @rapidminer.index Neural Net
 * 
 * @author Ingo Mierswa
 */
public class ImprovedNeuralNetLearner extends AbstractLearner {	

	/** The parameter name for "The number of hidden layers. Only used if no layers are defined by the list hidden_layer_types." */
	public static final String PARAMETER_HIDDEN_LAYERS = "hidden_layers";

	/** The parameter name for "The number of training cycles used for the neural network training." */
	public static final String PARAMETER_TRAINING_CYCLES = "training_cycles";

	/** The parameter name for "The optimization is stopped if the training error gets below this epsilon value." */
	public static final String PARAMETER_ERROR_EPSILON = "error_epsilon";

	/** The parameter name for "The learning rate determines by how much we change the weights at each step." */
	public static final String PARAMETER_LEARNING_RATE = "learning_rate";

	/** The parameter name for "The momentum simply adds a fraction of the previous weight update to the current one (prevent local maxima and smoothes optimization directions)." */
	public static final String PARAMETER_MOMENTUM = "momentum";

	/** Indicates if the learning rate should be cooled down. */
	public static final String PARAMETER_DECAY = "decay";

	/** Indicates if the input data should be shuffled before learning. */
	public static final String PARAMETER_SHUFFLE = "shuffle";

	/** Indicates if the input data should be normalized between -1 and 1 before learning. */
	public static final String PARAMETER_NORMALIZE = "normalize";

	public ImprovedNeuralNetLearner(OperatorDescription description) {
		super(description);
	}


	public Model learn(ExampleSet exampleSet) throws OperatorException {
		ImprovedNeuralNetModel model = new ImprovedNeuralNetModel(exampleSet);

		List<String[]> hiddenLayers = getParameterList(PARAMETER_HIDDEN_LAYERS);
		int maxCycles = getParameterAsInt(PARAMETER_TRAINING_CYCLES);
		double maxError = getParameterAsDouble(PARAMETER_ERROR_EPSILON);
		double learningRate = getParameterAsDouble(PARAMETER_LEARNING_RATE);
		double momentum = getParameterAsDouble(PARAMETER_MOMENTUM);
		boolean decay = getParameterAsBoolean(PARAMETER_DECAY);
		boolean shuffle = getParameterAsBoolean(PARAMETER_SHUFFLE);
		boolean normalize = getParameterAsBoolean(PARAMETER_NORMALIZE);
		RandomGenerator randomGenerator = RandomGenerator.getRandomGenerator(this);

		model.train(exampleSet, hiddenLayers, maxCycles, maxError, learningRate, momentum, decay, shuffle, normalize, randomGenerator);
		return model;
	}

	@Override
	public Class<? extends PredictionModel> getModelClass() {
		return ImprovedNeuralNetModel.class;
	}
	
	/**
	 * Returns true for all types of attributes and numerical and binominal labels.
	 */
	public boolean supportsCapability(OperatorCapability lc) {
		switch (lc) {
		case NUMERICAL_ATTRIBUTES:
		case POLYNOMINAL_LABEL:
		case BINOMINAL_LABEL:
		case NUMERICAL_LABEL:
		case WEIGHTED_EXAMPLES:
			return true;			
		default:
			return false;
		}
	}
	
	@Override
	public List<ParameterType> getParameterTypes() {
		List<ParameterType> types = super.getParameterTypes();

		ParameterType type = new ParameterTypeList(PARAMETER_HIDDEN_LAYERS, "Describes the name and the size of all hidden layers.", 
				new ParameterTypeString("hidden_layer_name", "The name of the hidden layer."),
				new ParameterTypeInt("hidden_layer_sizes", "The size of the hidden layers. A size of < 0 leads to a layer size of (number_of_attributes + number of classes) / 2 + 1.", -1, Integer.MAX_VALUE, -1));
		type.setExpert(false);
		types.add(type);

		type = new ParameterTypeInt(PARAMETER_TRAINING_CYCLES, "The number of training cycles used for the neural network training.", 1, Integer.MAX_VALUE, 500);
		type.setExpert(false);
		types.add(type);

		type = new ParameterTypeDouble(PARAMETER_LEARNING_RATE, "The learning rate determines by how much we change the weights at each step. May not be 0.", Double.MIN_VALUE, 1.0d, 0.3d);
		type.setExpert(false);
		types.add(type);

		types.add(new ParameterTypeDouble(PARAMETER_MOMENTUM, "The momentum simply adds a fraction of the previous weight update to the current one (prevent local maxima and smoothes optimization directions).", 0.0d, 1.0d, 0.2d));

		types.add(new ParameterTypeBoolean(PARAMETER_DECAY, "Indicates if the learning rate should be decreased during learningh", false));

		types.add(new ParameterTypeBoolean(PARAMETER_SHUFFLE, "Indicates if the input data should be shuffled before learning (increases memory usage but is recommended if data is sorted before)", true));

		types.add(new ParameterTypeBoolean(PARAMETER_NORMALIZE, "Indicates if the input data should be normalized between -1 and +1 before learning (increases runtime but is in most cases necessary)", true));

		types.add(new ParameterTypeDouble(PARAMETER_ERROR_EPSILON, "The optimization is stopped if the training error gets below this epsilon value.", 0.0d, Double.POSITIVE_INFINITY, 0.00001d));

		types.addAll(RandomGenerator.getRandomGeneratorParameters(this));

		return types;
	}
}