/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.learner.functions.neuralnet;
import java.util.List;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.Tools;
import com.rapidminer.operator.Model;
import com.rapidminer.operator.OperatorCapability;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.learner.AbstractLearner;
import com.rapidminer.operator.learner.PredictionModel;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.ParameterTypeList;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.tools.RandomGenerator;
/**
* <p>
* This operator learns a model by means of a feed-forward neural network trained by a
* backpropagation algorithm (multi-layer perceptron). The user can define the structure of the
* neural network with the parameter list "hidden_layers". Each list entry describes a new
* hidden layer. The key of each entry must correspond to the layer name. The value of each entry
* must be a number defining the size of the hidden layer. A size value of -1 indicates that the
* layer size should be calculated from the number of attributes of the input example set. In this
* case, the layer size will be set to (number of attributes + number of classes) / 2 + 1.
* </p>
*
* <p>
* If the user does not specify any hidden layers, a default hidden layer with sigmoid type and size
* (number of attributes + number of classes) / 2 + 1 will be created and added to the net. If only
* a single layer without nodes is specified, the input nodes are directly connected to the output
* nodes and no hidden layer will be used.
* </p>
*
* <p>
* The used activation function is the usual sigmoid function. Therefore, the values ranges of the
* attributes should be scaled to -1 and +1. This is also done by this operator if not specified
* otherwise by the corresponding parameter setting. The type of the output node is sigmoid if the
* learning data describes a classification task and linear for numerical regression tasks.
* </p>
*
* @rapidminer.index Neural Net
*
* @author Ingo Mierswa
*/
public class ImprovedNeuralNetLearner extends AbstractLearner {
/**
* The parameter name for "The number of hidden layers. Only used if no layers are defined
* by the list hidden_layer_types."
*/
public static final String PARAMETER_HIDDEN_LAYERS = "hidden_layers";
/**
* The parameter name for "The number of training cycles used for the neural network
* training."
*/
public static final String PARAMETER_TRAINING_CYCLES = "training_cycles";
/**
* The parameter name for "The optimization is stopped if the training error gets below
* this epsilon value."
*/
public static final String PARAMETER_ERROR_EPSILON = "error_epsilon";
/**
* The parameter name for "The learning rate determines by how much we change the weights
* at each step."
*/
public static final String PARAMETER_LEARNING_RATE = "learning_rate";
/**
* The parameter name for "The momentum simply adds a fraction of the previous weight
* update to the current one (prevent local maxima and smoothes optimization directions)."
*/
public static final String PARAMETER_MOMENTUM = "momentum";
/** Indicates if the learning rate should be cooled down. */
public static final String PARAMETER_DECAY = "decay";
/** Indicates if the input data should be shuffled before learning. */
public static final String PARAMETER_SHUFFLE = "shuffle";
/** Indicates if the input data should be normalized between -1 and 1 before learning. */
public static final String PARAMETER_NORMALIZE = "normalize";
public ImprovedNeuralNetLearner(OperatorDescription description) {
super(description);
}
@Override
public Model learn(ExampleSet exampleSet) throws OperatorException {
Tools.onlyNonMissingValues(exampleSet, getOperatorClassName(), this, new String[0]);
ImprovedNeuralNetModel model = new ImprovedNeuralNetModel(exampleSet);
List<String[]> hiddenLayers = getParameterList(PARAMETER_HIDDEN_LAYERS);
int maxCycles = getParameterAsInt(PARAMETER_TRAINING_CYCLES);
double maxError = getParameterAsDouble(PARAMETER_ERROR_EPSILON);
double learningRate = getParameterAsDouble(PARAMETER_LEARNING_RATE);
double momentum = getParameterAsDouble(PARAMETER_MOMENTUM);
boolean decay = getParameterAsBoolean(PARAMETER_DECAY);
boolean shuffle = getParameterAsBoolean(PARAMETER_SHUFFLE);
boolean normalize = getParameterAsBoolean(PARAMETER_NORMALIZE);
RandomGenerator randomGenerator = RandomGenerator.getRandomGenerator(this);
model.train(exampleSet, hiddenLayers, maxCycles, maxError, learningRate, momentum, decay, shuffle, normalize,
randomGenerator, this);
return model;
}
@Override
public Class<? extends PredictionModel> getModelClass() {
return ImprovedNeuralNetModel.class;
}
/**
* Returns true for all types of attributes and numerical and binominal labels.
*/
@Override
public boolean supportsCapability(OperatorCapability lc) {
switch (lc) {
case NUMERICAL_ATTRIBUTES:
case POLYNOMINAL_LABEL:
case BINOMINAL_LABEL:
case NUMERICAL_LABEL:
case WEIGHTED_EXAMPLES:
return true;
// $CASES-OMITTED$
default:
return false;
}
}
@Override
public List<ParameterType> getParameterTypes() {
List<ParameterType> types = super.getParameterTypes();
ParameterType type = new ParameterTypeList(
PARAMETER_HIDDEN_LAYERS,
"Describes the name and the size of all hidden layers.",
new ParameterTypeString("hidden_layer_name", "The name of the hidden layer."),
new ParameterTypeInt(
"hidden_layer_sizes",
"The size of the hidden layers. A size of < 0 leads to a layer size of (number_of_attributes + number of classes) / 2 + 1.",
-1, Integer.MAX_VALUE, -1));
type.setExpert(false);
types.add(type);
type = new ParameterTypeInt(PARAMETER_TRAINING_CYCLES,
"The number of training cycles used for the neural network training.", 1, Integer.MAX_VALUE, 500);
type.setExpert(false);
types.add(type);
type = new ParameterTypeDouble(PARAMETER_LEARNING_RATE,
"The learning rate determines by how much we change the weights at each step. May not be 0.",
Double.MIN_VALUE, 1.0d, 0.3d);
type.setExpert(false);
types.add(type);
types.add(new ParameterTypeDouble(
PARAMETER_MOMENTUM,
"The momentum simply adds a fraction of the previous weight update to the current one (prevent local maxima and smoothes optimization directions).",
0.0d, 1.0d, 0.2d));
types.add(new ParameterTypeBoolean(PARAMETER_DECAY,
"Indicates if the learning rate should be decreased during learningh", false));
types.add(new ParameterTypeBoolean(
PARAMETER_SHUFFLE,
"Indicates if the input data should be shuffled before learning (increases memory usage but is recommended if data is sorted before)",
true));
types.add(new ParameterTypeBoolean(
PARAMETER_NORMALIZE,
"Indicates if the input data should be normalized between -1 and +1 before learning (increases runtime but is in most cases necessary)",
true));
types.add(new ParameterTypeDouble(PARAMETER_ERROR_EPSILON,
"The optimization is stopped if the training error gets below this epsilon value.", 0.0d,
Double.POSITIVE_INFINITY, 0.00001d));
types.addAll(RandomGenerator.getRandomGeneratorParameters(this));
return types;
}
}