SimpleNeuralNetLearner.java example

Explorer
rapidminer-vega-master
/*
 *  RapidMiner
 *
 *  Copyright (C) 2001-2011 by Rapid-I and the contributors
 *
 *  Complete list of developers available at our web site:
 *
 *       http://rapid-i.com
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Affero General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Affero General Public License for more details.
 *
 *  You should have received a copy of the GNU Affero General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses/.
 */
package com.rapidminer.operator.learner.functions.neuralnet;

import java.util.List;

import org.encog.neural.activation.ActivationLinear;
import org.encog.neural.activation.ActivationSigmoid;
import org.encog.neural.data.NeuralDataSet;
import org.encog.neural.data.basic.BasicNeuralDataSet;
import org.encog.neural.networks.BasicNetwork;
import org.encog.neural.networks.Train;
import org.encog.neural.networks.layers.FeedforwardLayer;
import org.encog.neural.networks.training.backpropagation.Backpropagation;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.Statistics;
import com.rapidminer.operator.Model;
import com.rapidminer.operator.OperatorCapability;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.learner.AbstractLearner;
import com.rapidminer.operator.learner.PredictionModel;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.tools.RandomGenerator;


/**
 * <p>This operator learns a model by means of a feed-forward neural network. The learning is
 * done via backpropagation. The user can define the structure of the neural network in two
 * different ways according to the setting of the parameter define_different_hidden_layers.
 * If different hidden layers are defined, the parameter hidden_layer_sizes must be set
 * to a comma separated list of the sizes of all hidden layers, e.g. 3,7,5.
 * If no different hidden layers are defined, the parameters for the default hidden layers
 * are used. A size value of -1 or 0 indicates that the layer size should be calculated from
 * the number of attributes of the input example set. In this case, the layer size will be set to
 * (number of attributes + number of classes) / 2 + 1. All layers have a sigmoid activation
 * function.</p>
 * 
 * <p>If the user does not specify any hidden layers, a default hidden layer with
 * size (number of attributes + number of classes) / 2 + 1 will be created and
 * added to the net.</p>
 * 
 * @rapidminer.index Neural Net
 * 
 * @author Ingo Mierswa
 */
public class SimpleNeuralNetLearner extends AbstractLearner {

    public static final String PARAMETER_DEFINE_DIFFERENT_HIDDEN_LAYERS = "define_different_hidden_layers";

    public static final String PARAMETER_HIDDEN_LAYER_SIZES = "hidden_layer_sizes";

    /** The parameter name for "The number of hidden layers. Only used if no layers are defined by the list hidden_layer_types." */
    public static final String PARAMETER_DEFAULT_NUMBER_OF_HIDDEN_LAYERS = "default_number_of_hidden_layers";

    /** The parameter name for "The default size  of hidden layers. Only used if no layers are defined by the list hidden_layer_types. -1 means size (number of attributes + number of classes) / 2" */
    public static final String PARAMETER_DEFAULT_HIDDEN_LAYER_SIZE = "default_hidden_layer_size";

    /** The parameter name for "The number of training cycles used for the neural network training." */
    public static final String PARAMETER_TRAINING_CYCLES = "training_cycles";

    /** The parameter name for "The learning rate determines by how much we change the weights at each step." */
    public static final String PARAMETER_LEARNING_RATE = "learning_rate";

    /** The parameter name for "The momentum simply adds a fraction of the previous weight update to the current one (prevent local maxima and smoothes optimization directions)." */
    public static final String PARAMETER_MOMENTUM = "momentum";

    /** The parameter name for "The optimization is stopped if the training error gets below this epsilon value." */
    public static final String PARAMETER_ERROR_EPSILON = "error_epsilon";

    private double[] attributeMin;
    private double[] attributeMax;
    private double labelMin;
    private double labelMax;

    public SimpleNeuralNetLearner(OperatorDescription description) {
        super(description);
    }

    @Override
    public Model learn(ExampleSet exampleSet) throws OperatorException {
        BasicNetwork network = getNetwork(exampleSet);
        NeuralDataSet trainingSet = getTraining(exampleSet);
        network = trainNetwork(network, trainingSet, getParameterAsDouble(PARAMETER_LEARNING_RATE), getParameterAsDouble(PARAMETER_MOMENTUM), getParameterAsDouble(PARAMETER_ERROR_EPSILON), getParameterAsInt(PARAMETER_TRAINING_CYCLES));
        return new SimpleNeuralNetModel(exampleSet, network, attributeMin, attributeMax, labelMin, labelMax);
    }

    private BasicNetwork getNetwork(ExampleSet exampleSet) throws OperatorException {
        BasicNetwork network = new BasicNetwork();

        // input layer
        network.addLayer(new FeedforwardLayer(exampleSet.getAttributes().size()));


        // hidden layers
        log("No hidden layers defined. Using default hidden layers.");
        int layerSize = getParameterAsInt(PARAMETER_DEFAULT_HIDDEN_LAYER_SIZE);
        if (layerSize <= 0)
            layerSize = getDefaultLayerSize(exampleSet);
        for (int p = 0; p < getParameterAsInt(PARAMETER_DEFAULT_NUMBER_OF_HIDDEN_LAYERS); p++) {
            network.addLayer(new FeedforwardLayer(layerSize));
        }


        // output layer
        if (exampleSet.getAttributes().getLabel().isNominal()) {
            network.addLayer(new FeedforwardLayer(new ActivationSigmoid(), 1));
        } else {
            network.addLayer(new FeedforwardLayer(new ActivationLinear(), 1));
        }

        network.reset(RandomGenerator.getRandomGenerator(getParameterAsBoolean(RandomGenerator.PARAMETER_USE_LOCAL_RANDOM_SEED), getParameterAsInt(RandomGenerator.PARAMETER_LOCAL_RANDOM_SEED)));

        return network;
    }


    private int getDefaultLayerSize(ExampleSet exampleSet) {
        return (int)Math.round(exampleSet.getAttributes().size() / 2.0d) + 1;
    }

    private NeuralDataSet getTraining(ExampleSet exampleSet) {
        double[][] data   = new double[exampleSet.size()][exampleSet.getAttributes().size()];
        double[][] labels = new double[exampleSet.size()][1];
        int index = 0;
        Attribute label = exampleSet.getAttributes().getLabel();

        this.attributeMin = new double[exampleSet.getAttributes().size()];
        this.attributeMax = new double[attributeMin.length];
        exampleSet.recalculateAllAttributeStatistics();
        int a = 0;
        for (Attribute attribute : exampleSet.getAttributes()) {
            this.attributeMin[a] = exampleSet.getStatistics(attribute, Statistics.MINIMUM);
            this.attributeMax[a] = exampleSet.getStatistics(attribute, Statistics.MAXIMUM);
            a++;
        }

        this.labelMin = exampleSet.getStatistics(label, Statistics.MINIMUM);
        this.labelMax = exampleSet.getStatistics(label, Statistics.MAXIMUM);

        for (Example example : exampleSet) {
            // attributes
            a = 0;
            for (Attribute attribute : exampleSet.getAttributes()) {
                if (attributeMin[a] != attributeMax[a]) {
                    data[index][a] = (example.getValue(attribute) - attributeMin[a]) / (attributeMax[a] - attributeMin[a]);
                } else {
                    data[index][a] = example.getValue(attribute) - attributeMin[a];
                }
                a++;
            }

            // label
            if (label.isNominal()) {
                labels[index][0] = example.getValue(label);
            } else {
                if (labelMax != labelMin) {
                    labels[index][0] = (example.getValue(label) - labelMin) / (labelMax - labelMin);
                } else {
                    labels[index][0] = example.getValue(label) - labelMin;
                }
            }

            index++;
        }

        return new BasicNeuralDataSet(data, labels);
    }

    private BasicNetwork trainNetwork(BasicNetwork network, NeuralDataSet trainingSet, double learningRate, double momentum, double maxError, int maxIteration) {
        final Train train = new Backpropagation(network, trainingSet, learningRate, momentum);

        int epoch = 1;

        do {
            train.iteration();
            epoch++;
        } while ((epoch < maxIteration) && (train.getError() > maxError));

        return (BasicNetwork)train.getNetwork();
    }

    @Override
    public Class<? extends PredictionModel> getModelClass() {
        return SimpleNeuralNetModel.class;
    }

    /**
     * Returns true for all types of attributes and numerical and binominal labels.
     */
    @Override
    public boolean supportsCapability(OperatorCapability lc) {
        if (lc == OperatorCapability.NUMERICAL_ATTRIBUTES)
            return true;
        if (lc == OperatorCapability.BINOMINAL_LABEL)
            return true;
        if (lc == OperatorCapability.NUMERICAL_LABEL)
            return true;
        return false;
    }

    @Override
    public List<ParameterType> getParameterTypes() {
        List<ParameterType> types = super.getParameterTypes();

        ParameterType type = new ParameterTypeInt(PARAMETER_DEFAULT_NUMBER_OF_HIDDEN_LAYERS, "The number of hidden layers. Only used if no layers are defined by the list hidden_layer_types.", 1, Integer.MAX_VALUE, 1);
        type.setExpert(false);
        types.add(type);

        types.add(new ParameterTypeInt(PARAMETER_DEFAULT_HIDDEN_LAYER_SIZE, "The default size  of hidden layers. Only used if no layers are defined by the list hidden_layer_types. -1 means size (number of attributes + number of classes) / 2", -1, Integer.MAX_VALUE, -1));

        type = new ParameterTypeInt(PARAMETER_TRAINING_CYCLES, "The number of training cycles used for the neural network training.", 1, Integer.MAX_VALUE, 500);
        type.setExpert(false);
        types.add(type);

        type = new ParameterTypeDouble(PARAMETER_LEARNING_RATE, "The learning rate determines by how much we change the weights at each step.", 0.0d, 1.0d, 0.3d);
        type.setExpert(false);
        types.add(type);

        types.add(new ParameterTypeDouble(PARAMETER_MOMENTUM, "The momentum simply adds a fraction of the previous weight update to the current one (prevent local maxima and smoothes optimization directions).", 0.0d, 1.0d, 0.2d));

        types.add(new ParameterTypeDouble(PARAMETER_ERROR_EPSILON, "The optimization is stopped if the training error gets below this epsilon value.", 0.0d, Double.POSITIVE_INFINITY, 0.01d));

        types.addAll(RandomGenerator.getRandomGeneratorParameters(this));

        return types;
    }
}