NeuralNetLearner.java example

Explorer
rapidminer-vega-master
/*
 *  RapidMiner
 *
 *  Copyright (C) 2001-2011 by Rapid-I and the contributors
 *
 *  Complete list of developers available at our web site:
 *
 *       http://rapid-i.com
 *
 *  This program is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Affero General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Affero General Public License for more details.
 *
 *  You should have received a copy of the GNU Affero General Public License
 *  along with this program.  If not, see http://www.gnu.org/licenses/.
 */
package com.rapidminer.operator.learner.functions.neuralnet;

import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;

import org.joone.engine.FullSynapse;
import org.joone.engine.GaussianLayer;
import org.joone.engine.Layer;
import org.joone.engine.LinearLayer;
import org.joone.engine.LogarithmicLayer;
import org.joone.engine.Monitor;
import org.joone.engine.NeuralNetEvent;
import org.joone.engine.NeuralNetListener;
import org.joone.engine.SigmoidLayer;
import org.joone.engine.SineLayer;
import org.joone.engine.TanhLayer;
import org.joone.engine.learning.TeachingSynapse;
import org.joone.engine.listeners.ErrorBasedTerminator;
import org.joone.io.MemoryInputSynapse;
import org.joone.io.MemoryOutputSynapse;
import org.joone.net.NeuralNet;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.operator.Model;
import com.rapidminer.operator.OperatorCapability;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.operator.learner.AbstractLearner;
import com.rapidminer.operator.learner.PredictionModel;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeDouble;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.parameter.ParameterTypeList;
import com.rapidminer.parameter.UndefinedParameterError;


/**
 * <p>This operator learns a model by means of a feed-forward neural network. The learning is
 * done via backpropagation. The user can define the structure of the neural network with the
 * parameter list "hidden_layer_types". Each list entry describes a new hidden
 * layer. The key of each entry must correspond to the layer type which must be one out of</p>
 * 
 * <ul>
 * <li>linear</li>
 * <li>sigmoid (default)</li>
 * <li>tanh</li>
 * <li>sine</li>
 * <li>logarithmic</li>
 * <li>gaussian</li>
 * </ul>
 * 
 * <p>The key of each entry must be a number defining the size of the hidden layer. A size value
 * of -1 or 0 indicates that the layer size should be calculated from the number of attributes
 * of the input example set. In this case, the layer size will be set to 
 * (number of attributes + number of classes) / 2 + 1.</p>
 * 
 * <p>If the user does not specify any hidden layers, a default hidden layer with 
 * sigmoid type and size (number of attributes + number of classes) / 2 + 1 will be created and 
 * added to the net.</p>
 * 
 * <p>The type of the input nodes is sigmoid. The type of the output node is sigmoid is the 
 * learning data describes a classification task and linear for numerical regression tasks.</p>
 * 
 * @rapidminer.index Neural Net
 * 
 * @author Ingo Mierswa
 */
public class NeuralNetLearner extends AbstractLearner implements NeuralNetListener {

	/** The parameter name for "The default layer type for the input layers." */
	public static final String PARAMETER_INPUT_LAYER_TYPE = "input_layer_type";

	/** The parameter name for "The default layer type for the output layers." */
	public static final String PARAMETER_OUTPUT_LAYER_TYPE = "output_layer_type";

	/** The parameter name for "The number of hidden layers. Only used if no layers are defined by the list hidden_layer_types." */
	public static final String PARAMETER_DEFAULT_NUMBER_OF_HIDDEN_LAYERS = "default_number_of_hidden_layers";

	/** The parameter name for "The default size  of hidden layers. Only used if no layers are defined by the list hidden_layer_types. -1 means size (number of attributes + number of classes) / 2" */
	public static final String PARAMETER_DEFAULT_HIDDEN_LAYER_SIZE = "default_hidden_layer_size";

	/** The parameter name for "The default layer type for the hidden layers. Only used if the parameter list hidden_layer_types is not defined." */
	public static final String PARAMETER_DEFAULT_HIDDEN_LAYER_TYPE = "default_hidden_layer_type";

	/** The parameter name for "Describes the name, the size, and the type of all hidden layers" */
	public static final String PARAMETER_HIDDEN_LAYER_TYPES = "hidden_layer_types";

	/** The parameter name for "The number of training cycles used for the neural network training." */
	public static final String PARAMETER_TRAINING_CYCLES = "training_cycles";

	/** The parameter name for "The learning rate determines by how much we change the weights at each step." */
	public static final String PARAMETER_LEARNING_RATE = "learning_rate";

	/** The parameter name for "The momentum simply adds a fraction of the previous weight update to the current one (prevent local maxima and smoothes optimization directions)." */
	public static final String PARAMETER_MOMENTUM = "momentum";

	/** The parameter name for "The optimization is stopped if the training error gets below this epsilon value." */
	public static final String PARAMETER_ERROR_EPSILON = "error_epsilon";

	private static final String[] LAYER_TYPES = new String[] {
		"linear",
		"sigmoid",
		"tanh",
		"sine",
		"logarithmic",
		"gaussian"
	};

	private static final int LINEAR      = 0;
	private static final int SIGMOID     = 1;
	private static final int TANH        = 2;
	private static final int SINE        = 3;
	private static final int LOGARITHMIC = 4;
	private static final int GAUSSIAN    = 5;

	private NeuralNet neuralNet;

	private MemoryInputSynapse inputSynapse;

	private MemoryInputSynapse desiredOutputSynapse;

	private double minLabel, maxLabel;


	/** Creates a new Neural Network learner. */
	public NeuralNetLearner(OperatorDescription description) {
		super(description);
	}

	private int getDefaultLayerSize(ExampleSet exampleSet) {
		return (int)Math.round(exampleSet.getAttributes().size() / 2.0d) + 1;
	}

	/** Learns and returns a model. */
	public Model learn(ExampleSet exampleSet) throws OperatorException {
		Attribute label = exampleSet.getAttributes().getLabel();
		if (label.isNominal()) {
			if (label.getMapping().size() != 2) {
				throw new UserError(this, 114, getName(), label.getName());
			}
		}
		initNeuralNet(exampleSet);
		train(exampleSet);
		return new NeuralNetModel(exampleSet, neuralNet, exampleSet.getAttributes().size(), this.minLabel, this.maxLabel);
	}

	private Layer createLayer(String layerTypeName, int size, int counter) {
		return createLayer("Hidden", layerTypeName, size, counter);
	}

	private Layer createLayer(String layerName, String layerTypeName, int size, int counter) {
		Layer layer = null;
		if (LAYER_TYPES[LINEAR].equals(layerTypeName.toLowerCase())) {
			layer = new LinearLayer(); 
		} else if (LAYER_TYPES[SIGMOID].equals(layerTypeName.toLowerCase())) {
			layer = new SigmoidLayer();
		} else if (LAYER_TYPES[TANH].equals(layerTypeName.toLowerCase())) {
			layer = new TanhLayer();
		} else if (LAYER_TYPES[SINE].equals(layerTypeName.toLowerCase())) {
			layer = new SineLayer();
		} else if (LAYER_TYPES[LOGARITHMIC].equals(layerTypeName.toLowerCase())) {
			layer = new LogarithmicLayer();
		} else if (LAYER_TYPES[GAUSSIAN].equals(layerTypeName.toLowerCase())) {
			layer = new GaussianLayer();
		} else {
			logWarning("Cannot create layer of type '" + layerTypeName + "', using sigmoid layer instead.");
			layer = new SigmoidLayer();
		}
		layer.setRows(size);
		String name = layerName;
		if (counter >= 0)
			name += "-" + counter;
		name += " [" + layerTypeName + "]";
		layer.setLayerName(name);
		return layer;
	}

	private void initNeuralNet(ExampleSet exampleSet) throws UndefinedParameterError {
		// First create the layers
		Layer input = createLayer("Input", LAYER_TYPES[getParameterAsInt(PARAMETER_INPUT_LAYER_TYPE)], exampleSet.getAttributes().size(), -1);		
		Layer output = createLayer("Output", LAYER_TYPES[getParameterAsInt(PARAMETER_OUTPUT_LAYER_TYPE)], 1, -1);

		// create hidden layers
		LinkedList<Layer> allHiddenLayers = new LinkedList<Layer>();
		List<String[]> hiddenLayerList = getParameterList(PARAMETER_HIDDEN_LAYER_TYPES);
		Iterator<String[]> i = hiddenLayerList.iterator();
		int counter = 1;
		while (i.hasNext()) {
			String[] typeSizePair = i.next();
			String layerType = typeSizePair[0];
			Integer layerSizeObject = Integer.valueOf(typeSizePair[1]);
			int layerSize = layerSizeObject;
			if (layerSize <= 0)
				layerSize = getDefaultLayerSize(exampleSet);
			Layer hiddenLayer = createLayer(layerType, layerSize, counter);
			allHiddenLayers.add(hiddenLayer);
			counter++;
		}

		// create at least one hidden layer if no other layers were created
		if (allHiddenLayers.size() == 0) {
			log("No hidden layers defined. Using default hidden layers.");
			String layerType = LAYER_TYPES[getParameterAsInt(PARAMETER_DEFAULT_HIDDEN_LAYER_TYPE)];
			int layerSize = getParameterAsInt(PARAMETER_DEFAULT_HIDDEN_LAYER_SIZE);
			if (layerSize <= 0)
				layerSize = getDefaultLayerSize(exampleSet);
			for (int p = 0; p < getParameterAsInt(PARAMETER_DEFAULT_NUMBER_OF_HIDDEN_LAYERS); p++) {
				allHiddenLayers.add(createLayer(layerType, layerSize, (p+1)));
			}
		}

		// now create the synapses between all hidden layers
		Layer last = null;
		Iterator<Layer> l = allHiddenLayers.iterator();
		while (l.hasNext()) {
			Layer current = l.next();
			if (last != null) {
				FullSynapse synapse_HH = new FullSynapse();
				last.addOutputSynapse(synapse_HH);
				current.addInputSynapse(synapse_HH);
			}
			last = current;
		}

		// Connect the input layer with the first hidden layer
		FullSynapse synapse_IH = new FullSynapse();
		input.addOutputSynapse(synapse_IH);
		allHiddenLayers.getFirst().addInputSynapse(synapse_IH);

		// Connect the last hidden layer with the output layer
		FullSynapse synapse_HO = new FullSynapse();
		allHiddenLayers.getLast().addOutputSynapse(synapse_HO);
		output.addInputSynapse(synapse_HO);

		// the input to the neural net
		inputSynapse = new MemoryInputSynapse();
		input.addInputSynapse(inputSynapse);

		// the output of the neural net
		MemoryOutputSynapse outputSynapse = new MemoryOutputSynapse();
		output.addOutputSynapse(outputSynapse);

		// the trainer and its desired output
		TeachingSynapse trainer = new TeachingSynapse();
		desiredOutputSynapse = new MemoryInputSynapse();
		trainer.setDesired(desiredOutputSynapse);

		// now we add the complete structure to a NeuralNet object
		neuralNet = new NeuralNet();

		neuralNet.addLayer(input, NeuralNet.INPUT_LAYER);
		Iterator<Layer> h = allHiddenLayers.iterator();
		while (h.hasNext()) {
			neuralNet.addLayer(h.next(), NeuralNet.HIDDEN_LAYER);	
		}
		neuralNet.addLayer(output, NeuralNet.OUTPUT_LAYER);
		neuralNet.setTeacher(trainer);
		output.addOutputSynapse(trainer);

		ErrorBasedTerminator terminator = new ErrorBasedTerminator(getParameterAsDouble(PARAMETER_ERROR_EPSILON));
		terminator.setNeuralNet(neuralNet);
		neuralNet.getMonitor().addNeuralNetListener(terminator);
	}

	public void train(ExampleSet exampleSet) throws UndefinedParameterError {
		double[][] inputArray = createInputData(exampleSet);

		// set the inputs
		inputSynapse.setInputArray(inputArray);
		inputSynapse.setAdvancedColumnSelector("1-" + exampleSet.getAttributes().size());

		// set the desired outputs
		desiredOutputSynapse.setInputArray(inputArray);
		desiredOutputSynapse.setAdvancedColumnSelector((exampleSet.getAttributes().size() + 1) + "");

		// get the monitor object to train or feed forward
		Monitor monitor = neuralNet.getMonitor();

		// set the monitor parameters
		monitor.setLearningRate(getParameterAsDouble(PARAMETER_LEARNING_RATE));
		monitor.setMomentum(getParameterAsDouble(PARAMETER_MOMENTUM));
		monitor.setTrainingPatterns(inputArray.length);
		monitor.setTotCicles(getParameterAsInt(PARAMETER_TRAINING_CYCLES));
		monitor.setLearning(true);
		neuralNet.getMonitor().addNeuralNetListener(this);
		neuralNet.start();
		neuralNet.getMonitor().Go();
		neuralNet.join();
	}

	public void cicleTerminated(NeuralNetEvent e) {}

	public void errorChanged(NeuralNetEvent e) {
	}

	public void netStarted(NeuralNetEvent e) {
		log("learning started.");
	}

	public void netStopped(NeuralNetEvent e) {
		log("learning finished.");
	}

	public void netStoppedError(NeuralNetEvent e, String error) {
		logError("learning stopped, error: " + error);
	}

	private double[][] createInputData(ExampleSet exampleSet) {
		double[][] result = null;
		result = new double[exampleSet.size()][exampleSet.getAttributes().size() + 1];

		int counter = 0;
		Iterator<Example> i = exampleSet.iterator();
		this.maxLabel = Double.NEGATIVE_INFINITY;
		this.minLabel = Double.POSITIVE_INFINITY;
		Attribute label = exampleSet.getAttributes().getLabel();
		while (i.hasNext()) {
			Example example = i.next();
			int a = 0;
			for (Attribute attribute : example.getAttributes()) {
				result[counter][a++] = example.getValue(attribute);
			}
			double labelValue = example.getValue(label);
			if (label.isNominal()) {
				result[counter][exampleSet.getAttributes().size()] = (label.getMapping().getPositiveIndex() == labelValue ? 1.0d : 0.0d);
			} else {
				result[counter][exampleSet.getAttributes().size()] = labelValue;
				this.maxLabel = Math.max(this.maxLabel, labelValue);
				this.minLabel = Math.min(this.minLabel, labelValue);
			}
			counter++;
		}
		if (!label.isNominal()) {
			for (int l = 0; l < result.length; l++) {
				result[l][exampleSet.getAttributes().size()] = (result[l][exampleSet.getAttributes().size()] - this.minLabel) / (this.maxLabel - this.minLabel);
			}
		}
		return result;
	}

	@Override
	public Class<? extends PredictionModel> getModelClass() {
		return NeuralNetModel.class;
	}
	
	/**
	 * Returns true for all types of attributes and numerical and binominal labels.
	 */
	public boolean supportsCapability(OperatorCapability lc) {
		if (lc == OperatorCapability.POLYNOMINAL_ATTRIBUTES)
			return true;
		if (lc == OperatorCapability.BINOMINAL_ATTRIBUTES)
			return true;
		if (lc == OperatorCapability.NUMERICAL_ATTRIBUTES)
			return true;
		if (lc == OperatorCapability.BINOMINAL_LABEL)
			return true;
		if (lc == OperatorCapability.NUMERICAL_LABEL)
			return true;
		return false;
	}

	@Override
	public List<ParameterType> getParameterTypes() {
		List<ParameterType> types = super.getParameterTypes();
		ParameterType type = new ParameterTypeCategory(PARAMETER_INPUT_LAYER_TYPE, "The default layer type for the input layers.", LAYER_TYPES, LINEAR);
		type.setExpert(false);
		types.add(type);
		type = new ParameterTypeCategory(PARAMETER_OUTPUT_LAYER_TYPE, "The default layer type for the output layers.", LAYER_TYPES, SIGMOID);
		type.setExpert(false);
		types.add(type);
		type = new ParameterTypeInt(PARAMETER_DEFAULT_NUMBER_OF_HIDDEN_LAYERS, "The number of hidden layers. Only used if no layers are defined by the list hidden_layer_types.", 1, Integer.MAX_VALUE, 1);
		type.setExpert(false);
		types.add(type);
		types.add(new ParameterTypeInt(PARAMETER_DEFAULT_HIDDEN_LAYER_SIZE, "The default size  of hidden layers. Only used if no layers are defined by the list hidden_layer_types. -1 means size (number of attributes + number of classes) / 2", -1, Integer.MAX_VALUE, -1));
		types.add(new ParameterTypeCategory(PARAMETER_DEFAULT_HIDDEN_LAYER_TYPE, "The default layer type for the hidden layers. Only used if the parameter list hidden_layer_types is not defined.", LAYER_TYPES, SIGMOID));
		types.add(new ParameterTypeList(PARAMETER_HIDDEN_LAYER_TYPES, "Describes the type and size of all hidden layers", 
				new ParameterTypeCategory("hidden_layer_type", "The type of this hidden layer", LAYER_TYPES, 0),
				new ParameterTypeInt("hidden_layer_sizes", "The the size of this hidden layer. A size of <= 0 leads to a layer size of (number_of_attributes + number of classes) / 2.", -1, Integer.MAX_VALUE, -1)));
		type = new ParameterTypeInt(PARAMETER_TRAINING_CYCLES, "The number of training cycles used for the neural network training.", 1, Integer.MAX_VALUE, 200);
		type.setExpert(false);
		types.add(type);
		type = new ParameterTypeDouble(PARAMETER_LEARNING_RATE, "The learning rate determines by how much we change the weights at each step.", 0.0d, 1.0d, 0.3d);
		type.setExpert(false);
		types.add(type);
		types.add(new ParameterTypeDouble(PARAMETER_MOMENTUM, "The momentum simply adds a fraction of the previous weight update to the current one (prevent local maxima and smoothes optimization directions).", 0.0d, 1.0d, 0.2d));
		types.add(new ParameterTypeDouble(PARAMETER_ERROR_EPSILON, "The optimization is stopped if the training error gets below this epsilon value.", 0.0d, Double.POSITIVE_INFINITY, 0.05d));
		return types;
	}
}