AutoMLPImprovedNeuralNetLearner.java example

/**
 * Copyright (C) 2001-2017 by RapidMiner and the contributors
 *
 * Complete list of developers available at our web site:
 *
 * http://rapidminer.com
 *
 * This program is free software: you can redistribute it and/or modify it under the terms of the
 * GNU Affero General Public License as published by the Free Software Foundation, either version 3
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
 * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License along with this program.
 * If not, see http://www.gnu.org/licenses/.
 */
package de.dfki.madm.paren.operator.learner.functions.neuralnet;

import java.util.LinkedList;
import java.util.List;

import com.rapidminer.example.Attribute;
import com.rapidminer.example.Example;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.set.SplittedExampleSet;
import com.rapidminer.operator.Model;
import com.rapidminer.operator.OperatorCapability;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.learner.AbstractLearner;
import com.rapidminer.operator.learner.PredictionModel;
import com.rapidminer.operator.learner.functions.neuralnet.InnerNode;
import com.rapidminer.operator.learner.functions.neuralnet.Node;
import com.rapidminer.operator.performance.PerformanceVector;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeInt;
import com.rapidminer.tools.RandomGenerator;


/**
 *
 * @rapidminer.index Neural Net
 *
 * @author Ingo Mierswa, modified by Syed Atif Mehdi (01/09/2010)
 */

public class AutoMLPImprovedNeuralNetLearner extends AbstractLearner {

	// hidden layers have been removed. - atif

	/**
	 * The parameter name for "The number of training cycles used for the neural network
	 * training."
	 */
	public static final String PARAMETER_TRAINING_CYCLES = "training_cycles";

	private static final String PARAMETER_MAX_GENERATIONS = "number_of_generations";

	private static final String PARAMETER_NUMBER_ENSEMBLES = "number_of_esemble_mlps";

	RandomGenerator randomGenerator;

	protected PerformanceVector performance;

	public AutoMLPImprovedNeuralNetLearner(OperatorDescription description) {
		super(description);
	}

	@Override
	public Model learn(ExampleSet exampleSet) throws OperatorException {

		int maxCycles = getParameterAsInt(PARAMETER_TRAINING_CYCLES); // training cycles
		int max_generations = getParameterAsInt(PARAMETER_MAX_GENERATIONS); // number of generations
		int nensemble = getParameterAsInt(PARAMETER_NUMBER_ENSEMBLES); // number of mlps in ensemble

		double eta_init = 0.5;

		int min_hidden = 5;
		int max_hidden = 300;

		double eta_varlog = 1.5; // eta variance in lognormal
		double hidden_varlog = 1.8; // nhidden variance in lognormal

		int generations = 0;

		double maxError = 0.0;
		double momentum = 0.5;
		boolean decay = false;
		boolean shuffle = true;
		boolean normalize = true;

		int hidden_lo = 20;
		int hidden_hi = 80;

		double cv_split = 0.8;

		randomGenerator = RandomGenerator.getRandomGenerator(this);

		AutoMLPImprovedNeuralNetModel model;
		AutoMLPImprovedNeuralNetModel[] old_models = new AutoMLPImprovedNeuralNetModel[nensemble];

		boolean[] is_old_models = new boolean[nensemble];
		double[] learningRate = new double[nensemble]; // getParameterAsDouble(PARAMETER_LEARNING_RATE);

		// hidden layers have been removed.... - atif
		List<String[]>[] hiddenLayers = new LinkedList[nensemble];

		// initialize the hidden layers and learning rates.
		for (int i = 0; i < nensemble; i++) {
			hiddenLayers[i] = new LinkedList<String[]>();
			is_old_models[i] = false;
			do {
				learningRate[i] = rlognormal(eta_init, eta_varlog);
			} while (learningRate[i] < 0 || learningRate[i] >= 1.0);

			int nn = logspace(i, nensemble, hidden_lo, hidden_hi);
			{
				// now initialize the hidden layers and train. - atif
				if (nn < max_hidden) {
					// hiddenLayers[i].clear();
					hiddenLayers[i].add(new String[] { "Hidden", Integer.toString(nn) });
				}
			}
		} // initialization complete

		SplittedExampleSet splittedES = new SplittedExampleSet(exampleSet, cv_split,
				1 /*
					 * samplingType = 1 for shuffeled split
					 */, false, 1992 /*
									 * this is the seed that is used .
									 */);

		// NOTE the following -atif

		AutoMlpThreaded autoMlpThread;

		// create the thread and start training. after training, change the structure of the NN for
		// the next generation
		do {
			splittedES.selectSingleSubset(0); // training data 0.8
			autoMlpThread = new AutoMlpThreaded(splittedES, nensemble, hiddenLayers, maxCycles, maxError, learningRate,
					momentum, decay, shuffle, normalize, randomGenerator, is_old_models, old_models);
			autoMlpThread.StartTraining();

			// wait until the training stops.
			while (autoMlpThread.isAlive() == true) {
				try {
					Thread.sleep(100);
				} catch (InterruptedException e) {
				}
			}
			// stores the trained NN.
			for (int i = 0; i < nensemble; i++) {
				old_models[i] = autoMlpThread.GetModel(i);
				is_old_models[i] = true;
			}

			// Do the Cross Validation here. and use that error to model the neural nets.
			splittedES.selectSingleSubset(1);

			autoMlpThread.CrossValidate(splittedES); // although it is working.. but the next
													 // function seems much
			// better... - atif

			for (int i = 0; i < nensemble; i++) {
				old_models[i].error = calculateError(splittedES, old_models[i]);
			}

			// if training is not to be stopped, then change the structure of half of the
			// nensenmbles

			// sort the learning rate and old models based on errors
			quicksort(old_models, learningRate, 0, old_models.length - 1);

			// initialize the hidden layers of half of the good NN with the good NN's number of
			// hidden nodes
			for (int i = 0; i < nensemble / 2; i++) {
				hiddenLayers[i].clear();
				int current_size = 0;
				for (int k = 0; k < old_models[i].innerNodes.length; k++) {
					InnerNode old_innerNode = old_models[i].innerNodes[k];
					int old_layerIndex = old_innerNode.getLayerIndex();
					if (old_layerIndex != Node.OUTPUT) {
						current_size++;
					}
				}
				hiddenLayers[i].add(new String[] { "Hidden", Integer.toString(current_size) });
			}

			// now for the rest half of NN, change their structure and the learning rate

			for (int i = nensemble / 2, j = 0; i < nensemble; i++, j++) // this loop should run
																		 // nensemble/2
			{
				do {
					learningRate[i] = rlognormal(eta_init, eta_varlog);
				} while (learningRate[i] < 0 || learningRate[i] >= 1.0);
				// modify the rest by modifying the copy of best
				old_models[i] = old_models[j];

				int current_size = 0;
				for (int k = 0; k < old_models[j].innerNodes.length; k++) {
					InnerNode old_innerNode = old_models[j].innerNodes[k];
					int old_layerIndex = old_innerNode.getLayerIndex();
					if (old_layerIndex != Node.OUTPUT) {
						current_size++;
					}
				}
				int value = 0;
				do {
					value = (int) rlognormal(current_size, hidden_varlog);
				} while (value < 0);
				if (value > 0) // since -1 is the error state - atif
				{
					// now change the hidden layers and retrain. - atif
					int nn = 0;
					do {
						nn = Math.min(Math.max(min_hidden, value), max_hidden);
					} while (nn > max_hidden);
					if (nn < max_hidden) {
						hiddenLayers[i].clear();
						hiddenLayers[i].add(new String[] { "Hidden", Integer.toString(nn) });
					}
				}
			}

			generations++;
		} while (generations < max_generations);

		// old model is sorted based on the error.. therefore 0th index will have the least error
		model = old_models[0];
		return model;
	}

	@Override
	public Class<? extends PredictionModel> getModelClass() {
		return AutoMLPImprovedNeuralNetModel.class;
	}

	/**
	 * Returns true for all types of attributes and numerical and binominal labels.
	 */
	@Override
	public boolean supportsCapability(OperatorCapability lc) {
		if (lc == OperatorCapability.NUMERICAL_ATTRIBUTES) {
			return true;
		}
		if (lc == OperatorCapability.POLYNOMINAL_LABEL) {
			return true;
		}
		if (lc == OperatorCapability.BINOMINAL_LABEL) {
			return true;
		}
		return false;
	}

	@Override
	public List<ParameterType> getParameterTypes() {
		List<ParameterType> types = super.getParameterTypes();

		ParameterType type = new ParameterTypeInt(PARAMETER_TRAINING_CYCLES,
				"The number of maximum training cycles used for the neural network training.", 1, Integer.MAX_VALUE, 10);
		type.setExpert(true);
		types.add(type);

		ParameterType type2 = new ParameterTypeInt(PARAMETER_MAX_GENERATIONS,
				"The number of generations for AutoMLP training.", 1, Integer.MAX_VALUE, 10);
		type2.setExpert(true);
		types.add(type2);

		ParameterType type3 = new ParameterTypeInt(PARAMETER_NUMBER_ENSEMBLES, "The number of MLPs per ensemble.", 1,
				Integer.MAX_VALUE, 4);
		type3.setExpert(true);
		types.add(type3);
		return types;
	}

	private double rlognormal(double etaInit, double r) {
		if (r > 1.0) {
			double result;
			do {
				double n = rnormal(Math.log(etaInit), Math.log(r));
				result = Math.exp(n);
			} while (Double.isNaN(result));
			return result;
		} else {
			return -1;
		}
	}

	private double rnormal(double d, double e) {
		return rnormal() * e + d;
	}

	private double rnormal() {
		double x, y, s;
		do {

			x = 2 * randomGenerator.nextGaussian() - 1;
			y = 2 * randomGenerator.nextGaussian() - 1;

			s = x * x + y * y;
		} while (s > 1.0);

		double retValue = 0.0;
		do {
			retValue = x * Math.sqrt(-Math.log(s) / s);

		} while (Double.isNaN(retValue));

		return retValue;
	}

	private int logspace(int i, int n, float lo, float hi) {
		Double d;
		do {
			d = Math.exp(i / (float) (n - 1) * (Math.log(hi) - Math.log(lo)) + Math.log(lo));
		} while (d.isNaN());
		return d.intValue();
	}

	// sort the array of old NN and also move the learning Rate according to the NN
	private void quicksort(AutoMLPImprovedNeuralNetModel[] old_nn, double[] lR, int low, int high) {
		int i = low, j = high;
		// Get the pivot element from the middle of the list
		// Using shift instead of division to avoid overflow if low or high get large.
		double pivot = old_nn[low + high >>> 1].getError();

		// Divide into two lists
		while (i <= j) {
			// If the current value from the left list is smaller or equal then the pivot
			// element then get the next element from the left list
			while (i < high && old_nn[i].getError() < pivot) {
				i++;
			}
			// If the current value from the right list is larger or equal then the pivot
			// element then get the next element from the right list
			while (j > low && old_nn[j].getError() > pivot) {
				j--;
			}

			// If we have found a values in the left list which is larger then
			// the pivot element and if we have found a value in the right list
			// which is smaller then the pivot element then we exchange the
			// values.
			// As we are done we can increase i and decrease j
			if (i <= j) {

				Swap(old_nn, lR, i, j);
				i++;
				j--;
			}
		}
		// Recursion
		if (low < j) {
			quicksort(old_nn, lR, low, j);
		}
		if (i < high) {
			quicksort(old_nn, lR, i, high);
		}
	}

	private void Swap(AutoMLPImprovedNeuralNetModel[] models, double[] lR, int index_1, int index_2) {
		AutoMLPImprovedNeuralNetModel temp = models[index_1];
		double l_temp = lR[index_1];

		models[index_1] = models[index_2];
		models[index_2] = temp;

		lR[index_1] = lR[index_2];
		lR[index_2] = l_temp;

	}

	protected float calculateError(ExampleSet exampleSet, AutoMLPImprovedNeuralNetModel model) {
		Attribute predictedLabel = exampleSet.getAttributes().getLabel();
		long count = 0;
		long misclassified = 0;
		for (Example example : exampleSet) {
			model.resetNetwork();
			count++;
			if (predictedLabel.isNominal()) {
				int numberOfClasses = model.getNumberOfClasses(predictedLabel);
				double[] classProbabilities = new double[numberOfClasses];
				for (int c = 0; c < numberOfClasses; c++) {
					classProbabilities[c] = model.outputNodes[c].calculateValue(true, example);
				}

				double total = 0.0;
				for (int c = 0; c < numberOfClasses; c++) {
					total += classProbabilities[c];
				}

				double maxConfidence = Double.NEGATIVE_INFINITY;
				int maxIndex = 0;
				for (int c = 0; c < numberOfClasses; c++) {
					classProbabilities[c] /= total;
					if (classProbabilities[c] > maxConfidence) {
						maxIndex = c;
						maxConfidence = classProbabilities[c];
					}
				}
				if (maxIndex != example.getLabel()) {
					misclassified++;
				}
			}
		}
		return (float) misclassified / (float) count;
	}

}


class AutoMlpThreaded extends Thread {

	AutoMLPImprovedNeuralNetModel[] model;
	int nensembles = 1;
	ExampleSet exampleSet;
	List<String[]>[] hiddenLayers;
	int maxCycles;
	double maxError;
	double[] learningRate;
	double momentum;
	boolean decay;
	boolean shuffle;
	boolean normalize;
	RandomGenerator randomGenerator;
	boolean[] isOldModels;
	AutoMLPImprovedNeuralNetModel[] oldModels;

	AutoMlpThreaded(ExampleSet example, int nn, List<String[]>[] hidden_layers, int max_cycles, double max_Error,
			double[] learning_Rate, double moment, boolean is_decay, boolean is_shuffle, boolean is_normalize,
			RandomGenerator random_Generator, boolean[] is_old_models, AutoMLPImprovedNeuralNetModel[] old_models) {
		exampleSet = example;
		nensembles = nn;
		hiddenLayers = hidden_layers;
		maxCycles = max_cycles;
		maxError = max_Error;
		learningRate = learning_Rate;
		momentum = moment;
		decay = is_decay;
		shuffle = is_shuffle;
		normalize = is_normalize;
		randomGenerator = random_Generator;
		isOldModels = is_old_models;
		oldModels = old_models;

		model = new AutoMLPImprovedNeuralNetModel[nensembles];
		for (int i = 0; i < nensembles; i++) {
			model[i] = new AutoMLPImprovedNeuralNetModel(exampleSet);
		}
	}

	@Override
	public void run() {
		for (int i = 0; i < nensembles; i++) {
			model[i].train(exampleSet, hiddenLayers[i], maxCycles, maxError, learningRate[i], momentum, decay, shuffle,
					normalize, randomGenerator, isOldModels[i], oldModels[i]);
		}
	}

	public void StartTraining() {
		start(); // start the thread
	}

	void CrossValidate(ExampleSet splittedES) {
		for (int i = 0; i < nensembles; i++) {
			int maxSize = splittedES.size();
			double error = 0.0;
			for (int index = 0; index < maxSize; index++) {
				error += model[i].calculateError(splittedES.getExample(index));
			}
			// ::::TODO::::.. this is not the right way.. to setup the error value..
			// it is done here because have to test it first.
			// this also that in quicksort.. calls GetError() and over there error is required.
			model[i].error = error;
		}
	}

	public double[] GetModelsErrors() {
		double[] errors = new double[nensembles];
		for (int i = 0; i < nensembles; i++) {
			errors[i] = model[i].getError();
		}
		return errors;
	}

	public AutoMLPImprovedNeuralNetModel GetModel(int index) {
		return model[index];
	}
}