JacobianChainRule.java example

Explorer
VAFusion2-master
/*
 * Encog(tm) Core v2.5 - Java Version
 * http://www.heatonresearch.com/encog/
 * http://code.google.com/p/encog-java/
 
 * Copyright 2008-2010 Heaton Research, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *   
 * For more information on Heaton Research copyrights, licenses 
 * and trademarks visit:
 * http://www.heatonresearch.com/copyright
 */

package org.encog.neural.networks.training.lma;

import java.util.List;

import org.encog.engine.network.activation.ActivationFunction;
import org.encog.neural.data.Indexable;
import org.encog.neural.data.NeuralData;
import org.encog.neural.data.NeuralDataPair;
import org.encog.neural.data.basic.BasicNeuralData;
import org.encog.neural.data.basic.BasicNeuralDataPair;
import org.encog.neural.networks.BasicNetwork;
import org.encog.neural.networks.NeuralOutputHolder;
import org.encog.neural.networks.synapse.Synapse;

/**
 * Calculate the Jacobian using the chain rule.
 */
public class JacobianChainRule implements ComputeJacobian {
	/**
	 * The network that is to be trained.
	 */
	private final BasicNetwork network;

	/**
	 * THe training set to use. Must be indexable.
	 */
	private final Indexable indexableTraining;

	/**
	 * The number of training set elements.
	 */
	private final int inputLength;

	/**
	 * The number of weights and bias values in the neural network.
	 */
	private final int parameterSize;

	/**
	 * The Jacobian matrix that was calculated.
	 */
	private final double[][] jacobian;

	/**
	 * The current row in the Jacobian matrix.
	 */
	private int jacobianRow;

	/**
	 * The current column in the Jacobian matrix.
	 */
	private int jacobianCol;

	/**
	 * Used to read the training data.
	 */
	private final NeuralDataPair pair;

	/**
	 * The errors for each row in the Jacobian.
	 */
	private final double[] rowErrors;

	/**
	 * The current error.
	 */
	private double error;

	/**
	 * Construct the chain rule calculation.
	 * 
	 * @param network
	 *            The network to use.
	 * @param indexableTraining
	 *            The training set to use.
	 */
	public JacobianChainRule(final BasicNetwork network,
			final Indexable indexableTraining) {
		this.indexableTraining = indexableTraining;
		this.network = network;
		this.parameterSize = network.getStructure().calculateSize();
		this.inputLength = (int) this.indexableTraining.getRecordCount();
		this.jacobian = new double[this.inputLength][this.parameterSize];
		this.rowErrors = new double[this.inputLength];

		final BasicNeuralData input = new BasicNeuralData(
				this.indexableTraining.getInputSize());
		final BasicNeuralData ideal = new BasicNeuralData(
				this.indexableTraining.getIdealSize());
		this.pair = new BasicNeuralDataPair(input, ideal);
	}

	/**
	 * Calculate the derivative.
	 * 
	 * @param a
	 *            The activation function.
	 * @param d
	 *            The value to calculate for.
	 * @return The derivative.
	 */
	private double calcDerivative(final ActivationFunction a, final double d) {

		return a.derivativeFunction(d);
	}

	/**
	 * Calculate the derivative.
	 * 
	 * @param a
	 *            The activation function.
	 * @param d
	 *            The value to calculate for.
	 * @return The derivative.
	 */
	private double calcDerivative2(final ActivationFunction a, final double d) {
		final double[] temp = new double[1];
		temp[0] = d;
		a.activationFunction(temp,0,temp.length);
		temp[0] = a.derivativeFunction(temp[0]);
		return temp[0];
	}

	/**
	 * Calculate the Jacobian matrix.
	 * 
	 * @param weights
	 *            The weights for the neural network.
	 * @return The sum squared of the weights.
	 */
	public double calculate(final double[] weights) {
		double result = 0.0;

		for (int i = 0; i < this.inputLength; i++) {
			this.jacobianRow = i;
			this.jacobianCol = 0;

			this.indexableTraining.getRecord(i, this.pair);

			final double e = calculateDerivatives(this.pair);
			this.rowErrors[i] = e;
			result += e * e;

		}

		return result / 2.0;
	}

	/**
	 * Calculate the derivatives for this training set element.
	 * 
	 * @param pair
	 *            The training set element.
	 * @return The sum squared of errors.
	 */
	private double calculateDerivatives(final NeuralDataPair pair) {
		// error values
		double e = 0.0;
		double sum = 0.0;

		final ActivationFunction function = this.network.getLayer(
				BasicNetwork.TAG_INPUT).getActivationFunction();

		final NeuralOutputHolder holder = new NeuralOutputHolder();

		this.network.compute(pair.getInput(), holder);

		final List<Synapse> synapses = this.network.getStructure()
				.getSynapses();

		int synapseNumber = 0;

		Synapse synapse = synapses.get(synapseNumber++);

		double output = holder.getOutput().getData(0);
		e = pair.getIdeal().getData(0) - output;

		for (int i = 0; i < synapse.getFromNeuronCount(); i++) {
			final double lastOutput = holder.getResult().get(synapse)
					.getData(i);

			this.jacobian[this.jacobianRow][this.jacobianCol++] 
			    = calcDerivative(
					function, output)
					* lastOutput;
		}
		
		this.jacobian[this.jacobianRow][this.jacobianCol++] = calcDerivative(
				function, output);


		Synapse lastSynapse;

		while (synapseNumber < synapses.size()) {
			lastSynapse = synapse;
			synapse = synapses.get(synapseNumber++);
			final NeuralData outputData = holder.getResult().get(lastSynapse);

			// for each neuron in the input layer
			for (int neuron = 0; neuron 
			  < synapse.getToNeuronCount(); neuron++) {
				output = outputData.getData(neuron);
				
				final double w = lastSynapse.getMatrix().get(neuron, 0);
				final double val = calcDerivative(function, output)
						* calcDerivative2(function, sum) * w;

				// for each weight of the input neuron
				for (int i = 0; i < synapse.getFromNeuronCount(); i++) {
					sum = 0.0;
					// for each neuron in the next layer
					for (int j = 0; j < lastSynapse.getToNeuronCount(); j++) {
						// for each weight of the next neuron
						for (int k = 0; k 
						  < lastSynapse.getFromNeuronCount(); k++) {
							sum += lastSynapse.getMatrix().get(k, j) * output;
						}
						sum += lastSynapse.getToLayer().getBiasWeight(j);
					}

					this.jacobian[this.jacobianRow][this.jacobianCol++] = val
							* holder.getResult().get(synapse).getData(i);
				}
				
				if( synapse.getToLayer().hasBias() ) {
					this.jacobian[this.jacobianRow][this.jacobianCol++] += val;
				}
			}
		}

		// return error
		return e;
	}

	/**
	 * @return The sum squared errors.
	 */
	public double getError() {
		return this.error;
	}

	/**
	 * @return The Jacobian matrix.
	 */
	public double[][] getJacobian() {
		return this.jacobian;
	}

	/**
	 * @return The errors for each row of the Jacobian.
	 */
	public double[] getRowErrors() {
		return this.rowErrors;
	}
}