/* * File: ThreeLayerFeedforwardNeuralNetwork.java * Authors: Kevin R. Dixon * Company: Sandia National Laboratories * Project: Cognitive Foundry * * Copyright Apr 15, 2009, Sandia Corporation. * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive * license for use of this work by or on behalf of the U.S. Government. * Export of this program may require a license from the United States * Government. See CopyrightHistory.txt for complete details. * */ package gov.sandia.cognition.learning.function.vector; import gov.sandia.cognition.annotation.PublicationReference; import gov.sandia.cognition.annotation.PublicationType; import gov.sandia.cognition.learning.algorithm.gradient.GradientDescendable; import gov.sandia.cognition.learning.function.scalar.AtanFunction; import gov.sandia.cognition.math.DifferentiableUnivariateScalarFunction; import gov.sandia.cognition.math.matrix.Matrix; import gov.sandia.cognition.math.matrix.MatrixFactory; import gov.sandia.cognition.math.matrix.Vector; import gov.sandia.cognition.math.matrix.VectorFactory; import gov.sandia.cognition.math.matrix.VectorInputEvaluator; import gov.sandia.cognition.math.matrix.VectorOutputEvaluator; import gov.sandia.cognition.math.matrix.VectorizableVectorFunction; import gov.sandia.cognition.util.AbstractRandomized; import gov.sandia.cognition.util.ObjectUtil; import java.util.Random; /** * This is a "standard" feedforward neural network with a single hidden * layer. There are bias terms on the input and hidden layers. The hidden * units are each applied with the same smooth (differentiable) squashing * function and the outputs are a linear combination of the hidden units and * the hidden-layer bias term. * @author Kevin R. Dixon * @since 3.0 */ @PublicationReference( author="Wikipedia", title="Multilayer perceptron", type=PublicationType.WebPage, year=2009, url="http://en.wikipedia.org/wiki/Multilayer_perceptron" ) public class ThreeLayerFeedforwardNeuralNetwork extends AbstractRandomized implements VectorizableVectorFunction, VectorInputEvaluator<Vector,Vector>, VectorOutputEvaluator<Vector,Vector>, GradientDescendable { /** * Default initialization range, {@value}. */ public static final double DEFAULT_INITIALIZATION_RANGE = 1e-3; /** * Default squashing function, AtanFunction. */ public static final DifferentiableUnivariateScalarFunction DEFAULT_SQUASHING_FUNCTION = new AtanFunction(); /** * Default random seed, {@value}. */ public static final int DEFAULT_RANDOM_SEED = 1; /** * Matrix of weights to pre-multiply the inputs by. */ protected Matrix inputToHiddenWeights; /** * Bias weights to add to each of the hidden units. */ protected Vector inputToHiddenBiasWeights; /** * Matrix of weights to pre-multiply the hidden-unit activations by. */ protected Matrix hiddenToOutputWeights; /** * Bias weights to add to each of the output units. */ protected Vector hiddenToOutputBiasWeights; /** * Squashing function to apply at the hidden layer. */ private DifferentiableUnivariateScalarFunction squashingFunction; /** * Range of values to initialize the weights between, must be greater than * or equal to zero. */ private double initializationRange; /** * Creates a new instance of ThreeLayerFeedforwardNeuralNetwork */ public ThreeLayerFeedforwardNeuralNetwork() { this( 1, 1, 1 ); } /** * Creates a new instance of ThreeLayerFeedforwardNeuralNetwork * @param numInputs * Input dimensionality, not including the bias term, * must be greater than zero. * @param numHidden * Number of hidden units, not including the bias term, * must be greater than zero. * @param numOutputs * Output dimensionality, must be greater than zero. */ public ThreeLayerFeedforwardNeuralNetwork( int numInputs, int numHidden, int numOutputs ) { this( numInputs, numHidden, numOutputs, DEFAULT_SQUASHING_FUNCTION ); } /** * Creates a new instance of ThreeLayerFeedforwardNeuralNetwork * @param numInputs * Input dimensionality, not including the bias term, * must be greater than zero. * @param numHidden * Number of hidden units, not including the bias term, * must be greater than zero. * @param numOutputs * Output dimensionality, must be greater than zero. * @param squashingFunction * Function to apply at the hidden layer. */ public ThreeLayerFeedforwardNeuralNetwork( int numInputs, int numHidden, int numOutputs, DifferentiableUnivariateScalarFunction squashingFunction ) { this( numInputs, numHidden, numOutputs, squashingFunction, DEFAULT_RANDOM_SEED, DEFAULT_INITIALIZATION_RANGE ); } /** * Creates a new instance of ThreeLayerFeedforwardNeuralNetwork * @param numInputs * Input dimensionality, not including the bias term, * must be greater than zero. * @param numHidden * Number of hidden units, not including the bias term, * must be greater than zero. * @param numOutputs * Output dimensionality, must be greater than zero. * @param squashingFunction * Function to apply at the hidden layer. * @param randomSeed * Value to use as the random seed for the random-number generator. * @param initializationRange * Range of values to initialize the weights between, must be greater than * or equal to zero. */ public ThreeLayerFeedforwardNeuralNetwork( int numInputs, int numHidden, int numOutputs, DifferentiableUnivariateScalarFunction squashingFunction, int randomSeed, double initializationRange ) { super( new Random( randomSeed ) ); this.setInitializationRange(initializationRange); this.setSquashingFunction(squashingFunction); this.initializeWeights(numInputs, numHidden, numOutputs); } @Override public ThreeLayerFeedforwardNeuralNetwork clone() { ThreeLayerFeedforwardNeuralNetwork clone = (ThreeLayerFeedforwardNeuralNetwork) super.clone(); clone.inputToHiddenWeights = ObjectUtil.cloneSafe(this.inputToHiddenWeights); clone.inputToHiddenBiasWeights = ObjectUtil.cloneSafe(this.inputToHiddenBiasWeights); clone.hiddenToOutputWeights = ObjectUtil.cloneSafe(this.hiddenToOutputWeights); clone.hiddenToOutputBiasWeights = ObjectUtil.cloneSafe(this.hiddenToOutputBiasWeights); clone.squashingFunction = ObjectUtil.cloneSmart(this.squashingFunction); return clone; } public Matrix computeParameterGradient( Vector input) { int numInputs = this.getInputDimensionality(); int numHidden = this.getHiddenDimensionality(); int numOutput = this.getOutputDimensionality(); int num1 = numInputs * numHidden; int num2 = numHidden; int num3 = numHidden * numOutput; int num4 = numOutput; int N = num1 + num2 + num3 + num4; int M = numOutput; int offset; Vector hiddenActivation = this.evaluateHiddenLayerActivation(input); Vector squashedHiddenLayerActivation = this.evaluateSquashedHiddenLayerActivation(hiddenActivation); double[] squashedDerivativeHiddenLayerActivation = new double[ numHidden ]; for( int i = 0; i < squashedDerivativeHiddenLayerActivation.length; i++ ) { squashedDerivativeHiddenLayerActivation[i] = this.squashingFunction.differentiate( hiddenActivation.getElement(i) ); } Matrix gradient = MatrixFactory.getDefault().createMatrix(M,N); // Chain rule: y=f(g(h(x))) => h'(x)g'(h(x))*f'(g(x)) // My case:= y = W2*f(W1*x+b1)+b2 = W2*f(W1*x)+W2*f(b1) = W2*f(h)+b2 // Therefore: // dy_i/db2_j = 1.0 // dy_i/dW2_ij = f(h_j) // dy_i/db1_j = W2_ij*f'(h_j) // dy_i/dW1_jk = W2_ij*f'(h_j) * x_k int columnIndex = N-num4-num3; for( int j = 0; j < numHidden; j++ ) { // Derivation of this gradient assumes that the parameters are // column-stacked from the underlying matrices... // This makes a gradient which looks like: // [ h0 0 0 h1 0 0] // [ 0 h0 0 0 h1 0] // [ 0 0 h0 0 0 h1] // And so forth... double hj = squashedHiddenLayerActivation.getElement(j); for (int i = 0; i < numOutput; i++) { gradient.setElement( i, columnIndex, hj ); columnIndex++; } } // Add the output bias, because it's the easiest offset = N-num4; for( int i = 0; i < numOutput; i++ ) { gradient.setElement( i, i+offset, 1.0 ); } offset = numHidden*numInputs; for( int i = 0; i < numOutput; i++ ) { // Because we stack each column to vectorize the matrix parameters for( int j = 0; j < numHidden; j++ ) { double W2ij = this.hiddenToOutputWeights.getElement(i,j); double dfdhj = squashedDerivativeHiddenLayerActivation[j]; // dy_i/db1_j = W2_ij*f'(h_j) double dyi_db1j = W2ij*dfdhj; gradient.setElement( i, j + offset, dyi_db1j ); for( int k = 0; k < numInputs; k++ ) { // dy_i/dW1_jk = W2_ij*f'(h_j) * x_k double dyi_dW1jk = W2ij*dfdhj* input.getElement(k); gradient.setElement(i, k*numHidden + j, dyi_dW1jk ); } } } return gradient; } public Vector convertToVector() { Vector p1 = this.inputToHiddenWeights.convertToVector(); Vector p2 = this.inputToHiddenBiasWeights; Vector p3 = this.hiddenToOutputWeights.convertToVector(); Vector p4 = this.hiddenToOutputBiasWeights; int num = p1.getDimensionality() + p2.getDimensionality() + p3.getDimensionality() + p4.getDimensionality(); Vector parameters = VectorFactory.getDefault().createVector( num ); int index = 0; for( int i = 0; i < p1.getDimensionality(); i++ ) { parameters.setElement(index, p1.getElement(i) ); index++; } for( int i = 0; i < p2.getDimensionality(); i++ ) { parameters.setElement(index, p2.getElement(i) ); index++; } for( int i = 0; i < p3.getDimensionality(); i++ ) { parameters.setElement(index, p3.getElement(i)); index++; } for( int i = 0; i < p4.getDimensionality(); i++ ) { parameters.setElement(index, p4.getElement(i)); index++; } return parameters; } /** * Gets the total number of parameters in the neural net, including * the bias weights. * @return * Total number of parameters in the neural net. */ public int getNumParameters() { int numInputs = this.getInputDimensionality(); int numHidden = this.getHiddenDimensionality(); int numOutput = this.getOutputDimensionality(); int num1 = numInputs * numHidden; int num2 = numHidden; int num3 = numHidden * numOutput; int num4 = numOutput; return num1 + num2 + num3 + num4; } public void convertFromVector( Vector parameters) { int numInputs = this.getInputDimensionality(); int numHidden = this.getHiddenDimensionality(); int numOutput = this.getOutputDimensionality(); int num1 = numInputs * numHidden; int num2 = numHidden; int num3 = numHidden * numOutput; int num4 = numOutput; int num = num1 + num2 + num3 + num4; parameters.assertDimensionalityEquals(num); Vector p1 = parameters.subVector(0,num1-1); Vector p2 = parameters.subVector(num1,num1+num2-1); Vector p3 = parameters.subVector(num1+num2,num1+num2+num3-1); Vector p4 = parameters.subVector(num1+num2+num3,num-1); this.inputToHiddenWeights.convertFromVector(p1); this.inputToHiddenBiasWeights = p2; this.hiddenToOutputWeights.convertFromVector(p3); this.hiddenToOutputBiasWeights = p4; } public Vector evaluate( Vector input) { Vector hiddenActivation = this.evaluateHiddenLayerActivation(input); Vector squashedHiddenActivation = this.evaluateSquashedHiddenLayerActivation(hiddenActivation); return this.evaluateOutputFromSquashedHiddenLayerActivation( squashedHiddenActivation); } /** * Computes the raw (unsquashed) activation at the hidden layer for the * given input. * @param input * Input to compute the raw hidden activation of. * @return * Raw (unsquashed) activation at the hidden layer. */ protected Vector evaluateHiddenLayerActivation( Vector input ) { Vector hiddenActivation = this.inputToHiddenWeights.times( input ); hiddenActivation.plusEquals( this.inputToHiddenBiasWeights ); return hiddenActivation; } /** * Evaluates the squashed hidden-layer activation from its raw activation * value. This is equivalent to apply an element-wise squashing function * to the raw hidden activation values. * @param hiddenActivation * Raw (unsquashed) hidden activation values. * @return * Squashed hidden-layer activation. */ protected Vector evaluateSquashedHiddenLayerActivation( Vector hiddenActivation ) { return ElementWiseVectorFunction.evaluate( hiddenActivation,this.getSquashingFunction()); } /** * Evaluates the output from the squashed hidden-layer activation. * @param squashedHiddenActivation * Squashed hidden-layer activation. * @return * Output of the neural net. */ protected Vector evaluateOutputFromSquashedHiddenLayerActivation( Vector squashedHiddenActivation ) { Vector outputActivation = this.hiddenToOutputWeights.times( squashedHiddenActivation ); outputActivation.plusEquals( this.hiddenToOutputBiasWeights ); return outputActivation; } /** * Reinitializes the neural network parameters based on its current setup. * It uses the object's internal random number generator to generate * weights uniformly in the range of * [-initializationRange, +initializationRange]. */ public void reinitializeWeights() { this.initializeWeights( this.getInputDimensionality(), this.getHiddenDimensionality(), this.getOutputDimensionality()); } /** * Initializes the neural net parameters for the given dimensions, not * including the bias terms, using the object's random-number generator * uniformly between the initialization range (and its negative value). * @param inputDimensionality * Number of the inputs, not including the bias term, * must be greater than zero. * @param hiddenDimensionality * Number of the hidden units, not including the bias term, * must be greater than zero. * @param outputDimensionality * Number of the outputs, must be greater than zero. */ public void initializeWeights( int inputDimensionality, int hiddenDimensionality, int outputDimensionality ) { if( inputDimensionality < 1 ) { throw new IllegalArgumentException( "inputDimensionality must be >= 1" ); } if( hiddenDimensionality < 1 ) { throw new IllegalArgumentException( "hiddenDimensionality must be >= 1" ); } if( outputDimensionality < 1 ) { throw new IllegalArgumentException( "outputDimensionality must be >= 1" ); } this.inputToHiddenWeights = MatrixFactory.getDefault().createUniformRandom( hiddenDimensionality, inputDimensionality, -this.getInitializationRange(), this.getInitializationRange() ,this.getRandom()); this.inputToHiddenBiasWeights = VectorFactory.getDefault().createUniformRandom( hiddenDimensionality, -this.getInitializationRange(),this.getInitializationRange(), this.random); this.hiddenToOutputWeights = MatrixFactory.getDefault().createUniformRandom( outputDimensionality, hiddenDimensionality, -this.getInitializationRange(), this.getInitializationRange() ,this.getRandom()); this.hiddenToOutputBiasWeights = VectorFactory.getDefault().createUniformRandom( outputDimensionality, -this.getInitializationRange(),this.getInitializationRange(), this.random); } public int getOutputDimensionality() { return this.hiddenToOutputWeights.getNumRows(); } /** * Sets the output dimensionality of the neural net by re-initializing the * weights. * @param outputDimensionality * Desired output dimensionality, must be greater than zero. */ public void setOutputDimensionality( int outputDimensionality ) { this.initializeWeights( this.getInputDimensionality(), this.getHiddenDimensionality(), outputDimensionality ); } /** * Gets the number of hidden units, not including the bias term. * @return * Number of hidden units, must be greater than zero. */ public int getHiddenDimensionality() { return this.hiddenToOutputWeights.getNumColumns(); } /** * Sets the number of hidden units, not including the bias term, by * re-initializing the neural net's weights. * @param hiddenDimensionality * Number of hidden units, must be greater than zero. */ public void setHiddenDimensionality( int hiddenDimensionality ) { this.initializeWeights( this.getInputDimensionality(), hiddenDimensionality, this.getOutputDimensionality() ); } public int getInputDimensionality() { return this.inputToHiddenWeights.getNumColumns(); } /** * Sets the number of input units, not counting the bias term, * by re-initializing the neural net's parameters. * @param inputDimensionality * Desired input dimensionality, must be greater than zero. */ public void setInputDimensionality( int inputDimensionality ) { this.initializeWeights( inputDimensionality, this.getHiddenDimensionality(), this.getOutputDimensionality() ); } /** * Getter for squashingFunction * @return * Squashing function to apply at the hidden layer. */ public DifferentiableUnivariateScalarFunction getSquashingFunction() { return this.squashingFunction; } /** * Setter for squashingFunction * @param squashingFunction * Squashing function to apply at the hidden layer. */ public void setSquashingFunction( DifferentiableUnivariateScalarFunction squashingFunction) { if( squashingFunction == null ) { throw new IllegalArgumentException( "Squashing function cannot be null!" ); } this.squashingFunction = squashingFunction; } /** * Getter for initializationRange * @return * Range of values to initialize the weights between, must be greater than * or equal to zero. */ public double getInitializationRange() { return this.initializationRange; } /** * Setter for initializationRange * @param initializationRange * Range of values to initialize the weights between, must be greater than * or equal to zero. */ public void setInitializationRange( double initializationRange) { if( initializationRange < 0.0 ) { throw new IllegalArgumentException( "initializationRange must be >= 0.0" ); } this.initializationRange = initializationRange; } }