/*
* File: DifferentiableFeedforwardNeuralNetwork.java
* Authors: Kevin R. Dixon
* Company: Sandia National Laboratories
* Project: Cognitive Foundry
*
* Copyright February 28, 2007, Sandia Corporation. Under the terms of Contract
* DE-AC04-94AL85000, there is a non-exclusive license for use of this work by
* or on behalf of the U.S. Government. Export of this program may require a
* license from the United States Government. See CopyrightHistory.txt for
* complete details.
*
*/
package gov.sandia.cognition.learning.function.vector;
import gov.sandia.cognition.learning.algorithm.gradient.GradientDescendable;
import gov.sandia.cognition.math.DifferentiableUnivariateScalarFunction;
import gov.sandia.cognition.math.matrix.DifferentiableVectorFunction;
import gov.sandia.cognition.math.matrix.Matrix;
import gov.sandia.cognition.math.matrix.MatrixFactory;
import gov.sandia.cognition.math.matrix.Vector;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Random;
/**
* A feedforward neural network that can have an arbitrary number of layers,
* and an arbitrary differentiable squashing (activation) function assigned to
* each layer. The squashing functions must be differentiable.
*
* @author Kevin R. Dixon
* @since 1.0
*
*/
public class DifferentiableFeedforwardNeuralNetwork
extends FeedforwardNeuralNetwork
implements GradientDescendable
{
/**
* Creates a new instance of DifferentiableFeedforwardNeuralNetwork
* @param nodesPerLayer
* Number of nodes in each layer, must have no fewer than 2 layers
* @param layerActivationFunctions
* Squashing function to assign to each layer, must have one fewer squashing
* function than you do layers (that is, the input layer has no squashing)
* @param random
* The random number generator for initial weights.
*/
public DifferentiableFeedforwardNeuralNetwork(
ArrayList<Integer> nodesPerLayer,
ArrayList<DifferentiableUnivariateScalarFunction> layerActivationFunctions,
Random random)
{
super(new ArrayList<DifferentiableGeneralizedLinearModel>());
ArrayList<DifferentiableGeneralizedLinearModel> layers =
new ArrayList<DifferentiableGeneralizedLinearModel>(
layerActivationFunctions.size());
final double range = 0.1;
for (int i = 0; i < nodesPerLayer.size() - 1; i++)
{
int currentNum = nodesPerLayer.get(i);
int nextNum = nodesPerLayer.get(i + 1);
Matrix w = MatrixFactory.getDefault().createUniformRandom(nextNum, currentNum, -range, range, random);
DifferentiableGeneralizedLinearModel layer =
new DifferentiableGeneralizedLinearModel(
new MultivariateDiscriminant(w),
layerActivationFunctions.get(i));
layers.add(layer);
}
this.setLayers(layers);
}
/**
* Creates a new instance of FeedforwardNeuralNetwork
*
* @param numInputs
* Number of nodes in the input layer
* @param numHiddens
* Number of nodes in the hidden (middle) layer
* @param numOutputs
* Number of nodes in the output layer
* @param activationFunction
* Squashing function to assign to all layers
* @param random
* The random number generator for the initial weights.
*/
public DifferentiableFeedforwardNeuralNetwork(
int numInputs,
int numHiddens,
int numOutputs,
DifferentiableVectorFunction activationFunction,
Random random)
{
super(new ArrayList<DifferentiableGeneralizedLinearModel>());
ArrayList<DifferentiableGeneralizedLinearModel> layers =
new ArrayList<DifferentiableGeneralizedLinearModel>(2);
final double range = 1.0;
Matrix w12 = MatrixFactory.getDefault().createUniformRandom(numHiddens, numInputs, -range, range, random);
Matrix w23 = MatrixFactory.getDefault().createUniformRandom(numOutputs, numHiddens, -range, range, random);
layers.add(new DifferentiableGeneralizedLinearModel(
new MultivariateDiscriminant(w12), activationFunction));
layers.add(new DifferentiableGeneralizedLinearModel(
new MultivariateDiscriminant(w23), activationFunction));
this.setLayers(layers);
}
/**
* Creates a new instance of FeedforwardNeuralNetwork
*
* @param numInputs
* Number of nodes in the input layer
* @param numHiddens
* Number of nodes in the hidden (middle) layer
* @param numOutputs
* Number of nodes in the output layer
* @param scalarFunction
* Squashing function to assign to all layers
* @param random
* The random number generator for the initial weights.
*/
public DifferentiableFeedforwardNeuralNetwork(
int numInputs,
int numHiddens,
int numOutputs,
DifferentiableUnivariateScalarFunction scalarFunction,
Random random)
{
this(numInputs, numHiddens, numOutputs,
new ElementWiseDifferentiableVectorFunction(scalarFunction),
random);
}
/**
* Creates a new instance of FeedforwardNeuralNetwork
* @param layers
* Layers of the neural network
*/
public DifferentiableFeedforwardNeuralNetwork(
DifferentiableGeneralizedLinearModel ... layers )
{
super( new ArrayList<DifferentiableGeneralizedLinearModel>(
Arrays.asList( layers ) ) );
}
@Override
public DifferentiableFeedforwardNeuralNetwork clone()
{
return (DifferentiableFeedforwardNeuralNetwork) super.clone();
}
@SuppressWarnings("unchecked")
@Override
public ArrayList<DifferentiableGeneralizedLinearModel> getLayers()
{
return (ArrayList<DifferentiableGeneralizedLinearModel>) super.getLayers();
}
public Matrix computeParameterGradient(
Vector input)
{
int numLayers = this.getLayers().size();
ArrayList<Vector> layerActivations = this.evaluateAtEachLayer(input);
ArrayList<Matrix> layerGradients = new ArrayList<Matrix>(numLayers);
int M = layerActivations.get(numLayers).getDimensionality();
int N = 0;
Matrix layerDerivative = MatrixFactory.getDefault().createIdentity(M, M);
for (int i = numLayers - 1; i >= 0; i--)
{
DifferentiableGeneralizedLinearModel layer =
this.getLayers().get(i);
Vector layerInput = layerActivations.get(i);
Matrix layerGradient = layerDerivative.times(
layer.computeParameterGradient(layerInput));
N += layerGradient.getNumColumns();
layerGradients.add(layerGradient);
// Don't need to chain rule beyond the first layer
if( i > 0 )
{
layerDerivative = layerDerivative.times(
layer.differentiate(layerInput));
}
}
// Gradients were pushed on in reverse order, so push them into
// the gradient Matrix in reverse order... this will be the
// correct order for the parameter ordering
Matrix gradient = MatrixFactory.getDefault().createMatrix(M, N);
int columnIndex = 0;
for (int n = numLayers - 1; n >= 0; n--)
{
Matrix layerGradient = layerGradients.get(n);
// The last entry has a special form
// [ x0 0 0 x3 0 0 x6 ...
// [ 0 x1 0 0 x4 0 ...
// [ 0 0 x2 0 0 x5 ...
if( n == 0 )
{
int row = 0;
int Mi = layerGradient.getNumRows();
int Ni = layerGradient.getNumColumns();
for( int column = 0; column < Ni; column++ )
{
double value = layerGradient.getElement( row, column );
gradient.setElement( row, columnIndex+column, value );
row = (row + 1) % Mi;
}
}
else
{
gradient.setSubMatrix(0, columnIndex, layerGradient);
}
columnIndex += layerGradient.getNumColumns();
}
return gradient;
}
}