DifferentiableFeedforwardNeuralNetwork.java example

Explorer
Foundry-master
- Components
/*
 * File:                DifferentiableFeedforwardNeuralNetwork.java
 * Authors:             Kevin R. Dixon
 * Company:             Sandia National Laboratories
 * Project:             Cognitive Foundry
 *
 * Copyright February 28, 2007, Sandia Corporation.  Under the terms of Contract
 * DE-AC04-94AL85000, there is a non-exclusive license for use of this work by
 * or on behalf of the U.S. Government. Export of this program may require a
 * license from the United States Government. See CopyrightHistory.txt for
 * complete details.
 *
 */

package gov.sandia.cognition.learning.function.vector;

import gov.sandia.cognition.learning.algorithm.gradient.GradientDescendable;
import gov.sandia.cognition.math.DifferentiableUnivariateScalarFunction;
import gov.sandia.cognition.math.matrix.DifferentiableVectorFunction;
import gov.sandia.cognition.math.matrix.Matrix;
import gov.sandia.cognition.math.matrix.MatrixFactory;
import gov.sandia.cognition.math.matrix.Vector;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Random;

/**
 * A feedforward neural network that can have an arbitrary number of layers,
 * and an arbitrary differentiable squashing (activation) function assigned to 
 * each layer. The squashing functions must be differentiable.
 *
 * @author Kevin R. Dixon
 * @since  1.0
 *
 */
public class DifferentiableFeedforwardNeuralNetwork
    extends FeedforwardNeuralNetwork
    implements GradientDescendable
{

    /**
     * Creates a new instance of DifferentiableFeedforwardNeuralNetwork
     * @param nodesPerLayer
     * Number of nodes in each layer, must have no fewer than 2 layers
     * @param layerActivationFunctions 
     * Squashing function to assign to each layer, must have one fewer squashing
     * function than you do layers (that is, the input layer has no squashing)
     * @param random
     * The random number generator for initial weights.
     */
    public DifferentiableFeedforwardNeuralNetwork(
        ArrayList<Integer> nodesPerLayer,
        ArrayList<DifferentiableUnivariateScalarFunction> layerActivationFunctions,
        Random random)
    {
        super(new ArrayList<DifferentiableGeneralizedLinearModel>());

        ArrayList<DifferentiableGeneralizedLinearModel> layers =
            new ArrayList<DifferentiableGeneralizedLinearModel>(
            layerActivationFunctions.size());

        final double range = 0.1;
        for (int i = 0; i < nodesPerLayer.size() - 1; i++)
        {
            int currentNum = nodesPerLayer.get(i);
            int nextNum = nodesPerLayer.get(i + 1);

            Matrix w = MatrixFactory.getDefault().createUniformRandom(nextNum, currentNum, -range, range, random);
            DifferentiableGeneralizedLinearModel layer =
                new DifferentiableGeneralizedLinearModel(
                new MultivariateDiscriminant(w),
                layerActivationFunctions.get(i));
            layers.add(layer);
        }

        this.setLayers(layers);

    }

    /**
     * Creates a new instance of FeedforwardNeuralNetwork
     * 
     * @param numInputs 
     * Number of nodes in the input layer
     * @param numHiddens 
     * Number of nodes in the hidden (middle) layer
     * @param numOutputs 
     * Number of nodes in the output layer
     * @param activationFunction 
     * Squashing function to assign to all layers
     * @param random
     * The random number generator for the initial weights.
     */
    public DifferentiableFeedforwardNeuralNetwork(
        int numInputs,
        int numHiddens,
        int numOutputs,
        DifferentiableVectorFunction activationFunction,
        Random random)
    {
        super(new ArrayList<DifferentiableGeneralizedLinearModel>());

        ArrayList<DifferentiableGeneralizedLinearModel> layers =
            new ArrayList<DifferentiableGeneralizedLinearModel>(2);

        final double range = 1.0;

        Matrix w12 = MatrixFactory.getDefault().createUniformRandom(numHiddens, numInputs, -range, range, random);
        Matrix w23 = MatrixFactory.getDefault().createUniformRandom(numOutputs, numHiddens, -range, range, random);

        layers.add(new DifferentiableGeneralizedLinearModel(
            new MultivariateDiscriminant(w12), activationFunction));

        layers.add(new DifferentiableGeneralizedLinearModel(
            new MultivariateDiscriminant(w23), activationFunction));

        this.setLayers(layers);
    }

    /**
     * Creates a new instance of FeedforwardNeuralNetwork
     * 
     * @param numInputs 
     * Number of nodes in the input layer
     * @param numHiddens 
     * Number of nodes in the hidden (middle) layer
     * @param numOutputs 
     * Number of nodes in the output layer
     * @param scalarFunction 
     * Squashing function to assign to all layers
     * @param random
     * The random number generator for the initial weights.
     */
    public DifferentiableFeedforwardNeuralNetwork(
        int numInputs,
        int numHiddens,
        int numOutputs,
        DifferentiableUnivariateScalarFunction scalarFunction,
        Random random)
    {
        this(numInputs, numHiddens, numOutputs,
            new ElementWiseDifferentiableVectorFunction(scalarFunction), 
            random);
    }
    
    /**
     * Creates a new instance of FeedforwardNeuralNetwork
     * @param layers
     * Layers of the neural network
     */
    public DifferentiableFeedforwardNeuralNetwork(
        DifferentiableGeneralizedLinearModel ... layers )
    {
        super( new ArrayList<DifferentiableGeneralizedLinearModel>(
            Arrays.asList( layers ) ) );
    }

    @Override
    public DifferentiableFeedforwardNeuralNetwork clone()
    {
        return (DifferentiableFeedforwardNeuralNetwork) super.clone();
    }
    
    @SuppressWarnings("unchecked")
    @Override
    public ArrayList<DifferentiableGeneralizedLinearModel> getLayers()
    {
        return (ArrayList<DifferentiableGeneralizedLinearModel>) super.getLayers();
    }

    public Matrix computeParameterGradient(
        Vector input)
    {
        int numLayers = this.getLayers().size();
        ArrayList<Vector> layerActivations = this.evaluateAtEachLayer(input);
        ArrayList<Matrix> layerGradients = new ArrayList<Matrix>(numLayers);

        int M = layerActivations.get(numLayers).getDimensionality();
        int N = 0;
        Matrix layerDerivative = MatrixFactory.getDefault().createIdentity(M, M);
        for (int i = numLayers - 1; i >= 0; i--)
        {
            DifferentiableGeneralizedLinearModel layer =
                this.getLayers().get(i);

            Vector layerInput = layerActivations.get(i);

            Matrix layerGradient = layerDerivative.times(
                layer.computeParameterGradient(layerInput));

            N += layerGradient.getNumColumns();
            layerGradients.add(layerGradient);

            // Don't need to chain rule beyond the first layer
            if( i > 0 )
            {
                layerDerivative = layerDerivative.times(
                    layer.differentiate(layerInput));
            }
        }

        // Gradients were pushed on in reverse order, so push them into
        // the gradient Matrix in reverse order... this will be the
        // correct order for the parameter ordering
        Matrix gradient = MatrixFactory.getDefault().createMatrix(M, N);
        int columnIndex = 0;
        for (int n = numLayers - 1; n >= 0; n--)
        {
            Matrix layerGradient = layerGradients.get(n);

            // The last entry has a special form
            // [ x0 0  0  x3 0  0  x6 ...
            // [ 0  x1 0  0  x4 0  ...
            // [ 0  0  x2 0  0  x5 ...
            if( n == 0 )
            {
                int row = 0;
                int Mi = layerGradient.getNumRows();
                int Ni = layerGradient.getNumColumns();
                for( int column = 0; column < Ni; column++ )
                {
                    double value = layerGradient.getElement( row, column );

                    gradient.setElement( row, columnIndex+column, value );
                    row = (row + 1) % Mi;
                }
            }
            else
            {
                gradient.setSubMatrix(0, columnIndex, layerGradient);
            }
            columnIndex += layerGradient.getNumColumns();
        }
        
        return gradient;

    }

}