/*
* Encog(tm) Core v3.4 - Java Version
* http://www.heatonresearch.com/encog/
* https://github.com/encog/encog-java-core
* Copyright 2008-2016 Heaton Research, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* For more information on Heaton Research copyrights, licenses
* and trademarks visit:
* http://www.heatonresearch.com/copyright
*/
package org.encog.mathutil.matrices.hessian;
import org.encog.mathutil.EncogMath;
import org.encog.mathutil.error.ErrorCalculation;
import org.encog.mathutil.matrices.Matrix;
import org.encog.ml.data.MLData;
import org.encog.ml.data.MLDataPair;
import org.encog.ml.data.MLDataSet;
import org.encog.neural.networks.BasicNetwork;
import org.encog.util.EngineArray;
/**
* Calculate the Hessian matrix using the finite difference method. This is a
* very simple method of calculating the Hessian. The algorithm does not vary
* greatly by number layers. This makes it very useful as a tool to check the
* accuracy of other methods of determining the Hessian.
*
* For more information on the Finite Difference Method see the following article.
*
* http://en.wikipedia.org/wiki/Finite_difference_method
*
*/
public class HessianFD extends BasicHessian {
/**
* The initial step size for dStep.
*/
public final double INITIAL_STEP = 0.001;
/**
* The derivative step size, used for the finite difference method.
*/
private double[] dStep;
/**
* The derivative coefficient, used for the finite difference method.
*/
private double[] dCoeff;
/**
* The center of the point array.
*/
private int center;
/**
* The number of points requested per side. This determines the accuracy of the calculation.
*/
private int pointsPerSide = 5;
/**
* The number of points actually used, which is (pointsPerSide*2)+1.
*/
private int pointCount;
/**
* The weight count.
*/
private int weightCount;
/**
* {@inheritDoc}
*/
public void init(BasicNetwork theNetwork, MLDataSet theTraining) {
super.init(theNetwork,theTraining);
this.weightCount = theNetwork.getStructure().getFlat().getWeights().length;
this.center = this.pointsPerSide+1;
this.pointCount = (this.pointsPerSide*2)+1;
this.dCoeff = createCoefficients();
this.dStep = new double[this.weightCount];
for (int i = 0; i < this.weightCount; i++) {
this.dStep[i] = this.INITIAL_STEP;
}
}
/**
* {@inheritDoc}
*/
public void compute() {
this.sse = 0;
for(int i=0;i<network.getOutputCount();i++) {
internalCompute(i);
}
}
private void internalCompute(int outputNeuron) {
double e;
int row = 0;
ErrorCalculation error = new ErrorCalculation();
double[] derivative = new double[weightCount];
// Loop over every training element
for (final MLDataPair pair : this.training) {
EngineArray.fill(derivative, 0);
final MLData networkOutput = this.network.compute(pair.getInput());
e = pair.getIdeal().getData(outputNeuron) - networkOutput.getData(outputNeuron);
error.updateError(networkOutput.getData(outputNeuron), pair.getIdeal().getData(outputNeuron));
int currentWeight = 0;
// loop over the output weights
int outputFeedCount = network.getLayerTotalNeuronCount(network.getLayerCount()-2);
for(int i=0;i<this.network.getOutputCount();i++) {
for(int j=0;j<outputFeedCount;j++) {
double jc;
if (i == outputNeuron) {
jc = computeDerivative(pair.getInput(), outputNeuron,
currentWeight, this.dStep,
networkOutput.getData(outputNeuron), row);
} else {
jc = 0;
}
this.gradients[currentWeight] += jc *e;
derivative[currentWeight] = jc;
currentWeight++;
}
}
// Loop over every weight in the neural network
while( currentWeight<this.network.getFlat().getWeights().length) {
double jc = computeDerivative(
pair.getInput(), outputNeuron, currentWeight,
this.dStep,
networkOutput.getData(outputNeuron), row);
derivative[currentWeight] = jc;
this.gradients[currentWeight] += jc *e;
currentWeight++;
}
row++;
updateHessian(derivative);
}
sse+= error.calculateESS();
}
/**
* Computes the derivative of the output of the neural network with respect to a weight.
* @param inputData The input data to the neural network.
* @param weight The weight.
* @param stepSize The step size.
* @param networkOutput The output from the neural network.
* @param row The training row currently being processed.
* @return The derivative output.
*/
private double computeDerivative(final MLData inputData, int outputNeuron, int weight, final double[] stepSize,
final double networkOutput, final int row) {
double temp = this.network.getFlat().getWeights()[weight];
final double[] points = new double[this.dCoeff.length];
stepSize[row] = Math.max( this.INITIAL_STEP * Math.abs(temp), INITIAL_STEP);
points[this.center] = networkOutput;
for (int i = 0; i < this.dCoeff.length; i++) {
if (i == this.center)
continue;
final double newWeight = temp + ((i - this.center))
* stepSize[row];
this.network.getFlat().getWeights()[weight] = newWeight;
final MLData output = this.network.compute(inputData);
points[i] = output.getData(outputNeuron);
}
double result = 0.0;
for (int i = 0; i < this.dCoeff.length; i++) {
result += this.dCoeff[i] * points[i];
}
result /= Math.pow(stepSize[row], 1);
this.network.getFlat().getWeights()[weight] = temp;
return result;
}
/**
* Compute finite difference coefficients according to the method provided here:
*
* http://en.wikipedia.org/wiki/Finite_difference_coefficients
*
* @return An array of the coefficients for FD.
*/
public double[] createCoefficients() {
final double[] result = new double[this.pointCount];
final Matrix delts = new Matrix(this.pointCount, this.pointCount);
final double[][] t = delts.getData();
for (int j = 0; j < this.pointCount; j++) {
final double delt = (j - this.center);
double x = 1.0;
for (int k = 0; k < this.pointCount; k++) {
t[j][k] = x / EncogMath.factorial(k);
x *= delt;
}
}
final Matrix invMatrix = delts.inverse();
final double f = EncogMath.factorial(this.pointCount);
for (int k = 0; k < this.pointCount; k++) {
result[k] = (Math
.round(invMatrix.getData()[1][k]* f))/ f;
}
return result;
}
/**
* @return The number of points per side.
*/
public int getPointsPerSide() {
return pointsPerSide;
}
/**
* This specifies the number of points per side, default is 5. Must be called before init.
* @param pointsPerSide The number of points per side.
*/
public void setPointsPerSide(int pointsPerSide) {
this.pointsPerSide = pointsPerSide;
}
}