package hex.deeplearning;
import org.junit.Ignore;
import hex.deeplearning.DeepLearningModel.DeepLearningParameters;
import hex.deeplearning.DeepLearningModel.DeepLearningParameters.Activation;
import hex.deeplearning.DeepLearningModel.DeepLearningParameters.Loss;
import water.util.ArrayUtils;
import water.util.RandomUtils;
import java.text.DecimalFormat;
import java.util.Random;
/**
* James McCaffrey's MLP on Iris.
* <nl>
* Adapted to Java as a reference implementation for testing.
* <nl>
* http://channel9.msdn.com/Events/Build/2013/2-401
*/
@Ignore
public class DeepLearningMLPReference {
static final DecimalFormat _format = new DecimalFormat("0.000");
double[][] _trainData;
double[][] _testData;
NeuralNetwork _nn;
public void init(DeepLearningParameters.Activation activation, Random rand, double holdout_ratio, int numHidden) {
double[][] ds = new double[150][];
int r = 0;
ds[r++] = new double[] { 5.1, 3.5, 1.4, 0.2, 0, 0, 1 };
ds[r++] = new double[] { 4.9, 3, 1.4, 0.2, 0, 0, 1 };
ds[r++] = new double[] { 4.7, 3.2, 1.3, 0.2, 0, 0, 1 };
ds[r++] = new double[] { 4.6, 3.1, 1.5, 0.2, 0, 0, 1 };
ds[r++] = new double[] { 5, 3.6, 1.4, 0.2, 0, 0, 1 };
ds[r++] = new double[] { 5.4, 3.9, 1.7, 0.4, 0, 0, 1 };
ds[r++] = new double[] { 4.6, 3.4, 1.4, 0.3, 0, 0, 1 };
ds[r++] = new double[] { 5, 3.4, 1.5, 0.2, 0, 0, 1 };
ds[r++] = new double[] { 4.4, 2.9, 1.4, 0.2, 0, 0, 1 };
ds[r++] = new double[] { 4.9, 3.1, 1.5, 0.1, 0, 0, 1 };
ds[r++] = new double[] { 5.4, 3.7, 1.5, 0.2, 0, 0, 1 };
ds[r++] = new double[] { 4.8, 3.4, 1.6, 0.2, 0, 0, 1 };
ds[r++] = new double[] { 4.8, 3, 1.4, 0.1, 0, 0, 1 };
ds[r++] = new double[] { 4.3, 3, 1.1, 0.1, 0, 0, 1 };
ds[r++] = new double[] { 5.8, 4, 1.2, 0.2, 0, 0, 1 };
ds[r++] = new double[] { 5.7, 4.4, 1.5, 0.4, 0, 0, 1 };
ds[r++] = new double[] { 5.4, 3.9, 1.3, 0.4, 0, 0, 1 };
ds[r++] = new double[] { 5.1, 3.5, 1.4, 0.3, 0, 0, 1 };
ds[r++] = new double[] { 5.7, 3.8, 1.7, 0.3, 0, 0, 1 };
ds[r++] = new double[] { 5.1, 3.8, 1.5, 0.3, 0, 0, 1 };
ds[r++] = new double[] { 5.4, 3.4, 1.7, 0.2, 0, 0, 1 };
ds[r++] = new double[] { 5.1, 3.7, 1.5, 0.4, 0, 0, 1 };
ds[r++] = new double[] { 4.6, 3.6, 1, 0.2, 0, 0, 1 };
ds[r++] = new double[] { 5.1, 3.3, 1.7, 0.5, 0, 0, 1 };
ds[r++] = new double[] { 4.8, 3.4, 1.9, 0.2, 0, 0, 1 };
ds[r++] = new double[] { 5, 3, 1.6, 0.2, 0, 0, 1 };
ds[r++] = new double[] { 5, 3.4, 1.6, 0.4, 0, 0, 1 };
ds[r++] = new double[] { 5.2, 3.5, 1.5, 0.2, 0, 0, 1 };
ds[r++] = new double[] { 5.2, 3.4, 1.4, 0.2, 0, 0, 1 };
ds[r++] = new double[] { 4.7, 3.2, 1.6, 0.2, 0, 0, 1 };
ds[r++] = new double[] { 4.8, 3.1, 1.6, 0.2, 0, 0, 1 };
ds[r++] = new double[] { 5.4, 3.4, 1.5, 0.4, 0, 0, 1 };
ds[r++] = new double[] { 5.2, 4.1, 1.5, 0.1, 0, 0, 1 };
ds[r++] = new double[] { 5.5, 4.2, 1.4, 0.2, 0, 0, 1 };
ds[r++] = new double[] { 4.9, 3.1, 1.5, 0.1, 0, 0, 1 };
ds[r++] = new double[] { 5, 3.2, 1.2, 0.2, 0, 0, 1 };
ds[r++] = new double[] { 5.5, 3.5, 1.3, 0.2, 0, 0, 1 };
ds[r++] = new double[] { 4.9, 3.1, 1.5, 0.1, 0, 0, 1 };
ds[r++] = new double[] { 4.4, 3, 1.3, 0.2, 0, 0, 1 };
ds[r++] = new double[] { 5.1, 3.4, 1.5, 0.2, 0, 0, 1 };
ds[r++] = new double[] { 5, 3.5, 1.3, 0.3, 0, 0, 1 };
ds[r++] = new double[] { 4.5, 2.3, 1.3, 0.3, 0, 0, 1 };
ds[r++] = new double[] { 4.4, 3.2, 1.3, 0.2, 0, 0, 1 };
ds[r++] = new double[] { 5, 3.5, 1.6, 0.6, 0, 0, 1 };
ds[r++] = new double[] { 5.1, 3.8, 1.9, 0.4, 0, 0, 1 };
ds[r++] = new double[] { 4.8, 3, 1.4, 0.3, 0, 0, 1 };
ds[r++] = new double[] { 5.1, 3.8, 1.6, 0.2, 0, 0, 1 };
ds[r++] = new double[] { 4.6, 3.2, 1.4, 0.2, 0, 0, 1 };
ds[r++] = new double[] { 5.3, 3.7, 1.5, 0.2, 0, 0, 1 };
ds[r++] = new double[] { 5, 3.3, 1.4, 0.2, 0, 0, 1 };
ds[r++] = new double[] { 7, 3.2, 4.7, 1.4, 0, 1, 0 };
ds[r++] = new double[] { 6.4, 3.2, 4.5, 1.5, 0, 1, 0 };
ds[r++] = new double[] { 6.9, 3.1, 4.9, 1.5, 0, 1, 0 };
ds[r++] = new double[] { 5.5, 2.3, 4, 1.3, 0, 1, 0 };
ds[r++] = new double[] { 6.5, 2.8, 4.6, 1.5, 0, 1, 0 };
ds[r++] = new double[] { 5.7, 2.8, 4.5, 1.3, 0, 1, 0 };
ds[r++] = new double[] { 6.3, 3.3, 4.7, 1.6, 0, 1, 0 };
ds[r++] = new double[] { 4.9, 2.4, 3.3, 1, 0, 1, 0 };
ds[r++] = new double[] { 6.6, 2.9, 4.6, 1.3, 0, 1, 0 };
ds[r++] = new double[] { 5.2, 2.7, 3.9, 1.4, 0, 1, 0 };
ds[r++] = new double[] { 5, 2, 3.5, 1, 0, 1, 0 };
ds[r++] = new double[] { 5.9, 3, 4.2, 1.5, 0, 1, 0 };
ds[r++] = new double[] { 6, 2.2, 4, 1, 0, 1, 0 };
ds[r++] = new double[] { 6.1, 2.9, 4.7, 1.4, 0, 1, 0 };
ds[r++] = new double[] { 5.6, 2.9, 3.6, 1.3, 0, 1, 0 };
ds[r++] = new double[] { 6.7, 3.1, 4.4, 1.4, 0, 1, 0 };
ds[r++] = new double[] { 5.6, 3, 4.5, 1.5, 0, 1, 0 };
ds[r++] = new double[] { 5.8, 2.7, 4.1, 1, 0, 1, 0 };
ds[r++] = new double[] { 6.2, 2.2, 4.5, 1.5, 0, 1, 0 };
ds[r++] = new double[] { 5.6, 2.5, 3.9, 1.1, 0, 1, 0 };
ds[r++] = new double[] { 5.9, 3.2, 4.8, 1.8, 0, 1, 0 };
ds[r++] = new double[] { 6.1, 2.8, 4, 1.3, 0, 1, 0 };
ds[r++] = new double[] { 6.3, 2.5, 4.9, 1.5, 0, 1, 0 };
ds[r++] = new double[] { 6.1, 2.8, 4.7, 1.2, 0, 1, 0 };
ds[r++] = new double[] { 6.4, 2.9, 4.3, 1.3, 0, 1, 0 };
ds[r++] = new double[] { 6.6, 3, 4.4, 1.4, 0, 1, 0 };
ds[r++] = new double[] { 6.8, 2.8, 4.8, 1.4, 0, 1, 0 };
ds[r++] = new double[] { 6.7, 3, 5, 1.7, 0, 1, 0 };
ds[r++] = new double[] { 6, 2.9, 4.5, 1.5, 0, 1, 0 };
ds[r++] = new double[] { 5.7, 2.6, 3.5, 1, 0, 1, 0 };
ds[r++] = new double[] { 5.5, 2.4, 3.8, 1.1, 0, 1, 0 };
ds[r++] = new double[] { 5.5, 2.4, 3.7, 1, 0, 1, 0 };
ds[r++] = new double[] { 5.8, 2.7, 3.9, 1.2, 0, 1, 0 };
ds[r++] = new double[] { 6, 2.7, 5.1, 1.6, 0, 1, 0 };
ds[r++] = new double[] { 5.4, 3, 4.5, 1.5, 0, 1, 0 };
ds[r++] = new double[] { 6, 3.4, 4.5, 1.6, 0, 1, 0 };
ds[r++] = new double[] { 6.7, 3.1, 4.7, 1.5, 0, 1, 0 };
ds[r++] = new double[] { 6.3, 2.3, 4.4, 1.3, 0, 1, 0 };
ds[r++] = new double[] { 5.6, 3, 4.1, 1.3, 0, 1, 0 };
ds[r++] = new double[] { 5.5, 2.5, 4, 1.3, 0, 1, 0 };
ds[r++] = new double[] { 5.5, 2.6, 4.4, 1.2, 0, 1, 0 };
ds[r++] = new double[] { 6.1, 3, 4.6, 1.4, 0, 1, 0 };
ds[r++] = new double[] { 5.8, 2.6, 4, 1.2, 0, 1, 0 };
ds[r++] = new double[] { 5, 2.3, 3.3, 1, 0, 1, 0 };
ds[r++] = new double[] { 5.6, 2.7, 4.2, 1.3, 0, 1, 0 };
ds[r++] = new double[] { 5.7, 3, 4.2, 1.2, 0, 1, 0 };
ds[r++] = new double[] { 5.7, 2.9, 4.2, 1.3, 0, 1, 0 };
ds[r++] = new double[] { 6.2, 2.9, 4.3, 1.3, 0, 1, 0 };
ds[r++] = new double[] { 5.1, 2.5, 3, 1.1, 0, 1, 0 };
ds[r++] = new double[] { 5.7, 2.8, 4.1, 1.3, 0, 1, 0 };
ds[r++] = new double[] { 6.3, 3.3, 6, 2.5, 1, 0, 0 };
ds[r++] = new double[] { 5.8, 2.7, 5.1, 1.9, 1, 0, 0 };
ds[r++] = new double[] { 7.1, 3, 5.9, 2.1, 1, 0, 0 };
ds[r++] = new double[] { 6.3, 2.9, 5.6, 1.8, 1, 0, 0 };
ds[r++] = new double[] { 6.5, 3, 5.8, 2.2, 1, 0, 0 };
ds[r++] = new double[] { 7.6, 3, 6.6, 2.1, 1, 0, 0 };
ds[r++] = new double[] { 4.9, 2.5, 4.5, 1.7, 1, 0, 0 };
ds[r++] = new double[] { 7.3, 2.9, 6.3, 1.8, 1, 0, 0 };
ds[r++] = new double[] { 6.7, 2.5, 5.8, 1.8, 1, 0, 0 };
ds[r++] = new double[] { 7.2, 3.6, 6.1, 2.5, 1, 0, 0 };
ds[r++] = new double[] { 6.5, 3.2, 5.1, 2, 1, 0, 0 };
ds[r++] = new double[] { 6.4, 2.7, 5.3, 1.9, 1, 0, 0 };
ds[r++] = new double[] { 6.8, 3, 5.5, 2.1, 1, 0, 0 };
ds[r++] = new double[] { 5.7, 2.5, 5, 2, 1, 0, 0 };
ds[r++] = new double[] { 5.8, 2.8, 5.1, 2.4, 1, 0, 0 };
ds[r++] = new double[] { 6.4, 3.2, 5.3, 2.3, 1, 0, 0 };
ds[r++] = new double[] { 6.5, 3, 5.5, 1.8, 1, 0, 0 };
ds[r++] = new double[] { 7.7, 3.8, 6.7, 2.2, 1, 0, 0 };
ds[r++] = new double[] { 7.7, 2.6, 6.9, 2.3, 1, 0, 0 };
ds[r++] = new double[] { 6, 2.2, 5, 1.5, 1, 0, 0 };
ds[r++] = new double[] { 6.9, 3.2, 5.7, 2.3, 1, 0, 0 };
ds[r++] = new double[] { 5.6, 2.8, 4.9, 2, 1, 0, 0 };
ds[r++] = new double[] { 7.7, 2.8, 6.7, 2, 1, 0, 0 };
ds[r++] = new double[] { 6.3, 2.7, 4.9, 1.8, 1, 0, 0 };
ds[r++] = new double[] { 6.7, 3.3, 5.7, 2.1, 1, 0, 0 };
ds[r++] = new double[] { 7.2, 3.2, 6, 1.8, 1, 0, 0 };
ds[r++] = new double[] { 6.2, 2.8, 4.8, 1.8, 1, 0, 0 };
ds[r++] = new double[] { 6.1, 3, 4.9, 1.8, 1, 0, 0 };
ds[r++] = new double[] { 6.4, 2.8, 5.6, 2.1, 1, 0, 0 };
ds[r++] = new double[] { 7.2, 3, 5.8, 1.6, 1, 0, 0 };
ds[r++] = new double[] { 7.4, 2.8, 6.1, 1.9, 1, 0, 0 };
ds[r++] = new double[] { 7.9, 3.8, 6.4, 2, 1, 0, 0 };
ds[r++] = new double[] { 6.4, 2.8, 5.6, 2.2, 1, 0, 0 };
ds[r++] = new double[] { 6.3, 2.8, 5.1, 1.5, 1, 0, 0 };
ds[r++] = new double[] { 6.1, 2.6, 5.6, 1.4, 1, 0, 0 };
ds[r++] = new double[] { 7.7, 3, 6.1, 2.3, 1, 0, 0 };
ds[r++] = new double[] { 6.3, 3.4, 5.6, 2.4, 1, 0, 0 };
ds[r++] = new double[] { 6.4, 3.1, 5.5, 1.8, 1, 0, 0 };
ds[r++] = new double[] { 6, 3, 4.8, 1.8, 1, 0, 0 };
ds[r++] = new double[] { 6.9, 3.1, 5.4, 2.1, 1, 0, 0 };
ds[r++] = new double[] { 6.7, 3.1, 5.6, 2.4, 1, 0, 0 };
ds[r++] = new double[] { 6.9, 3.1, 5.1, 2.3, 1, 0, 0 };
ds[r++] = new double[] { 5.8, 2.7, 5.1, 1.9, 1, 0, 0 };
ds[r++] = new double[] { 6.8, 3.2, 5.9, 2.3, 1, 0, 0 };
ds[r++] = new double[] { 6.7, 3.3, 5.7, 2.5, 1, 0, 0 };
ds[r++] = new double[] { 6.7, 3, 5.2, 2.3, 1, 0, 0 };
ds[r++] = new double[] { 6.3, 2.5, 5, 1.9, 1, 0, 0 };
ds[r++] = new double[] { 6.5, 3, 5.2, 2, 1, 0, 0 };
ds[r++] = new double[] { 6.2, 3.4, 5.4, 2.3, 1, 0, 0 };
ds[r++] = new double[] { 5.9, 3, 5.1, 1.8, 1, 0, 0 };
double[][] allData = new double[ds.length][ds[0].length];
for( int j = 0; j < allData.length; j++ ) {
for( int i = 0; i < allData[j].length; i++ )
allData[j][i] = ds[j][i];
allData[j][4] = ds[j][6];
allData[j][5] = ds[j][5];
allData[j][6] = ds[j][4];
}
int trainRows = (int) (allData.length * holdout_ratio);
int testRows = allData.length - trainRows;
_trainData = new double[trainRows][];
_testData = new double[testRows][];
MakeTrainTest(allData, _trainData, _testData, rand);
// Normalize all data using train stats
for( int i = 0; i < 4; i++ ) {
double mean = 0;
for( int n = 0; n < _trainData.length; n++ )
mean += _trainData[n][i];
mean /= _trainData.length;
double sigma = 0;
for( int n = 0; n < _trainData.length; n++ ) {
double d = _trainData[n][i] - mean;
sigma += d * d;
}
sigma = Math.sqrt(sigma / (_trainData.length - 1));
for( int n = 0; n < _trainData.length; n++ ) {
_trainData[n][i] -= mean;
_trainData[n][i] /= sigma;
}
for( int n = 0; n < _testData.length; n++ ) {
_testData[n][i] -= mean;
_testData[n][i] /= sigma;
}
}
int numInput = 4;
int numOutput = 3;
_nn = new NeuralNetwork(activation, numInput, numHidden, numOutput);
_nn.InitializeWeights();
}
void train(int maxEpochs, double learnRate, double momentum, Loss loss, long seed) {
_nn.Train(_trainData, maxEpochs, learnRate, momentum, loss, seed);
}
void MakeTrainTest(double[][] allData, double[][] trainData, double[][] testData, Random rand) {
// split allData into 80% trainData and 20% testData
int numCols = allData[0].length;
int[] shuffle = new int[allData.length]; // create a random sequence of indexes
for( int i = 0; i < shuffle.length; ++i )
shuffle[i] = i;
NeuralNetwork.shuffle(shuffle, rand);
int si = 0; // index into sequence[]
int j = 0; // index into trainData or testData
for( ; si < trainData.length; ++si ) // first rows to train data
{
trainData[j] = new double[numCols];
int idx = shuffle[si];
System.arraycopy(allData[idx], 0, trainData[j], 0, numCols);
++j;
}
j = 0; // reset to start of test data
for( ; si < allData.length; ++si ) // remainder to test data
{
testData[j] = new double[numCols];
int idx = shuffle[si];
System.arraycopy(allData[idx], 0, testData[j], 0, numCols);
++j;
}
} // MakeTrainTest
static void Normalize(double[][] dataMatrix, int[] cols) {
// in most cases you want to normalize the x-data
}
static void ShowVector(double[] vector, int valsPerRow, int decimals, boolean newLine) {
for( int i = 0; i < vector.length; ++i ) {
if( i % valsPerRow == 0 )
System.out.println("");
System.out.print(_format.format(vector[i]) + " ");
}
if(newLine)
System.out.println("");
}
static void ShowMatrix(double[][] matrix, int numRows, int decimals, boolean newLine) {
for( int i = 0; i < numRows; ++i ) {
System.out.print(i + ": ");
for( int j = 0; j < matrix[i].length; ++j ) {
if( matrix[i][j] >= 0.0 )
System.out.print(" ");
else
System.out.print("-");
System.out.print(_format.format(Math.abs(matrix[i][j])) + " ");
}
System.out.println("");
}
if( newLine == true )
System.out.println("");
}
public static class NeuralNetwork {
Activation activation = Activation.Tanh;
int numInput;
int numHidden;
int numOutput;
double[] inputs;
float[][] ihWeights; // input-hidden
double[] hBiases;
double[] hOutputs;
float[][] hoWeights; // hidden-output
double[] oBiases;
double[] outputs;
// back-prop specific arrays (these could be local to method UpdateWeights)
double[] oGrads; // output gradients for back-propagation
double[] hGrads; // hidden gradients for back-propagation
// back-prop momentum specific arrays (these could be local to method Train)
float[][] ihPrevWeightsDelta; // for momentum with back-propagation
double[] hPrevBiasesDelta;
float[][] hoPrevWeightsDelta;
double[] oPrevBiasesDelta;
public NeuralNetwork(DeepLearningParameters.Activation activationType, int numInput, int numHidden, int numOutput) {
this.activation = activationType;
this.numInput = numInput;
this.numHidden = numHidden;
this.numOutput = numOutput;
this.inputs = new double[numInput];
this.ihWeights = MakeMatrixFloat(numInput, numHidden);
this.hBiases = new double[numHidden];
this.hOutputs = new double[numHidden];
this.hoWeights = MakeMatrixFloat(numHidden, numOutput);
this.oBiases = new double[numOutput];
this.outputs = new double[numOutput];
// back-prop related arrays below
this.hGrads = new double[numHidden];
this.oGrads = new double[numOutput];
this.ihPrevWeightsDelta = MakeMatrixFloat(numInput, numHidden);
this.hPrevBiasesDelta = new double[numHidden];
this.hoPrevWeightsDelta = MakeMatrixFloat(numHidden, numOutput);
this.oPrevBiasesDelta = new double[numOutput];
} // ctor
private static double[][] MakeMatrix(int rows, int cols) // helper for ctor
{
double[][] result = new double[rows][];
for( int r = 0; r < result.length; ++r )
result[r] = new double[cols];
return result;
}
private static float[][] MakeMatrixFloat(int rows, int cols) // helper for ctor
{
float[][] result = new float[rows][];
for( int r = 0; r < result.length; ++r )
result[r] = new float[cols];
return result;
}
@Override public String toString() // yikes
{
String s = "";
s += "===============================\n";
s += "numInput = " + numInput + " numHidden = " + numHidden + " numOutput = " + numOutput + "\n\n";
s += "inputs: \n";
for( int i = 0; i < inputs.length; ++i )
s += inputs[i] + " ";
s += "\n\n";
s += "ihWeights: \n";
for( int i = 0; i < ihWeights.length; ++i ) {
for( int j = 0; j < ihWeights[i].length; ++j ) {
s += ihWeights[i][j] + " ";
}
s += "\n";
}
s += "\n";
s += "hBiases: \n";
for( int i = 0; i < hBiases.length; ++i )
s += hBiases[i] + " ";
s += "\n\n";
s += "hOutputs: \n";
for( int i = 0; i < hOutputs.length; ++i )
s += hOutputs[i] + " ";
s += "\n\n";
s += "hoWeights: \n";
for( int i = 0; i < hoWeights.length; ++i ) {
for( int j = 0; j < hoWeights[i].length; ++j ) {
s += hoWeights[i][j] + " ";
}
s += "\n";
}
s += "\n";
s += "oBiases: \n";
for( int i = 0; i < oBiases.length; ++i )
s += oBiases[i] + " ";
s += "\n\n";
s += "hGrads: \n";
for( int i = 0; i < hGrads.length; ++i )
s += hGrads[i] + " ";
s += "\n\n";
s += "oGrads: \n";
for( int i = 0; i < oGrads.length; ++i )
s += oGrads[i] + " ";
s += "\n\n";
s += "ihPrevWeightsDelta: \n";
for( int i = 0; i < ihPrevWeightsDelta.length; ++i ) {
for( int j = 0; j < ihPrevWeightsDelta[i].length; ++j ) {
s += ihPrevWeightsDelta[i][j] + " ";
}
s += "\n";
}
s += "\n";
s += "hPrevBiasesDelta: \n";
for( int i = 0; i < hPrevBiasesDelta.length; ++i )
s += hPrevBiasesDelta[i] + " ";
s += "\n\n";
s += "hoPrevWeightsDelta: \n";
for( int i = 0; i < hoPrevWeightsDelta.length; ++i ) {
for( int j = 0; j < hoPrevWeightsDelta[i].length; ++j ) {
s += hoPrevWeightsDelta[i][j] + " ";
}
s += "\n";
}
s += "\n";
s += "oPrevBiasesDelta: \n";
for( int i = 0; i < oPrevBiasesDelta.length; ++i )
s += oPrevBiasesDelta[i] + " ";
s += "\n\n";
s += "outputs: \n";
for( int i = 0; i < outputs.length; ++i )
s += outputs[i] + " ";
s += "\n\n";
s += "===============================\n";
return s;
}
// ----------------------------------------------------------------------------------------
public void SetWeights(float[] weights) {
// copy weights and biases in weights[] array to i-h weights, i-h biases, h-o weights, h-o
// biases
int numWeights = (numInput * numHidden) + (numHidden * numOutput) + numHidden + numOutput;
if( weights.length != numWeights )
throw new RuntimeException("Bad weights array length: ");
int k = 0; // points into weights param
for( int i = 0; i < numInput; ++i )
for( int j = 0; j < numHidden; ++j )
ihWeights[i][j] = weights[k++];
for( int i = 0; i < numHidden; ++i )
hBiases[i] = weights[k++];
for( int i = 0; i < numHidden; ++i )
for( int j = 0; j < numOutput; ++j )
hoWeights[i][j] = weights[k++];
for( int i = 0; i < numOutput; ++i )
oBiases[i] = weights[k++];
}
public void InitializeWeights() {
// initialize weights and biases to small random values
int numWeights = (numInput * numHidden) + (numHidden * numOutput) + numHidden + numOutput;
float[] initialWeights = new float[numWeights];
double lo = -0.01f;
double hi = 0.01f;
Random rnd = new Random(0);
for( int i = 0; i < initialWeights.length; ++i )
initialWeights[i] = (float)((hi - lo) * rnd.nextFloat() + lo);
this.SetWeights(initialWeights);
}
public double[] GetWeights() {
// returns the current set of wweights, presumably after training
int numWeights = (numInput * numHidden) + (numHidden * numOutput) + numHidden + numOutput;
double[] result = new double[numWeights];
int k = 0;
for( int i = 0; i < ihWeights.length; ++i )
for( int j = 0; j < ihWeights[0].length; ++j )
result[k++] = ihWeights[i][j];
for( int i = 0; i < hBiases.length; ++i )
result[k++] = hBiases[i];
for( int i = 0; i < hoWeights.length; ++i )
for( int j = 0; j < hoWeights[0].length; ++j )
result[k++] = hoWeights[i][j];
for( int i = 0; i < oBiases.length; ++i )
result[k++] = oBiases[i];
return result;
}
// ----------------------------------------------------------------------------------------
public double[] ComputeOutputs(double[] xValues) {
if( xValues.length != numInput )
throw new RuntimeException("Bad xValues array length");
double[] hSums = new double[numHidden]; // hidden nodes sums scratch array
double[] oSums = new double[numOutput]; // output nodes sums
for( int i = 0; i < xValues.length; ++i )
// copy x-values to inputs
this.inputs[i] = xValues[i];
for( int j = 0; j < numHidden; ++j )
// compute i-h sum of weights * inputs
for( int i = 0; i < numInput; ++i )
hSums[j] += this.inputs[i] * this.ihWeights[i][j]; // note +=
for( int i = 0; i < numHidden; ++i )
// add biases to input-to-hidden sums
hSums[i] += this.hBiases[i];
for( int i = 0; i < numHidden; ++i )
// apply activation
if (activation == Activation.Tanh || activation == Activation.TanhWithDropout) {
hOutputs[i] = HyperTanFunction(hSums[i]);
} else if (activation == Activation.Rectifier || activation == Activation.RectifierWithDropout) {
hOutputs[i] = Rectifier(hSums[i]);
} else throw new RuntimeException("invalid activation.");
for( int j = 0; j < numOutput; ++j )
// compute h-o sum of weights * hOutputs
for( int i = 0; i < numHidden; ++i )
oSums[j] += hOutputs[i] * hoWeights[i][j];
for( int i = 0; i < numOutput; ++i )
// add biases to input-to-hidden sums
oSums[i] += oBiases[i];
double[] softOut = Softmax(oSums); // softmax activation does all outputs at once for
// efficiency
System.arraycopy(softOut, 0, outputs, 0, softOut.length);
double[] retResult = new double[numOutput]; // could define a GetOutputs method instead
System.arraycopy(this.outputs, 0, retResult, 0, retResult.length);
return retResult;
} // ComputeOutputs
private static double HyperTanFunction(double x) {
return Math.tanh(x);
}
private static double Rectifier(double x) {
return Math.max(x, 0.0f);
}
private static double[] Softmax(double[] oSums) {
// does all output nodes at once so scale doesn't have to be re-computed each time
// 1. determine max output sum
double max = oSums[0];
for( int i = 0; i < oSums.length; ++i )
if( oSums[i] > max )
max = oSums[i];
// 2. determine scaling factor -- sum of exp(each val - max)
double[] result = new double[oSums.length];
double scale = 0;
for( int i = 0; i < result.length; i++ ) {
result[i] = Math.exp(oSums[i] - max);
scale += result[i];
}
for( int i = 0; i < result.length; i++ )
result[i] /= scale;
return result; // now scaled so that xi sum to 1.0
}
// ----------------------------------------------------------------------------------------
private void UpdateWeights(double[] tValues, double learnRate, double momentum, Loss loss) {
// update the weights and biases using back-propagation, with target values, eta (learning
// rate),
// alpha (momentum)
// assumes that SetWeights and ComputeOutputs have been called and so all the internal arrays
// and
// matrices have values (other than 0.0)
if( tValues.length != numOutput )
throw new RuntimeException("target values not same length as output in UpdateWeights");
// 1. compute output gradients
for( int i = 0; i < oGrads.length; ++i ) {
// derivative of softmax = (1 - y) * y (same as log-sigmoid)
double derivative = (1 - outputs[i]) * outputs[i];
if (loss == Loss.CrossEntropy) {
oGrads[i] = tValues[i] - outputs[i];
} else if (loss == Loss.Quadratic) {
// 'mean squared error version'. research suggests cross-entropy is better here . . .
oGrads[i] = derivative * (tValues[i] - outputs[i]);
} else throw new RuntimeException("invalid loss function");
}
// 2. compute hidden gradients
for( int i = 0; i < hGrads.length; ++i ) {
double derivative = 1;
if (activation == Activation.Tanh || activation == Activation.TanhWithDropout) {
derivative = (1 - hOutputs[i]) * (1 + hOutputs[i]); // derivative of tanh (y) = (1 - y) * (1 + y)
} else if (activation == Activation.Rectifier || activation == Activation.RectifierWithDropout) {
derivative = hOutputs[i] <= 0 ? 0 : 1;
} else throw new RuntimeException("invalid activation.");
double sum = 0;
for( int j = 0; j < numOutput; ++j ) // each hidden delta is the sum of numOutput terms
{
double x = oGrads[j] * hoWeights[i][j];
sum += x;
}
hGrads[i] = derivative * sum;
}
// 3a. update hidden weights (gradients must be computed right-to-left but weights
// can be updated in any order)
for( int i = 0; i < ihWeights.length; ++i ) // 0..2 (3)
{
for( int j = 0; j < ihWeights[0].length; ++j ) // 0..3 (4)
{
double delta = learnRate * hGrads[j] * inputs[i]; // compute the new delta
ihWeights[i][j] += delta; // update. note we use '+' instead of '-'. this can be very
// tricky.
// add momentum using previous delta. on first pass old value will be 0.0 but that's OK.
ihWeights[i][j] += momentum * ihPrevWeightsDelta[i][j];
// weight decay would go here
ihPrevWeightsDelta[i][j] = (float)delta; // don't forget to save the delta for momentum
}
}
// 3b. update hidden biases
for( int i = 0; i < hBiases.length; ++i ) {
// the 1.0 below is the constant input for any bias; could leave out
double delta = learnRate * hGrads[i] * 1;
hBiases[i] += delta;
hBiases[i] += momentum * hPrevBiasesDelta[i]; // momentum
// weight decay here
hPrevBiasesDelta[i] = delta; // don't forget to save the delta
}
// 4. update hidden-output weights
for( int i = 0; i < hoWeights.length; ++i ) {
for( int j = 0; j < hoWeights[0].length; ++j ) {
// see above: hOutputs are inputs to the deeplearning outputs
double delta = learnRate * oGrads[j] * hOutputs[i];
hoWeights[i][j] += delta;
hoWeights[i][j] += momentum * hoPrevWeightsDelta[i][j]; // momentum
// weight decay here
hoPrevWeightsDelta[i][j] = (float)delta; // save
}
}
// 4b. update output biases
for( int i = 0; i < oBiases.length; ++i ) {
double delta = learnRate * oGrads[i] * 1;
oBiases[i] += delta;
oBiases[i] += momentum * oPrevBiasesDelta[i]; // momentum
// weight decay here
oPrevBiasesDelta[i] = delta; // save
}
} // UpdateWeights
// ----------------------------------------------------------------------------------------
public void Train(double[][] trainData, int maxEprochs, double learnRate, double momentum, Loss loss, long seed) {
// train a back-prop style NN classifier using learning rate and momentum
// no weight decay
int epoch = 0;
double[] xValues = new double[numInput]; // inputs
double[] tValues = new double[numOutput]; // target values
while( epoch < maxEprochs ) {
// same logic as in FrameTask
final long chunkSeed = (0x8734093502429734L + (seed + epoch*trainData.length) ) * ((epoch+1) + 0x9823423497823423L);
final Random skip_rng = RandomUtils.getRNG(chunkSeed);
int[] sequence = new int[trainData.length];
for( int i = 0; i < sequence.length; ++i ) sequence[i] = i;
ArrayUtils.shuffleArray(sequence, skip_rng);
for( int i = 0; i < trainData.length; ++i ) {
int idx = sequence[i];
System.arraycopy(trainData[idx], 0, xValues, 0, numInput); // extract x's and y's.
System.arraycopy(trainData[idx], numInput, tValues, 0, numOutput);
ComputeOutputs(xValues); // copy xValues in, compute outputs (and store them internally)
UpdateWeights(tValues, learnRate, momentum, loss); // use back-prop to find better weights
} // each training tuple
++epoch;
}
} // Train
static void shuffle(int[] sequence, Random rand) {
for( int i = sequence.length - 1; i >= 0; i-- ) {
int r = rand.nextInt(i + 1);
int tmp = sequence[r];
sequence[r] = sequence[i];
sequence[i] = tmp;
}
}
// ----------------------------------------------------------------------------------------
public double Accuracy(double[][] testData) {
// percentage correct using winner-takes all
int numCorrect = 0;
int numWrong = 0;
double[] xValues = new double[numInput]; // inputs
double[] tValues = new double[numOutput]; // targets
double[] yValues; // computed Y
for( int i = 0; i < testData.length; ++i ) {
System.arraycopy(testData[i], 0, xValues, 0, numInput); // parse test data into x-values and
// t-values
System.arraycopy(testData[i], numInput, tValues, 0, numOutput);
yValues = this.ComputeOutputs(xValues);
//int maxIndex = MaxIndex(yValues); // which cell in yValues has largest value?
// convert to float and do the same tie-breaking as H2O
double[] preds = new double[yValues.length+1];
for (int j=0; j<yValues.length; ++j) preds[j+1] = (float)yValues[j];
preds[0] = hex.genmodel.GenModel.getPrediction(preds, null, xValues, 0.5);
if( tValues[(int)preds[0]] == 1.0 ) // ugly. consider AreEqual(double x, double y)
++numCorrect;
else
++numWrong;
}
return (double)numWrong / (numCorrect + numWrong); // ugly 2 - check for divide by zero
}
private static int MaxIndex(double[] vector) // helper for Accuracy()
{
// index of largest value
int bigIndex = 0;
double biggestVal = vector[0];
for( int i = 0; i < vector.length; ++i ) {
if( vector[i] > biggestVal ) {
biggestVal = vector[i];
bigIndex = i;
}
}
return bigIndex;
}
}
}