DeepLearningMLPReference.java example

Explorer
h2o-3-master
package hex.deeplearning;

import org.junit.Ignore;
import hex.deeplearning.DeepLearningModel.DeepLearningParameters;
import hex.deeplearning.DeepLearningModel.DeepLearningParameters.Activation;
import hex.deeplearning.DeepLearningModel.DeepLearningParameters.Loss;
import water.util.ArrayUtils;
import water.util.RandomUtils;

import java.text.DecimalFormat;
import java.util.Random;


/**
 * James McCaffrey's MLP on Iris.
 * <nl>
 * Adapted to Java as a reference implementation for testing.
 * <nl>
 * http://channel9.msdn.com/Events/Build/2013/2-401
 */
@Ignore
public class DeepLearningMLPReference {
  static final DecimalFormat _format = new DecimalFormat("0.000");

  double[][] _trainData;
  double[][] _testData;
  NeuralNetwork _nn;

  public void init(DeepLearningParameters.Activation activation, Random rand, double holdout_ratio, int numHidden) {
    double[][] ds = new double[150][];
    int r = 0;
    ds[r++] = new double[] { 5.1, 3.5, 1.4, 0.2, 0, 0, 1 };
    ds[r++] = new double[] { 4.9, 3, 1.4, 0.2, 0, 0, 1 };
    ds[r++] = new double[] { 4.7, 3.2, 1.3, 0.2, 0, 0, 1 };
    ds[r++] = new double[] { 4.6, 3.1, 1.5, 0.2, 0, 0, 1 };
    ds[r++] = new double[] { 5, 3.6, 1.4, 0.2, 0, 0, 1 };
    ds[r++] = new double[] { 5.4, 3.9, 1.7, 0.4, 0, 0, 1 };
    ds[r++] = new double[] { 4.6, 3.4, 1.4, 0.3, 0, 0, 1 };
    ds[r++] = new double[] { 5, 3.4, 1.5, 0.2, 0, 0, 1 };
    ds[r++] = new double[] { 4.4, 2.9, 1.4, 0.2, 0, 0, 1 };
    ds[r++] = new double[] { 4.9, 3.1, 1.5, 0.1, 0, 0, 1 };
    ds[r++] = new double[] { 5.4, 3.7, 1.5, 0.2, 0, 0, 1 };
    ds[r++] = new double[] { 4.8, 3.4, 1.6, 0.2, 0, 0, 1 };
    ds[r++] = new double[] { 4.8, 3, 1.4, 0.1, 0, 0, 1 };
    ds[r++] = new double[] { 4.3, 3, 1.1, 0.1, 0, 0, 1 };
    ds[r++] = new double[] { 5.8, 4, 1.2, 0.2, 0, 0, 1 };
    ds[r++] = new double[] { 5.7, 4.4, 1.5, 0.4, 0, 0, 1 };
    ds[r++] = new double[] { 5.4, 3.9, 1.3, 0.4, 0, 0, 1 };
    ds[r++] = new double[] { 5.1, 3.5, 1.4, 0.3, 0, 0, 1 };
    ds[r++] = new double[] { 5.7, 3.8, 1.7, 0.3, 0, 0, 1 };
    ds[r++] = new double[] { 5.1, 3.8, 1.5, 0.3, 0, 0, 1 };
    ds[r++] = new double[] { 5.4, 3.4, 1.7, 0.2, 0, 0, 1 };
    ds[r++] = new double[] { 5.1, 3.7, 1.5, 0.4, 0, 0, 1 };
    ds[r++] = new double[] { 4.6, 3.6, 1, 0.2, 0, 0, 1 };
    ds[r++] = new double[] { 5.1, 3.3, 1.7, 0.5, 0, 0, 1 };
    ds[r++] = new double[] { 4.8, 3.4, 1.9, 0.2, 0, 0, 1 };
    ds[r++] = new double[] { 5, 3, 1.6, 0.2, 0, 0, 1 };
    ds[r++] = new double[] { 5, 3.4, 1.6, 0.4, 0, 0, 1 };
    ds[r++] = new double[] { 5.2, 3.5, 1.5, 0.2, 0, 0, 1 };
    ds[r++] = new double[] { 5.2, 3.4, 1.4, 0.2, 0, 0, 1 };
    ds[r++] = new double[] { 4.7, 3.2, 1.6, 0.2, 0, 0, 1 };
    ds[r++] = new double[] { 4.8, 3.1, 1.6, 0.2, 0, 0, 1 };
    ds[r++] = new double[] { 5.4, 3.4, 1.5, 0.4, 0, 0, 1 };
    ds[r++] = new double[] { 5.2, 4.1, 1.5, 0.1, 0, 0, 1 };
    ds[r++] = new double[] { 5.5, 4.2, 1.4, 0.2, 0, 0, 1 };
    ds[r++] = new double[] { 4.9, 3.1, 1.5, 0.1, 0, 0, 1 };
    ds[r++] = new double[] { 5, 3.2, 1.2, 0.2, 0, 0, 1 };
    ds[r++] = new double[] { 5.5, 3.5, 1.3, 0.2, 0, 0, 1 };
    ds[r++] = new double[] { 4.9, 3.1, 1.5, 0.1, 0, 0, 1 };
    ds[r++] = new double[] { 4.4, 3, 1.3, 0.2, 0, 0, 1 };
    ds[r++] = new double[] { 5.1, 3.4, 1.5, 0.2, 0, 0, 1 };
    ds[r++] = new double[] { 5, 3.5, 1.3, 0.3, 0, 0, 1 };
    ds[r++] = new double[] { 4.5, 2.3, 1.3, 0.3, 0, 0, 1 };
    ds[r++] = new double[] { 4.4, 3.2, 1.3, 0.2, 0, 0, 1 };
    ds[r++] = new double[] { 5, 3.5, 1.6, 0.6, 0, 0, 1 };
    ds[r++] = new double[] { 5.1, 3.8, 1.9, 0.4, 0, 0, 1 };
    ds[r++] = new double[] { 4.8, 3, 1.4, 0.3, 0, 0, 1 };
    ds[r++] = new double[] { 5.1, 3.8, 1.6, 0.2, 0, 0, 1 };
    ds[r++] = new double[] { 4.6, 3.2, 1.4, 0.2, 0, 0, 1 };
    ds[r++] = new double[] { 5.3, 3.7, 1.5, 0.2, 0, 0, 1 };
    ds[r++] = new double[] { 5, 3.3, 1.4, 0.2, 0, 0, 1 };
    ds[r++] = new double[] { 7, 3.2, 4.7, 1.4, 0, 1, 0 };
    ds[r++] = new double[] { 6.4, 3.2, 4.5, 1.5, 0, 1, 0 };
    ds[r++] = new double[] { 6.9, 3.1, 4.9, 1.5, 0, 1, 0 };
    ds[r++] = new double[] { 5.5, 2.3, 4, 1.3, 0, 1, 0 };
    ds[r++] = new double[] { 6.5, 2.8, 4.6, 1.5, 0, 1, 0 };
    ds[r++] = new double[] { 5.7, 2.8, 4.5, 1.3, 0, 1, 0 };
    ds[r++] = new double[] { 6.3, 3.3, 4.7, 1.6, 0, 1, 0 };
    ds[r++] = new double[] { 4.9, 2.4, 3.3, 1, 0, 1, 0 };
    ds[r++] = new double[] { 6.6, 2.9, 4.6, 1.3, 0, 1, 0 };
    ds[r++] = new double[] { 5.2, 2.7, 3.9, 1.4, 0, 1, 0 };
    ds[r++] = new double[] { 5, 2, 3.5, 1, 0, 1, 0 };
    ds[r++] = new double[] { 5.9, 3, 4.2, 1.5, 0, 1, 0 };
    ds[r++] = new double[] { 6, 2.2, 4, 1, 0, 1, 0 };
    ds[r++] = new double[] { 6.1, 2.9, 4.7, 1.4, 0, 1, 0 };
    ds[r++] = new double[] { 5.6, 2.9, 3.6, 1.3, 0, 1, 0 };
    ds[r++] = new double[] { 6.7, 3.1, 4.4, 1.4, 0, 1, 0 };
    ds[r++] = new double[] { 5.6, 3, 4.5, 1.5, 0, 1, 0 };
    ds[r++] = new double[] { 5.8, 2.7, 4.1, 1, 0, 1, 0 };
    ds[r++] = new double[] { 6.2, 2.2, 4.5, 1.5, 0, 1, 0 };
    ds[r++] = new double[] { 5.6, 2.5, 3.9, 1.1, 0, 1, 0 };
    ds[r++] = new double[] { 5.9, 3.2, 4.8, 1.8, 0, 1, 0 };
    ds[r++] = new double[] { 6.1, 2.8, 4, 1.3, 0, 1, 0 };
    ds[r++] = new double[] { 6.3, 2.5, 4.9, 1.5, 0, 1, 0 };
    ds[r++] = new double[] { 6.1, 2.8, 4.7, 1.2, 0, 1, 0 };
    ds[r++] = new double[] { 6.4, 2.9, 4.3, 1.3, 0, 1, 0 };
    ds[r++] = new double[] { 6.6, 3, 4.4, 1.4, 0, 1, 0 };
    ds[r++] = new double[] { 6.8, 2.8, 4.8, 1.4, 0, 1, 0 };
    ds[r++] = new double[] { 6.7, 3, 5, 1.7, 0, 1, 0 };
    ds[r++] = new double[] { 6, 2.9, 4.5, 1.5, 0, 1, 0 };
    ds[r++] = new double[] { 5.7, 2.6, 3.5, 1, 0, 1, 0 };
    ds[r++] = new double[] { 5.5, 2.4, 3.8, 1.1, 0, 1, 0 };
    ds[r++] = new double[] { 5.5, 2.4, 3.7, 1, 0, 1, 0 };
    ds[r++] = new double[] { 5.8, 2.7, 3.9, 1.2, 0, 1, 0 };
    ds[r++] = new double[] { 6, 2.7, 5.1, 1.6, 0, 1, 0 };
    ds[r++] = new double[] { 5.4, 3, 4.5, 1.5, 0, 1, 0 };
    ds[r++] = new double[] { 6, 3.4, 4.5, 1.6, 0, 1, 0 };
    ds[r++] = new double[] { 6.7, 3.1, 4.7, 1.5, 0, 1, 0 };
    ds[r++] = new double[] { 6.3, 2.3, 4.4, 1.3, 0, 1, 0 };
    ds[r++] = new double[] { 5.6, 3, 4.1, 1.3, 0, 1, 0 };
    ds[r++] = new double[] { 5.5, 2.5, 4, 1.3, 0, 1, 0 };
    ds[r++] = new double[] { 5.5, 2.6, 4.4, 1.2, 0, 1, 0 };
    ds[r++] = new double[] { 6.1, 3, 4.6, 1.4, 0, 1, 0 };
    ds[r++] = new double[] { 5.8, 2.6, 4, 1.2, 0, 1, 0 };
    ds[r++] = new double[] { 5, 2.3, 3.3, 1, 0, 1, 0 };
    ds[r++] = new double[] { 5.6, 2.7, 4.2, 1.3, 0, 1, 0 };
    ds[r++] = new double[] { 5.7, 3, 4.2, 1.2, 0, 1, 0 };
    ds[r++] = new double[] { 5.7, 2.9, 4.2, 1.3, 0, 1, 0 };
    ds[r++] = new double[] { 6.2, 2.9, 4.3, 1.3, 0, 1, 0 };
    ds[r++] = new double[] { 5.1, 2.5, 3, 1.1, 0, 1, 0 };
    ds[r++] = new double[] { 5.7, 2.8, 4.1, 1.3, 0, 1, 0 };
    ds[r++] = new double[] { 6.3, 3.3, 6, 2.5, 1, 0, 0 };
    ds[r++] = new double[] { 5.8, 2.7, 5.1, 1.9, 1, 0, 0 };
    ds[r++] = new double[] { 7.1, 3, 5.9, 2.1, 1, 0, 0 };
    ds[r++] = new double[] { 6.3, 2.9, 5.6, 1.8, 1, 0, 0 };
    ds[r++] = new double[] { 6.5, 3, 5.8, 2.2, 1, 0, 0 };
    ds[r++] = new double[] { 7.6, 3, 6.6, 2.1, 1, 0, 0 };
    ds[r++] = new double[] { 4.9, 2.5, 4.5, 1.7, 1, 0, 0 };
    ds[r++] = new double[] { 7.3, 2.9, 6.3, 1.8, 1, 0, 0 };
    ds[r++] = new double[] { 6.7, 2.5, 5.8, 1.8, 1, 0, 0 };
    ds[r++] = new double[] { 7.2, 3.6, 6.1, 2.5, 1, 0, 0 };
    ds[r++] = new double[] { 6.5, 3.2, 5.1, 2, 1, 0, 0 };
    ds[r++] = new double[] { 6.4, 2.7, 5.3, 1.9, 1, 0, 0 };
    ds[r++] = new double[] { 6.8, 3, 5.5, 2.1, 1, 0, 0 };
    ds[r++] = new double[] { 5.7, 2.5, 5, 2, 1, 0, 0 };
    ds[r++] = new double[] { 5.8, 2.8, 5.1, 2.4, 1, 0, 0 };
    ds[r++] = new double[] { 6.4, 3.2, 5.3, 2.3, 1, 0, 0 };
    ds[r++] = new double[] { 6.5, 3, 5.5, 1.8, 1, 0, 0 };
    ds[r++] = new double[] { 7.7, 3.8, 6.7, 2.2, 1, 0, 0 };
    ds[r++] = new double[] { 7.7, 2.6, 6.9, 2.3, 1, 0, 0 };
    ds[r++] = new double[] { 6, 2.2, 5, 1.5, 1, 0, 0 };
    ds[r++] = new double[] { 6.9, 3.2, 5.7, 2.3, 1, 0, 0 };
    ds[r++] = new double[] { 5.6, 2.8, 4.9, 2, 1, 0, 0 };
    ds[r++] = new double[] { 7.7, 2.8, 6.7, 2, 1, 0, 0 };
    ds[r++] = new double[] { 6.3, 2.7, 4.9, 1.8, 1, 0, 0 };
    ds[r++] = new double[] { 6.7, 3.3, 5.7, 2.1, 1, 0, 0 };
    ds[r++] = new double[] { 7.2, 3.2, 6, 1.8, 1, 0, 0 };
    ds[r++] = new double[] { 6.2, 2.8, 4.8, 1.8, 1, 0, 0 };
    ds[r++] = new double[] { 6.1, 3, 4.9, 1.8, 1, 0, 0 };
    ds[r++] = new double[] { 6.4, 2.8, 5.6, 2.1, 1, 0, 0 };
    ds[r++] = new double[] { 7.2, 3, 5.8, 1.6, 1, 0, 0 };
    ds[r++] = new double[] { 7.4, 2.8, 6.1, 1.9, 1, 0, 0 };
    ds[r++] = new double[] { 7.9, 3.8, 6.4, 2, 1, 0, 0 };
    ds[r++] = new double[] { 6.4, 2.8, 5.6, 2.2, 1, 0, 0 };
    ds[r++] = new double[] { 6.3, 2.8, 5.1, 1.5, 1, 0, 0 };
    ds[r++] = new double[] { 6.1, 2.6, 5.6, 1.4, 1, 0, 0 };
    ds[r++] = new double[] { 7.7, 3, 6.1, 2.3, 1, 0, 0 };
    ds[r++] = new double[] { 6.3, 3.4, 5.6, 2.4, 1, 0, 0 };
    ds[r++] = new double[] { 6.4, 3.1, 5.5, 1.8, 1, 0, 0 };
    ds[r++] = new double[] { 6, 3, 4.8, 1.8, 1, 0, 0 };
    ds[r++] = new double[] { 6.9, 3.1, 5.4, 2.1, 1, 0, 0 };
    ds[r++] = new double[] { 6.7, 3.1, 5.6, 2.4, 1, 0, 0 };
    ds[r++] = new double[] { 6.9, 3.1, 5.1, 2.3, 1, 0, 0 };
    ds[r++] = new double[] { 5.8, 2.7, 5.1, 1.9, 1, 0, 0 };
    ds[r++] = new double[] { 6.8, 3.2, 5.9, 2.3, 1, 0, 0 };
    ds[r++] = new double[] { 6.7, 3.3, 5.7, 2.5, 1, 0, 0 };
    ds[r++] = new double[] { 6.7, 3, 5.2, 2.3, 1, 0, 0 };
    ds[r++] = new double[] { 6.3, 2.5, 5, 1.9, 1, 0, 0 };
    ds[r++] = new double[] { 6.5, 3, 5.2, 2, 1, 0, 0 };
    ds[r++] = new double[] { 6.2, 3.4, 5.4, 2.3, 1, 0, 0 };
    ds[r++] = new double[] { 5.9, 3, 5.1, 1.8, 1, 0, 0 };

    double[][] allData = new double[ds.length][ds[0].length];
    for( int j = 0; j < allData.length; j++ ) {
      for( int i = 0; i < allData[j].length; i++ )
        allData[j][i] = ds[j][i];

      allData[j][4] = ds[j][6];
      allData[j][5] = ds[j][5];
      allData[j][6] = ds[j][4];
    }

    int trainRows = (int) (allData.length * holdout_ratio);
    int testRows = allData.length - trainRows;
    _trainData = new double[trainRows][];
    _testData = new double[testRows][];
    MakeTrainTest(allData, _trainData, _testData, rand);

    // Normalize all data using train stats
    for( int i = 0; i < 4; i++ ) {
      double mean = 0;
      for( int n = 0; n < _trainData.length; n++ )
        mean += _trainData[n][i];
      mean /= _trainData.length;

      double sigma = 0;
      for( int n = 0; n < _trainData.length; n++ ) {
        double d = _trainData[n][i] - mean;
        sigma += d * d;
      }
      sigma = Math.sqrt(sigma / (_trainData.length - 1));
      for( int n = 0; n < _trainData.length; n++ ) {
        _trainData[n][i] -= mean;
        _trainData[n][i] /= sigma;
      }
      for( int n = 0; n < _testData.length; n++ ) {
        _testData[n][i] -= mean;
        _testData[n][i] /= sigma;
      }
    }

    int numInput = 4;
    int numOutput = 3;
    _nn = new NeuralNetwork(activation, numInput, numHidden, numOutput);
    _nn.InitializeWeights();
  }

  void train(int maxEpochs, double learnRate, double momentum, Loss loss, long seed) {
    _nn.Train(_trainData, maxEpochs, learnRate, momentum, loss, seed);
  }

  void MakeTrainTest(double[][] allData, double[][] trainData, double[][] testData, Random rand) {
    // split allData into 80% trainData and 20% testData
    int numCols = allData[0].length;

    int[] shuffle = new int[allData.length]; // create a random sequence of indexes
    for( int i = 0; i < shuffle.length; ++i )
      shuffle[i] = i;
    NeuralNetwork.shuffle(shuffle, rand);

    int si = 0; // index into sequence[]
    int j = 0; // index into trainData or testData

    for( ; si < trainData.length; ++si ) // first rows to train data
    {
      trainData[j] = new double[numCols];
      int idx = shuffle[si];
      System.arraycopy(allData[idx], 0, trainData[j], 0, numCols);
      ++j;
    }

    j = 0; // reset to start of test data
    for( ; si < allData.length; ++si ) // remainder to test data
    {
      testData[j] = new double[numCols];
      int idx = shuffle[si];
      System.arraycopy(allData[idx], 0, testData[j], 0, numCols);
      ++j;
    }
  } // MakeTrainTest

  static void Normalize(double[][] dataMatrix, int[] cols) {
    // in most cases you want to normalize the x-data
  }

  static void ShowVector(double[] vector, int valsPerRow, int decimals, boolean newLine) {
    for( int i = 0; i < vector.length; ++i ) {
      if( i % valsPerRow == 0 )
        System.out.println("");
      System.out.print(_format.format(vector[i]) + " ");
    }
    if(newLine)
      System.out.println("");
  }

  static void ShowMatrix(double[][] matrix, int numRows, int decimals, boolean newLine) {
    for( int i = 0; i < numRows; ++i ) {
      System.out.print(i + ": ");
      for( int j = 0; j < matrix[i].length; ++j ) {
        if( matrix[i][j] >= 0.0 )
          System.out.print(" ");
        else
          System.out.print("-");
        System.out.print(_format.format(Math.abs(matrix[i][j])) + " ");
      }
      System.out.println("");
    }
    if( newLine == true )
      System.out.println("");
  }

  public static class NeuralNetwork {
    Activation activation = Activation.Tanh;
    int numInput;
    int numHidden;
    int numOutput;

    double[] inputs;

    float[][] ihWeights; // input-hidden
    double[] hBiases;
    double[] hOutputs;

    float[][] hoWeights; // hidden-output
    double[] oBiases;

    double[] outputs;

    // back-prop specific arrays (these could be local to method UpdateWeights)
    double[] oGrads; // output gradients for back-propagation
    double[] hGrads; // hidden gradients for back-propagation

    // back-prop momentum specific arrays (these could be local to method Train)
    float[][] ihPrevWeightsDelta;  // for momentum with back-propagation
    double[] hPrevBiasesDelta;
    float[][] hoPrevWeightsDelta;
    double[] oPrevBiasesDelta;

    public NeuralNetwork(DeepLearningParameters.Activation activationType, int numInput, int numHidden, int numOutput) {
      this.activation = activationType;
      this.numInput = numInput;
      this.numHidden = numHidden;
      this.numOutput = numOutput;

      this.inputs = new double[numInput];

      this.ihWeights = MakeMatrixFloat(numInput, numHidden);
      this.hBiases = new double[numHidden];
      this.hOutputs = new double[numHidden];

      this.hoWeights = MakeMatrixFloat(numHidden, numOutput);
      this.oBiases = new double[numOutput];

      this.outputs = new double[numOutput];

      // back-prop related arrays below
      this.hGrads = new double[numHidden];
      this.oGrads = new double[numOutput];

      this.ihPrevWeightsDelta = MakeMatrixFloat(numInput, numHidden);
      this.hPrevBiasesDelta = new double[numHidden];
      this.hoPrevWeightsDelta = MakeMatrixFloat(numHidden, numOutput);
      this.oPrevBiasesDelta = new double[numOutput];
    } // ctor

    private static double[][] MakeMatrix(int rows, int cols) // helper for ctor
    {
      double[][] result = new double[rows][];
      for( int r = 0; r < result.length; ++r )
        result[r] = new double[cols];
      return result;
    }

    private static float[][] MakeMatrixFloat(int rows, int cols) // helper for ctor
    {
      float[][] result = new float[rows][];
      for( int r = 0; r < result.length; ++r )
        result[r] = new float[cols];
      return result;
    }

    @Override public String toString() // yikes
    {
      String s = "";
      s += "===============================\n";
      s += "numInput = " + numInput + " numHidden = " + numHidden + " numOutput = " + numOutput + "\n\n";

      s += "inputs: \n";
      for( int i = 0; i < inputs.length; ++i )
        s += inputs[i] + " ";
      s += "\n\n";

      s += "ihWeights: \n";
      for( int i = 0; i < ihWeights.length; ++i ) {
        for( int j = 0; j < ihWeights[i].length; ++j ) {
          s += ihWeights[i][j] + " ";
        }
        s += "\n";
      }
      s += "\n";

      s += "hBiases: \n";
      for( int i = 0; i < hBiases.length; ++i )
        s += hBiases[i] + " ";
      s += "\n\n";

      s += "hOutputs: \n";
      for( int i = 0; i < hOutputs.length; ++i )
        s += hOutputs[i] + " ";
      s += "\n\n";

      s += "hoWeights: \n";
      for( int i = 0; i < hoWeights.length; ++i ) {
        for( int j = 0; j < hoWeights[i].length; ++j ) {
          s += hoWeights[i][j] + " ";
        }
        s += "\n";
      }
      s += "\n";

      s += "oBiases: \n";
      for( int i = 0; i < oBiases.length; ++i )
        s += oBiases[i] + " ";
      s += "\n\n";

      s += "hGrads: \n";
      for( int i = 0; i < hGrads.length; ++i )
        s += hGrads[i] + " ";
      s += "\n\n";

      s += "oGrads: \n";
      for( int i = 0; i < oGrads.length; ++i )
        s += oGrads[i] + " ";
      s += "\n\n";

      s += "ihPrevWeightsDelta: \n";
      for( int i = 0; i < ihPrevWeightsDelta.length; ++i ) {
        for( int j = 0; j < ihPrevWeightsDelta[i].length; ++j ) {
          s += ihPrevWeightsDelta[i][j] + " ";
        }
        s += "\n";
      }
      s += "\n";

      s += "hPrevBiasesDelta: \n";
      for( int i = 0; i < hPrevBiasesDelta.length; ++i )
        s += hPrevBiasesDelta[i] + " ";
      s += "\n\n";

      s += "hoPrevWeightsDelta: \n";
      for( int i = 0; i < hoPrevWeightsDelta.length; ++i ) {
        for( int j = 0; j < hoPrevWeightsDelta[i].length; ++j ) {
          s += hoPrevWeightsDelta[i][j] + " ";
        }
        s += "\n";
      }
      s += "\n";

      s += "oPrevBiasesDelta: \n";
      for( int i = 0; i < oPrevBiasesDelta.length; ++i )
        s += oPrevBiasesDelta[i] + " ";
      s += "\n\n";

      s += "outputs: \n";
      for( int i = 0; i < outputs.length; ++i )
        s += outputs[i] + " ";
      s += "\n\n";

      s += "===============================\n";
      return s;
    }

    // ----------------------------------------------------------------------------------------

    public void SetWeights(float[] weights) {
      // copy weights and biases in weights[] array to i-h weights, i-h biases, h-o weights, h-o
// biases
      int numWeights = (numInput * numHidden) + (numHidden * numOutput) + numHidden + numOutput;
      if( weights.length != numWeights )
        throw new RuntimeException("Bad weights array length: ");

      int k = 0; // points into weights param

      for( int i = 0; i < numInput; ++i )
        for( int j = 0; j < numHidden; ++j )
          ihWeights[i][j] = weights[k++];
      for( int i = 0; i < numHidden; ++i )
        hBiases[i] = weights[k++];
      for( int i = 0; i < numHidden; ++i )
        for( int j = 0; j < numOutput; ++j )
          hoWeights[i][j] = weights[k++];
      for( int i = 0; i < numOutput; ++i )
        oBiases[i] = weights[k++];
    }

    public void InitializeWeights() {
      // initialize weights and biases to small random values
      int numWeights = (numInput * numHidden) + (numHidden * numOutput) + numHidden + numOutput;
      float[] initialWeights = new float[numWeights];
      double lo = -0.01f;
      double hi = 0.01f;
      Random rnd = new Random(0);
      for( int i = 0; i < initialWeights.length; ++i )
        initialWeights[i] = (float)((hi - lo) * rnd.nextFloat() + lo);
      this.SetWeights(initialWeights);
    }

    public double[] GetWeights() {
      // returns the current set of wweights, presumably after training
      int numWeights = (numInput * numHidden) + (numHidden * numOutput) + numHidden + numOutput;
      double[] result = new double[numWeights];
      int k = 0;
      for( int i = 0; i < ihWeights.length; ++i )
        for( int j = 0; j < ihWeights[0].length; ++j )
          result[k++] = ihWeights[i][j];
      for( int i = 0; i < hBiases.length; ++i )
        result[k++] = hBiases[i];
      for( int i = 0; i < hoWeights.length; ++i )
        for( int j = 0; j < hoWeights[0].length; ++j )
          result[k++] = hoWeights[i][j];
      for( int i = 0; i < oBiases.length; ++i )
        result[k++] = oBiases[i];
      return result;
    }

    // ----------------------------------------------------------------------------------------

    public double[] ComputeOutputs(double[] xValues) {
      if( xValues.length != numInput )
        throw new RuntimeException("Bad xValues array length");

      double[] hSums = new double[numHidden]; // hidden nodes sums scratch array
      double[] oSums = new double[numOutput]; // output nodes sums

      for( int i = 0; i < xValues.length; ++i )
        // copy x-values to inputs
        this.inputs[i] = xValues[i];

      for( int j = 0; j < numHidden; ++j )
        // compute i-h sum of weights * inputs
        for( int i = 0; i < numInput; ++i )
          hSums[j] += this.inputs[i] * this.ihWeights[i][j]; // note +=

      for( int i = 0; i < numHidden; ++i )
        // add biases to input-to-hidden sums
        hSums[i] += this.hBiases[i];

      for( int i = 0; i < numHidden; ++i )
        // apply activation
        if (activation == Activation.Tanh || activation == Activation.TanhWithDropout) {
          hOutputs[i] = HyperTanFunction(hSums[i]);
        } else if (activation == Activation.Rectifier || activation == Activation.RectifierWithDropout) {
          hOutputs[i] = Rectifier(hSums[i]);
        } else throw new RuntimeException("invalid activation.");

      for( int j = 0; j < numOutput; ++j )
        // compute h-o sum of weights * hOutputs
        for( int i = 0; i < numHidden; ++i )
          oSums[j] += hOutputs[i] * hoWeights[i][j];

      for( int i = 0; i < numOutput; ++i )
        // add biases to input-to-hidden sums
        oSums[i] += oBiases[i];

      double[] softOut = Softmax(oSums); // softmax activation does all outputs at once for
// efficiency
      System.arraycopy(softOut, 0, outputs, 0, softOut.length);

      double[] retResult = new double[numOutput]; // could define a GetOutputs method instead
      System.arraycopy(this.outputs, 0, retResult, 0, retResult.length);
      return retResult;
    } // ComputeOutputs

    private static double HyperTanFunction(double x) {
      return Math.tanh(x);
    }

    private static double Rectifier(double x) {
      return Math.max(x, 0.0f);
    }

    private static double[] Softmax(double[] oSums) {
      // does all output nodes at once so scale doesn't have to be re-computed each time
      // 1. determine max output sum
      double max = oSums[0];
      for( int i = 0; i < oSums.length; ++i )
        if( oSums[i] > max )
          max = oSums[i];

      // 2. determine scaling factor -- sum of exp(each val - max)
      double[] result = new double[oSums.length];
      double scale = 0;
      for( int i = 0; i < result.length; i++ ) {
        result[i] = Math.exp(oSums[i] - max);
        scale += result[i];
      }
      for( int i = 0; i < result.length; i++ )
        result[i] /= scale;
      return result; // now scaled so that xi sum to 1.0
    }

    // ----------------------------------------------------------------------------------------

    private void UpdateWeights(double[] tValues, double learnRate, double momentum, Loss loss) {
      // update the weights and biases using back-propagation, with target values, eta (learning
// rate),
      // alpha (momentum)
      // assumes that SetWeights and ComputeOutputs have been called and so all the internal arrays
// and
      // matrices have values (other than 0.0)
      if( tValues.length != numOutput )
        throw new RuntimeException("target values not same length as output in UpdateWeights");

      // 1. compute output gradients
      for( int i = 0; i < oGrads.length; ++i ) {
        // derivative of softmax = (1 - y) * y (same as log-sigmoid)
        double derivative = (1 - outputs[i]) * outputs[i];
        if (loss == Loss.CrossEntropy) {
          oGrads[i] = tValues[i] - outputs[i];
        } else if (loss == Loss.Quadratic) {
          // 'mean squared error version'. research suggests cross-entropy is better here . . .
          oGrads[i] = derivative * (tValues[i] - outputs[i]);
        } else throw new RuntimeException("invalid loss function");
      }

      // 2. compute hidden gradients
      for( int i = 0; i < hGrads.length; ++i ) {
        double derivative = 1;
        if (activation == Activation.Tanh || activation == Activation.TanhWithDropout) {
          derivative = (1 - hOutputs[i]) * (1 + hOutputs[i]); // derivative of tanh (y) = (1 - y) * (1 + y)
        } else if (activation == Activation.Rectifier || activation == Activation.RectifierWithDropout) {
          derivative = hOutputs[i] <= 0 ? 0 : 1;
        } else throw new RuntimeException("invalid activation.");

        double sum = 0;
        for( int j = 0; j < numOutput; ++j ) // each hidden delta is the sum of numOutput terms
        {
          double x = oGrads[j] * hoWeights[i][j];
          sum += x;
        }
        hGrads[i] = derivative * sum;
      }

      // 3a. update hidden weights (gradients must be computed right-to-left but weights
      // can be updated in any order)
      for( int i = 0; i < ihWeights.length; ++i ) // 0..2 (3)
      {
        for( int j = 0; j < ihWeights[0].length; ++j ) // 0..3 (4)
        {
          double delta = learnRate * hGrads[j] * inputs[i]; // compute the new delta
          ihWeights[i][j] += delta; // update. note we use '+' instead of '-'. this can be very
// tricky.
          // add momentum using previous delta. on first pass old value will be 0.0 but that's OK.
          ihWeights[i][j] += momentum * ihPrevWeightsDelta[i][j];
          // weight decay would go here
          ihPrevWeightsDelta[i][j] = (float)delta; // don't forget to save the delta for momentum
        }
      }

      // 3b. update hidden biases
      for( int i = 0; i < hBiases.length; ++i ) {
        // the 1.0 below is the constant input for any bias; could leave out
        double delta = learnRate * hGrads[i] * 1;
        hBiases[i] += delta;
        hBiases[i] += momentum * hPrevBiasesDelta[i]; // momentum
        // weight decay here
        hPrevBiasesDelta[i] = delta; // don't forget to save the delta
      }

      // 4. update hidden-output weights
      for( int i = 0; i < hoWeights.length; ++i ) {
        for( int j = 0; j < hoWeights[0].length; ++j ) {
          // see above: hOutputs are inputs to the deeplearning outputs
          double delta = learnRate * oGrads[j] * hOutputs[i];
          hoWeights[i][j] += delta;
          hoWeights[i][j] += momentum * hoPrevWeightsDelta[i][j]; // momentum
          // weight decay here
          hoPrevWeightsDelta[i][j] = (float)delta; // save
        }
      }

      // 4b. update output biases
      for( int i = 0; i < oBiases.length; ++i ) {
        double delta = learnRate * oGrads[i] * 1;
        oBiases[i] += delta;
        oBiases[i] += momentum * oPrevBiasesDelta[i]; // momentum
        // weight decay here
        oPrevBiasesDelta[i] = delta; // save
      }
    } // UpdateWeights

    // ----------------------------------------------------------------------------------------

    public void Train(double[][] trainData, int maxEprochs, double learnRate, double momentum, Loss loss, long seed) {
      // train a back-prop style NN classifier using learning rate and momentum
      // no weight decay
      int epoch = 0;
      double[] xValues = new double[numInput]; // inputs
      double[] tValues = new double[numOutput]; // target values


      while( epoch < maxEprochs ) {
        // same logic as in FrameTask
        final long chunkSeed = (0x8734093502429734L + (seed + epoch*trainData.length) ) * ((epoch+1) + 0x9823423497823423L);
        final Random skip_rng = RandomUtils.getRNG(chunkSeed);
        int[] sequence = new int[trainData.length];
        for( int i = 0; i < sequence.length; ++i ) sequence[i] = i;
        ArrayUtils.shuffleArray(sequence, skip_rng);

        for( int i = 0; i < trainData.length; ++i ) {
          int idx = sequence[i];
          System.arraycopy(trainData[idx], 0, xValues, 0, numInput); // extract x's and y's.
          System.arraycopy(trainData[idx], numInput, tValues, 0, numOutput);
          ComputeOutputs(xValues); // copy xValues in, compute outputs (and store them internally)
          UpdateWeights(tValues, learnRate, momentum, loss); // use back-prop to find better weights
        } // each training tuple
        ++epoch;
      }
    } // Train

    static void shuffle(int[] sequence, Random rand) {
      for( int i = sequence.length - 1; i >= 0; i-- ) {
        int r = rand.nextInt(i + 1);
        int tmp = sequence[r];
        sequence[r] = sequence[i];
        sequence[i] = tmp;
      }
    }

    // ----------------------------------------------------------------------------------------

    public double Accuracy(double[][] testData) {
      // percentage correct using winner-takes all
      int numCorrect = 0;
      int numWrong = 0;
      double[] xValues = new double[numInput]; // inputs
      double[] tValues = new double[numOutput]; // targets
      double[] yValues; // computed Y

      for( int i = 0; i < testData.length; ++i ) {
        System.arraycopy(testData[i], 0, xValues, 0, numInput); // parse test data into x-values and
// t-values
        System.arraycopy(testData[i], numInput, tValues, 0, numOutput);
        yValues = this.ComputeOutputs(xValues);
        //int maxIndex = MaxIndex(yValues); // which cell in yValues has largest value?

        // convert to float and do the same tie-breaking as H2O
        double[] preds = new double[yValues.length+1];
        for (int j=0; j<yValues.length; ++j) preds[j+1] = (float)yValues[j];
        preds[0] = hex.genmodel.GenModel.getPrediction(preds, null, xValues, 0.5);

        if( tValues[(int)preds[0]] == 1.0 ) // ugly. consider AreEqual(double x, double y)
          ++numCorrect;
        else
          ++numWrong;
      }
      return (double)numWrong / (numCorrect + numWrong); // ugly 2 - check for divide by zero
    }

    private static int MaxIndex(double[] vector) // helper for Accuracy()
    {
      // index of largest value
      int bigIndex = 0;
      double biggestVal = vector[0];
      for( int i = 0; i < vector.length; ++i ) {
        if( vector[i] > biggestVal ) {
          biggestVal = vector[i];
          bigIndex = i;
        }
      }
      return bigIndex;
    }
  }
}