DeepLearningModel.java example

Explorer
h2o-2-master
package hex.deeplearning;

import static java.lang.Double.isNaN;
import hex.FrameTask.DataInfo;
import hex.VarImp;
import water.*;
import water.api.*;
import water.api.Request.API;
import water.fvec.Chunk;
import water.fvec.Frame;
import water.fvec.Vec;
import water.util.*;

import java.lang.reflect.Field;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Random;

/**
 * The Deep Learning model
 * It contains a DeepLearningModelInfo with the most up-to-date model,
 * a scoring history, as well as some helpers to indicate the progress
 */
public class DeepLearningModel extends Model implements Comparable<DeepLearningModel> {
  static final int API_WEAVER = 1; // This file has auto-gen'd doc & json fields
  static public DocGen.FieldDoc[] DOC_FIELDS; // Initialized from Auto-Gen code.

  @API(help="Model info", json = true)
  private volatile DeepLearningModelInfo model_info;
  void set_model_info(DeepLearningModelInfo mi) { model_info = mi; }
  final public DeepLearningModelInfo model_info() { return model_info; }

  @API(help="Job that built the model", json = true)
  final private Key jobKey;

  @API(help="Validation dataset used for model building", json = true)
  public final Key _validationKey;

  @API(help="Time to build the model", json = true)
  private long run_time;
  final private long start_time;

  public long actual_train_samples_per_iteration;
  public double time_for_communication_us; //helper for auto-tuning: time in microseconds for collective bcast/reduce of the model

  @API(help="Number of training epochs", json = true)
  public double epoch_counter;

  @API(help="Number of rows in training data", json = true)
  public long training_rows;

  @API(help="Number of rows in validation data", json = true)
  public long validation_rows;

  @API(help = "Scoring during model building")
  private Errors[] errors;
  public Errors[] scoring_history() { return errors; }

  // Keep the best model so far, based on a single criterion (overall class. error or MSE)
  private float _bestError = Float.MAX_VALUE;

  @API(help = "Key to the best model so far (based on overall error on scoring data set)")
  public Key actual_best_model_key;

  // return the most up-to-date model metrics
  Errors last_scored() { return errors == null ? null : errors[errors.length-1]; }

  @Override public final DeepLearning get_params() { return model_info.get_params(); }
  @Override public final Request2 job() { return model_info.get_job(); }

  @Override protected double missingColumnsType() { return get_params().sparse ? 0 : Double.NaN; }

  public float error() { return (float) (isClassifier() ? cm().err() : mse()); }

  @Override public boolean isClassifier() { return super.isClassifier() && !model_info.get_params().autoencoder; }
  @Override public boolean isSupervised() { return !model_info.get_params().autoencoder; }

  @Override public int nfeatures() { return model_info.get_params().autoencoder ? _names.length : _names.length - 1; }

  public int compareTo(DeepLearningModel o) {
    if (o.isClassifier() != isClassifier()) throw new UnsupportedOperationException("Cannot compare classifier against regressor.");
    if (o.nclasses() != nclasses()) throw new UnsupportedOperationException("Cannot compare models with different number of classes.");
    return (error() < o.error() ? -1 : error() > o.error() ? 1 : 0);
  }

  public static class Errors extends Iced {
    static final int API_WEAVER = 1;
    static public DocGen.FieldDoc[] DOC_FIELDS;

    @API(help = "How many epochs the algorithm has processed")
    public double epoch_counter;
    @API(help = "How many rows the algorithm has processed")
    public long training_samples;
    @API(help = "How long the algorithm ran in ms")
    public long training_time_ms;

    //training/validation sets
    @API(help = "Whether a validation set was provided")
    boolean validation;
    @API(help = "Number of folds for cross-validation (for validation=false)")
    int num_folds;
    @API(help = "Number of training set samples for scoring")
    public long score_training_samples;
    @API(help = "Number of validation set samples for scoring")
    public long score_validation_samples;

    @API(help="Do classification or regression")
    public boolean classification;

    @API(help = "Variable importances")
    VarImp variable_importances;

    // classification
    @API(help = "Confusion matrix on training data")
    public water.api.ConfusionMatrix train_confusion_matrix;
    @API(help = "Confusion matrix on validation data")
    public water.api.ConfusionMatrix valid_confusion_matrix;
    @API(help = "Classification error on training data")
    public double train_err = 1;
    @API(help = "Classification error on validation data")
    public double valid_err = 1;
    @API(help = "AUC on training data")
    public AUCData trainAUC;
    @API(help = "AUC on validation data")
    public AUCData validAUC;
    @API(help = "Hit ratio on training data")
    public water.api.HitRatio train_hitratio;
    @API(help = "Hit ratio on validation data")
    public water.api.HitRatio valid_hitratio;

    // regression
    @API(help = "Training MSE")
    public double train_mse = Double.POSITIVE_INFINITY;
    @API(help = "Validation MSE")
    public double valid_mse = Double.POSITIVE_INFINITY;

    @API(help = "Time taken for scoring")
    public long scoring_time;

    Errors deep_clone() {
      AutoBuffer ab = new AutoBuffer();
      this.write(ab);
      ab.flipForReading();
      return new Errors().read(ab);
    }

    @Override public String toString() {
      StringBuilder sb = new StringBuilder();
      if (classification) {
        sb.append("Error on training data (misclassification)"
                + (trainAUC != null ? " [using threshold for " + trainAUC.threshold_criterion.toString().replace("_"," ") +"]: ": ": ")
                + String.format("%.2f", 100*train_err) + "%");

        if (trainAUC != null) sb.append(", AUC on training data: " + String.format("%.4f", 100*trainAUC.AUC) + "%");
        if (validation || num_folds>0)
          sb.append("\nError on " + (num_folds>0 ? num_folds + "-fold cross-":"")+ "validation data (misclassification)"
                + (validAUC != null ? " [using threshold for " + validAUC.threshold_criterion.toString().replace("_"," ") +"]: ": ": ")
                + String.format("%.2f", (100*valid_err)) + "%");
        if (validAUC != null) sb.append(", AUC on validation data: " + String.format("%.4f", 100*validAUC.AUC) + "%");
      } else if (!Double.isInfinite(train_mse)) {
        sb.append("Error on training data (MSE): " + train_mse);
        if (validation || num_folds>0)
          sb.append("\nError on "+ (num_folds>0 ? num_folds + "-fold cross-":"")+ "validation data (MSE): " + valid_mse);
      }
      return sb.toString();
    }
  }

  final private static class ConfMat extends hex.ConfusionMatrix {
    final private double _err;
    final private double _f1;
    public ConfMat(double err, double f1) {
      super(null);
      _err=err;
      _f1=f1;
    }
    @Override public double err() { return _err; }
    @Override public double F1() { return _f1; }
    @Override public double[] classErr() { return null; }
  }

  /** for grid search error reporting */
  @Override
  public hex.ConfusionMatrix cm() {
    final Errors lasterror = last_scored();
    if (lasterror == null) return null;
    water.api.ConfusionMatrix cm = lasterror.validation || lasterror.num_folds > 0 ?
            lasterror.valid_confusion_matrix :
            lasterror.train_confusion_matrix;
    if (cm == null || cm.cm == null) {
      if (lasterror.validation || lasterror.num_folds > 0) {
        return new ConfMat(lasterror.valid_err, lasterror.validAUC != null ? lasterror.validAUC.F1() : 0);
      } else {
        return new ConfMat(lasterror.train_err, lasterror.trainAUC != null ? lasterror.trainAUC.F1() : 0);
      }
    }
    // cm.cm has NaN padding, reduce it to N-1 size
    return new hex.ConfusionMatrix(cm.cm, cm.cm.length-1);
  }

  @Override
  public double mse() {
    if (errors == null) return super.mse();
    return last_scored().validation || last_scored().num_folds > 0 ? last_scored().valid_mse : last_scored().train_mse;
  }

  @Override
  public VarImp varimp() {
    if (errors == null) return null;
    return last_scored().variable_importances;
  }

  // This describes the model, together with the parameters
  // This will be shared: one per node
  public static class DeepLearningModelInfo extends Iced {
    static final int API_WEAVER = 1; // This file has auto-gen'd doc & json fields
    static public DocGen.FieldDoc[] DOC_FIELDS; // Initialized from Auto-Gen code.

    @API(help="Input data info")
    private DataInfo data_info;
    public DataInfo data_info() { return data_info; }

    // model is described by parameters and the following arrays
    private Neurons.DenseRowMatrix[] dense_row_weights; //one 2D weight matrix per layer (stored as a 1D array each)
    private Neurons.DenseColMatrix[] dense_col_weights; //one 2D weight matrix per layer (stored as a 1D array each)
    private Neurons.DenseVector[] biases; //one 1D bias array per layer
    private Neurons.DenseVector[] avg_activations; //one 1D array per hidden layer

    // helpers for storing previous step deltas
    // Note: These two arrays *could* be made transient and then initialized freshly in makeNeurons() and in DeepLearningTask.initLocal()
    // But then, after each reduction, the weights would be lost and would have to restart afresh -> not *exactly* right, but close...
    private Neurons.DenseRowMatrix[] dense_row_weights_momenta;
    private Neurons.DenseColMatrix[] dense_col_weights_momenta;
    private Neurons.DenseVector[] biases_momenta;

    // helpers for AdaDelta
    private Neurons.DenseRowMatrix[] dense_row_ada_dx_g;
    private Neurons.DenseColMatrix[] dense_col_ada_dx_g;
    private Neurons.DenseVector[] biases_ada_dx_g;

    // compute model size (number of model parameters required for making predictions)
    // momenta are not counted here, but they are needed for model building
    public long size() {
      long siz = 0;
      for (Neurons.Matrix w : dense_row_weights) if (w != null) siz += w.size();
      for (Neurons.Matrix w : dense_col_weights) if (w != null) siz += w.size();
      for (Neurons.Vector b : biases) siz += b.size();
      return siz;
    }

    // accessors to (shared) weights and biases - those will be updated racily (c.f. Hogwild!)
    boolean has_momenta() { return get_params().momentum_start != 0 || get_params().momentum_stable != 0; }
    boolean adaDelta() { return get_params().adaptive_rate; }
    public final Neurons.Matrix get_weights(int i) { return dense_row_weights[i] == null ? dense_col_weights[i] : dense_row_weights[i]; }
    public final Neurons.DenseVector get_biases(int i) { return biases[i]; }
    public final Neurons.Matrix get_weights_momenta(int i) { return dense_row_weights_momenta[i] == null ? dense_col_weights_momenta[i] : dense_row_weights_momenta[i]; }
    public final Neurons.DenseVector get_biases_momenta(int i) { return biases_momenta[i]; }
    public final Neurons.Matrix get_ada_dx_g(int i) { return dense_row_ada_dx_g[i] == null ? dense_col_ada_dx_g[i] : dense_row_ada_dx_g[i]; }
    public final Neurons.DenseVector get_biases_ada_dx_g(int i) { return biases_ada_dx_g[i]; }

    //accessor to shared parameter defining avg activations
    public final Neurons.DenseVector get_avg_activations(int i) { return avg_activations[i]; }


    @API(help = "Model parameters", json = true)
    private Request2 job;
    public final DeepLearning get_params() { return (DeepLearning)job; }
    public final Request2 get_job() { return job; }

    @API(help = "Mean rate", json = true)
    private float[] mean_rate;

    @API(help = "RMS rate", json = true)
    private float[] rms_rate;

    @API(help = "Mean bias", json = true)
    private float[] mean_bias;

    @API(help = "RMS bias", json = true)
    private float[] rms_bias;

    @API(help = "Mean weight", json = true)
    private float[] mean_weight;

    @API(help = "RMS weight", json = true)
    public float[] rms_weight;

    @API(help = "Mean Activation", json = true)
    public float[] mean_a;

    @API(help = "Unstable", json = true)
    private volatile boolean unstable = false;
    public boolean unstable() { return unstable; }
    public void set_unstable() { if (!unstable) computeStats(); unstable = true; }

    @API(help = "Processed samples", json = true)
    private long processed_global;
    public synchronized long get_processed_global() { return processed_global; }
    public synchronized void set_processed_global(long p) { processed_global = p; }
    public synchronized void add_processed_global(long p) { processed_global += p; }

    private long processed_local;
    public synchronized long get_processed_local() { return processed_local; }
    public synchronized void set_processed_local(long p) { processed_local = p; }
    public synchronized void add_processed_local(long p) { processed_local += p; }

    public synchronized long get_processed_total() { return processed_global + processed_local; }

    // package local helpers
    int[] units; //number of neurons per layer, extracted from parameters and from datainfo

    public DeepLearningModelInfo() {}

    public DeepLearningModelInfo(final Job job, final DataInfo dinfo) {
      this.job = job;
      data_info = dinfo;
      final int num_input = dinfo.fullN();
      final int num_output = get_params().autoencoder ? num_input : get_params().classification ? dinfo._adaptedFrame.domains()[dinfo._adaptedFrame.domains().length-1].length : 1;
      assert(num_input > 0);
      assert(num_output > 0);
      if (has_momenta() && adaDelta()) throw new IllegalArgumentException("Cannot have non-zero momentum and adaptive rate at the same time.");
      final int layers=get_params().hidden.length;
      // units (# neurons for each layer)
      units = new int[layers+2];
      if (get_params().max_categorical_features <= Integer.MAX_VALUE - dinfo._nums)
        units[0] = Math.min(dinfo._nums + get_params().max_categorical_features, num_input);
      else
        units[0] = num_input;
      System.arraycopy(get_params().hidden, 0, units, 1, layers);
      units[layers+1] = num_output;

      if ((long)units[0] > 100000L) {
        final String[][] domains = dinfo._adaptedFrame.domains();
        int[] levels = new int[domains.length];
        for (int i=0; i<levels.length; ++i) {
          levels[i] = domains[i] != null ? domains[i].length : 0;
        }
        Arrays.sort(levels);
        Log.warn("===================================================================================================================================");
        Log.warn(num_input + " input features" + (dinfo._cats > 0 ? " (after categorical one-hot encoding)" : "") + ". Can be slow and require a lot of memory.");
        if (levels[levels.length-1] > 0) {
          int levelcutoff = levels[levels.length-1-Math.min(10, levels.length)];
          int count = 0;
          for (int i=0; i<dinfo._adaptedFrame.numCols() - (get_params().autoencoder ? 0 : 1) && count < 10; ++i) {
            if (dinfo._adaptedFrame.domains()[i] != null && dinfo._adaptedFrame.domains()[i].length >= levelcutoff) {
              Log.warn("Categorical feature '" + dinfo._adaptedFrame._names[i] + "' has cardinality " + dinfo._adaptedFrame.domains()[i].length + ".");
              count++;
            }
          }
        }
        Log.warn("Suggestions:");
        Log.warn(" *) Limit the size of the first hidden layer");
        if (dinfo._cats > 0) {
          Log.warn(" *) Limit the total number of one-hot encoded features with the parameter 'max_categorical_features'");
          Log.warn(" *) Run h2o.interaction(...,pairwise=F) on high-cardinality categorical columns to limit the factor count, see http://learn.h2o.ai");
        }
        Log.warn("===================================================================================================================================");
      }

      // weights (to connect layers)
      dense_row_weights = new Neurons.DenseRowMatrix[layers+1];
      dense_col_weights = new Neurons.DenseColMatrix[layers+1];

      // decide format of weight matrices row-major or col-major
      if (get_params().col_major) dense_col_weights[0] = new Neurons.DenseColMatrix(units[1], units[0]);
      else dense_row_weights[0] = new Neurons.DenseRowMatrix(units[1], units[0]);
      for (int i = 1; i <= layers; ++i)
        dense_row_weights[i] = new Neurons.DenseRowMatrix(units[i + 1] /*rows*/, units[i] /*cols*/);

      // biases (only for hidden layers and output layer)
      biases = new Neurons.DenseVector[layers+1];
      for (int i=0; i<=layers; ++i) biases[i] = new Neurons.DenseVector(units[i+1]);
      // average activation (only for hidden layers)
      if (get_params().autoencoder && get_params().sparsity_beta > 0) {
        avg_activations = new Neurons.DenseVector[layers];
        mean_a = new float[layers];
        for (int i = 0; i < layers; ++i) avg_activations[i] = new Neurons.DenseVector(units[i + 1]);
      }
      fillHelpers();
      // for diagnostics
      mean_rate = new float[units.length];
      rms_rate = new float[units.length];
      mean_bias = new float[units.length];
      rms_bias = new float[units.length];
      mean_weight = new float[units.length];
      rms_weight = new float[units.length];
    }

    // deep clone all weights/biases
    DeepLearningModelInfo deep_clone() {
      AutoBuffer ab = new AutoBuffer();
      this.write(ab);
      ab.flipForReading();
      return new DeepLearningModelInfo().read(ab);
    }

    void fillHelpers() {
      if (has_momenta()) {
        dense_row_weights_momenta = new Neurons.DenseRowMatrix[dense_row_weights.length];
        dense_col_weights_momenta = new Neurons.DenseColMatrix[dense_col_weights.length];
        if (dense_row_weights[0] != null)
          dense_row_weights_momenta[0] = new Neurons.DenseRowMatrix(units[1], units[0]);
        else
          dense_col_weights_momenta[0] = new Neurons.DenseColMatrix(units[1], units[0]);
        for (int i=1; i<dense_row_weights_momenta.length; ++i) dense_row_weights_momenta[i] = new Neurons.DenseRowMatrix(units[i+1], units[i]);

        biases_momenta = new Neurons.DenseVector[biases.length];
        for (int i=0; i<biases_momenta.length; ++i) biases_momenta[i] = new Neurons.DenseVector(units[i+1]);
      }
      else if (adaDelta()) {
        dense_row_ada_dx_g = new Neurons.DenseRowMatrix[dense_row_weights.length];
        dense_col_ada_dx_g = new Neurons.DenseColMatrix[dense_col_weights.length];
        //AdaGrad
        if (dense_row_weights[0] != null) {
          dense_row_ada_dx_g[0] = new Neurons.DenseRowMatrix(units[1], 2*units[0]);
        } else {
          dense_col_ada_dx_g[0] = new Neurons.DenseColMatrix(2*units[1], units[0]);
        }
        for (int i=1; i<dense_row_ada_dx_g.length; ++i) {
          dense_row_ada_dx_g[i] = new Neurons.DenseRowMatrix(units[i+1], 2*units[i]);
        }
        biases_ada_dx_g = new Neurons.DenseVector[biases.length];
        for (int i=0; i<biases_ada_dx_g.length; ++i) {
          biases_ada_dx_g[i] = new Neurons.DenseVector(2*units[i+1]);
        }
      }
    }

    @Override public String toString() {
      StringBuilder sb = new StringBuilder();
      if (get_params().diagnostics && !get_params().quiet_mode) {
        Neurons[] neurons = DeepLearningTask.makeNeuronsForTesting(this);

        sb.append("Number of hidden layers is " + get_params().hidden.length + " \n");

        if (get_params().sparsity_beta > 0) {
          for (int k = 0; k < get_params().hidden.length; k++)
            sb.append("Average activation in hidden layer " + k + " is  " + mean_a[k] + " \n");
        }

        sb.append("Status of Neuron Layers:\n");
        sb.append("#  Units         Type      Dropout    L1       L2    " + (get_params().adaptive_rate ? "  Rate (Mean,RMS)   " : "  Rate      Momentum") + "   Weight (Mean, RMS)      Bias (Mean,RMS)\n");
        final String format = "%7g";
        for (int i=0; i<neurons.length; ++i) {
          sb.append((i+1) + " " + String.format("%6d", neurons[i].units)
                  + " " + String.format("%16s", neurons[i].getClass().getSimpleName()));
          if (i == 0) {
            sb.append("  " + Utils.formatPct(neurons[i].params.input_dropout_ratio) + " \n");
            continue;
          }
          else if (i < neurons.length-1) {
            if (neurons[i].params.hidden_dropout_ratios == null)
              sb.append("  " + Utils.formatPct(0) + " ");
            else
              sb.append("  " + Utils.formatPct(neurons[i].params.hidden_dropout_ratios[i - 1]) + " ");
          } else {
            sb.append("          ");
          }
          sb.append(
                  " " + String.format("%5f", neurons[i].params.l1)
                          + " " + String.format("%5f", neurons[i].params.l2)
                          + " " + (get_params().adaptive_rate ? (" (" + String.format(format, mean_rate[i]) + ", " + String.format(format, rms_rate[i]) + ")" )
                          : (String.format("%10g", neurons[i].rate(get_processed_total())) + " " + String.format("%5f", neurons[i].momentum(get_processed_total()))))
                          + " (" + String.format(format, mean_weight[i])
                          + ", " + String.format(format, rms_weight[i]) + ")"
                          + " (" + String.format(format, mean_bias[i])
                          + ", " + String.format(format, rms_bias[i]) + ")\n");

          if (get_params().sparsity_beta > 0) {
            // sb.append("  " + String.format(format, mean_a[i]) + " \n");
          }
        }
      }
      return sb.toString();
    }

    // DEBUGGING
    public String toStringAll() {
      StringBuilder sb = new StringBuilder();
      sb.append(toString());

      for (int i=0; i<units.length-1; ++i)
        sb.append("\nweights["+i+"][]="+Arrays.toString(get_weights(i).raw()));
      for (int i=0; i<units.length-1; ++i)
        sb.append("\nbiases["+i+"][]="+Arrays.toString(get_biases(i).raw()));
      if (has_momenta()) {
        for (int i=0; i<units.length-1; ++i)
          sb.append("\nweights_momenta["+i+"][]="+Arrays.toString(get_weights_momenta(i).raw()));
      }
      if (biases_momenta != null) {
        for (int i=0; i<units.length-1; ++i)
          sb.append("\nbiases_momenta["+i+"][]="+Arrays.toString(biases_momenta[i].raw()));
      }
      sb.append("\nunits[]="+Arrays.toString(units));
      sb.append("\nprocessed global: "+get_processed_global());
      sb.append("\nprocessed local:  "+get_processed_local());
      sb.append("\nprocessed total:  " + get_processed_total());
      sb.append("\n");
      return sb.toString();
    }

    void initializeMembers() {
      randomizeWeights();
      //TODO: determine good/optimal/best initialization scheme for biases
      // hidden layers
      for (int i=0; i<get_params().hidden.length; ++i) {
        if (get_params().activation == DeepLearning.Activation.Rectifier
                || get_params().activation == DeepLearning.Activation.RectifierWithDropout
                || get_params().activation == DeepLearning.Activation.Maxout
                || get_params().activation == DeepLearning.Activation.MaxoutWithDropout
                ) {
//          Arrays.fill(biases[i], 1.); //old behavior
          Arrays.fill(biases[i].raw(), i == 0 ? 0.5f : 1f); //new behavior, might be slightly better
        }
        else if (get_params().activation == DeepLearning.Activation.Tanh || get_params().activation == DeepLearning.Activation.TanhWithDropout) {
          Arrays.fill(biases[i].raw(), 0f);
        }
      }
      Arrays.fill(biases[biases.length-1].raw(), 0f); //output layer
    }
    public void add(DeepLearningModelInfo other) {
      for (int i=0;i<dense_row_weights.length;++i)
        Utils.add(get_weights(i).raw(), other.get_weights(i).raw());
      for (int i=0;i<biases.length;++i) Utils.add(biases[i].raw(), other.biases[i].raw());
      if (avg_activations != null)
        for (int i=0;i<avg_activations.length;++i)
          Utils.add(avg_activations[i].raw(), other.biases[i].raw());
      if (has_momenta()) {
        assert(other.has_momenta());
        for (int i=0;i<dense_row_weights_momenta.length;++i)
          Utils.add(get_weights_momenta(i).raw(), other.get_weights_momenta(i).raw());
        for (int i=0;i<biases_momenta.length;++i)
          Utils.add(biases_momenta[i].raw(),  other.biases_momenta[i].raw());
      }
      if (adaDelta()) {
        assert(other.adaDelta());
        for (int i=0;i<dense_row_ada_dx_g.length;++i) {
          Utils.add(get_ada_dx_g(i).raw(), other.get_ada_dx_g(i).raw());
        }
      }
      add_processed_local(other.get_processed_local());
    }
    protected void div(float N) {
      for (int i=0; i<dense_row_weights.length; ++i)
        Utils.div(get_weights(i).raw(), N);
      for (Neurons.Vector bias : biases) Utils.div(bias.raw(), N);
      if (avg_activations != null)
        for (Neurons.Vector avgac : avg_activations)
          Utils.div(avgac.raw(), N);
      if (has_momenta()) {
        for (int i=0; i<dense_row_weights_momenta.length; ++i)
          Utils.div(get_weights_momenta(i).raw(), N);
        for (Neurons.Vector bias_momenta : biases_momenta) Utils.div(bias_momenta.raw(), N);
      }
      if (adaDelta()) {
        for (int i=0;i<dense_row_ada_dx_g.length;++i) {
          Utils.div(get_ada_dx_g(i).raw(), N);
        }
      }
    }
    double uniformDist(Random rand, double min, double max) {
      return min + rand.nextFloat() * (max - min);
    }
    void randomizeWeights() {
      for (int w=0; w<dense_row_weights.length; ++w) {
        final Random rng = water.util.Utils.getDeterRNG(get_params().seed + 0xBAD5EED + w+1); //to match NeuralNet behavior
        final double range = Math.sqrt(6. / (units[w] + units[w+1]));
        for( int i = 0; i < get_weights(w).rows(); i++ ) {
          for( int j = 0; j < get_weights(w).cols(); j++ ) {
            if (get_params().initial_weight_distribution == DeepLearning.InitialWeightDistribution.UniformAdaptive) {
              // cf. http://machinelearning.wustl.edu/mlpapers/paper_files/AISTATS2010_GlorotB10.pdf
              if (w==dense_row_weights.length-1 && get_params().classification)
                get_weights(w).set(i,j, (float)(4.*uniformDist(rng, -range, range))); //Softmax might need an extra factor 4, since it's like a sigmoid
              else
                get_weights(w).set(i,j, (float)uniformDist(rng, -range, range));
            }
            else if (get_params().initial_weight_distribution == DeepLearning.InitialWeightDistribution.Uniform) {
              get_weights(w).set(i,j, (float)uniformDist(rng, -get_params().initial_weight_scale, get_params().initial_weight_scale));
            }
            else if (get_params().initial_weight_distribution == DeepLearning.InitialWeightDistribution.Normal) {
              get_weights(w).set(i,j, (float)(rng.nextGaussian() * get_params().initial_weight_scale));
            }
          }
        }
      }
    }

    // TODO: Add "subset randomize" function
//        int count = Math.min(15, _previous.units);
//        double min = -.1f, max = +.1f;
//        //double min = -1f, max = +1f;
//        for( int o = 0; o < units; o++ ) {
//          for( int n = 0; n < count; n++ ) {
//            int i = rand.nextInt(_previous.units);
//            int w = o * _previous.units + i;
//            _w[w] = uniformDist(rand, min, max);
//          }
//        }

    /**
     * Compute Variable Importance, based on
     * GEDEON: DATA MINING OF INPUTS: ANALYSING MAGNITUDE AND FUNCTIONAL MEASURES
     * @return variable importances for input features
     */
    public float[] computeVariableImportances() {
      float[] vi = new float[units[0]];
      Arrays.fill(vi, 0f);

      float[][] Qik = new float[units[0]][units[2]]; //importance of input i on output k
      float[] sum_wj = new float[units[1]]; //sum of incoming weights into first hidden layer
      float[] sum_wk = new float[units[2]]; //sum of incoming weights into output layer (or second hidden layer)
      for (float[] Qi : Qik) Arrays.fill(Qi, 0f);
      Arrays.fill(sum_wj, 0f);
      Arrays.fill(sum_wk, 0f);

      // compute sum of absolute incoming weights
      for( int j = 0; j < units[1]; j++ ) {
        for( int i = 0; i < units[0]; i++ ) {
          float wij = get_weights(0).get(j, i);
          sum_wj[j] += Math.abs(wij);
        }
      }
      for( int k = 0; k < units[2]; k++ ) {
        for( int j = 0; j < units[1]; j++ ) {
          float wjk = get_weights(1).get(k,j);
          sum_wk[k] += Math.abs(wjk);
        }
      }
      // compute importance of input i on output k as product of connecting weights going through j
      for( int i = 0; i < units[0]; i++ ) {
        for( int k = 0; k < units[2]; k++ ) {
          for( int j = 0; j < units[1]; j++ ) {
            float wij = get_weights(0).get(j,i);
            float wjk = get_weights(1).get(k,j);
            //Qik[i][k] += Math.abs(wij)/sum_wj[j] * wjk; //Wong,Gedeon,Taggart '95
            Qik[i][k] += Math.abs(wij)/sum_wj[j] * Math.abs(wjk)/sum_wk[k]; //Gedeon '97
          }
        }
      }
      // normalize Qik over all outputs k
      for( int k = 0; k < units[2]; k++ ) {
        float sumQk = 0;
        for( int i = 0; i < units[0]; i++ ) sumQk += Qik[i][k];
        for( int i = 0; i < units[0]; i++ ) Qik[i][k] /= sumQk;
      }
      // importance for feature i is the sum over k of i->k importances
      for( int i = 0; i < units[0]; i++ ) vi[i] = Utils.sum(Qik[i]);

      //normalize importances such that max(vi) = 1
      Utils.div(vi, Utils.maxValue(vi));
      return vi;
    }

    // compute stats on all nodes
    public void computeStats() {
      float[][] rate = get_params().adaptive_rate ? new float[units.length-1][] : null;

      if (get_params().autoencoder && get_params().sparsity_beta > 0) {
        for (int k = 0; k < get_params().hidden.length; k++) {
          mean_a[k] = 0;
          for (int j = 0; j < avg_activations[k].size(); j++)
            mean_a[k] += avg_activations[k].get(j);
          mean_a[k] /= avg_activations[k].size();
        }
      }

      for( int y = 1; y < units.length; y++ ) {
        mean_rate[y] = rms_rate[y] = 0;
        mean_bias[y] = rms_bias[y] = 0;
        mean_weight[y] = rms_weight[y] = 0;
        for(int u = 0; u < biases[y-1].size(); u++) {
          mean_bias[y] += biases[y-1].get(u);
        }
        if (rate != null) rate[y-1] = new float[get_weights(y-1).raw().length];
        for(int u = 0; u < get_weights(y-1).raw().length; u++) {
          mean_weight[y] += get_weights(y-1).raw()[u];
          if (rate != null) {
//            final float RMS_dx = (float)Math.sqrt(ada[y-1][2*u]+(float)get_params().epsilon);
//            final float invRMS_g = (float)(1/Math.sqrt(ada[y-1][2*u+1]+(float)get_params().epsilon));
            final float RMS_dx = Utils.approxSqrt(get_ada_dx_g(y-1).raw()[2*u]+(float)get_params().epsilon);
            final float invRMS_g = Utils.approxInvSqrt(get_ada_dx_g(y-1).raw()[2*u+1]+(float)get_params().epsilon);
            rate[y-1][u] = RMS_dx*invRMS_g; //not exactly right, RMS_dx should be from the previous time step -> but close enough for diagnostics.
            mean_rate[y] += rate[y-1][u];
          }
        }


        mean_bias[y] /= biases[y-1].size();

        mean_weight[y] /= get_weights(y-1).size();
        if (rate != null) mean_rate[y] /= rate[y-1].length;

        for(int u = 0; u < biases[y-1].size(); u++) {
          final double db = biases[y-1].get(u) - mean_bias[y];
          rms_bias[y] += db * db;
        }
        for(int u = 0; u < get_weights(y-1).size(); u++) {
          final double dw = get_weights(y-1).raw()[u] - mean_weight[y];
          rms_weight[y] += dw * dw;
          if (rate != null) {
            final double drate = rate[y-1][u] - mean_rate[y];
            rms_rate[y] += drate * drate;
          }
        }
        rms_bias[y] = Utils.approxSqrt(rms_bias[y]/biases[y-1].size());
        rms_weight[y] = Utils.approxSqrt(rms_weight[y]/get_weights(y-1).size());
        if (rate != null) rms_rate[y] = Utils.approxSqrt(rms_rate[y]/rate[y-1].length);
//        rms_bias[y] = (float)Math.sqrt(rms_bias[y]/biases[y-1].length);
//        rms_weight[y] = (float)Math.sqrt(rms_weight[y]/weights[y-1].length);
//        if (rate != null) rms_rate[y] = (float)Math.sqrt(rms_rate[y]/rate[y-1].length);

        // Abort the run if weights or biases are unreasonably large (Note that all input values are normalized upfront)
        // This can happen with Rectifier units when L1/L2/max_w2 are all set to 0, especially when using more than 1 hidden layer.
        final double thresh = 1e10;
        unstable |= mean_bias[y] > thresh  || isNaN(mean_bias[y])
                || rms_bias[y] > thresh    || isNaN(rms_bias[y])
                || mean_weight[y] > thresh || isNaN(mean_weight[y])
                || rms_weight[y] > thresh  || isNaN(rms_weight[y]);
      }
    }
  }

  /**
   * Constructor to restart from a checkpointed model
   * @param cp Checkpoint to restart from
   * @param destKey New destination key for the model
   * @param jobKey New job key (job which updates the model)
   */
  public DeepLearningModel(final DeepLearningModel cp, final Key destKey, final Key jobKey, final DataInfo dataInfo) {
    super(destKey, cp._dataKey, dataInfo._adaptedFrame.names(), dataInfo._adaptedFrame.domains(), cp._priorClassDist != null ? cp._priorClassDist.clone() : null, null);
    final boolean store_best_model = (jobKey == null);
    this.jobKey = jobKey;
    this._validationKey = cp._validationKey;
    if (store_best_model) {
      model_info = cp.model_info.deep_clone(); //don't want to interfere with model being built, just make a deep copy and store that
      model_info.data_info = dataInfo.deep_clone(); //replace previous data_info with updated version that's passed in (contains enum for classification)
      get_params().state = Job.JobState.DONE; //change the deep_clone'd state to DONE
      _modelClassDist = cp._modelClassDist != null ? cp._modelClassDist.clone() : null;
    } else {
      model_info = (DeepLearningModelInfo) cp.model_info.clone(); //shallow clone is ok (won't modify the Checkpoint in K-V store during checkpoint restart)
      model_info.data_info = dataInfo; //shallow clone is ok
      get_params().checkpoint = cp._key; //it's only a "real" checkpoint if job != null, otherwise a best model copy
      get_params().state = ((DeepLearning)UKV.get(jobKey)).state; //make the job state consistent
    }
    get_params().job_key = jobKey;
    get_params().destination_key = destKey;
    get_params().start_time = System.currentTimeMillis(); //for displaying the model progress
    actual_best_model_key = cp.actual_best_model_key;
    start_time = cp.start_time;
    run_time = cp.run_time;
    training_rows = cp.training_rows; //copy the value to display the right number on the model page before training has started
    validation_rows = cp.validation_rows; //copy the value to display the right number on the model page before training has started
    _bestError = cp._bestError;

    // deep clone scoring history
    errors = cp.errors.clone();
    for (int i=0; i<errors.length;++i)
      errors[i] = cp.errors[i].deep_clone();

    // set proper timing
    _timeLastScoreEnter = System.currentTimeMillis();
    _timeLastScoreStart = 0;
    _timeLastScoreEnd = 0;
    _timeLastPrintStart = 0;
    assert(Arrays.equals(_key._kb, destKey._kb));
  }

  public DeepLearningModel(final Key destKey, final Key jobKey, final Key dataKey, final DataInfo dinfo, final DeepLearning params, final float[] priorDist) {
    super(destKey, dataKey, dinfo._adaptedFrame, priorDist);
    this.jobKey = jobKey;
    this._validationKey = params.validation != null ? params.validation._key : null;
    run_time = 0;
    start_time = System.currentTimeMillis();
    _timeLastScoreEnter = start_time;
    model_info = new DeepLearningModelInfo(params, dinfo);
    actual_best_model_key = Key.makeSystem(Key.make().toString());

    if (params.n_folds != 0) actual_best_model_key = null;
    Object job = UKV.get(jobKey);
    if (job instanceof DeepLearning)
      get_params().state = ((DeepLearning)UKV.get(jobKey)).state; //make the job state consistent
    else
      get_params().state = ((Job.JobHandle)UKV.get(jobKey)).state; //make the job state consistent
    if (!get_params().autoencoder) {
      errors = new Errors[1];
      errors[0] = new Errors();
      errors[0].validation = (params.validation != null);
      errors[0].num_folds = params.n_folds;
    }
    assert(Arrays.equals(_key._kb, destKey._kb));
  }

  public long _timeLastScoreEnter; //not transient: needed for HTML display page
  transient private long _timeLastScoreStart;
  transient private long _timeLastScoreEnd;
  transient private long _timeLastPrintStart;
  /**
   *
   * @param train training data from which the model is built (for epoch counting only)
   * @param ftrain potentially downsampled training data for scoring
   * @param ftest  potentially downsampled validation data for scoring
   * @param job_key key of the owning job
   * @return true if model building is ongoing
   */
  boolean doScoring(Frame train, Frame ftrain, Frame ftest, Key job_key, Job.ValidatedJob.Response2CMAdaptor vadaptor) {
    try {
      final long now = System.currentTimeMillis();
      epoch_counter = (float)model_info().get_processed_total()/training_rows;
      final double time_last_iter_millis = now-_timeLastScoreEnter;

      // Auto-tuning
      // if multi-node and auto-tuning and at least 10 ms for communication (to avoid doing thins on multi-JVM on same node),
      // then adjust the auto-tuning parameter 'actual_train_samples_per_iteration' such that the targeted ratio of comm to comp is achieved
      // Note: actual communication time is estimated by the NetworkTest's collective test.
      if (H2O.CLOUD.size() > 1 && get_params().train_samples_per_iteration == -2 && time_for_communication_us > 1e4) {
//        Log.info("Time taken for communication: " + PrettyPrint.usecs((long)time_for_communication_us));
//        Log.info("Time taken for Map/Reduce iteration: " + PrettyPrint.msecs((long)time_last_iter_millis, true));
        final double comm_to_work_ratio = (time_for_communication_us *1e-3) / time_last_iter_millis;
//        Log.info("Ratio of network communication to computation: " + String.format("%.3f", comm_to_work_ratio));
//        Log.info("target_comm_to_work: " + get_params().target_ratio_comm_to_comp);
        final double correction = get_params().target_ratio_comm_to_comp / comm_to_work_ratio;
//        Log.warn("Suggested value for train_samples_per_iteration: " + get_params().actual_train_samples_per_iteration/correction);
        actual_train_samples_per_iteration /= correction;
        actual_train_samples_per_iteration = Math.max(1, actual_train_samples_per_iteration);
      }

      run_time += time_last_iter_millis;
      _timeLastScoreEnter = now;
      boolean keep_running = (epoch_counter < get_params().epochs);
      final long sinceLastScore = now -_timeLastScoreStart;
      final long sinceLastPrint = now -_timeLastPrintStart;
      final long samples = model_info().get_processed_total();
      if (!keep_running || sinceLastPrint > get_params().score_interval*1000) {
        _timeLastPrintStart = now;
        Log.info("Training time: " + PrettyPrint.msecs(run_time, true)
                + ". Processed " + String.format("%,d", samples) + " samples" + " (" + String.format("%.3f", epoch_counter) + " epochs)."
                + " Speed: " + String.format("%.3f", 1000.*samples/run_time) + " samples/sec.");
      }

      // this is potentially slow - only do every so often
      if( !keep_running ||
              (sinceLastScore > get_params().score_interval*1000 //don't score too often
                      &&(double)(_timeLastScoreEnd-_timeLastScoreStart)/sinceLastScore < get_params().score_duty_cycle) ) { //duty cycle
        final boolean printme = !get_params().quiet_mode;
        final boolean adaptCM = (isClassifier() && vadaptor.needsAdaptation2CM());
        _timeLastScoreStart = now;
        if (get_params().diagnostics) model_info().computeStats();
        Errors err = new Errors();
        err.training_time_ms = run_time;
        err.epoch_counter = epoch_counter;
        err.training_samples = model_info().get_processed_total();
        err.validation = ftest != null;
        err.score_training_samples = ftrain.numRows();

        if (get_params().autoencoder) {
          if (printme) Log.info("Scoring the auto-encoder.");
          // training
          {
            final Frame mse_frame = scoreAutoEncoder(ftrain);
            final Vec l2 = mse_frame.anyVec();
            Log.info("Mean reconstruction error on training data: " + l2.mean() + "\n");
            err.train_mse = l2.mean();
            mse_frame.delete();
          }
        } else {
          if (printme) Log.info("Scoring the model.");
          // compute errors
          err.classification = isClassifier();
          assert (err.classification == get_params().classification);
          err.num_folds = get_params().n_folds;
          err.train_confusion_matrix = new ConfusionMatrix();
          final int hit_k = Math.min(nclasses(), get_params().max_hit_ratio_k);
          if (err.classification && nclasses() > 2 && hit_k > 0) {
            err.train_hitratio = new HitRatio();
            err.train_hitratio.set_max_k(hit_k);
          }
          final String m = model_info().toString();
          if (m.length() > 0) Log.info(m);
          final Frame trainPredict = score(ftrain, false);
          AUC trainAUC = null;
          if (err.classification && nclasses() == 2) trainAUC = new AUC();
          final double trainErr = calcError(ftrain, ftrain.lastVec(), trainPredict, trainPredict, "training",
                  printme, get_params().max_confusion_matrix_size, err.train_confusion_matrix, trainAUC, err.train_hitratio);
          if (isClassifier()) err.train_err = trainErr;
          if (trainAUC != null) err.trainAUC = trainAUC.data();
          else err.train_mse = trainErr;

          trainPredict.delete();

          if (err.validation) {
            assert ftest != null;
            err.score_validation_samples = ftest.numRows();
            err.valid_confusion_matrix = new ConfusionMatrix();
            if (err.classification && nclasses() > 2 && hit_k > 0) {
              err.valid_hitratio = new HitRatio();
              err.valid_hitratio.set_max_k(hit_k);
            }
            final String adaptRespName = vadaptor.adaptedValidationResponse(responseName());
            Vec adaptCMresp = null;
            if (adaptCM) {
              Vec[] v = ftest.vecs();
              assert (ftest.find(adaptRespName) == v.length - 1); //make sure to have (adapted) response in the test set
              adaptCMresp = ftest.remove(v.length - 1); //model would remove any extra columns anyway (need to keep it here for later)
            }

            final Frame validPredict = score(ftest, adaptCM);
            final Frame hitratio_validPredict = new Frame(validPredict);
            Vec orig_label = validPredict.vecs()[0];
            // Adapt output response domain, in case validation domain is different from training domain
            // Note: doesn't change predictions, just the *possible* label domain
            if (adaptCM) {
              assert (adaptCMresp != null);
              assert (ftest.find(adaptRespName) == -1);
              ftest.add(adaptRespName, adaptCMresp);
              final Vec CMadapted = vadaptor.adaptModelResponse2CM(validPredict.vecs()[0]);
              validPredict.replace(0, CMadapted); //replace label
              validPredict.add("to_be_deleted", CMadapted); //keep the Vec around to be deleted later (no leak)
            }
            AUC validAUC = null;
            if (err.classification && nclasses() == 2) validAUC = new AUC();
            final double validErr = calcError(ftest, ftest.lastVec(), validPredict, hitratio_validPredict, "validation",
                    printme, get_params().max_confusion_matrix_size, err.valid_confusion_matrix, validAUC, err.valid_hitratio);
            if (isClassifier()) err.valid_err = validErr;
            if (trainAUC != null) err.validAUC = validAUC.data();
            else err.valid_mse = validErr;
            validPredict.delete();
            //also delete the replaced label
            if (adaptCM) orig_label.remove(new Futures()).blockForPending();
          }

          // only keep confusion matrices for the last step if there are fewer than specified number of output classes
          if (err.train_confusion_matrix.cm != null
                  && err.train_confusion_matrix.cm.length - 1 >= get_params().max_confusion_matrix_size) {
            err.train_confusion_matrix = null;
            err.valid_confusion_matrix = null;
          }
        }

        if (get_params().variable_importances) {
          if (!get_params().quiet_mode) Log.info("Computing variable importances.");
          final float[] vi = model_info().computeVariableImportances();
          err.variable_importances = new VarImp(vi, Arrays.copyOfRange(model_info().data_info().coefNames(), 0, vi.length));
        }

        _timeLastScoreEnd = System.currentTimeMillis();
        err.scoring_time = System.currentTimeMillis() - now;
        // enlarge the error array by one, push latest score back
        if (errors == null) {
          errors = new Errors[]{err};
        } else {
          Errors[] err2 = new Errors[errors.length + 1];
          System.arraycopy(errors, 0, err2, 0, errors.length);
          err2[err2.length - 1] = err;
          errors = err2;
        }

        if (!get_params().autoencoder) {
          // always keep a copy of the best model so far (based on the following criterion)
          if (actual_best_model_key != null && (
                  // if we have a best_model in DKV, then compare against its error() (unless it's a different model as judged by the network size)
                  (UKV.get(actual_best_model_key) != null && (error() < UKV.<DeepLearningModel>get(actual_best_model_key).error() || !Arrays.equals(model_info().units, UKV.<DeepLearningModel>get(actual_best_model_key).model_info().units)))
                          ||
                          // otherwise, compare against our own _bestError
                          (UKV.get(actual_best_model_key) == null && error() < _bestError)
          ) ) {
            if (!get_params().quiet_mode)
              Log.info("Error reduced from " + _bestError + " to " + error() + ". Storing best model so far under key " + actual_best_model_key.toString() + ".");
            _bestError = error();
            putMeAsBestModel(actual_best_model_key);

            // debugging check
            if (false) {
              DeepLearningModel bestModel = UKV.get(actual_best_model_key);
              final Frame fr = ftest != null ? ftest : ftrain;
              final Frame bestPredict = bestModel.score(fr, ftest != null ? adaptCM : false);
              final Frame hitRatio_bestPredict = new Frame(bestPredict);
              // Adapt output response domain, in case validation domain is different from training domain
              // Note: doesn't change predictions, just the *possible* label domain
              if (adaptCM) {
                final Vec CMadapted = vadaptor.adaptModelResponse2CM(bestPredict.vecs()[0]);
                bestPredict.replace(0, CMadapted); //replace label
                bestPredict.add("to_be_deleted", CMadapted); //keep the Vec around to be deleted later (no leak)
              }
              final double err3 = calcError(fr, fr.lastVec(), bestPredict, hitRatio_bestPredict, "cross-check",
                      printme, get_params().max_confusion_matrix_size, new water.api.ConfusionMatrix(), isClassifier() && nclasses() == 2 ? new AUC() : null, null);
              if (isClassifier())
                assert (ftest != null ? Math.abs(err.valid_err - err3) < 1e-5 : Math.abs(err.train_err - err3) < 1e-5);
              else
                assert (ftest != null ? Math.abs(err.valid_mse - err3) < 1e-5 : Math.abs(err.train_mse - err3) < 1e-5);
              bestPredict.delete();
            }
          }
//        else {
//          // keep output JSON small
//          if (errors.length > 1) {
//            if (last_scored().trainAUC != null) last_scored().trainAUC.clear();
//            if (last_scored().validAUC != null) last_scored().validAUC.clear();
//            last_scored().variable_importances = null;
//          }
//        }

          // print the freshly scored model to ASCII
          for (String s : toString().split("\n")) Log.info(s);
          if (printme) Log.info("Time taken for scoring and diagnostics: " + PrettyPrint.msecs(err.scoring_time, true));
        }
      }
      if (model_info().unstable()) {
        Log.warn(unstable_msg);
        keep_running = false;
      } else if ( (isClassifier() && last_scored().train_err <= get_params().classification_stop)
              || (!isClassifier() && last_scored().train_mse <= get_params().regression_stop) ) {
        Log.info("Achieved requested predictive accuracy on the training data. Model building completed.");
        keep_running = false;
      }
      update(job_key);

//    System.out.println(this);
      return keep_running;
    }
    catch (Exception ex) {
      return false;
    }
  }

  @Override protected void setCrossValidationError(Job.ValidatedJob job, double cv_error, ConfusionMatrix cm, AUCData auc, HitRatio hr) {
    _have_cv_results = true;
    if (!get_params().classification)
      last_scored().valid_mse = cv_error;
    else
      last_scored().valid_err = cv_error;
    last_scored().score_validation_samples = last_scored().score_training_samples / get_params().n_folds;
    last_scored().num_folds = get_params().n_folds;
    last_scored().valid_confusion_matrix = cm;
    last_scored().validAUC = auc;
    last_scored().valid_hitratio = hr;
    DKV.put(this._key, this); //overwrite this model
  }

  @Override public String toString() {
    StringBuilder sb = new StringBuilder();
    sb.append(model_info.toString());
    sb.append(last_scored().toString());
    return sb.toString();
  }

  public String toStringAll() {
    StringBuilder sb = new StringBuilder();
    sb.append(model_info.toStringAll());
    sb.append(last_scored().toString());
    return sb.toString();
  }

  public String getHeader() {
    assert get_params().autoencoder;
    StringBuilder sb = new StringBuilder();
    final int len = model_info().data_info().fullN();
    String prefix = "reconstr_";
    assert (model_info().data_info()._responses == 0);
    String[] coefnames = model_info().data_info().coefNames();
    assert (len == coefnames.length);
    for (int c = 0; c < len; c++) {
      if (c>0) sb.append(",");
      sb.append(prefix + coefnames[c]);
    }
    return sb.toString();
  }

  /**
   * This is an overridden version of Model.score(). Make either a prediction or a reconstruction.
   * @param frame Test dataset
   * @return A frame containing the prediction or reconstruction
   */
  @Override
  public Frame score(Frame frame) {
    if (!get_params().autoencoder) {
      return super.score(frame);
    } else {
      // Reconstruction
      // Adapt the Frame layout - returns adapted frame and frame containing only
      // newly created vectors
      Frame[] adaptFrms = adapt(frame,false,false/*no response*/);
      // Adapted frame containing all columns - mix of original vectors from fr
      // and newly created vectors serving as adaptors
      Frame adaptFrm = adaptFrms[0];
      // Contains only newly created vectors. The frame eases deletion of these vectors.
      Frame onlyAdaptFrm = adaptFrms[1];

      final int len = model_info().data_info().fullN();
      String prefix = "reconstr_";
      assert(model_info().data_info()._responses == 0);
      String[] coefnames = model_info().data_info().coefNames();
      assert(len == coefnames.length);
      for( int c=0; c<len; c++ )
        adaptFrm.add(prefix+coefnames[c],adaptFrm.anyVec().makeZero());
      new MRTask2() {
        @Override public void map( Chunk chks[] ) {
          double tmp [] = new double[_names.length];
          float preds[] = new float [len];
          final Neurons[] neurons = DeepLearningTask.makeNeuronsForTesting(model_info);
          for( int row=0; row<chks[0]._len; row++ ) {
            float p[] = score_autoencoder(chks, row, tmp, preds, neurons);
            for( int c=0; c<preds.length; c++ )
              chks[_names.length+c].set0(row,p[c]);
          }
        }
      }.doAll(adaptFrm);

      // Return the predicted columns
      int x=_names.length, y=adaptFrm.numCols();
      Frame f = adaptFrm.extractFrame(x, y); //this will call vec_impl() and we cannot call the delete() below just yet
      onlyAdaptFrm.delete();
      return f;
    }
  }

  /**
   * Predict from raw double values representing the data
   * @param data raw array containing categorical values (horizontalized to 1,0,0,1,0,0 etc.) and numerical values (0.35,1.24,5.3234,etc), both can contain NaNs
   * @param preds predicted label and per-class probabilities (for classification), predicted target (regression), can contain NaNs
   * @return preds, can contain NaNs
   */
  @Override public float[] score0(double[] data, float[] preds) {
    if (model_info().unstable()) {
      Log.warn(unstable_msg);
      throw new UnsupportedOperationException("Trying to predict with an unstable model.");
    }
    Neurons[] neurons = DeepLearningTask.makeNeuronsForTesting(model_info);
    ((Neurons.Input)neurons[0]).setInput(-1, data);
    DeepLearningTask.step(-1, neurons, model_info, false, null);
    float[] out = neurons[neurons.length - 1]._a.raw();
    if (isClassifier()) {
      assert (preds.length == out.length + 1);
      for (int i = 0; i < preds.length - 1; ++i) {
        preds[i + 1] = out[i];
        if (Float.isNaN(preds[i + 1])) throw new RuntimeException("Predicted class probability NaN!");
      }
      preds[0] = ModelUtils.getPrediction(preds, data);
    } else {
      assert (preds.length == 1 && out.length == 1);
      if (model_info().data_info()._normRespMul != null)
        preds[0] = (float) (out[0] / model_info().data_info()._normRespMul[0] + model_info().data_info()._normRespSub[0]);
      else
        preds[0] = out[0];
      if (Float.isNaN(preds[0])) throw new RuntimeException("Predicted regression target NaN!");
    }
    return preds;
  }

  /**
   * Score auto-encoded reconstruction (on-the-fly, without allocating the reconstruction as done in Frame score(Frame fr))
   * @param frame Original data (can contain response, will be ignored)
   * @return Frame containing one Vec with reconstruction error (MSE) of each reconstructed row, caller is responsible for deletion
   */
  public Frame scoreAutoEncoder(Frame frame) {
    final int len = _names.length;
    // Adapt the Frame layout - returns adapted frame and frame containing only
    // newly created vectors
    Frame[] adaptFrms = adapt(frame,false,false/*no response*/);
    // Adapted frame containing all columns - mix of original vectors from fr
    // and newly created vectors serving as adaptors
    Frame adaptFrm = adaptFrms[0];
    // Contains only newly created vectors. The frame eases deletion of these vectors.
    Frame onlyAdaptFrm = adaptFrms[1];
    adaptFrm.add("Reconstruction.MSE", adaptFrm.anyVec().makeZero());
    new MRTask2() {
      @Override public void map( Chunk chks[] ) {
        double tmp [] = new double[len];
        final Neurons[] neurons = DeepLearningTask.makeNeuronsForTesting(model_info);
        for( int row=0; row<chks[0]._len; row++ ) {
          for( int i=0; i<_names.length; i++ )
            tmp[i] = chks[i].at0(row); //original data
          chks[len].set0(row, score_autoencoder(tmp, null, neurons)); //store the per-row reconstruction error (MSE) in the last column
        }
      }
    }.doAll(adaptFrm);

    // Return just the output columns
    int x=_names.length, y=adaptFrm.numCols();
    final Frame l2 = adaptFrm.extractFrame(x, y);
    onlyAdaptFrm.delete();
    return l2;
  }

  /**
   * Score auto-encoded reconstruction (on-the-fly, without allocating the reconstruction as done in Frame score(Frame fr))
   * @param frame Original data (can contain response, will be ignored)
   * @return Frame containing one Vec with reconstruction error (MSE) of each reconstructed row, caller is responsible for deletion
   */
  public Frame scoreDeepFeatures(Frame frame, final int layer) {
    assert(layer >= 0 && layer < model_info().get_params().hidden.length);
    final int len = nfeatures();
    Vec resp = null;
    if (isSupervised()) {
      int ridx = frame.find(responseName());
      if (ridx != -1) { // drop the response for scoring!
        frame = new Frame(frame);
        resp = frame.vecs()[ridx];
        frame.remove(ridx);
      }
    }
    // Adapt the Frame layout - returns adapted frame and frame containing only
    // newly created vectors
    Frame[] adaptFrms = adapt(frame,false,false/*no response*/);
    // Adapted frame containing all columns - mix of original vectors from fr
    // and newly created vectors serving as adaptors
    Frame adaptFrm = adaptFrms[0];
    // Contains only newly created vectors. The frame eases deletion of these vectors.
    Frame onlyAdaptFrm = adaptFrms[1];
    //create new features, will be dense
    final int features = model_info().get_params().hidden[layer];
    Vec[] vecs = adaptFrm.anyVec().makeZeros(features);
    for (int j=0; j<features; ++j) {
      adaptFrm.add("DF.C" + (j+1), vecs[j]);
    }
    new MRTask2() {
      @Override public void map( Chunk chks[] ) {
        double tmp [] = new double[len];
        float df[] = new float [features];
        final Neurons[] neurons = DeepLearningTask.makeNeuronsForTesting(model_info);
        for( int row=0; row<chks[0]._len; row++ ) {
          for( int i=0; i<len; i++ )
            tmp[i] = chks[i].at0(row);
          ((Neurons.Input)neurons[0]).setInput(-1, tmp);
          DeepLearningTask.step(-1, neurons, model_info, false, null);
          float[] out = neurons[layer+1]._a.raw(); //extract the layer-th hidden feature
          for( int c=0; c<df.length; c++ )
            chks[_names.length+c].set0(row,out[c]);
        }
      }
    }.doAll(adaptFrm);

    // Return just the output columns
    int x=_names.length, y=adaptFrm.numCols();
    Frame ret = adaptFrm.extractFrame(x, y);
    onlyAdaptFrm.delete();
    if (resp != null) ret.prepend(responseName(), resp);
    return ret;
  }

  // Make (potentially expanded) reconstruction
  private float[] score_autoencoder(Chunk[] chks, int row_in_chunk, double[] tmp, float[] preds, Neurons[] neurons) {
    assert(get_params().autoencoder);
    assert(tmp.length == _names.length);
    for( int i=0; i<tmp.length; i++ )
      tmp[i] = chks[i].at0(row_in_chunk);
    score_autoencoder(tmp, preds, neurons); // this fills preds, returns MSE error (ignored here)
    return preds;
  }

  /**
   * Helper to reconstruct original data into preds array and compute the reconstruction error (MSE)
   * @param data Original data (unexpanded)
   * @param preds Reconstruction (potentially expanded)
   * @return reconstruction error
   */
  private double score_autoencoder(double[] data, float[] preds, Neurons[] neurons) {
    assert(model_info().get_params().autoencoder);
    if (model_info().unstable()) {
      Log.warn(unstable_msg);
      throw new UnsupportedOperationException("Trying to predict with an unstable model.");
    }
    ((Neurons.Input)neurons[0]).setInput(-1, data); // expands categoricals inside
    DeepLearningTask.step(-1, neurons, model_info, false, null); // reconstructs data in expanded space
    float[] in  = neurons[0]._a.raw(); //input (expanded)
    float[] out = neurons[neurons.length - 1]._a.raw(); //output (expanded)
    // DEBUGGING
//    Log.info(Arrays.toString(data));
//    Log.info(Arrays.toString(in));
//    Log.info(Arrays.toString(out));
    assert(in.length == out.length);

    // First normalize categorical reconstructions to be probabilities
    // (such that they can be better compared to the input where one factor was 1 and the rest was 0)
//    model_info().data_info().softMaxCategoricals(out,out); //only modifies the categoricals

    // Compute MSE of reconstruction in expanded space (with categorical probabilities)
    double l2 = 0;
    for (int i = 0; i < in.length; ++i)
      l2 += Math.pow((out[i] - in[i]), 2);
    l2 /= in.length;

    if (preds!=null) {
      // Now scale back numerical columns to original data space (scale + shift)
      model_info().data_info().unScaleNumericals(out, out); //only modifies the numericals
      System.arraycopy(out, 0, preds, 0, out.length); //copy reconstruction into preds
    }
    // DEBUGGING
//    Log.info(Arrays.toString(preds));
//    Log.info("");
    return l2;
  }

  /**
   * Compute quantile-based threshold (in reconstruction error) to find outliers
   * @param mse Vector containing reconstruction errors
   * @param quantile Quantile for cut-off
   * @return Threshold in MSE value for a point to be above the quantile
   */
  public double calcOutlierThreshold(Vec mse, double quantile) {
    Frame mse_frame = new Frame(Key.make(), new String[]{"Reconstruction.MSE"}, new Vec[]{mse});
    QuantilesPage qp = new QuantilesPage();
    qp.column = mse_frame.vec(0);
    qp.source_key = mse_frame;
    qp.quantile = quantile;
    qp.invoke();
    DKV.remove(mse_frame._key);
    return qp.result;
  }

  @Override public ModelAutobufferSerializer getModelSerializer() {
    // Return a serializer which knows how to serialize keys
    return new ModelAutobufferSerializer() {
      @Override protected AutoBuffer postLoad(Model m, AutoBuffer ab) {
        Job.hygiene(((DeepLearningModel)m).get_params());
        return ab;
      }
    };
  }

  public boolean generateHTML(String title, StringBuilder sb) {
    if (_key == null) {
      DocGen.HTML.title(sb, "No model yet");
      return true;
    }

    // optional JFrame creation for visualization of weights
//    DeepLearningVisualization.visualize(this);

    final String mse_format = "%g";
//    final String cross_entropy_format = "%2.6f";

    // stats for training and validation
    final Errors error = last_scored();

    DocGen.HTML.title(sb, title);

    if (get_params().source == null || DKV.get(get_params().source._key) == null ||
            (get_params().validation != null && DKV.get(get_params().validation._key) == null)) (Job.hygiene(get_params())).toHTML(sb);
    else job().toHTML(sb);

    sb.append("<div class='alert'>Actions: "
            + (jobKey != null && UKV.get(jobKey) != null && Job.isRunning(jobKey) ? "<i class=\"icon-stop\"></i>" + Cancel.link(jobKey, "Stop training") + ", " : "")
            + Inspect2.link("Inspect training data (" + _dataKey + ")", _dataKey) + ", "
            + (_validationKey != null ? (Inspect2.link("Inspect validation data (" + _validationKey + ")", _validationKey) + ", ") : "")
            + water.api.Predict.link(_key, "Score on dataset") + ", "
            + DeepLearning.link(_dataKey, "Compute new model", null, responseName(), _validationKey)
            + (actual_best_model_key != null && UKV.get(actual_best_model_key) != null && actual_best_model_key != _key ? ", " + DeepLearningModelView.link("Go to best model", actual_best_model_key) : "")
            + (jobKey == null || ((jobKey != null && UKV.get(jobKey) == null)) || (jobKey != null && UKV.get(jobKey) != null && Job.isEnded(jobKey)) ? ", <i class=\"icon-play\"></i>" + DeepLearning.link(_dataKey, "Continue training this model", _key, responseName(), _validationKey) : "") + ", "
            + UIUtils.qlink(SaveModel.class, "model", _key, "Save model") + ", "
            + "</div>");

    DocGen.HTML.paragraph(sb, "Model Key: " + _key);
    if (jobKey != null) DocGen.HTML.paragraph(sb, "Job Key: " + jobKey);
    if (!get_params().autoencoder)
      DocGen.HTML.paragraph(sb, "Model type: " + (get_params().classification ? " Classification" : " Regression") + ", predicting: " + responseName());
    else
      DocGen.HTML.paragraph(sb, "Model type: Auto-Encoder");
    DocGen.HTML.paragraph(sb, "Number of model parameters (weights/biases): " + String.format("%,d", model_info().size()));

    if (model_info.unstable()) {
      DocGen.HTML.section(sb, "=======================================================================================");
      DocGen.HTML.section(sb, unstable_msg.replace("\n"," "));
      DocGen.HTML.section(sb, "=======================================================================================");
    }

    if (error == null) return true;

    DocGen.HTML.title(sb, "Progress");
    // update epoch counter every time the website is displayed
    epoch_counter = training_rows > 0 ? (float)model_info().get_processed_total()/training_rows : 0;
    final double progress = get_params().progress();

    if (get_params() != null && get_params().diagnostics) {
      DocGen.HTML.section(sb, "Status of Neuron Layers");
      sb.append("<table class='table table-striped table-bordered table-condensed'>");
      sb.append("<tr>");
      sb.append("<th>").append("#").append("</th>");
      sb.append("<th>").append("Units").append("</th>");
      sb.append("<th>").append("Type").append("</th>");
      sb.append("<th>").append("Dropout").append("</th>");
      sb.append("<th>").append("L1").append("</th>");
      sb.append("<th>").append("L2").append("</th>");
      if (get_params().adaptive_rate) {
        sb.append("<th>").append("Rate (Mean, RMS)").append("</th>");
      } else {
        sb.append("<th>").append("Rate").append("</th>");
        sb.append("<th>").append("Momentum").append("</th>");
      }
      sb.append("<th>").append("Weight (Mean, RMS)").append("</th>");
      sb.append("<th>").append("Bias (Mean, RMS)").append("</th>");
      sb.append("</tr>");
      Neurons[] neurons = DeepLearningTask.makeNeuronsForTesting(model_info()); //link the weights to the neurons, for easy access
      for (int i=0; i<neurons.length; ++i) {
        sb.append("<tr>");
        sb.append("<td>").append("<b>").append(i+1).append("</b>").append("</td>");
        sb.append("<td>").append("<b>").append(neurons[i].units).append("</b>").append("</td>");
        sb.append("<td>").append(neurons[i].getClass().getSimpleName()).append("</td>");

        if (i == 0) {
          sb.append("<td>");
          sb.append(Utils.formatPct(neurons[i].params.input_dropout_ratio));
          sb.append("</td>");
          sb.append("<td></td>");
          sb.append("<td></td>");
          sb.append("<td></td>");
          if (!get_params().adaptive_rate) sb.append("<td></td>");
          sb.append("<td></td>");
          sb.append("<td></td>");
          sb.append("</tr>");
          continue;
        }
        else if (i < neurons.length-1) {
          sb.append("<td>");
          if (neurons[i].params.hidden_dropout_ratios == null)
            sb.append(Utils.formatPct(0));
          else
            sb.append(Utils.formatPct(neurons[i].params.hidden_dropout_ratios[i - 1]));
          sb.append("</td>");
        } else {
          sb.append("<td></td>");
        }

        final String format = "%g";
        sb.append("<td>").append(neurons[i].params.l1).append("</td>");
        sb.append("<td>").append(neurons[i].params.l2).append("</td>");
        if (get_params().adaptive_rate) {
          sb.append("<td>(").append(String.format(format, model_info.mean_rate[i])).
                  append(", ").append(String.format(format, model_info.rms_rate[i])).append(")</td>");
        } else {
          sb.append("<td>").append(String.format("%.5g", neurons[i].rate(error.training_samples))).append("</td>");
          sb.append("<td>").append(String.format("%.5f", neurons[i].momentum(error.training_samples))).append("</td>");
        }
        sb.append("<td>(").append(String.format(format, model_info.mean_weight[i])).
                append(", ").append(String.format(format, model_info.rms_weight[i])).append(")</td>");
        sb.append("<td>(").append(String.format(format, model_info.mean_bias[i])).
                append(", ").append(String.format(format, model_info.rms_bias[i])).append(")</td>");
        sb.append("</tr>");
      }
      sb.append("</table>");
    }

    if (isClassifier() && !get_params().autoencoder) {
      DocGen.HTML.section(sb, "Classification error on training data: " + Utils.formatPct(error.train_err));
      if(error.validation) {
        DocGen.HTML.section(sb, "Classification error on validation data: " + Utils.formatPct(error.valid_err));
      } else if(error.num_folds > 0) {
        DocGen.HTML.section(sb, "Classification error on " + error.num_folds + "-fold cross-validated training data"
                + (_have_cv_results ? ": " + Utils.formatPct(error.valid_err) : " is being computed - please reload this page later."));
      }
    } else {
      DocGen.HTML.section(sb, "MSE on training data: " + String.format(mse_format, error.train_mse));
      if(error.validation) {
        DocGen.HTML.section(sb, "MSE on validation data: " + String.format(mse_format, error.valid_mse));
      } else if(error.num_folds > 0) {
        DocGen.HTML.section(sb, "MSE on " + error.num_folds + "-fold cross-validated training data"
                + (_have_cv_results ? ": " + String.format(mse_format, error.valid_mse) : " is being computed - please reload this page later."));
      }
    }
    DocGen.HTML.paragraph(sb, "Training samples: " + String.format("%,d", model_info().get_processed_total()));
    DocGen.HTML.paragraph(sb, "Epochs: " + String.format("%.3f", epoch_counter) + " / " + String.format("%.3f", get_params().epochs));
    int cores = 0; for (H2ONode n : H2O.CLOUD._memary) cores += n._heartbeat._num_cpus;
    DocGen.HTML.paragraph(sb, "Number of compute nodes: " + (model_info.get_params().single_node_mode ? ("1 (" + H2O.NUMCPUS + " threads)") : (H2O.CLOUD.size() + " (" + cores + " threads)")));
    DocGen.HTML.paragraph(sb, "Training samples per iteration" + (
            get_params().train_samples_per_iteration == -2 ? " (-2 -> auto-tuning): " :
            get_params().train_samples_per_iteration == -1 ? " (-1 -> max. available data): " :
            get_params().train_samples_per_iteration == 0 ? " (0 -> one epoch): " : " (user-given): ")
                    + String.format("%,d", actual_train_samples_per_iteration));

    final boolean isEnded = get_params().self() == null || (UKV.get(get_params().self()) != null && Job.isEnded(get_params().self()));
    final long time_so_far = isEnded ? run_time : run_time + System.currentTimeMillis() - _timeLastScoreEnter;
    if (time_so_far > 0) {
      long time_for_speed = isEnded || H2O.CLOUD.size() > 1 ? run_time : time_so_far;
      if (time_for_speed > 0)
        DocGen.HTML.paragraph(sb, "Training speed: " + String.format("%,d", model_info().get_processed_total() * 1000 / time_for_speed) + " samples/s");
    }
    DocGen.HTML.paragraph(sb, "Training time: " + PrettyPrint.msecs(time_so_far, true));
    if (progress > 0 && !isEnded)
      DocGen.HTML.paragraph(sb, "Estimated time left: " +PrettyPrint.msecs((long)(time_so_far*(1-progress)/progress), true));

    long score_train = error.score_training_samples;
    long score_valid = error.score_validation_samples;
    final boolean fulltrain = score_train==0 || score_train == training_rows;
    final boolean fullvalid = error.validation && get_params().n_folds == 0 && (score_valid==0 || score_valid == validation_rows);

    final String toolarge = " Confusion matrix not shown here - too large: number of classes (" + model_info.units[model_info.units.length-1]
            + ") is greater than the specified limit of " + get_params().max_confusion_matrix_size + ".";
    boolean smallenough = model_info.units[model_info.units.length-1] <= get_params().max_confusion_matrix_size;

    if (!error.validation) {
      if (_have_cv_results) {
        String cmTitle = "<div class=\"alert\">Scoring results reported for " + error.num_folds + "-fold cross-validated training data " + Inspect2.link(_dataKey) + ":</div>";
        sb.append("<h5>" + cmTitle);
        sb.append("</h5>");
      }
      else {
        String cmTitle = "<div class=\"alert\">Scoring results reported on training data " + Inspect2.link(_dataKey) + (fulltrain ? "" : " (" + score_train + " samples)") + ":</div>";
        sb.append("<h5>" + cmTitle);
        sb.append("</h5>");
      }
    }
    else {
      RString v_rs = new RString("<a href='Inspect2.html?src_key=%$key'>%key</a>");
      String cmTitle = "<div class=\"alert\">Scoring results reported on validation data " + Inspect2.link(_validationKey) + (fullvalid ? "" : " (" + score_valid + " samples)") + ":</div>";
      sb.append("<h5>" + cmTitle);
      sb.append("</h5>");
    }

    if (isClassifier()) {
      // print AUC
      if (error.validAUC != null) {
        error.validAUC.toHTML(sb);
      }
      else if (error.trainAUC != null) {
        error.trainAUC.toHTML(sb);
      }
      else {
        if (error.validation) {
          if (error.valid_confusion_matrix != null && smallenough) {
            error.valid_confusion_matrix.toHTML(sb);
          } else if (smallenough) sb.append("<h5>Confusion matrix on validation data is not yet computed.</h5>");
          else sb.append(toolarge);
        }
        else if (_have_cv_results) {
          if (error.valid_confusion_matrix != null && smallenough) {
            error.valid_confusion_matrix.toHTML(sb);
          } else if (smallenough) sb.append("<h5>Confusion matrix on " + error.num_folds + "-fold cross-validated training data is not yet computed.</h5>");
          else sb.append(toolarge);
        }
        else {
          if (error.train_confusion_matrix != null && smallenough) {
            error.train_confusion_matrix.toHTML(sb);
          } else if (smallenough) sb.append("<h5>Confusion matrix on training data is not yet computed.</h5>");
          else sb.append(toolarge);
        }
      }
    }

    // Hit ratio
    if (error.valid_hitratio != null) {
      error.valid_hitratio.toHTML(sb);
    } else if (error.train_hitratio != null) {
      error.train_hitratio.toHTML(sb);
    }

    // Variable importance
    if (error.variable_importances != null) {
      error.variable_importances.toHTML(this, sb);
    }

    printCrossValidationModelsHTML(sb);

    DocGen.HTML.title(sb, "Scoring history");
    if (errors.length > 1) {
      DocGen.HTML.paragraph(sb, "Time taken for last scoring and diagnostics: " + PrettyPrint.msecs(errors[errors.length-1].scoring_time, true));
      // training
      {
        final long pts = fulltrain ? training_rows : score_train;
        String training = "Number of training data samples for scoring: " + (fulltrain ? "all " : "") + pts;
        if (pts < 1000 && training_rows >= 1000) training += " (low, scoring might be inaccurate -> consider increasing this number in the expert mode)";
        if (pts > 100000 && errors[errors.length-1].scoring_time > 10000) training += " (large, scoring can be slow -> consider reducing this number in the expert mode or scoring manually)";
        DocGen.HTML.paragraph(sb, training);
      }
      // validation
      if (error.validation) {
        final long ptsv = fullvalid ? validation_rows : score_valid;
        String validation = "Number of validation data samples for scoring: " + (fullvalid ? "all " : "") + ptsv;
        if (ptsv < 1000 && validation_rows >= 1000) validation += " (low, scoring might be inaccurate -> consider increasing this number in the expert mode)";
        if (ptsv > 100000 && errors[errors.length-1].scoring_time > 10000) validation += " (large, scoring can be slow -> consider reducing this number in the expert mode or scoring manually)";
        DocGen.HTML.paragraph(sb, validation);
      }

      if (isClassifier() && nclasses() != 2 /*binary classifier has its own conflicting D3 object (AUC)*/) {
        // Plot training error
        float[] err = new float[errors.length];
        float[] samples = new float[errors.length];
        for (int i=0; i<err.length; ++i) {
          err[i] = (float)errors[i].train_err;
          samples[i] = errors[i].training_samples;
        }
        new D3Plot(samples, err, "training samples", "classification error",
                "classification error on training data").generate(sb);

        // Plot validation error
        if (error.validation) {
          for (int i=0; i<err.length; ++i) {
            err[i] = (float)errors[i].valid_err;
          }
          new D3Plot(samples, err, "training samples", "classification error",
                  "classification error on validation set").generate(sb);
        }
      }
      // regression
      else if (!isClassifier()) {
        // Plot training MSE
        float[] err = new float[errors.length-1];
        float[] samples = new float[errors.length-1];
        for (int i=0; i<err.length; ++i) {
          err[i] = (float)errors[i+1].train_mse;
          samples[i] = errors[i+1].training_samples;
        }
        new D3Plot(samples, err, "training samples", "MSE",
                "regression error on training data").generate(sb);

        // Plot validation MSE
        if (error.validation) {
          for (int i=0; i<err.length; ++i) {
            err[i] = (float)errors[i+1].valid_mse;
          }
          new D3Plot(samples, err, "training samples", "MSE",
                  "regression error on validation data").generate(sb);
        }
      }
    }

//    String training = "Number of training set samples for scoring: " + error.score_training;
    if (error.validation) {
//      String validation = "Number of validation set samples for scoring: " + error.score_validation;
    }
    sb.append("<table class='table table-striped table-bordered table-condensed'>");
    sb.append("<tr>");
    sb.append("<th>Training Time</th>");
    sb.append("<th>Training Epochs</th>");
    sb.append("<th>Training Samples</th>");
    if (isClassifier()) {
//      sb.append("<th>Training MCE</th>");
      sb.append("<th>Training Error</th>");
      if (nclasses()==2) sb.append("<th>Training AUC</th>");
    } else {
      sb.append("<th>Training MSE</th>");
    }
    if (error.validation) {
      if (isClassifier()) {
//      sb.append("<th>Validation MCE</th>");
        sb.append("<th>Validation Error</th>");
        if (nclasses()==2) sb.append("<th>Validation AUC</th>");
      } else {
        sb.append("<th>Validation MSE</th>");
      }
    }
    else if (error.num_folds > 0) {
      if (isClassifier()) {
        sb.append("<th>Cross-Validation Error</th>");
        if (nclasses()==2) sb.append("<th>Cross-Validation AUC</th>");
      } else {
        sb.append("<th>Cross-Validation MSE</th>");
      }
    }
    sb.append("</tr>");
    for( int i = errors.length - 1; i >= 0; i-- ) {
      final Errors e = errors[i];
      sb.append("<tr>");
      sb.append("<td>" + PrettyPrint.msecs(e.training_time_ms, true) + "</td>");
      sb.append("<td>" + String.format("%g", e.epoch_counter) + "</td>");
      sb.append("<td>" + String.format("%,d", e.training_samples) + "</td>");
      if (isClassifier() && !get_params().autoencoder) {
        sb.append("<td>" + Utils.formatPct(e.train_err) + "</td>");
        if (nclasses()==2) {
          if (e.trainAUC != null) sb.append("<td>" + Utils.formatPct(e.trainAUC.AUC()) + "</td>");
          else sb.append("<td>" + "N/A" + "</td>");
        }
      } else {
        sb.append("<td>" + String.format(mse_format, e.train_mse) + "</td>");
      }
      if(e.validation) {
        if (isClassifier()) {
          sb.append("<td>" + Utils.formatPct(e.valid_err) + "</td>");
          if (nclasses()==2) {
            if (e.validAUC != null) sb.append("<td>" + Utils.formatPct(e.validAUC.AUC()) + "</td>");
            else sb.append("<td>" + "N/A" + "</td>");
          }
        } else {
          sb.append("<td>" + String.format(mse_format, e.valid_mse) + "</td>");
        }
      }
      else if(e.num_folds > 0) {
        if (i == errors.length - 1 && _have_cv_results) {
          if (isClassifier()) {
            sb.append("<td>" + Utils.formatPct(e.valid_err) + "</td>");
            if (nclasses() == 2) {
              if (e.validAUC != null) sb.append("<td>" + Utils.formatPct(e.validAUC.AUC()) + "</td>");
              else sb.append("<td>" + "N/A" + "</td>");
            }
          } else {
            sb.append("<td>" + String.format(mse_format, e.valid_mse) + "</td>");
          }
        }
        else {
          sb.append("<td>N/A</td>");
          if (nclasses() == 2) sb.append("<td>N/A</td>");
        }
      }
      sb.append("</tr>");
    }
    sb.append("</table>");
    return true;
  }

  @Override
  protected SB toJavaNCLASSES(SB sb) {
    return !get_params().autoencoder ? super.toJavaNCLASSES(sb) :
            JCodeGen.toStaticVar(sb, "NCLASSES", model_info.units[model_info.units.length-1], "Number of output features (same as features of training data).");
  }

  @Override
  protected void toJavaFillPreds0(SB bodySb) {
    if (!get_params().autoencoder) super.toJavaFillPreds0(bodySb);
  }

  public void toJavaHtml(StringBuilder sb) {
    sb.append("<br /><br /><div class=\"pull-right\"><a href=\"#\" onclick=\'$(\"#javaModel\").toggleClass(\"hide\");\'" +
            "class=\'btn btn-inverse btn-mini\'>Java Model</a></div><br /><div class=\"hide\" id=\"javaModel\">");

    boolean featureAllowed = true; //isFeatureAllowed();
    if (! featureAllowed) {
      sb.append("<br/><div id=\'javaModelWarningBlock\' class=\"alert\" style=\"background:#eedd20;color:#636363;text-shadow:none;\">");
      sb.append("<b>You have requested a premium feature and your H<sub>2</sub>O software is unlicensed.</b><br/><br/>");
      sb.append("Please enter your email address below, and we will send you a trial license shortly.<br/>");
      sb.append("This will also temporarily enable downloading Java models.<br/>");
      sb.append("<form class=\'form-inline\'><input id=\"emailForJavaModel\" class=\"span5\" type=\"text\" placeholder=\"Email\"/> ");
      sb.append("<a href=\"#\" onclick=\'processJavaModelLicense();\' class=\'btn btn-inverse\'>Send</a></form></div>");
      sb.append("<div id=\"javaModelSource\" class=\"hide\"><pre style=\"overflow-y:scroll;\"><code class=\"language-java\">");
      DocGen.HTML.escape(sb, toJava());
      sb.append("</code></pre></div>");
    }
    else if( model_info().size() > 100000 ) {
      String modelName = JCodeGen.toJavaId(_key.toString());
      sb.append("<pre style=\"overflow-y:scroll;\"><code class=\"language-java\">");
      sb.append("/* Java code is too large to display, download it directly.\n");
      sb.append("   To obtain the code please invoke in your terminal:\n");
      sb.append("     curl http:/").append(H2O.SELF.toString()).append("/h2o-model.jar > h2o-model.jar\n");
      sb.append("     curl http:/").append(H2O.SELF.toString()).append("/2/").append(this.getClass().getSimpleName()).append("View.java?_modelKey=").append(_key).append(" > ").append(modelName).append(".java\n");
      sb.append("     javac -cp h2o-model.jar -J-Xmx2g -J-XX:MaxPermSize=128m ").append(modelName).append(".java\n");
      sb.append("*/");
      sb.append("</code></pre>");
    } else {
      sb.append("<pre style=\"overflow-y:scroll;\"><code class=\"language-java\">");
      DocGen.HTML.escape(sb, toJava());
      sb.append("</code></pre>");
    }
    sb.append("</div>");
    sb.append("<script type=\"text/javascript\">$(document).ready(showOrHideJavaModel);</script>");
  }

  @Override protected SB toJavaInit(SB sb, SB fileContextSB) {
    sb = super.toJavaInit(sb, fileContextSB);
    if (model_info().data_info()._nums > 0) {
      JCodeGen.toStaticVar(sb, "NUMS", new double[model_info().data_info()._nums], "Workspace for storing numerical input variables.");
      JCodeGen.toStaticVar(sb, "NORMMUL", model_info().data_info()._normMul, "Standardization/Normalization scaling factor for numerical variables.");
      JCodeGen.toStaticVar(sb, "NORMSUB", model_info().data_info()._normSub, "Standardization/Normalization offset for numerical variables.");
    }
    if (model_info().data_info()._cats > 0) {
      JCodeGen.toStaticVar(sb, "CATS", new int[model_info().data_info()._cats], "Workspace for storing categorical input variables.");
    }
    JCodeGen.toStaticVar(sb, "CATOFFSETS", model_info().data_info()._catOffsets, "Workspace for categorical offsets.");
    if (model_info().data_info()._normRespMul != null) {
      JCodeGen.toStaticVar(sb, "NORMRESPMUL", model_info().data_info()._normRespMul, "Standardization/Normalization scaling factor for response.");
      JCodeGen.toStaticVar(sb, "NORMRESPSUB", model_info().data_info()._normRespSub, "Standardization/Normalization offset for response.");
    }
    if (get_params().hidden_dropout_ratios != null) {
      JCodeGen.toStaticVar(sb, "HIDDEN_DROPOUT_RATIOS", get_params().hidden_dropout_ratios, "Hidden layer dropout ratios.");
    }

    Neurons[] neurons = DeepLearningTask.makeNeuronsForTesting(model_info());
    int[] layers = new int[neurons.length];
    for (int i=0;i<neurons.length;++i)
      layers[i] = neurons[i].units;
    JCodeGen.toStaticVar(sb, "NEURONS", layers, "Number of neurons for each layer.");

    if (get_params().autoencoder) {
      sb.i(1).p("@Override public int getPredsSize() { return " + model_info.units[model_info.units.length-1] + "; }").nl();
      sb.i(1).p("@Override public boolean isAutoEncoder() { return true; }").nl();
      sb.i(1).p("@Override public String getHeader() { return \"" + getHeader() + "\"; }").nl();

    }

    // activation storage
    sb.i(1).p("// Storage for neuron activation values.").nl();
    sb.i(1).p("public static final float[][] ACTIVATION = new float[][] {").nl();
    for (int i=0; i<neurons.length; i++) {
      String colInfoClazz = "Activation_"+i;
      sb.i(2).p("/* ").p(neurons[i].getClass().getSimpleName()).p(" */ ");
      sb.p(colInfoClazz).p(".VALUES");
      if (i!=neurons.length-1) sb.p(',');
      sb.nl();
      fileContextSB.i().p("// Neuron activation values for ").p(neurons[i].getClass().getSimpleName()).p(" layer").nl();
      JCodeGen.toClassWithArray(fileContextSB, null, colInfoClazz, new float[layers[i]]);
    }
    sb.i(1).p("};").nl();

    // biases
    sb.i(1).p("// Neuron bias values.").nl();
    sb.i(1).p("public static final float[][] BIAS = new float[][] {").nl();
    for (int i=0; i<neurons.length; i++) {
      String colInfoClazz = "Bias_"+i;
      sb.i(2).p("/* ").p(neurons[i].getClass().getSimpleName()).p(" */ ");
      sb.p(colInfoClazz).p(".VALUES");
      if (i!=neurons.length-1) sb.p(',');
      sb.nl();
      fileContextSB.i().p("// Neuron bias values for ").p(neurons[i].getClass().getSimpleName()).p(" layer").nl();
      float[] bias = i == 0 ? null : new float[model_info().get_biases(i-1).size()];
      if (i>0) {
        for (int j=0; j<bias.length; ++j) bias[j] = model_info().get_biases(i-1).get(j);
      }
      JCodeGen.toClassWithArray(fileContextSB, null, colInfoClazz, bias);
    }
    sb.i(1).p("};").nl();

    // weights
    sb.i(1).p("// Connecting weights between neurons.").nl();
    sb.i(1).p("public static final float[][] WEIGHT = new float[][] {").nl();
    for (int i=0; i<neurons.length; i++) {
      String colInfoClazz = "Weight_"+i;
      sb.i(2).p("/* ").p(neurons[i].getClass().getSimpleName()).p(" */ ");
      sb.p(colInfoClazz).p(".VALUES");
      if (i!=neurons.length-1) sb.p(',');
      sb.nl();
      if (i > 0) {
        fileContextSB.i().p("// Neuron weights connecting ").
                p(neurons[i - 1].getClass().getSimpleName()).p(" and ").
                p(neurons[i].getClass().getSimpleName()).
                p(" layer").nl();
      }
      float[] weights = i == 0 ? null : new float[model_info().get_weights(i-1).rows()*model_info().get_weights(i-1).cols()];
      if (i>0) {
        final int rows = model_info().get_weights(i-1).rows();
        final int cols = model_info().get_weights(i-1).cols();
        for (int j=0; j<rows; ++j)
          for (int k=0; k<cols; ++k)
            weights[j*cols+k] = model_info().get_weights(i-1).get(j,k);
      }
      JCodeGen.toClassWithArray(fileContextSB, null, colInfoClazz, weights);
    }
    sb.i(1).p("};").nl();

    return sb;
  }

  @Override protected void toJavaPredictBody( final SB bodySb, final SB classCtxSb, final SB fileCtxSb) {
    SB model = new SB();
    bodySb.i().p("java.util.Arrays.fill(preds,0f);").nl();
    final int cats = model_info().data_info()._cats;
    final int nums = model_info().data_info()._nums;
    // initialize input layer
    if (nums > 0) bodySb.i().p("java.util.Arrays.fill(NUMS,0f);").nl();
    if (cats > 0) bodySb.i().p("java.util.Arrays.fill(CATS,0);").nl();
    bodySb.i().p("int i = 0, ncats = 0;").nl();
    if (cats > 0) {
      bodySb.i().p("for(; i<"+cats+"; ++i) {").nl();
      bodySb.i(1).p("if (!Double.isNaN(data[i])) {").nl();
      bodySb.i(2).p("int c = (int) data[i];").nl();
      if (model_info().data_info()._useAllFactorLevels)
        bodySb.i(2).p("CATS[ncats++] = c + CATOFFSETS[i];").nl();
      else
        bodySb.i(2).p("if (c != 0) CATS[ncats++] = c + CATOFFSETS[i] - 1;").nl();
      bodySb.i(1).p("}").nl();
      bodySb.i().p("}").nl();
    }
    if (nums > 0) {
      bodySb.i().p("final int n = data.length;").nl();
      bodySb.i().p("for(; i<n; ++i) {").nl();
        bodySb.i(1).p("NUMS[i" + (cats > 0 ? "-" + cats : "") + "] = Double.isNaN(data[i]) ? 0 : ");
      if (model_info().data_info()._normMul != null) {
        bodySb.p("(data[i] - NORMSUB[i" + (cats > 0 ? "-" + cats : "") + "])*NORMMUL[i" + (cats > 0 ? "-" + cats : "") + "];").nl();
      } else {
        bodySb.p("data[i];").nl();
      }
      bodySb.i(0).p("}").nl();
    }
    bodySb.i().p("java.util.Arrays.fill(ACTIVATION[0],0);").nl();
    if (cats > 0) {
      bodySb.i().p("for (i=0; i<ncats; ++i) ACTIVATION[0][CATS[i]] = 1f;").nl();
    }
    if (nums > 0) {
      bodySb.i().p("for (i=0; i<NUMS.length; ++i) {").nl();
        bodySb.i(1).p("ACTIVATION[0][CATOFFSETS[CATOFFSETS.length-1] + i] = Double.isNaN(NUMS[i]) ? 0f : (float) NUMS[i];").nl();
      bodySb.i().p("}").nl();
    }

    boolean tanh=(get_params().activation == DeepLearning.Activation.Tanh || get_params().activation == DeepLearning.Activation.TanhWithDropout);
    boolean relu=(get_params().activation == DeepLearning.Activation.Rectifier || get_params().activation == DeepLearning.Activation.RectifierWithDropout);
    boolean maxout=(get_params().activation == DeepLearning.Activation.Maxout || get_params().activation == DeepLearning.Activation.MaxoutWithDropout);

    final String stopping = get_params().autoencoder ? "(i<=ACTIVATION.length-1)" : "(i<ACTIVATION.length-1)";

    // make prediction: forward propagation
    bodySb.i().p("for (i=1; i<ACTIVATION.length; ++i) {").nl();
    bodySb.i(1).p("java.util.Arrays.fill(ACTIVATION[i],0f);").nl();
    if (maxout) {
      bodySb.i(1).p("float rmax = 0;").nl();
    }
    bodySb.i(1).p("for (int r=0; r<ACTIVATION[i].length; ++r) {").nl();
    bodySb.i(2).p("final int cols = ACTIVATION[i-1].length;").nl();
    if (maxout) {
      bodySb.i(2).p("float cmax = Float.NEGATIVE_INFINITY;").nl();
    }
    bodySb.i(2).p("for (int c=0; c<cols; ++c) {").nl();
    if (!maxout) {
      bodySb.i(3).p("ACTIVATION[i][r] += ACTIVATION[i-1][c] * WEIGHT[i][r*cols+c];").nl();
    } else {
      bodySb.i(3).p("if " + stopping + " cmax = Math.max(ACTIVATION[i-1][c] * WEIGHT[i][r*cols+c], cmax);").nl();
      bodySb.i(3).p("else ACTIVATION[i][r] += ACTIVATION[i-1][c] * WEIGHT[i][r*cols+c];").nl();
    }
    bodySb.i(2).p("}").nl();
    if (maxout) {
      bodySb.i(2).p("if "+ stopping +" ACTIVATION[i][r] = Float.isInfinite(cmax) ? 0f : cmax;").nl();
    }
    bodySb.i(2).p("ACTIVATION[i][r] += BIAS[i][r];").nl();
    if (maxout) {
      bodySb.i(2).p("if " + stopping + " rmax = Math.max(rmax, ACTIVATION[i][r]);").nl();
    }
    bodySb.i(1).p("}").nl();

    if (!maxout) bodySb.i(1).p("if " + stopping + " {").nl();
    bodySb.i(2).p("for (int r=0; r<ACTIVATION[i].length; ++r) {").nl();
    if (tanh) {
      bodySb.i(3).p("ACTIVATION[i][r] = 1f - 2f / (1f + (float)Math.exp(2*ACTIVATION[i][r]));").nl();
    } else if (relu) {
      bodySb.i(3).p("ACTIVATION[i][r] = Math.max(0f, ACTIVATION[i][r]);").nl();
    } else if (maxout) {
      bodySb.i(3).p("if (rmax > 1 ) ACTIVATION[i][r] /= rmax;").nl();
    }
    if (get_params().hidden_dropout_ratios != null) {
      if (maxout) bodySb.i(1).p("if " + stopping + " {").nl();
      bodySb.i(3).p("ACTIVATION[i][r] *= HIDDEN_DROPOUT_RATIOS[i-1];").nl();
      if (maxout) bodySb.i(1).p("}").nl();
    }
    bodySb.i(2).p("}").nl();
    if (!maxout) bodySb.i(1).p("}").nl();
    if (isClassifier()) {
      bodySb.i(1).p("if (i == ACTIVATION.length-1) {").nl();
      // softmax
      bodySb.i(2).p("float max = ACTIVATION[i][0];").nl();
      bodySb.i(2).p("for (int r=1; r<ACTIVATION[i].length; r++) {").nl();
      bodySb.i(3).p("if (ACTIVATION[i][r]>max) max = ACTIVATION[i][r];").nl();
      bodySb.i(2).p("}").nl();
      bodySb.i(2).p("float scale = 0f;").nl();
      bodySb.i(2).p("for (int r=0; r<ACTIVATION[i].length; r++) {").nl();
      bodySb.i(3).p("ACTIVATION[i][r] = (float) Math.exp(ACTIVATION[i][r] - max);").nl();
      bodySb.i(3).p("scale += ACTIVATION[i][r];").nl();
      bodySb.i(2).p("}").nl();
      bodySb.i(2).p("for (int r=0; r<ACTIVATION[i].length; r++) {").nl();
      bodySb.i(3).p("if (Float.isNaN(ACTIVATION[i][r]))").nl();
      bodySb.i(4).p("throw new RuntimeException(\"Numerical instability, predicted NaN.\");").nl();
      bodySb.i(3).p("ACTIVATION[i][r] /= scale;").nl();
      bodySb.i(3).p("preds[r+1] = ACTIVATION[i][r];").nl();
      bodySb.i(2).p("}").nl();
      bodySb.i(1).p("}").nl();
      bodySb.i().p("}").nl();
    } else if (!get_params().autoencoder) { //Regression
      bodySb.i(1).p("if (i == ACTIVATION.length-1) {").nl();
      // regression: set preds[1], FillPreds0 will put it into preds[0]
      if (model_info().data_info()._normRespMul != null) {
        bodySb.i(2).p("preds[1] = (float) (ACTIVATION[i][0] / NORMRESPMUL[0] + NORMRESPSUB[0]);").nl();
      }
      else {
        bodySb.i(2).p("preds[1] = ACTIVATION[i][0];").nl();
      }
      bodySb.i(2).p("if (Float.isNaN(preds[1])) throw new RuntimeException(\"Predicted regression target NaN!\");").nl();
      bodySb.i(1).p("}").nl();
      bodySb.i().p("}").nl();
    } else { //AutoEncoder
      bodySb.i(1).p("if (i == ACTIVATION.length-1) {").nl();
      bodySb.i(2).p("for (int r=0; r<ACTIVATION[i].length; r++) {").nl();
      bodySb.i(3).p("if (Float.isNaN(ACTIVATION[i][r]))").nl();
      bodySb.i(4).p("throw new RuntimeException(\"Numerical instability, reconstructed NaN.\");").nl();
      bodySb.i(3).p("preds[r] = ACTIVATION[i][r];").nl();
      bodySb.i(2).p("}").nl();
      if (model_info().data_info()._nums > 0) {
        int ns = model_info().data_info().numStart();
        bodySb.i(2).p("for (int k=" + ns + "; k<" + model_info().data_info().fullN() + "; ++k) {").nl();
        bodySb.i(3).p("preds[k] = preds[k] / (float)NORMMUL[k-" + ns + "] + (float)NORMSUB[k-" + ns + "];").nl();
        bodySb.i(2).p("}").nl();
      }
      bodySb.i(1).p("}").nl();
      bodySb.i().p("}").nl();
      // DEBUGGING
//      bodySb.i().p("System.out.println(java.util.Arrays.toString(data));").nl();
//      bodySb.i().p("System.out.println(java.util.Arrays.toString(ACTIVATION[0]));").nl();
//      bodySb.i().p("System.out.println(java.util.Arrays.toString(ACTIVATION[ACTIVATION.length-1]));").nl();
//      bodySb.i().p("System.out.println(java.util.Arrays.toString(preds));").nl();
//      bodySb.i().p("System.out.println(\"\");").nl();
    }
    fileCtxSb.p(model);
    toJavaUnifyPreds(bodySb);
    toJavaFillPreds0(bodySb);
  }

  // helper to push this model to another key (for keeping good models)
  private void putMeAsBestModel(Key bestModelKey) {
    final Key job = null;
    final DeepLearningModel cp = this;
    DeepLearningModel bestModel = new DeepLearningModel(cp, bestModelKey, job, model_info().data_info());
    bestModel.get_params().state = Job.JobState.DONE;
    bestModel.get_params().job_key = get_params().self();
    bestModel.delete_and_lock(job);
    bestModel.unlock(job);
    assert (UKV.get(bestModelKey) != null);
    assert (bestModel.compareTo(this) <= 0);
    assert (((DeepLearningModel) UKV.get(bestModelKey)).error() == _bestError);
  }

  public void delete_best_model( ) {
    if (actual_best_model_key != null && actual_best_model_key != _key) DKV.remove(actual_best_model_key);
  }

  public void delete_xval_models( ) {
    if (get_params().xval_models != null) {
      for (Key k : get_params().xval_models) {
        UKV.<DeepLearningModel>get(k).delete_best_model();
        UKV.<DeepLearningModel>get(k).delete();
      }
    }
  }

  transient private final String unstable_msg = "Job was aborted due to observed numerical instability (exponential growth)."
          + "\nTry a different initial distribution, a bounded activation function or adding"
          + "\nregularization with L1, L2 or max_w2 and/or use a smaller learning rate or faster annealing.";

}