package hex.deeplearning;
import static java.lang.Double.isNaN;
import hex.FrameTask.DataInfo;
import hex.VarImp;
import water.*;
import water.api.*;
import water.api.Request.API;
import water.fvec.Chunk;
import water.fvec.Frame;
import water.fvec.Vec;
import water.util.*;
import java.lang.reflect.Field;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Random;
/**
* The Deep Learning model
* It contains a DeepLearningModelInfo with the most up-to-date model,
* a scoring history, as well as some helpers to indicate the progress
*/
public class DeepLearningModel extends Model implements Comparable<DeepLearningModel> {
static final int API_WEAVER = 1; // This file has auto-gen'd doc & json fields
static public DocGen.FieldDoc[] DOC_FIELDS; // Initialized from Auto-Gen code.
@API(help="Model info", json = true)
private volatile DeepLearningModelInfo model_info;
void set_model_info(DeepLearningModelInfo mi) { model_info = mi; }
final public DeepLearningModelInfo model_info() { return model_info; }
@API(help="Job that built the model", json = true)
final private Key jobKey;
@API(help="Validation dataset used for model building", json = true)
public final Key _validationKey;
@API(help="Time to build the model", json = true)
private long run_time;
final private long start_time;
public long actual_train_samples_per_iteration;
public double time_for_communication_us; //helper for auto-tuning: time in microseconds for collective bcast/reduce of the model
@API(help="Number of training epochs", json = true)
public double epoch_counter;
@API(help="Number of rows in training data", json = true)
public long training_rows;
@API(help="Number of rows in validation data", json = true)
public long validation_rows;
@API(help = "Scoring during model building")
private Errors[] errors;
public Errors[] scoring_history() { return errors; }
// Keep the best model so far, based on a single criterion (overall class. error or MSE)
private float _bestError = Float.MAX_VALUE;
@API(help = "Key to the best model so far (based on overall error on scoring data set)")
public Key actual_best_model_key;
// return the most up-to-date model metrics
Errors last_scored() { return errors == null ? null : errors[errors.length-1]; }
@Override public final DeepLearning get_params() { return model_info.get_params(); }
@Override public final Request2 job() { return model_info.get_job(); }
@Override protected double missingColumnsType() { return get_params().sparse ? 0 : Double.NaN; }
public float error() { return (float) (isClassifier() ? cm().err() : mse()); }
@Override public boolean isClassifier() { return super.isClassifier() && !model_info.get_params().autoencoder; }
@Override public boolean isSupervised() { return !model_info.get_params().autoencoder; }
@Override public int nfeatures() { return model_info.get_params().autoencoder ? _names.length : _names.length - 1; }
public int compareTo(DeepLearningModel o) {
if (o.isClassifier() != isClassifier()) throw new UnsupportedOperationException("Cannot compare classifier against regressor.");
if (o.nclasses() != nclasses()) throw new UnsupportedOperationException("Cannot compare models with different number of classes.");
return (error() < o.error() ? -1 : error() > o.error() ? 1 : 0);
}
public static class Errors extends Iced {
static final int API_WEAVER = 1;
static public DocGen.FieldDoc[] DOC_FIELDS;
@API(help = "How many epochs the algorithm has processed")
public double epoch_counter;
@API(help = "How many rows the algorithm has processed")
public long training_samples;
@API(help = "How long the algorithm ran in ms")
public long training_time_ms;
//training/validation sets
@API(help = "Whether a validation set was provided")
boolean validation;
@API(help = "Number of folds for cross-validation (for validation=false)")
int num_folds;
@API(help = "Number of training set samples for scoring")
public long score_training_samples;
@API(help = "Number of validation set samples for scoring")
public long score_validation_samples;
@API(help="Do classification or regression")
public boolean classification;
@API(help = "Variable importances")
VarImp variable_importances;
// classification
@API(help = "Confusion matrix on training data")
public water.api.ConfusionMatrix train_confusion_matrix;
@API(help = "Confusion matrix on validation data")
public water.api.ConfusionMatrix valid_confusion_matrix;
@API(help = "Classification error on training data")
public double train_err = 1;
@API(help = "Classification error on validation data")
public double valid_err = 1;
@API(help = "AUC on training data")
public AUCData trainAUC;
@API(help = "AUC on validation data")
public AUCData validAUC;
@API(help = "Hit ratio on training data")
public water.api.HitRatio train_hitratio;
@API(help = "Hit ratio on validation data")
public water.api.HitRatio valid_hitratio;
// regression
@API(help = "Training MSE")
public double train_mse = Double.POSITIVE_INFINITY;
@API(help = "Validation MSE")
public double valid_mse = Double.POSITIVE_INFINITY;
@API(help = "Time taken for scoring")
public long scoring_time;
Errors deep_clone() {
AutoBuffer ab = new AutoBuffer();
this.write(ab);
ab.flipForReading();
return new Errors().read(ab);
}
@Override public String toString() {
StringBuilder sb = new StringBuilder();
if (classification) {
sb.append("Error on training data (misclassification)"
+ (trainAUC != null ? " [using threshold for " + trainAUC.threshold_criterion.toString().replace("_"," ") +"]: ": ": ")
+ String.format("%.2f", 100*train_err) + "%");
if (trainAUC != null) sb.append(", AUC on training data: " + String.format("%.4f", 100*trainAUC.AUC) + "%");
if (validation || num_folds>0)
sb.append("\nError on " + (num_folds>0 ? num_folds + "-fold cross-":"")+ "validation data (misclassification)"
+ (validAUC != null ? " [using threshold for " + validAUC.threshold_criterion.toString().replace("_"," ") +"]: ": ": ")
+ String.format("%.2f", (100*valid_err)) + "%");
if (validAUC != null) sb.append(", AUC on validation data: " + String.format("%.4f", 100*validAUC.AUC) + "%");
} else if (!Double.isInfinite(train_mse)) {
sb.append("Error on training data (MSE): " + train_mse);
if (validation || num_folds>0)
sb.append("\nError on "+ (num_folds>0 ? num_folds + "-fold cross-":"")+ "validation data (MSE): " + valid_mse);
}
return sb.toString();
}
}
final private static class ConfMat extends hex.ConfusionMatrix {
final private double _err;
final private double _f1;
public ConfMat(double err, double f1) {
super(null);
_err=err;
_f1=f1;
}
@Override public double err() { return _err; }
@Override public double F1() { return _f1; }
@Override public double[] classErr() { return null; }
}
/** for grid search error reporting */
@Override
public hex.ConfusionMatrix cm() {
final Errors lasterror = last_scored();
if (lasterror == null) return null;
water.api.ConfusionMatrix cm = lasterror.validation || lasterror.num_folds > 0 ?
lasterror.valid_confusion_matrix :
lasterror.train_confusion_matrix;
if (cm == null || cm.cm == null) {
if (lasterror.validation || lasterror.num_folds > 0) {
return new ConfMat(lasterror.valid_err, lasterror.validAUC != null ? lasterror.validAUC.F1() : 0);
} else {
return new ConfMat(lasterror.train_err, lasterror.trainAUC != null ? lasterror.trainAUC.F1() : 0);
}
}
// cm.cm has NaN padding, reduce it to N-1 size
return new hex.ConfusionMatrix(cm.cm, cm.cm.length-1);
}
@Override
public double mse() {
if (errors == null) return super.mse();
return last_scored().validation || last_scored().num_folds > 0 ? last_scored().valid_mse : last_scored().train_mse;
}
@Override
public VarImp varimp() {
if (errors == null) return null;
return last_scored().variable_importances;
}
// This describes the model, together with the parameters
// This will be shared: one per node
public static class DeepLearningModelInfo extends Iced {
static final int API_WEAVER = 1; // This file has auto-gen'd doc & json fields
static public DocGen.FieldDoc[] DOC_FIELDS; // Initialized from Auto-Gen code.
@API(help="Input data info")
private DataInfo data_info;
public DataInfo data_info() { return data_info; }
// model is described by parameters and the following arrays
private Neurons.DenseRowMatrix[] dense_row_weights; //one 2D weight matrix per layer (stored as a 1D array each)
private Neurons.DenseColMatrix[] dense_col_weights; //one 2D weight matrix per layer (stored as a 1D array each)
private Neurons.DenseVector[] biases; //one 1D bias array per layer
private Neurons.DenseVector[] avg_activations; //one 1D array per hidden layer
// helpers for storing previous step deltas
// Note: These two arrays *could* be made transient and then initialized freshly in makeNeurons() and in DeepLearningTask.initLocal()
// But then, after each reduction, the weights would be lost and would have to restart afresh -> not *exactly* right, but close...
private Neurons.DenseRowMatrix[] dense_row_weights_momenta;
private Neurons.DenseColMatrix[] dense_col_weights_momenta;
private Neurons.DenseVector[] biases_momenta;
// helpers for AdaDelta
private Neurons.DenseRowMatrix[] dense_row_ada_dx_g;
private Neurons.DenseColMatrix[] dense_col_ada_dx_g;
private Neurons.DenseVector[] biases_ada_dx_g;
// compute model size (number of model parameters required for making predictions)
// momenta are not counted here, but they are needed for model building
public long size() {
long siz = 0;
for (Neurons.Matrix w : dense_row_weights) if (w != null) siz += w.size();
for (Neurons.Matrix w : dense_col_weights) if (w != null) siz += w.size();
for (Neurons.Vector b : biases) siz += b.size();
return siz;
}
// accessors to (shared) weights and biases - those will be updated racily (c.f. Hogwild!)
boolean has_momenta() { return get_params().momentum_start != 0 || get_params().momentum_stable != 0; }
boolean adaDelta() { return get_params().adaptive_rate; }
public final Neurons.Matrix get_weights(int i) { return dense_row_weights[i] == null ? dense_col_weights[i] : dense_row_weights[i]; }
public final Neurons.DenseVector get_biases(int i) { return biases[i]; }
public final Neurons.Matrix get_weights_momenta(int i) { return dense_row_weights_momenta[i] == null ? dense_col_weights_momenta[i] : dense_row_weights_momenta[i]; }
public final Neurons.DenseVector get_biases_momenta(int i) { return biases_momenta[i]; }
public final Neurons.Matrix get_ada_dx_g(int i) { return dense_row_ada_dx_g[i] == null ? dense_col_ada_dx_g[i] : dense_row_ada_dx_g[i]; }
public final Neurons.DenseVector get_biases_ada_dx_g(int i) { return biases_ada_dx_g[i]; }
//accessor to shared parameter defining avg activations
public final Neurons.DenseVector get_avg_activations(int i) { return avg_activations[i]; }
@API(help = "Model parameters", json = true)
private Request2 job;
public final DeepLearning get_params() { return (DeepLearning)job; }
public final Request2 get_job() { return job; }
@API(help = "Mean rate", json = true)
private float[] mean_rate;
@API(help = "RMS rate", json = true)
private float[] rms_rate;
@API(help = "Mean bias", json = true)
private float[] mean_bias;
@API(help = "RMS bias", json = true)
private float[] rms_bias;
@API(help = "Mean weight", json = true)
private float[] mean_weight;
@API(help = "RMS weight", json = true)
public float[] rms_weight;
@API(help = "Mean Activation", json = true)
public float[] mean_a;
@API(help = "Unstable", json = true)
private volatile boolean unstable = false;
public boolean unstable() { return unstable; }
public void set_unstable() { if (!unstable) computeStats(); unstable = true; }
@API(help = "Processed samples", json = true)
private long processed_global;
public synchronized long get_processed_global() { return processed_global; }
public synchronized void set_processed_global(long p) { processed_global = p; }
public synchronized void add_processed_global(long p) { processed_global += p; }
private long processed_local;
public synchronized long get_processed_local() { return processed_local; }
public synchronized void set_processed_local(long p) { processed_local = p; }
public synchronized void add_processed_local(long p) { processed_local += p; }
public synchronized long get_processed_total() { return processed_global + processed_local; }
// package local helpers
int[] units; //number of neurons per layer, extracted from parameters and from datainfo
public DeepLearningModelInfo() {}
public DeepLearningModelInfo(final Job job, final DataInfo dinfo) {
this.job = job;
data_info = dinfo;
final int num_input = dinfo.fullN();
final int num_output = get_params().autoencoder ? num_input : get_params().classification ? dinfo._adaptedFrame.domains()[dinfo._adaptedFrame.domains().length-1].length : 1;
assert(num_input > 0);
assert(num_output > 0);
if (has_momenta() && adaDelta()) throw new IllegalArgumentException("Cannot have non-zero momentum and adaptive rate at the same time.");
final int layers=get_params().hidden.length;
// units (# neurons for each layer)
units = new int[layers+2];
if (get_params().max_categorical_features <= Integer.MAX_VALUE - dinfo._nums)
units[0] = Math.min(dinfo._nums + get_params().max_categorical_features, num_input);
else
units[0] = num_input;
System.arraycopy(get_params().hidden, 0, units, 1, layers);
units[layers+1] = num_output;
if ((long)units[0] > 100000L) {
final String[][] domains = dinfo._adaptedFrame.domains();
int[] levels = new int[domains.length];
for (int i=0; i<levels.length; ++i) {
levels[i] = domains[i] != null ? domains[i].length : 0;
}
Arrays.sort(levels);
Log.warn("===================================================================================================================================");
Log.warn(num_input + " input features" + (dinfo._cats > 0 ? " (after categorical one-hot encoding)" : "") + ". Can be slow and require a lot of memory.");
if (levels[levels.length-1] > 0) {
int levelcutoff = levels[levels.length-1-Math.min(10, levels.length)];
int count = 0;
for (int i=0; i<dinfo._adaptedFrame.numCols() - (get_params().autoencoder ? 0 : 1) && count < 10; ++i) {
if (dinfo._adaptedFrame.domains()[i] != null && dinfo._adaptedFrame.domains()[i].length >= levelcutoff) {
Log.warn("Categorical feature '" + dinfo._adaptedFrame._names[i] + "' has cardinality " + dinfo._adaptedFrame.domains()[i].length + ".");
count++;
}
}
}
Log.warn("Suggestions:");
Log.warn(" *) Limit the size of the first hidden layer");
if (dinfo._cats > 0) {
Log.warn(" *) Limit the total number of one-hot encoded features with the parameter 'max_categorical_features'");
Log.warn(" *) Run h2o.interaction(...,pairwise=F) on high-cardinality categorical columns to limit the factor count, see http://learn.h2o.ai");
}
Log.warn("===================================================================================================================================");
}
// weights (to connect layers)
dense_row_weights = new Neurons.DenseRowMatrix[layers+1];
dense_col_weights = new Neurons.DenseColMatrix[layers+1];
// decide format of weight matrices row-major or col-major
if (get_params().col_major) dense_col_weights[0] = new Neurons.DenseColMatrix(units[1], units[0]);
else dense_row_weights[0] = new Neurons.DenseRowMatrix(units[1], units[0]);
for (int i = 1; i <= layers; ++i)
dense_row_weights[i] = new Neurons.DenseRowMatrix(units[i + 1] /*rows*/, units[i] /*cols*/);
// biases (only for hidden layers and output layer)
biases = new Neurons.DenseVector[layers+1];
for (int i=0; i<=layers; ++i) biases[i] = new Neurons.DenseVector(units[i+1]);
// average activation (only for hidden layers)
if (get_params().autoencoder && get_params().sparsity_beta > 0) {
avg_activations = new Neurons.DenseVector[layers];
mean_a = new float[layers];
for (int i = 0; i < layers; ++i) avg_activations[i] = new Neurons.DenseVector(units[i + 1]);
}
fillHelpers();
// for diagnostics
mean_rate = new float[units.length];
rms_rate = new float[units.length];
mean_bias = new float[units.length];
rms_bias = new float[units.length];
mean_weight = new float[units.length];
rms_weight = new float[units.length];
}
// deep clone all weights/biases
DeepLearningModelInfo deep_clone() {
AutoBuffer ab = new AutoBuffer();
this.write(ab);
ab.flipForReading();
return new DeepLearningModelInfo().read(ab);
}
void fillHelpers() {
if (has_momenta()) {
dense_row_weights_momenta = new Neurons.DenseRowMatrix[dense_row_weights.length];
dense_col_weights_momenta = new Neurons.DenseColMatrix[dense_col_weights.length];
if (dense_row_weights[0] != null)
dense_row_weights_momenta[0] = new Neurons.DenseRowMatrix(units[1], units[0]);
else
dense_col_weights_momenta[0] = new Neurons.DenseColMatrix(units[1], units[0]);
for (int i=1; i<dense_row_weights_momenta.length; ++i) dense_row_weights_momenta[i] = new Neurons.DenseRowMatrix(units[i+1], units[i]);
biases_momenta = new Neurons.DenseVector[biases.length];
for (int i=0; i<biases_momenta.length; ++i) biases_momenta[i] = new Neurons.DenseVector(units[i+1]);
}
else if (adaDelta()) {
dense_row_ada_dx_g = new Neurons.DenseRowMatrix[dense_row_weights.length];
dense_col_ada_dx_g = new Neurons.DenseColMatrix[dense_col_weights.length];
//AdaGrad
if (dense_row_weights[0] != null) {
dense_row_ada_dx_g[0] = new Neurons.DenseRowMatrix(units[1], 2*units[0]);
} else {
dense_col_ada_dx_g[0] = new Neurons.DenseColMatrix(2*units[1], units[0]);
}
for (int i=1; i<dense_row_ada_dx_g.length; ++i) {
dense_row_ada_dx_g[i] = new Neurons.DenseRowMatrix(units[i+1], 2*units[i]);
}
biases_ada_dx_g = new Neurons.DenseVector[biases.length];
for (int i=0; i<biases_ada_dx_g.length; ++i) {
biases_ada_dx_g[i] = new Neurons.DenseVector(2*units[i+1]);
}
}
}
@Override public String toString() {
StringBuilder sb = new StringBuilder();
if (get_params().diagnostics && !get_params().quiet_mode) {
Neurons[] neurons = DeepLearningTask.makeNeuronsForTesting(this);
sb.append("Number of hidden layers is " + get_params().hidden.length + " \n");
if (get_params().sparsity_beta > 0) {
for (int k = 0; k < get_params().hidden.length; k++)
sb.append("Average activation in hidden layer " + k + " is " + mean_a[k] + " \n");
}
sb.append("Status of Neuron Layers:\n");
sb.append("# Units Type Dropout L1 L2 " + (get_params().adaptive_rate ? " Rate (Mean,RMS) " : " Rate Momentum") + " Weight (Mean, RMS) Bias (Mean,RMS)\n");
final String format = "%7g";
for (int i=0; i<neurons.length; ++i) {
sb.append((i+1) + " " + String.format("%6d", neurons[i].units)
+ " " + String.format("%16s", neurons[i].getClass().getSimpleName()));
if (i == 0) {
sb.append(" " + Utils.formatPct(neurons[i].params.input_dropout_ratio) + " \n");
continue;
}
else if (i < neurons.length-1) {
if (neurons[i].params.hidden_dropout_ratios == null)
sb.append(" " + Utils.formatPct(0) + " ");
else
sb.append(" " + Utils.formatPct(neurons[i].params.hidden_dropout_ratios[i - 1]) + " ");
} else {
sb.append(" ");
}
sb.append(
" " + String.format("%5f", neurons[i].params.l1)
+ " " + String.format("%5f", neurons[i].params.l2)
+ " " + (get_params().adaptive_rate ? (" (" + String.format(format, mean_rate[i]) + ", " + String.format(format, rms_rate[i]) + ")" )
: (String.format("%10g", neurons[i].rate(get_processed_total())) + " " + String.format("%5f", neurons[i].momentum(get_processed_total()))))
+ " (" + String.format(format, mean_weight[i])
+ ", " + String.format(format, rms_weight[i]) + ")"
+ " (" + String.format(format, mean_bias[i])
+ ", " + String.format(format, rms_bias[i]) + ")\n");
if (get_params().sparsity_beta > 0) {
// sb.append(" " + String.format(format, mean_a[i]) + " \n");
}
}
}
return sb.toString();
}
// DEBUGGING
public String toStringAll() {
StringBuilder sb = new StringBuilder();
sb.append(toString());
for (int i=0; i<units.length-1; ++i)
sb.append("\nweights["+i+"][]="+Arrays.toString(get_weights(i).raw()));
for (int i=0; i<units.length-1; ++i)
sb.append("\nbiases["+i+"][]="+Arrays.toString(get_biases(i).raw()));
if (has_momenta()) {
for (int i=0; i<units.length-1; ++i)
sb.append("\nweights_momenta["+i+"][]="+Arrays.toString(get_weights_momenta(i).raw()));
}
if (biases_momenta != null) {
for (int i=0; i<units.length-1; ++i)
sb.append("\nbiases_momenta["+i+"][]="+Arrays.toString(biases_momenta[i].raw()));
}
sb.append("\nunits[]="+Arrays.toString(units));
sb.append("\nprocessed global: "+get_processed_global());
sb.append("\nprocessed local: "+get_processed_local());
sb.append("\nprocessed total: " + get_processed_total());
sb.append("\n");
return sb.toString();
}
void initializeMembers() {
randomizeWeights();
//TODO: determine good/optimal/best initialization scheme for biases
// hidden layers
for (int i=0; i<get_params().hidden.length; ++i) {
if (get_params().activation == DeepLearning.Activation.Rectifier
|| get_params().activation == DeepLearning.Activation.RectifierWithDropout
|| get_params().activation == DeepLearning.Activation.Maxout
|| get_params().activation == DeepLearning.Activation.MaxoutWithDropout
) {
// Arrays.fill(biases[i], 1.); //old behavior
Arrays.fill(biases[i].raw(), i == 0 ? 0.5f : 1f); //new behavior, might be slightly better
}
else if (get_params().activation == DeepLearning.Activation.Tanh || get_params().activation == DeepLearning.Activation.TanhWithDropout) {
Arrays.fill(biases[i].raw(), 0f);
}
}
Arrays.fill(biases[biases.length-1].raw(), 0f); //output layer
}
public void add(DeepLearningModelInfo other) {
for (int i=0;i<dense_row_weights.length;++i)
Utils.add(get_weights(i).raw(), other.get_weights(i).raw());
for (int i=0;i<biases.length;++i) Utils.add(biases[i].raw(), other.biases[i].raw());
if (avg_activations != null)
for (int i=0;i<avg_activations.length;++i)
Utils.add(avg_activations[i].raw(), other.biases[i].raw());
if (has_momenta()) {
assert(other.has_momenta());
for (int i=0;i<dense_row_weights_momenta.length;++i)
Utils.add(get_weights_momenta(i).raw(), other.get_weights_momenta(i).raw());
for (int i=0;i<biases_momenta.length;++i)
Utils.add(biases_momenta[i].raw(), other.biases_momenta[i].raw());
}
if (adaDelta()) {
assert(other.adaDelta());
for (int i=0;i<dense_row_ada_dx_g.length;++i) {
Utils.add(get_ada_dx_g(i).raw(), other.get_ada_dx_g(i).raw());
}
}
add_processed_local(other.get_processed_local());
}
protected void div(float N) {
for (int i=0; i<dense_row_weights.length; ++i)
Utils.div(get_weights(i).raw(), N);
for (Neurons.Vector bias : biases) Utils.div(bias.raw(), N);
if (avg_activations != null)
for (Neurons.Vector avgac : avg_activations)
Utils.div(avgac.raw(), N);
if (has_momenta()) {
for (int i=0; i<dense_row_weights_momenta.length; ++i)
Utils.div(get_weights_momenta(i).raw(), N);
for (Neurons.Vector bias_momenta : biases_momenta) Utils.div(bias_momenta.raw(), N);
}
if (adaDelta()) {
for (int i=0;i<dense_row_ada_dx_g.length;++i) {
Utils.div(get_ada_dx_g(i).raw(), N);
}
}
}
double uniformDist(Random rand, double min, double max) {
return min + rand.nextFloat() * (max - min);
}
void randomizeWeights() {
for (int w=0; w<dense_row_weights.length; ++w) {
final Random rng = water.util.Utils.getDeterRNG(get_params().seed + 0xBAD5EED + w+1); //to match NeuralNet behavior
final double range = Math.sqrt(6. / (units[w] + units[w+1]));
for( int i = 0; i < get_weights(w).rows(); i++ ) {
for( int j = 0; j < get_weights(w).cols(); j++ ) {
if (get_params().initial_weight_distribution == DeepLearning.InitialWeightDistribution.UniformAdaptive) {
// cf. http://machinelearning.wustl.edu/mlpapers/paper_files/AISTATS2010_GlorotB10.pdf
if (w==dense_row_weights.length-1 && get_params().classification)
get_weights(w).set(i,j, (float)(4.*uniformDist(rng, -range, range))); //Softmax might need an extra factor 4, since it's like a sigmoid
else
get_weights(w).set(i,j, (float)uniformDist(rng, -range, range));
}
else if (get_params().initial_weight_distribution == DeepLearning.InitialWeightDistribution.Uniform) {
get_weights(w).set(i,j, (float)uniformDist(rng, -get_params().initial_weight_scale, get_params().initial_weight_scale));
}
else if (get_params().initial_weight_distribution == DeepLearning.InitialWeightDistribution.Normal) {
get_weights(w).set(i,j, (float)(rng.nextGaussian() * get_params().initial_weight_scale));
}
}
}
}
}
// TODO: Add "subset randomize" function
// int count = Math.min(15, _previous.units);
// double min = -.1f, max = +.1f;
// //double min = -1f, max = +1f;
// for( int o = 0; o < units; o++ ) {
// for( int n = 0; n < count; n++ ) {
// int i = rand.nextInt(_previous.units);
// int w = o * _previous.units + i;
// _w[w] = uniformDist(rand, min, max);
// }
// }
/**
* Compute Variable Importance, based on
* GEDEON: DATA MINING OF INPUTS: ANALYSING MAGNITUDE AND FUNCTIONAL MEASURES
* @return variable importances for input features
*/
public float[] computeVariableImportances() {
float[] vi = new float[units[0]];
Arrays.fill(vi, 0f);
float[][] Qik = new float[units[0]][units[2]]; //importance of input i on output k
float[] sum_wj = new float[units[1]]; //sum of incoming weights into first hidden layer
float[] sum_wk = new float[units[2]]; //sum of incoming weights into output layer (or second hidden layer)
for (float[] Qi : Qik) Arrays.fill(Qi, 0f);
Arrays.fill(sum_wj, 0f);
Arrays.fill(sum_wk, 0f);
// compute sum of absolute incoming weights
for( int j = 0; j < units[1]; j++ ) {
for( int i = 0; i < units[0]; i++ ) {
float wij = get_weights(0).get(j, i);
sum_wj[j] += Math.abs(wij);
}
}
for( int k = 0; k < units[2]; k++ ) {
for( int j = 0; j < units[1]; j++ ) {
float wjk = get_weights(1).get(k,j);
sum_wk[k] += Math.abs(wjk);
}
}
// compute importance of input i on output k as product of connecting weights going through j
for( int i = 0; i < units[0]; i++ ) {
for( int k = 0; k < units[2]; k++ ) {
for( int j = 0; j < units[1]; j++ ) {
float wij = get_weights(0).get(j,i);
float wjk = get_weights(1).get(k,j);
//Qik[i][k] += Math.abs(wij)/sum_wj[j] * wjk; //Wong,Gedeon,Taggart '95
Qik[i][k] += Math.abs(wij)/sum_wj[j] * Math.abs(wjk)/sum_wk[k]; //Gedeon '97
}
}
}
// normalize Qik over all outputs k
for( int k = 0; k < units[2]; k++ ) {
float sumQk = 0;
for( int i = 0; i < units[0]; i++ ) sumQk += Qik[i][k];
for( int i = 0; i < units[0]; i++ ) Qik[i][k] /= sumQk;
}
// importance for feature i is the sum over k of i->k importances
for( int i = 0; i < units[0]; i++ ) vi[i] = Utils.sum(Qik[i]);
//normalize importances such that max(vi) = 1
Utils.div(vi, Utils.maxValue(vi));
return vi;
}
// compute stats on all nodes
public void computeStats() {
float[][] rate = get_params().adaptive_rate ? new float[units.length-1][] : null;
if (get_params().autoencoder && get_params().sparsity_beta > 0) {
for (int k = 0; k < get_params().hidden.length; k++) {
mean_a[k] = 0;
for (int j = 0; j < avg_activations[k].size(); j++)
mean_a[k] += avg_activations[k].get(j);
mean_a[k] /= avg_activations[k].size();
}
}
for( int y = 1; y < units.length; y++ ) {
mean_rate[y] = rms_rate[y] = 0;
mean_bias[y] = rms_bias[y] = 0;
mean_weight[y] = rms_weight[y] = 0;
for(int u = 0; u < biases[y-1].size(); u++) {
mean_bias[y] += biases[y-1].get(u);
}
if (rate != null) rate[y-1] = new float[get_weights(y-1).raw().length];
for(int u = 0; u < get_weights(y-1).raw().length; u++) {
mean_weight[y] += get_weights(y-1).raw()[u];
if (rate != null) {
// final float RMS_dx = (float)Math.sqrt(ada[y-1][2*u]+(float)get_params().epsilon);
// final float invRMS_g = (float)(1/Math.sqrt(ada[y-1][2*u+1]+(float)get_params().epsilon));
final float RMS_dx = Utils.approxSqrt(get_ada_dx_g(y-1).raw()[2*u]+(float)get_params().epsilon);
final float invRMS_g = Utils.approxInvSqrt(get_ada_dx_g(y-1).raw()[2*u+1]+(float)get_params().epsilon);
rate[y-1][u] = RMS_dx*invRMS_g; //not exactly right, RMS_dx should be from the previous time step -> but close enough for diagnostics.
mean_rate[y] += rate[y-1][u];
}
}
mean_bias[y] /= biases[y-1].size();
mean_weight[y] /= get_weights(y-1).size();
if (rate != null) mean_rate[y] /= rate[y-1].length;
for(int u = 0; u < biases[y-1].size(); u++) {
final double db = biases[y-1].get(u) - mean_bias[y];
rms_bias[y] += db * db;
}
for(int u = 0; u < get_weights(y-1).size(); u++) {
final double dw = get_weights(y-1).raw()[u] - mean_weight[y];
rms_weight[y] += dw * dw;
if (rate != null) {
final double drate = rate[y-1][u] - mean_rate[y];
rms_rate[y] += drate * drate;
}
}
rms_bias[y] = Utils.approxSqrt(rms_bias[y]/biases[y-1].size());
rms_weight[y] = Utils.approxSqrt(rms_weight[y]/get_weights(y-1).size());
if (rate != null) rms_rate[y] = Utils.approxSqrt(rms_rate[y]/rate[y-1].length);
// rms_bias[y] = (float)Math.sqrt(rms_bias[y]/biases[y-1].length);
// rms_weight[y] = (float)Math.sqrt(rms_weight[y]/weights[y-1].length);
// if (rate != null) rms_rate[y] = (float)Math.sqrt(rms_rate[y]/rate[y-1].length);
// Abort the run if weights or biases are unreasonably large (Note that all input values are normalized upfront)
// This can happen with Rectifier units when L1/L2/max_w2 are all set to 0, especially when using more than 1 hidden layer.
final double thresh = 1e10;
unstable |= mean_bias[y] > thresh || isNaN(mean_bias[y])
|| rms_bias[y] > thresh || isNaN(rms_bias[y])
|| mean_weight[y] > thresh || isNaN(mean_weight[y])
|| rms_weight[y] > thresh || isNaN(rms_weight[y]);
}
}
}
/**
* Constructor to restart from a checkpointed model
* @param cp Checkpoint to restart from
* @param destKey New destination key for the model
* @param jobKey New job key (job which updates the model)
*/
public DeepLearningModel(final DeepLearningModel cp, final Key destKey, final Key jobKey, final DataInfo dataInfo) {
super(destKey, cp._dataKey, dataInfo._adaptedFrame.names(), dataInfo._adaptedFrame.domains(), cp._priorClassDist != null ? cp._priorClassDist.clone() : null, null);
final boolean store_best_model = (jobKey == null);
this.jobKey = jobKey;
this._validationKey = cp._validationKey;
if (store_best_model) {
model_info = cp.model_info.deep_clone(); //don't want to interfere with model being built, just make a deep copy and store that
model_info.data_info = dataInfo.deep_clone(); //replace previous data_info with updated version that's passed in (contains enum for classification)
get_params().state = Job.JobState.DONE; //change the deep_clone'd state to DONE
_modelClassDist = cp._modelClassDist != null ? cp._modelClassDist.clone() : null;
} else {
model_info = (DeepLearningModelInfo) cp.model_info.clone(); //shallow clone is ok (won't modify the Checkpoint in K-V store during checkpoint restart)
model_info.data_info = dataInfo; //shallow clone is ok
get_params().checkpoint = cp._key; //it's only a "real" checkpoint if job != null, otherwise a best model copy
get_params().state = ((DeepLearning)UKV.get(jobKey)).state; //make the job state consistent
}
get_params().job_key = jobKey;
get_params().destination_key = destKey;
get_params().start_time = System.currentTimeMillis(); //for displaying the model progress
actual_best_model_key = cp.actual_best_model_key;
start_time = cp.start_time;
run_time = cp.run_time;
training_rows = cp.training_rows; //copy the value to display the right number on the model page before training has started
validation_rows = cp.validation_rows; //copy the value to display the right number on the model page before training has started
_bestError = cp._bestError;
// deep clone scoring history
errors = cp.errors.clone();
for (int i=0; i<errors.length;++i)
errors[i] = cp.errors[i].deep_clone();
// set proper timing
_timeLastScoreEnter = System.currentTimeMillis();
_timeLastScoreStart = 0;
_timeLastScoreEnd = 0;
_timeLastPrintStart = 0;
assert(Arrays.equals(_key._kb, destKey._kb));
}
public DeepLearningModel(final Key destKey, final Key jobKey, final Key dataKey, final DataInfo dinfo, final DeepLearning params, final float[] priorDist) {
super(destKey, dataKey, dinfo._adaptedFrame, priorDist);
this.jobKey = jobKey;
this._validationKey = params.validation != null ? params.validation._key : null;
run_time = 0;
start_time = System.currentTimeMillis();
_timeLastScoreEnter = start_time;
model_info = new DeepLearningModelInfo(params, dinfo);
actual_best_model_key = Key.makeSystem(Key.make().toString());
if (params.n_folds != 0) actual_best_model_key = null;
Object job = UKV.get(jobKey);
if (job instanceof DeepLearning)
get_params().state = ((DeepLearning)UKV.get(jobKey)).state; //make the job state consistent
else
get_params().state = ((Job.JobHandle)UKV.get(jobKey)).state; //make the job state consistent
if (!get_params().autoencoder) {
errors = new Errors[1];
errors[0] = new Errors();
errors[0].validation = (params.validation != null);
errors[0].num_folds = params.n_folds;
}
assert(Arrays.equals(_key._kb, destKey._kb));
}
public long _timeLastScoreEnter; //not transient: needed for HTML display page
transient private long _timeLastScoreStart;
transient private long _timeLastScoreEnd;
transient private long _timeLastPrintStart;
/**
*
* @param train training data from which the model is built (for epoch counting only)
* @param ftrain potentially downsampled training data for scoring
* @param ftest potentially downsampled validation data for scoring
* @param job_key key of the owning job
* @return true if model building is ongoing
*/
boolean doScoring(Frame train, Frame ftrain, Frame ftest, Key job_key, Job.ValidatedJob.Response2CMAdaptor vadaptor) {
try {
final long now = System.currentTimeMillis();
epoch_counter = (float)model_info().get_processed_total()/training_rows;
final double time_last_iter_millis = now-_timeLastScoreEnter;
// Auto-tuning
// if multi-node and auto-tuning and at least 10 ms for communication (to avoid doing thins on multi-JVM on same node),
// then adjust the auto-tuning parameter 'actual_train_samples_per_iteration' such that the targeted ratio of comm to comp is achieved
// Note: actual communication time is estimated by the NetworkTest's collective test.
if (H2O.CLOUD.size() > 1 && get_params().train_samples_per_iteration == -2 && time_for_communication_us > 1e4) {
// Log.info("Time taken for communication: " + PrettyPrint.usecs((long)time_for_communication_us));
// Log.info("Time taken for Map/Reduce iteration: " + PrettyPrint.msecs((long)time_last_iter_millis, true));
final double comm_to_work_ratio = (time_for_communication_us *1e-3) / time_last_iter_millis;
// Log.info("Ratio of network communication to computation: " + String.format("%.3f", comm_to_work_ratio));
// Log.info("target_comm_to_work: " + get_params().target_ratio_comm_to_comp);
final double correction = get_params().target_ratio_comm_to_comp / comm_to_work_ratio;
// Log.warn("Suggested value for train_samples_per_iteration: " + get_params().actual_train_samples_per_iteration/correction);
actual_train_samples_per_iteration /= correction;
actual_train_samples_per_iteration = Math.max(1, actual_train_samples_per_iteration);
}
run_time += time_last_iter_millis;
_timeLastScoreEnter = now;
boolean keep_running = (epoch_counter < get_params().epochs);
final long sinceLastScore = now -_timeLastScoreStart;
final long sinceLastPrint = now -_timeLastPrintStart;
final long samples = model_info().get_processed_total();
if (!keep_running || sinceLastPrint > get_params().score_interval*1000) {
_timeLastPrintStart = now;
Log.info("Training time: " + PrettyPrint.msecs(run_time, true)
+ ". Processed " + String.format("%,d", samples) + " samples" + " (" + String.format("%.3f", epoch_counter) + " epochs)."
+ " Speed: " + String.format("%.3f", 1000.*samples/run_time) + " samples/sec.");
}
// this is potentially slow - only do every so often
if( !keep_running ||
(sinceLastScore > get_params().score_interval*1000 //don't score too often
&&(double)(_timeLastScoreEnd-_timeLastScoreStart)/sinceLastScore < get_params().score_duty_cycle) ) { //duty cycle
final boolean printme = !get_params().quiet_mode;
final boolean adaptCM = (isClassifier() && vadaptor.needsAdaptation2CM());
_timeLastScoreStart = now;
if (get_params().diagnostics) model_info().computeStats();
Errors err = new Errors();
err.training_time_ms = run_time;
err.epoch_counter = epoch_counter;
err.training_samples = model_info().get_processed_total();
err.validation = ftest != null;
err.score_training_samples = ftrain.numRows();
if (get_params().autoencoder) {
if (printme) Log.info("Scoring the auto-encoder.");
// training
{
final Frame mse_frame = scoreAutoEncoder(ftrain);
final Vec l2 = mse_frame.anyVec();
Log.info("Mean reconstruction error on training data: " + l2.mean() + "\n");
err.train_mse = l2.mean();
mse_frame.delete();
}
} else {
if (printme) Log.info("Scoring the model.");
// compute errors
err.classification = isClassifier();
assert (err.classification == get_params().classification);
err.num_folds = get_params().n_folds;
err.train_confusion_matrix = new ConfusionMatrix();
final int hit_k = Math.min(nclasses(), get_params().max_hit_ratio_k);
if (err.classification && nclasses() > 2 && hit_k > 0) {
err.train_hitratio = new HitRatio();
err.train_hitratio.set_max_k(hit_k);
}
final String m = model_info().toString();
if (m.length() > 0) Log.info(m);
final Frame trainPredict = score(ftrain, false);
AUC trainAUC = null;
if (err.classification && nclasses() == 2) trainAUC = new AUC();
final double trainErr = calcError(ftrain, ftrain.lastVec(), trainPredict, trainPredict, "training",
printme, get_params().max_confusion_matrix_size, err.train_confusion_matrix, trainAUC, err.train_hitratio);
if (isClassifier()) err.train_err = trainErr;
if (trainAUC != null) err.trainAUC = trainAUC.data();
else err.train_mse = trainErr;
trainPredict.delete();
if (err.validation) {
assert ftest != null;
err.score_validation_samples = ftest.numRows();
err.valid_confusion_matrix = new ConfusionMatrix();
if (err.classification && nclasses() > 2 && hit_k > 0) {
err.valid_hitratio = new HitRatio();
err.valid_hitratio.set_max_k(hit_k);
}
final String adaptRespName = vadaptor.adaptedValidationResponse(responseName());
Vec adaptCMresp = null;
if (adaptCM) {
Vec[] v = ftest.vecs();
assert (ftest.find(adaptRespName) == v.length - 1); //make sure to have (adapted) response in the test set
adaptCMresp = ftest.remove(v.length - 1); //model would remove any extra columns anyway (need to keep it here for later)
}
final Frame validPredict = score(ftest, adaptCM);
final Frame hitratio_validPredict = new Frame(validPredict);
Vec orig_label = validPredict.vecs()[0];
// Adapt output response domain, in case validation domain is different from training domain
// Note: doesn't change predictions, just the *possible* label domain
if (adaptCM) {
assert (adaptCMresp != null);
assert (ftest.find(adaptRespName) == -1);
ftest.add(adaptRespName, adaptCMresp);
final Vec CMadapted = vadaptor.adaptModelResponse2CM(validPredict.vecs()[0]);
validPredict.replace(0, CMadapted); //replace label
validPredict.add("to_be_deleted", CMadapted); //keep the Vec around to be deleted later (no leak)
}
AUC validAUC = null;
if (err.classification && nclasses() == 2) validAUC = new AUC();
final double validErr = calcError(ftest, ftest.lastVec(), validPredict, hitratio_validPredict, "validation",
printme, get_params().max_confusion_matrix_size, err.valid_confusion_matrix, validAUC, err.valid_hitratio);
if (isClassifier()) err.valid_err = validErr;
if (trainAUC != null) err.validAUC = validAUC.data();
else err.valid_mse = validErr;
validPredict.delete();
//also delete the replaced label
if (adaptCM) orig_label.remove(new Futures()).blockForPending();
}
// only keep confusion matrices for the last step if there are fewer than specified number of output classes
if (err.train_confusion_matrix.cm != null
&& err.train_confusion_matrix.cm.length - 1 >= get_params().max_confusion_matrix_size) {
err.train_confusion_matrix = null;
err.valid_confusion_matrix = null;
}
}
if (get_params().variable_importances) {
if (!get_params().quiet_mode) Log.info("Computing variable importances.");
final float[] vi = model_info().computeVariableImportances();
err.variable_importances = new VarImp(vi, Arrays.copyOfRange(model_info().data_info().coefNames(), 0, vi.length));
}
_timeLastScoreEnd = System.currentTimeMillis();
err.scoring_time = System.currentTimeMillis() - now;
// enlarge the error array by one, push latest score back
if (errors == null) {
errors = new Errors[]{err};
} else {
Errors[] err2 = new Errors[errors.length + 1];
System.arraycopy(errors, 0, err2, 0, errors.length);
err2[err2.length - 1] = err;
errors = err2;
}
if (!get_params().autoencoder) {
// always keep a copy of the best model so far (based on the following criterion)
if (actual_best_model_key != null && (
// if we have a best_model in DKV, then compare against its error() (unless it's a different model as judged by the network size)
(UKV.get(actual_best_model_key) != null && (error() < UKV.<DeepLearningModel>get(actual_best_model_key).error() || !Arrays.equals(model_info().units, UKV.<DeepLearningModel>get(actual_best_model_key).model_info().units)))
||
// otherwise, compare against our own _bestError
(UKV.get(actual_best_model_key) == null && error() < _bestError)
) ) {
if (!get_params().quiet_mode)
Log.info("Error reduced from " + _bestError + " to " + error() + ". Storing best model so far under key " + actual_best_model_key.toString() + ".");
_bestError = error();
putMeAsBestModel(actual_best_model_key);
// debugging check
if (false) {
DeepLearningModel bestModel = UKV.get(actual_best_model_key);
final Frame fr = ftest != null ? ftest : ftrain;
final Frame bestPredict = bestModel.score(fr, ftest != null ? adaptCM : false);
final Frame hitRatio_bestPredict = new Frame(bestPredict);
// Adapt output response domain, in case validation domain is different from training domain
// Note: doesn't change predictions, just the *possible* label domain
if (adaptCM) {
final Vec CMadapted = vadaptor.adaptModelResponse2CM(bestPredict.vecs()[0]);
bestPredict.replace(0, CMadapted); //replace label
bestPredict.add("to_be_deleted", CMadapted); //keep the Vec around to be deleted later (no leak)
}
final double err3 = calcError(fr, fr.lastVec(), bestPredict, hitRatio_bestPredict, "cross-check",
printme, get_params().max_confusion_matrix_size, new water.api.ConfusionMatrix(), isClassifier() && nclasses() == 2 ? new AUC() : null, null);
if (isClassifier())
assert (ftest != null ? Math.abs(err.valid_err - err3) < 1e-5 : Math.abs(err.train_err - err3) < 1e-5);
else
assert (ftest != null ? Math.abs(err.valid_mse - err3) < 1e-5 : Math.abs(err.train_mse - err3) < 1e-5);
bestPredict.delete();
}
}
// else {
// // keep output JSON small
// if (errors.length > 1) {
// if (last_scored().trainAUC != null) last_scored().trainAUC.clear();
// if (last_scored().validAUC != null) last_scored().validAUC.clear();
// last_scored().variable_importances = null;
// }
// }
// print the freshly scored model to ASCII
for (String s : toString().split("\n")) Log.info(s);
if (printme) Log.info("Time taken for scoring and diagnostics: " + PrettyPrint.msecs(err.scoring_time, true));
}
}
if (model_info().unstable()) {
Log.warn(unstable_msg);
keep_running = false;
} else if ( (isClassifier() && last_scored().train_err <= get_params().classification_stop)
|| (!isClassifier() && last_scored().train_mse <= get_params().regression_stop) ) {
Log.info("Achieved requested predictive accuracy on the training data. Model building completed.");
keep_running = false;
}
update(job_key);
// System.out.println(this);
return keep_running;
}
catch (Exception ex) {
return false;
}
}
@Override protected void setCrossValidationError(Job.ValidatedJob job, double cv_error, ConfusionMatrix cm, AUCData auc, HitRatio hr) {
_have_cv_results = true;
if (!get_params().classification)
last_scored().valid_mse = cv_error;
else
last_scored().valid_err = cv_error;
last_scored().score_validation_samples = last_scored().score_training_samples / get_params().n_folds;
last_scored().num_folds = get_params().n_folds;
last_scored().valid_confusion_matrix = cm;
last_scored().validAUC = auc;
last_scored().valid_hitratio = hr;
DKV.put(this._key, this); //overwrite this model
}
@Override public String toString() {
StringBuilder sb = new StringBuilder();
sb.append(model_info.toString());
sb.append(last_scored().toString());
return sb.toString();
}
public String toStringAll() {
StringBuilder sb = new StringBuilder();
sb.append(model_info.toStringAll());
sb.append(last_scored().toString());
return sb.toString();
}
public String getHeader() {
assert get_params().autoencoder;
StringBuilder sb = new StringBuilder();
final int len = model_info().data_info().fullN();
String prefix = "reconstr_";
assert (model_info().data_info()._responses == 0);
String[] coefnames = model_info().data_info().coefNames();
assert (len == coefnames.length);
for (int c = 0; c < len; c++) {
if (c>0) sb.append(",");
sb.append(prefix + coefnames[c]);
}
return sb.toString();
}
/**
* This is an overridden version of Model.score(). Make either a prediction or a reconstruction.
* @param frame Test dataset
* @return A frame containing the prediction or reconstruction
*/
@Override
public Frame score(Frame frame) {
if (!get_params().autoencoder) {
return super.score(frame);
} else {
// Reconstruction
// Adapt the Frame layout - returns adapted frame and frame containing only
// newly created vectors
Frame[] adaptFrms = adapt(frame,false,false/*no response*/);
// Adapted frame containing all columns - mix of original vectors from fr
// and newly created vectors serving as adaptors
Frame adaptFrm = adaptFrms[0];
// Contains only newly created vectors. The frame eases deletion of these vectors.
Frame onlyAdaptFrm = adaptFrms[1];
final int len = model_info().data_info().fullN();
String prefix = "reconstr_";
assert(model_info().data_info()._responses == 0);
String[] coefnames = model_info().data_info().coefNames();
assert(len == coefnames.length);
for( int c=0; c<len; c++ )
adaptFrm.add(prefix+coefnames[c],adaptFrm.anyVec().makeZero());
new MRTask2() {
@Override public void map( Chunk chks[] ) {
double tmp [] = new double[_names.length];
float preds[] = new float [len];
final Neurons[] neurons = DeepLearningTask.makeNeuronsForTesting(model_info);
for( int row=0; row<chks[0]._len; row++ ) {
float p[] = score_autoencoder(chks, row, tmp, preds, neurons);
for( int c=0; c<preds.length; c++ )
chks[_names.length+c].set0(row,p[c]);
}
}
}.doAll(adaptFrm);
// Return the predicted columns
int x=_names.length, y=adaptFrm.numCols();
Frame f = adaptFrm.extractFrame(x, y); //this will call vec_impl() and we cannot call the delete() below just yet
onlyAdaptFrm.delete();
return f;
}
}
/**
* Predict from raw double values representing the data
* @param data raw array containing categorical values (horizontalized to 1,0,0,1,0,0 etc.) and numerical values (0.35,1.24,5.3234,etc), both can contain NaNs
* @param preds predicted label and per-class probabilities (for classification), predicted target (regression), can contain NaNs
* @return preds, can contain NaNs
*/
@Override public float[] score0(double[] data, float[] preds) {
if (model_info().unstable()) {
Log.warn(unstable_msg);
throw new UnsupportedOperationException("Trying to predict with an unstable model.");
}
Neurons[] neurons = DeepLearningTask.makeNeuronsForTesting(model_info);
((Neurons.Input)neurons[0]).setInput(-1, data);
DeepLearningTask.step(-1, neurons, model_info, false, null);
float[] out = neurons[neurons.length - 1]._a.raw();
if (isClassifier()) {
assert (preds.length == out.length + 1);
for (int i = 0; i < preds.length - 1; ++i) {
preds[i + 1] = out[i];
if (Float.isNaN(preds[i + 1])) throw new RuntimeException("Predicted class probability NaN!");
}
preds[0] = ModelUtils.getPrediction(preds, data);
} else {
assert (preds.length == 1 && out.length == 1);
if (model_info().data_info()._normRespMul != null)
preds[0] = (float) (out[0] / model_info().data_info()._normRespMul[0] + model_info().data_info()._normRespSub[0]);
else
preds[0] = out[0];
if (Float.isNaN(preds[0])) throw new RuntimeException("Predicted regression target NaN!");
}
return preds;
}
/**
* Score auto-encoded reconstruction (on-the-fly, without allocating the reconstruction as done in Frame score(Frame fr))
* @param frame Original data (can contain response, will be ignored)
* @return Frame containing one Vec with reconstruction error (MSE) of each reconstructed row, caller is responsible for deletion
*/
public Frame scoreAutoEncoder(Frame frame) {
final int len = _names.length;
// Adapt the Frame layout - returns adapted frame and frame containing only
// newly created vectors
Frame[] adaptFrms = adapt(frame,false,false/*no response*/);
// Adapted frame containing all columns - mix of original vectors from fr
// and newly created vectors serving as adaptors
Frame adaptFrm = adaptFrms[0];
// Contains only newly created vectors. The frame eases deletion of these vectors.
Frame onlyAdaptFrm = adaptFrms[1];
adaptFrm.add("Reconstruction.MSE", adaptFrm.anyVec().makeZero());
new MRTask2() {
@Override public void map( Chunk chks[] ) {
double tmp [] = new double[len];
final Neurons[] neurons = DeepLearningTask.makeNeuronsForTesting(model_info);
for( int row=0; row<chks[0]._len; row++ ) {
for( int i=0; i<_names.length; i++ )
tmp[i] = chks[i].at0(row); //original data
chks[len].set0(row, score_autoencoder(tmp, null, neurons)); //store the per-row reconstruction error (MSE) in the last column
}
}
}.doAll(adaptFrm);
// Return just the output columns
int x=_names.length, y=adaptFrm.numCols();
final Frame l2 = adaptFrm.extractFrame(x, y);
onlyAdaptFrm.delete();
return l2;
}
/**
* Score auto-encoded reconstruction (on-the-fly, without allocating the reconstruction as done in Frame score(Frame fr))
* @param frame Original data (can contain response, will be ignored)
* @return Frame containing one Vec with reconstruction error (MSE) of each reconstructed row, caller is responsible for deletion
*/
public Frame scoreDeepFeatures(Frame frame, final int layer) {
assert(layer >= 0 && layer < model_info().get_params().hidden.length);
final int len = nfeatures();
Vec resp = null;
if (isSupervised()) {
int ridx = frame.find(responseName());
if (ridx != -1) { // drop the response for scoring!
frame = new Frame(frame);
resp = frame.vecs()[ridx];
frame.remove(ridx);
}
}
// Adapt the Frame layout - returns adapted frame and frame containing only
// newly created vectors
Frame[] adaptFrms = adapt(frame,false,false/*no response*/);
// Adapted frame containing all columns - mix of original vectors from fr
// and newly created vectors serving as adaptors
Frame adaptFrm = adaptFrms[0];
// Contains only newly created vectors. The frame eases deletion of these vectors.
Frame onlyAdaptFrm = adaptFrms[1];
//create new features, will be dense
final int features = model_info().get_params().hidden[layer];
Vec[] vecs = adaptFrm.anyVec().makeZeros(features);
for (int j=0; j<features; ++j) {
adaptFrm.add("DF.C" + (j+1), vecs[j]);
}
new MRTask2() {
@Override public void map( Chunk chks[] ) {
double tmp [] = new double[len];
float df[] = new float [features];
final Neurons[] neurons = DeepLearningTask.makeNeuronsForTesting(model_info);
for( int row=0; row<chks[0]._len; row++ ) {
for( int i=0; i<len; i++ )
tmp[i] = chks[i].at0(row);
((Neurons.Input)neurons[0]).setInput(-1, tmp);
DeepLearningTask.step(-1, neurons, model_info, false, null);
float[] out = neurons[layer+1]._a.raw(); //extract the layer-th hidden feature
for( int c=0; c<df.length; c++ )
chks[_names.length+c].set0(row,out[c]);
}
}
}.doAll(adaptFrm);
// Return just the output columns
int x=_names.length, y=adaptFrm.numCols();
Frame ret = adaptFrm.extractFrame(x, y);
onlyAdaptFrm.delete();
if (resp != null) ret.prepend(responseName(), resp);
return ret;
}
// Make (potentially expanded) reconstruction
private float[] score_autoencoder(Chunk[] chks, int row_in_chunk, double[] tmp, float[] preds, Neurons[] neurons) {
assert(get_params().autoencoder);
assert(tmp.length == _names.length);
for( int i=0; i<tmp.length; i++ )
tmp[i] = chks[i].at0(row_in_chunk);
score_autoencoder(tmp, preds, neurons); // this fills preds, returns MSE error (ignored here)
return preds;
}
/**
* Helper to reconstruct original data into preds array and compute the reconstruction error (MSE)
* @param data Original data (unexpanded)
* @param preds Reconstruction (potentially expanded)
* @return reconstruction error
*/
private double score_autoencoder(double[] data, float[] preds, Neurons[] neurons) {
assert(model_info().get_params().autoencoder);
if (model_info().unstable()) {
Log.warn(unstable_msg);
throw new UnsupportedOperationException("Trying to predict with an unstable model.");
}
((Neurons.Input)neurons[0]).setInput(-1, data); // expands categoricals inside
DeepLearningTask.step(-1, neurons, model_info, false, null); // reconstructs data in expanded space
float[] in = neurons[0]._a.raw(); //input (expanded)
float[] out = neurons[neurons.length - 1]._a.raw(); //output (expanded)
// DEBUGGING
// Log.info(Arrays.toString(data));
// Log.info(Arrays.toString(in));
// Log.info(Arrays.toString(out));
assert(in.length == out.length);
// First normalize categorical reconstructions to be probabilities
// (such that they can be better compared to the input where one factor was 1 and the rest was 0)
// model_info().data_info().softMaxCategoricals(out,out); //only modifies the categoricals
// Compute MSE of reconstruction in expanded space (with categorical probabilities)
double l2 = 0;
for (int i = 0; i < in.length; ++i)
l2 += Math.pow((out[i] - in[i]), 2);
l2 /= in.length;
if (preds!=null) {
// Now scale back numerical columns to original data space (scale + shift)
model_info().data_info().unScaleNumericals(out, out); //only modifies the numericals
System.arraycopy(out, 0, preds, 0, out.length); //copy reconstruction into preds
}
// DEBUGGING
// Log.info(Arrays.toString(preds));
// Log.info("");
return l2;
}
/**
* Compute quantile-based threshold (in reconstruction error) to find outliers
* @param mse Vector containing reconstruction errors
* @param quantile Quantile for cut-off
* @return Threshold in MSE value for a point to be above the quantile
*/
public double calcOutlierThreshold(Vec mse, double quantile) {
Frame mse_frame = new Frame(Key.make(), new String[]{"Reconstruction.MSE"}, new Vec[]{mse});
QuantilesPage qp = new QuantilesPage();
qp.column = mse_frame.vec(0);
qp.source_key = mse_frame;
qp.quantile = quantile;
qp.invoke();
DKV.remove(mse_frame._key);
return qp.result;
}
@Override public ModelAutobufferSerializer getModelSerializer() {
// Return a serializer which knows how to serialize keys
return new ModelAutobufferSerializer() {
@Override protected AutoBuffer postLoad(Model m, AutoBuffer ab) {
Job.hygiene(((DeepLearningModel)m).get_params());
return ab;
}
};
}
public boolean generateHTML(String title, StringBuilder sb) {
if (_key == null) {
DocGen.HTML.title(sb, "No model yet");
return true;
}
// optional JFrame creation for visualization of weights
// DeepLearningVisualization.visualize(this);
final String mse_format = "%g";
// final String cross_entropy_format = "%2.6f";
// stats for training and validation
final Errors error = last_scored();
DocGen.HTML.title(sb, title);
if (get_params().source == null || DKV.get(get_params().source._key) == null ||
(get_params().validation != null && DKV.get(get_params().validation._key) == null)) (Job.hygiene(get_params())).toHTML(sb);
else job().toHTML(sb);
sb.append("<div class='alert'>Actions: "
+ (jobKey != null && UKV.get(jobKey) != null && Job.isRunning(jobKey) ? "<i class=\"icon-stop\"></i>" + Cancel.link(jobKey, "Stop training") + ", " : "")
+ Inspect2.link("Inspect training data (" + _dataKey + ")", _dataKey) + ", "
+ (_validationKey != null ? (Inspect2.link("Inspect validation data (" + _validationKey + ")", _validationKey) + ", ") : "")
+ water.api.Predict.link(_key, "Score on dataset") + ", "
+ DeepLearning.link(_dataKey, "Compute new model", null, responseName(), _validationKey)
+ (actual_best_model_key != null && UKV.get(actual_best_model_key) != null && actual_best_model_key != _key ? ", " + DeepLearningModelView.link("Go to best model", actual_best_model_key) : "")
+ (jobKey == null || ((jobKey != null && UKV.get(jobKey) == null)) || (jobKey != null && UKV.get(jobKey) != null && Job.isEnded(jobKey)) ? ", <i class=\"icon-play\"></i>" + DeepLearning.link(_dataKey, "Continue training this model", _key, responseName(), _validationKey) : "") + ", "
+ UIUtils.qlink(SaveModel.class, "model", _key, "Save model") + ", "
+ "</div>");
DocGen.HTML.paragraph(sb, "Model Key: " + _key);
if (jobKey != null) DocGen.HTML.paragraph(sb, "Job Key: " + jobKey);
if (!get_params().autoencoder)
DocGen.HTML.paragraph(sb, "Model type: " + (get_params().classification ? " Classification" : " Regression") + ", predicting: " + responseName());
else
DocGen.HTML.paragraph(sb, "Model type: Auto-Encoder");
DocGen.HTML.paragraph(sb, "Number of model parameters (weights/biases): " + String.format("%,d", model_info().size()));
if (model_info.unstable()) {
DocGen.HTML.section(sb, "=======================================================================================");
DocGen.HTML.section(sb, unstable_msg.replace("\n"," "));
DocGen.HTML.section(sb, "=======================================================================================");
}
if (error == null) return true;
DocGen.HTML.title(sb, "Progress");
// update epoch counter every time the website is displayed
epoch_counter = training_rows > 0 ? (float)model_info().get_processed_total()/training_rows : 0;
final double progress = get_params().progress();
if (get_params() != null && get_params().diagnostics) {
DocGen.HTML.section(sb, "Status of Neuron Layers");
sb.append("<table class='table table-striped table-bordered table-condensed'>");
sb.append("<tr>");
sb.append("<th>").append("#").append("</th>");
sb.append("<th>").append("Units").append("</th>");
sb.append("<th>").append("Type").append("</th>");
sb.append("<th>").append("Dropout").append("</th>");
sb.append("<th>").append("L1").append("</th>");
sb.append("<th>").append("L2").append("</th>");
if (get_params().adaptive_rate) {
sb.append("<th>").append("Rate (Mean, RMS)").append("</th>");
} else {
sb.append("<th>").append("Rate").append("</th>");
sb.append("<th>").append("Momentum").append("</th>");
}
sb.append("<th>").append("Weight (Mean, RMS)").append("</th>");
sb.append("<th>").append("Bias (Mean, RMS)").append("</th>");
sb.append("</tr>");
Neurons[] neurons = DeepLearningTask.makeNeuronsForTesting(model_info()); //link the weights to the neurons, for easy access
for (int i=0; i<neurons.length; ++i) {
sb.append("<tr>");
sb.append("<td>").append("<b>").append(i+1).append("</b>").append("</td>");
sb.append("<td>").append("<b>").append(neurons[i].units).append("</b>").append("</td>");
sb.append("<td>").append(neurons[i].getClass().getSimpleName()).append("</td>");
if (i == 0) {
sb.append("<td>");
sb.append(Utils.formatPct(neurons[i].params.input_dropout_ratio));
sb.append("</td>");
sb.append("<td></td>");
sb.append("<td></td>");
sb.append("<td></td>");
if (!get_params().adaptive_rate) sb.append("<td></td>");
sb.append("<td></td>");
sb.append("<td></td>");
sb.append("</tr>");
continue;
}
else if (i < neurons.length-1) {
sb.append("<td>");
if (neurons[i].params.hidden_dropout_ratios == null)
sb.append(Utils.formatPct(0));
else
sb.append(Utils.formatPct(neurons[i].params.hidden_dropout_ratios[i - 1]));
sb.append("</td>");
} else {
sb.append("<td></td>");
}
final String format = "%g";
sb.append("<td>").append(neurons[i].params.l1).append("</td>");
sb.append("<td>").append(neurons[i].params.l2).append("</td>");
if (get_params().adaptive_rate) {
sb.append("<td>(").append(String.format(format, model_info.mean_rate[i])).
append(", ").append(String.format(format, model_info.rms_rate[i])).append(")</td>");
} else {
sb.append("<td>").append(String.format("%.5g", neurons[i].rate(error.training_samples))).append("</td>");
sb.append("<td>").append(String.format("%.5f", neurons[i].momentum(error.training_samples))).append("</td>");
}
sb.append("<td>(").append(String.format(format, model_info.mean_weight[i])).
append(", ").append(String.format(format, model_info.rms_weight[i])).append(")</td>");
sb.append("<td>(").append(String.format(format, model_info.mean_bias[i])).
append(", ").append(String.format(format, model_info.rms_bias[i])).append(")</td>");
sb.append("</tr>");
}
sb.append("</table>");
}
if (isClassifier() && !get_params().autoencoder) {
DocGen.HTML.section(sb, "Classification error on training data: " + Utils.formatPct(error.train_err));
if(error.validation) {
DocGen.HTML.section(sb, "Classification error on validation data: " + Utils.formatPct(error.valid_err));
} else if(error.num_folds > 0) {
DocGen.HTML.section(sb, "Classification error on " + error.num_folds + "-fold cross-validated training data"
+ (_have_cv_results ? ": " + Utils.formatPct(error.valid_err) : " is being computed - please reload this page later."));
}
} else {
DocGen.HTML.section(sb, "MSE on training data: " + String.format(mse_format, error.train_mse));
if(error.validation) {
DocGen.HTML.section(sb, "MSE on validation data: " + String.format(mse_format, error.valid_mse));
} else if(error.num_folds > 0) {
DocGen.HTML.section(sb, "MSE on " + error.num_folds + "-fold cross-validated training data"
+ (_have_cv_results ? ": " + String.format(mse_format, error.valid_mse) : " is being computed - please reload this page later."));
}
}
DocGen.HTML.paragraph(sb, "Training samples: " + String.format("%,d", model_info().get_processed_total()));
DocGen.HTML.paragraph(sb, "Epochs: " + String.format("%.3f", epoch_counter) + " / " + String.format("%.3f", get_params().epochs));
int cores = 0; for (H2ONode n : H2O.CLOUD._memary) cores += n._heartbeat._num_cpus;
DocGen.HTML.paragraph(sb, "Number of compute nodes: " + (model_info.get_params().single_node_mode ? ("1 (" + H2O.NUMCPUS + " threads)") : (H2O.CLOUD.size() + " (" + cores + " threads)")));
DocGen.HTML.paragraph(sb, "Training samples per iteration" + (
get_params().train_samples_per_iteration == -2 ? " (-2 -> auto-tuning): " :
get_params().train_samples_per_iteration == -1 ? " (-1 -> max. available data): " :
get_params().train_samples_per_iteration == 0 ? " (0 -> one epoch): " : " (user-given): ")
+ String.format("%,d", actual_train_samples_per_iteration));
final boolean isEnded = get_params().self() == null || (UKV.get(get_params().self()) != null && Job.isEnded(get_params().self()));
final long time_so_far = isEnded ? run_time : run_time + System.currentTimeMillis() - _timeLastScoreEnter;
if (time_so_far > 0) {
long time_for_speed = isEnded || H2O.CLOUD.size() > 1 ? run_time : time_so_far;
if (time_for_speed > 0)
DocGen.HTML.paragraph(sb, "Training speed: " + String.format("%,d", model_info().get_processed_total() * 1000 / time_for_speed) + " samples/s");
}
DocGen.HTML.paragraph(sb, "Training time: " + PrettyPrint.msecs(time_so_far, true));
if (progress > 0 && !isEnded)
DocGen.HTML.paragraph(sb, "Estimated time left: " +PrettyPrint.msecs((long)(time_so_far*(1-progress)/progress), true));
long score_train = error.score_training_samples;
long score_valid = error.score_validation_samples;
final boolean fulltrain = score_train==0 || score_train == training_rows;
final boolean fullvalid = error.validation && get_params().n_folds == 0 && (score_valid==0 || score_valid == validation_rows);
final String toolarge = " Confusion matrix not shown here - too large: number of classes (" + model_info.units[model_info.units.length-1]
+ ") is greater than the specified limit of " + get_params().max_confusion_matrix_size + ".";
boolean smallenough = model_info.units[model_info.units.length-1] <= get_params().max_confusion_matrix_size;
if (!error.validation) {
if (_have_cv_results) {
String cmTitle = "<div class=\"alert\">Scoring results reported for " + error.num_folds + "-fold cross-validated training data " + Inspect2.link(_dataKey) + ":</div>";
sb.append("<h5>" + cmTitle);
sb.append("</h5>");
}
else {
String cmTitle = "<div class=\"alert\">Scoring results reported on training data " + Inspect2.link(_dataKey) + (fulltrain ? "" : " (" + score_train + " samples)") + ":</div>";
sb.append("<h5>" + cmTitle);
sb.append("</h5>");
}
}
else {
RString v_rs = new RString("<a href='Inspect2.html?src_key=%$key'>%key</a>");
String cmTitle = "<div class=\"alert\">Scoring results reported on validation data " + Inspect2.link(_validationKey) + (fullvalid ? "" : " (" + score_valid + " samples)") + ":</div>";
sb.append("<h5>" + cmTitle);
sb.append("</h5>");
}
if (isClassifier()) {
// print AUC
if (error.validAUC != null) {
error.validAUC.toHTML(sb);
}
else if (error.trainAUC != null) {
error.trainAUC.toHTML(sb);
}
else {
if (error.validation) {
if (error.valid_confusion_matrix != null && smallenough) {
error.valid_confusion_matrix.toHTML(sb);
} else if (smallenough) sb.append("<h5>Confusion matrix on validation data is not yet computed.</h5>");
else sb.append(toolarge);
}
else if (_have_cv_results) {
if (error.valid_confusion_matrix != null && smallenough) {
error.valid_confusion_matrix.toHTML(sb);
} else if (smallenough) sb.append("<h5>Confusion matrix on " + error.num_folds + "-fold cross-validated training data is not yet computed.</h5>");
else sb.append(toolarge);
}
else {
if (error.train_confusion_matrix != null && smallenough) {
error.train_confusion_matrix.toHTML(sb);
} else if (smallenough) sb.append("<h5>Confusion matrix on training data is not yet computed.</h5>");
else sb.append(toolarge);
}
}
}
// Hit ratio
if (error.valid_hitratio != null) {
error.valid_hitratio.toHTML(sb);
} else if (error.train_hitratio != null) {
error.train_hitratio.toHTML(sb);
}
// Variable importance
if (error.variable_importances != null) {
error.variable_importances.toHTML(this, sb);
}
printCrossValidationModelsHTML(sb);
DocGen.HTML.title(sb, "Scoring history");
if (errors.length > 1) {
DocGen.HTML.paragraph(sb, "Time taken for last scoring and diagnostics: " + PrettyPrint.msecs(errors[errors.length-1].scoring_time, true));
// training
{
final long pts = fulltrain ? training_rows : score_train;
String training = "Number of training data samples for scoring: " + (fulltrain ? "all " : "") + pts;
if (pts < 1000 && training_rows >= 1000) training += " (low, scoring might be inaccurate -> consider increasing this number in the expert mode)";
if (pts > 100000 && errors[errors.length-1].scoring_time > 10000) training += " (large, scoring can be slow -> consider reducing this number in the expert mode or scoring manually)";
DocGen.HTML.paragraph(sb, training);
}
// validation
if (error.validation) {
final long ptsv = fullvalid ? validation_rows : score_valid;
String validation = "Number of validation data samples for scoring: " + (fullvalid ? "all " : "") + ptsv;
if (ptsv < 1000 && validation_rows >= 1000) validation += " (low, scoring might be inaccurate -> consider increasing this number in the expert mode)";
if (ptsv > 100000 && errors[errors.length-1].scoring_time > 10000) validation += " (large, scoring can be slow -> consider reducing this number in the expert mode or scoring manually)";
DocGen.HTML.paragraph(sb, validation);
}
if (isClassifier() && nclasses() != 2 /*binary classifier has its own conflicting D3 object (AUC)*/) {
// Plot training error
float[] err = new float[errors.length];
float[] samples = new float[errors.length];
for (int i=0; i<err.length; ++i) {
err[i] = (float)errors[i].train_err;
samples[i] = errors[i].training_samples;
}
new D3Plot(samples, err, "training samples", "classification error",
"classification error on training data").generate(sb);
// Plot validation error
if (error.validation) {
for (int i=0; i<err.length; ++i) {
err[i] = (float)errors[i].valid_err;
}
new D3Plot(samples, err, "training samples", "classification error",
"classification error on validation set").generate(sb);
}
}
// regression
else if (!isClassifier()) {
// Plot training MSE
float[] err = new float[errors.length-1];
float[] samples = new float[errors.length-1];
for (int i=0; i<err.length; ++i) {
err[i] = (float)errors[i+1].train_mse;
samples[i] = errors[i+1].training_samples;
}
new D3Plot(samples, err, "training samples", "MSE",
"regression error on training data").generate(sb);
// Plot validation MSE
if (error.validation) {
for (int i=0; i<err.length; ++i) {
err[i] = (float)errors[i+1].valid_mse;
}
new D3Plot(samples, err, "training samples", "MSE",
"regression error on validation data").generate(sb);
}
}
}
// String training = "Number of training set samples for scoring: " + error.score_training;
if (error.validation) {
// String validation = "Number of validation set samples for scoring: " + error.score_validation;
}
sb.append("<table class='table table-striped table-bordered table-condensed'>");
sb.append("<tr>");
sb.append("<th>Training Time</th>");
sb.append("<th>Training Epochs</th>");
sb.append("<th>Training Samples</th>");
if (isClassifier()) {
// sb.append("<th>Training MCE</th>");
sb.append("<th>Training Error</th>");
if (nclasses()==2) sb.append("<th>Training AUC</th>");
} else {
sb.append("<th>Training MSE</th>");
}
if (error.validation) {
if (isClassifier()) {
// sb.append("<th>Validation MCE</th>");
sb.append("<th>Validation Error</th>");
if (nclasses()==2) sb.append("<th>Validation AUC</th>");
} else {
sb.append("<th>Validation MSE</th>");
}
}
else if (error.num_folds > 0) {
if (isClassifier()) {
sb.append("<th>Cross-Validation Error</th>");
if (nclasses()==2) sb.append("<th>Cross-Validation AUC</th>");
} else {
sb.append("<th>Cross-Validation MSE</th>");
}
}
sb.append("</tr>");
for( int i = errors.length - 1; i >= 0; i-- ) {
final Errors e = errors[i];
sb.append("<tr>");
sb.append("<td>" + PrettyPrint.msecs(e.training_time_ms, true) + "</td>");
sb.append("<td>" + String.format("%g", e.epoch_counter) + "</td>");
sb.append("<td>" + String.format("%,d", e.training_samples) + "</td>");
if (isClassifier() && !get_params().autoencoder) {
sb.append("<td>" + Utils.formatPct(e.train_err) + "</td>");
if (nclasses()==2) {
if (e.trainAUC != null) sb.append("<td>" + Utils.formatPct(e.trainAUC.AUC()) + "</td>");
else sb.append("<td>" + "N/A" + "</td>");
}
} else {
sb.append("<td>" + String.format(mse_format, e.train_mse) + "</td>");
}
if(e.validation) {
if (isClassifier()) {
sb.append("<td>" + Utils.formatPct(e.valid_err) + "</td>");
if (nclasses()==2) {
if (e.validAUC != null) sb.append("<td>" + Utils.formatPct(e.validAUC.AUC()) + "</td>");
else sb.append("<td>" + "N/A" + "</td>");
}
} else {
sb.append("<td>" + String.format(mse_format, e.valid_mse) + "</td>");
}
}
else if(e.num_folds > 0) {
if (i == errors.length - 1 && _have_cv_results) {
if (isClassifier()) {
sb.append("<td>" + Utils.formatPct(e.valid_err) + "</td>");
if (nclasses() == 2) {
if (e.validAUC != null) sb.append("<td>" + Utils.formatPct(e.validAUC.AUC()) + "</td>");
else sb.append("<td>" + "N/A" + "</td>");
}
} else {
sb.append("<td>" + String.format(mse_format, e.valid_mse) + "</td>");
}
}
else {
sb.append("<td>N/A</td>");
if (nclasses() == 2) sb.append("<td>N/A</td>");
}
}
sb.append("</tr>");
}
sb.append("</table>");
return true;
}
@Override
protected SB toJavaNCLASSES(SB sb) {
return !get_params().autoencoder ? super.toJavaNCLASSES(sb) :
JCodeGen.toStaticVar(sb, "NCLASSES", model_info.units[model_info.units.length-1], "Number of output features (same as features of training data).");
}
@Override
protected void toJavaFillPreds0(SB bodySb) {
if (!get_params().autoencoder) super.toJavaFillPreds0(bodySb);
}
public void toJavaHtml(StringBuilder sb) {
sb.append("<br /><br /><div class=\"pull-right\"><a href=\"#\" onclick=\'$(\"#javaModel\").toggleClass(\"hide\");\'" +
"class=\'btn btn-inverse btn-mini\'>Java Model</a></div><br /><div class=\"hide\" id=\"javaModel\">");
boolean featureAllowed = true; //isFeatureAllowed();
if (! featureAllowed) {
sb.append("<br/><div id=\'javaModelWarningBlock\' class=\"alert\" style=\"background:#eedd20;color:#636363;text-shadow:none;\">");
sb.append("<b>You have requested a premium feature and your H<sub>2</sub>O software is unlicensed.</b><br/><br/>");
sb.append("Please enter your email address below, and we will send you a trial license shortly.<br/>");
sb.append("This will also temporarily enable downloading Java models.<br/>");
sb.append("<form class=\'form-inline\'><input id=\"emailForJavaModel\" class=\"span5\" type=\"text\" placeholder=\"Email\"/> ");
sb.append("<a href=\"#\" onclick=\'processJavaModelLicense();\' class=\'btn btn-inverse\'>Send</a></form></div>");
sb.append("<div id=\"javaModelSource\" class=\"hide\"><pre style=\"overflow-y:scroll;\"><code class=\"language-java\">");
DocGen.HTML.escape(sb, toJava());
sb.append("</code></pre></div>");
}
else if( model_info().size() > 100000 ) {
String modelName = JCodeGen.toJavaId(_key.toString());
sb.append("<pre style=\"overflow-y:scroll;\"><code class=\"language-java\">");
sb.append("/* Java code is too large to display, download it directly.\n");
sb.append(" To obtain the code please invoke in your terminal:\n");
sb.append(" curl http:/").append(H2O.SELF.toString()).append("/h2o-model.jar > h2o-model.jar\n");
sb.append(" curl http:/").append(H2O.SELF.toString()).append("/2/").append(this.getClass().getSimpleName()).append("View.java?_modelKey=").append(_key).append(" > ").append(modelName).append(".java\n");
sb.append(" javac -cp h2o-model.jar -J-Xmx2g -J-XX:MaxPermSize=128m ").append(modelName).append(".java\n");
sb.append("*/");
sb.append("</code></pre>");
} else {
sb.append("<pre style=\"overflow-y:scroll;\"><code class=\"language-java\">");
DocGen.HTML.escape(sb, toJava());
sb.append("</code></pre>");
}
sb.append("</div>");
sb.append("<script type=\"text/javascript\">$(document).ready(showOrHideJavaModel);</script>");
}
@Override protected SB toJavaInit(SB sb, SB fileContextSB) {
sb = super.toJavaInit(sb, fileContextSB);
if (model_info().data_info()._nums > 0) {
JCodeGen.toStaticVar(sb, "NUMS", new double[model_info().data_info()._nums], "Workspace for storing numerical input variables.");
JCodeGen.toStaticVar(sb, "NORMMUL", model_info().data_info()._normMul, "Standardization/Normalization scaling factor for numerical variables.");
JCodeGen.toStaticVar(sb, "NORMSUB", model_info().data_info()._normSub, "Standardization/Normalization offset for numerical variables.");
}
if (model_info().data_info()._cats > 0) {
JCodeGen.toStaticVar(sb, "CATS", new int[model_info().data_info()._cats], "Workspace for storing categorical input variables.");
}
JCodeGen.toStaticVar(sb, "CATOFFSETS", model_info().data_info()._catOffsets, "Workspace for categorical offsets.");
if (model_info().data_info()._normRespMul != null) {
JCodeGen.toStaticVar(sb, "NORMRESPMUL", model_info().data_info()._normRespMul, "Standardization/Normalization scaling factor for response.");
JCodeGen.toStaticVar(sb, "NORMRESPSUB", model_info().data_info()._normRespSub, "Standardization/Normalization offset for response.");
}
if (get_params().hidden_dropout_ratios != null) {
JCodeGen.toStaticVar(sb, "HIDDEN_DROPOUT_RATIOS", get_params().hidden_dropout_ratios, "Hidden layer dropout ratios.");
}
Neurons[] neurons = DeepLearningTask.makeNeuronsForTesting(model_info());
int[] layers = new int[neurons.length];
for (int i=0;i<neurons.length;++i)
layers[i] = neurons[i].units;
JCodeGen.toStaticVar(sb, "NEURONS", layers, "Number of neurons for each layer.");
if (get_params().autoencoder) {
sb.i(1).p("@Override public int getPredsSize() { return " + model_info.units[model_info.units.length-1] + "; }").nl();
sb.i(1).p("@Override public boolean isAutoEncoder() { return true; }").nl();
sb.i(1).p("@Override public String getHeader() { return \"" + getHeader() + "\"; }").nl();
}
// activation storage
sb.i(1).p("// Storage for neuron activation values.").nl();
sb.i(1).p("public static final float[][] ACTIVATION = new float[][] {").nl();
for (int i=0; i<neurons.length; i++) {
String colInfoClazz = "Activation_"+i;
sb.i(2).p("/* ").p(neurons[i].getClass().getSimpleName()).p(" */ ");
sb.p(colInfoClazz).p(".VALUES");
if (i!=neurons.length-1) sb.p(',');
sb.nl();
fileContextSB.i().p("// Neuron activation values for ").p(neurons[i].getClass().getSimpleName()).p(" layer").nl();
JCodeGen.toClassWithArray(fileContextSB, null, colInfoClazz, new float[layers[i]]);
}
sb.i(1).p("};").nl();
// biases
sb.i(1).p("// Neuron bias values.").nl();
sb.i(1).p("public static final float[][] BIAS = new float[][] {").nl();
for (int i=0; i<neurons.length; i++) {
String colInfoClazz = "Bias_"+i;
sb.i(2).p("/* ").p(neurons[i].getClass().getSimpleName()).p(" */ ");
sb.p(colInfoClazz).p(".VALUES");
if (i!=neurons.length-1) sb.p(',');
sb.nl();
fileContextSB.i().p("// Neuron bias values for ").p(neurons[i].getClass().getSimpleName()).p(" layer").nl();
float[] bias = i == 0 ? null : new float[model_info().get_biases(i-1).size()];
if (i>0) {
for (int j=0; j<bias.length; ++j) bias[j] = model_info().get_biases(i-1).get(j);
}
JCodeGen.toClassWithArray(fileContextSB, null, colInfoClazz, bias);
}
sb.i(1).p("};").nl();
// weights
sb.i(1).p("// Connecting weights between neurons.").nl();
sb.i(1).p("public static final float[][] WEIGHT = new float[][] {").nl();
for (int i=0; i<neurons.length; i++) {
String colInfoClazz = "Weight_"+i;
sb.i(2).p("/* ").p(neurons[i].getClass().getSimpleName()).p(" */ ");
sb.p(colInfoClazz).p(".VALUES");
if (i!=neurons.length-1) sb.p(',');
sb.nl();
if (i > 0) {
fileContextSB.i().p("// Neuron weights connecting ").
p(neurons[i - 1].getClass().getSimpleName()).p(" and ").
p(neurons[i].getClass().getSimpleName()).
p(" layer").nl();
}
float[] weights = i == 0 ? null : new float[model_info().get_weights(i-1).rows()*model_info().get_weights(i-1).cols()];
if (i>0) {
final int rows = model_info().get_weights(i-1).rows();
final int cols = model_info().get_weights(i-1).cols();
for (int j=0; j<rows; ++j)
for (int k=0; k<cols; ++k)
weights[j*cols+k] = model_info().get_weights(i-1).get(j,k);
}
JCodeGen.toClassWithArray(fileContextSB, null, colInfoClazz, weights);
}
sb.i(1).p("};").nl();
return sb;
}
@Override protected void toJavaPredictBody( final SB bodySb, final SB classCtxSb, final SB fileCtxSb) {
SB model = new SB();
bodySb.i().p("java.util.Arrays.fill(preds,0f);").nl();
final int cats = model_info().data_info()._cats;
final int nums = model_info().data_info()._nums;
// initialize input layer
if (nums > 0) bodySb.i().p("java.util.Arrays.fill(NUMS,0f);").nl();
if (cats > 0) bodySb.i().p("java.util.Arrays.fill(CATS,0);").nl();
bodySb.i().p("int i = 0, ncats = 0;").nl();
if (cats > 0) {
bodySb.i().p("for(; i<"+cats+"; ++i) {").nl();
bodySb.i(1).p("if (!Double.isNaN(data[i])) {").nl();
bodySb.i(2).p("int c = (int) data[i];").nl();
if (model_info().data_info()._useAllFactorLevels)
bodySb.i(2).p("CATS[ncats++] = c + CATOFFSETS[i];").nl();
else
bodySb.i(2).p("if (c != 0) CATS[ncats++] = c + CATOFFSETS[i] - 1;").nl();
bodySb.i(1).p("}").nl();
bodySb.i().p("}").nl();
}
if (nums > 0) {
bodySb.i().p("final int n = data.length;").nl();
bodySb.i().p("for(; i<n; ++i) {").nl();
bodySb.i(1).p("NUMS[i" + (cats > 0 ? "-" + cats : "") + "] = Double.isNaN(data[i]) ? 0 : ");
if (model_info().data_info()._normMul != null) {
bodySb.p("(data[i] - NORMSUB[i" + (cats > 0 ? "-" + cats : "") + "])*NORMMUL[i" + (cats > 0 ? "-" + cats : "") + "];").nl();
} else {
bodySb.p("data[i];").nl();
}
bodySb.i(0).p("}").nl();
}
bodySb.i().p("java.util.Arrays.fill(ACTIVATION[0],0);").nl();
if (cats > 0) {
bodySb.i().p("for (i=0; i<ncats; ++i) ACTIVATION[0][CATS[i]] = 1f;").nl();
}
if (nums > 0) {
bodySb.i().p("for (i=0; i<NUMS.length; ++i) {").nl();
bodySb.i(1).p("ACTIVATION[0][CATOFFSETS[CATOFFSETS.length-1] + i] = Double.isNaN(NUMS[i]) ? 0f : (float) NUMS[i];").nl();
bodySb.i().p("}").nl();
}
boolean tanh=(get_params().activation == DeepLearning.Activation.Tanh || get_params().activation == DeepLearning.Activation.TanhWithDropout);
boolean relu=(get_params().activation == DeepLearning.Activation.Rectifier || get_params().activation == DeepLearning.Activation.RectifierWithDropout);
boolean maxout=(get_params().activation == DeepLearning.Activation.Maxout || get_params().activation == DeepLearning.Activation.MaxoutWithDropout);
final String stopping = get_params().autoencoder ? "(i<=ACTIVATION.length-1)" : "(i<ACTIVATION.length-1)";
// make prediction: forward propagation
bodySb.i().p("for (i=1; i<ACTIVATION.length; ++i) {").nl();
bodySb.i(1).p("java.util.Arrays.fill(ACTIVATION[i],0f);").nl();
if (maxout) {
bodySb.i(1).p("float rmax = 0;").nl();
}
bodySb.i(1).p("for (int r=0; r<ACTIVATION[i].length; ++r) {").nl();
bodySb.i(2).p("final int cols = ACTIVATION[i-1].length;").nl();
if (maxout) {
bodySb.i(2).p("float cmax = Float.NEGATIVE_INFINITY;").nl();
}
bodySb.i(2).p("for (int c=0; c<cols; ++c) {").nl();
if (!maxout) {
bodySb.i(3).p("ACTIVATION[i][r] += ACTIVATION[i-1][c] * WEIGHT[i][r*cols+c];").nl();
} else {
bodySb.i(3).p("if " + stopping + " cmax = Math.max(ACTIVATION[i-1][c] * WEIGHT[i][r*cols+c], cmax);").nl();
bodySb.i(3).p("else ACTIVATION[i][r] += ACTIVATION[i-1][c] * WEIGHT[i][r*cols+c];").nl();
}
bodySb.i(2).p("}").nl();
if (maxout) {
bodySb.i(2).p("if "+ stopping +" ACTIVATION[i][r] = Float.isInfinite(cmax) ? 0f : cmax;").nl();
}
bodySb.i(2).p("ACTIVATION[i][r] += BIAS[i][r];").nl();
if (maxout) {
bodySb.i(2).p("if " + stopping + " rmax = Math.max(rmax, ACTIVATION[i][r]);").nl();
}
bodySb.i(1).p("}").nl();
if (!maxout) bodySb.i(1).p("if " + stopping + " {").nl();
bodySb.i(2).p("for (int r=0; r<ACTIVATION[i].length; ++r) {").nl();
if (tanh) {
bodySb.i(3).p("ACTIVATION[i][r] = 1f - 2f / (1f + (float)Math.exp(2*ACTIVATION[i][r]));").nl();
} else if (relu) {
bodySb.i(3).p("ACTIVATION[i][r] = Math.max(0f, ACTIVATION[i][r]);").nl();
} else if (maxout) {
bodySb.i(3).p("if (rmax > 1 ) ACTIVATION[i][r] /= rmax;").nl();
}
if (get_params().hidden_dropout_ratios != null) {
if (maxout) bodySb.i(1).p("if " + stopping + " {").nl();
bodySb.i(3).p("ACTIVATION[i][r] *= HIDDEN_DROPOUT_RATIOS[i-1];").nl();
if (maxout) bodySb.i(1).p("}").nl();
}
bodySb.i(2).p("}").nl();
if (!maxout) bodySb.i(1).p("}").nl();
if (isClassifier()) {
bodySb.i(1).p("if (i == ACTIVATION.length-1) {").nl();
// softmax
bodySb.i(2).p("float max = ACTIVATION[i][0];").nl();
bodySb.i(2).p("for (int r=1; r<ACTIVATION[i].length; r++) {").nl();
bodySb.i(3).p("if (ACTIVATION[i][r]>max) max = ACTIVATION[i][r];").nl();
bodySb.i(2).p("}").nl();
bodySb.i(2).p("float scale = 0f;").nl();
bodySb.i(2).p("for (int r=0; r<ACTIVATION[i].length; r++) {").nl();
bodySb.i(3).p("ACTIVATION[i][r] = (float) Math.exp(ACTIVATION[i][r] - max);").nl();
bodySb.i(3).p("scale += ACTIVATION[i][r];").nl();
bodySb.i(2).p("}").nl();
bodySb.i(2).p("for (int r=0; r<ACTIVATION[i].length; r++) {").nl();
bodySb.i(3).p("if (Float.isNaN(ACTIVATION[i][r]))").nl();
bodySb.i(4).p("throw new RuntimeException(\"Numerical instability, predicted NaN.\");").nl();
bodySb.i(3).p("ACTIVATION[i][r] /= scale;").nl();
bodySb.i(3).p("preds[r+1] = ACTIVATION[i][r];").nl();
bodySb.i(2).p("}").nl();
bodySb.i(1).p("}").nl();
bodySb.i().p("}").nl();
} else if (!get_params().autoencoder) { //Regression
bodySb.i(1).p("if (i == ACTIVATION.length-1) {").nl();
// regression: set preds[1], FillPreds0 will put it into preds[0]
if (model_info().data_info()._normRespMul != null) {
bodySb.i(2).p("preds[1] = (float) (ACTIVATION[i][0] / NORMRESPMUL[0] + NORMRESPSUB[0]);").nl();
}
else {
bodySb.i(2).p("preds[1] = ACTIVATION[i][0];").nl();
}
bodySb.i(2).p("if (Float.isNaN(preds[1])) throw new RuntimeException(\"Predicted regression target NaN!\");").nl();
bodySb.i(1).p("}").nl();
bodySb.i().p("}").nl();
} else { //AutoEncoder
bodySb.i(1).p("if (i == ACTIVATION.length-1) {").nl();
bodySb.i(2).p("for (int r=0; r<ACTIVATION[i].length; r++) {").nl();
bodySb.i(3).p("if (Float.isNaN(ACTIVATION[i][r]))").nl();
bodySb.i(4).p("throw new RuntimeException(\"Numerical instability, reconstructed NaN.\");").nl();
bodySb.i(3).p("preds[r] = ACTIVATION[i][r];").nl();
bodySb.i(2).p("}").nl();
if (model_info().data_info()._nums > 0) {
int ns = model_info().data_info().numStart();
bodySb.i(2).p("for (int k=" + ns + "; k<" + model_info().data_info().fullN() + "; ++k) {").nl();
bodySb.i(3).p("preds[k] = preds[k] / (float)NORMMUL[k-" + ns + "] + (float)NORMSUB[k-" + ns + "];").nl();
bodySb.i(2).p("}").nl();
}
bodySb.i(1).p("}").nl();
bodySb.i().p("}").nl();
// DEBUGGING
// bodySb.i().p("System.out.println(java.util.Arrays.toString(data));").nl();
// bodySb.i().p("System.out.println(java.util.Arrays.toString(ACTIVATION[0]));").nl();
// bodySb.i().p("System.out.println(java.util.Arrays.toString(ACTIVATION[ACTIVATION.length-1]));").nl();
// bodySb.i().p("System.out.println(java.util.Arrays.toString(preds));").nl();
// bodySb.i().p("System.out.println(\"\");").nl();
}
fileCtxSb.p(model);
toJavaUnifyPreds(bodySb);
toJavaFillPreds0(bodySb);
}
// helper to push this model to another key (for keeping good models)
private void putMeAsBestModel(Key bestModelKey) {
final Key job = null;
final DeepLearningModel cp = this;
DeepLearningModel bestModel = new DeepLearningModel(cp, bestModelKey, job, model_info().data_info());
bestModel.get_params().state = Job.JobState.DONE;
bestModel.get_params().job_key = get_params().self();
bestModel.delete_and_lock(job);
bestModel.unlock(job);
assert (UKV.get(bestModelKey) != null);
assert (bestModel.compareTo(this) <= 0);
assert (((DeepLearningModel) UKV.get(bestModelKey)).error() == _bestError);
}
public void delete_best_model( ) {
if (actual_best_model_key != null && actual_best_model_key != _key) DKV.remove(actual_best_model_key);
}
public void delete_xval_models( ) {
if (get_params().xval_models != null) {
for (Key k : get_params().xval_models) {
UKV.<DeepLearningModel>get(k).delete_best_model();
UKV.<DeepLearningModel>get(k).delete();
}
}
}
transient private final String unstable_msg = "Job was aborted due to observed numerical instability (exponential growth)."
+ "\nTry a different initial distribution, a bounded activation function or adding"
+ "\nregularization with L1, L2 or max_w2 and/or use a smaller learning rate or faster annealing.";
}