package hex; import static hex.NeuralNet.ExecutionMode.*; import hex.Layer.*; import water.*; import water.H2O.H2OCountedCompleter; import water.Job.ValidatedJob; import water.api.DocGen; import water.api.NeuralNetProgressPage; import water.api.RequestServer; import water.fvec.*; import water.util.*; import java.util.Arrays; import java.util.Random; /** * Neural network. * * @author cypof */ public class NeuralNet extends ValidatedJob { static final int API_WEAVER = 1; public static DocGen.FieldDoc[] DOC_FIELDS; public static final String DOC_GET = "Neural Network"; @API(help = "Execution Mode", filter = Default.class, json = true) public ExecutionMode mode = ExecutionMode.SingleNode; @API(help = "Activation function", filter = Default.class, json = true) public Activation activation = Activation.Tanh; @API(help = "Input layer dropout ratio", filter = Default.class, dmin = 0, dmax = 1, json = true) public double input_dropout_ratio = 0.0; @API(help = "Hidden layer sizes, e.g. 1000, 1000. Grid search: (100, 100), (200, 200)", filter = Default.class, json = true) public int[] hidden = new int[] { 200, 200 }; @API(help = "Learning rate (higher => less stable, lower => slower convergence)", filter = Default.class, dmin = 0, dmax = 1, json = true) public double rate = .005; @API(help = "Learning rate annealing: rate / (1 + rate_annealing * samples)", filter = Default.class, dmin = 0, dmax = 1, json = true) public double rate_annealing = 1 / 1e6; @API(help = "L1 regularization, can add stability", filter = Default.class, dmin = 0, dmax = 1, json = true) public double l1 = 0.0; @API(help = "L2 regularization, can add stability", filter = Default.class, dmin = 0, dmax = 1, json = true) public double l2 = 0.0; @API(help = "Initial momentum at the beginning of training", filter = Default.class, dmin = 0, json = true) public double momentum_start = .5; @API(help = "Number of training samples for which momentum increases", filter = Default.class, lmin = 0, json = true) public long momentum_ramp = 1000000; @API(help = "Final momentum after the ramp is over", filter = Default.class, dmin = 0, json = true) public double momentum_stable = 0.99; @API(help = "How many times the dataset should be iterated (streamed), can be less than 1.0", filter = Default.class, dmin = 0, json = true) public double epochs = 10; @API(help = "Seed for random numbers (reproducible results for single-threaded only, cf. Hogwild)", filter = Default.class, json = true) public long seed = new Random().nextLong(); @API(help = "Enable expert mode", filter = Default.class, json = true) public boolean expert_mode = false; @API(help = "Initial Weight Distribution", filter = Default.class, json = true) public InitialWeightDistribution initial_weight_distribution = InitialWeightDistribution.UniformAdaptive; @API(help = "Uniform: -value...value, Normal: stddev)", filter = Default.class, dmin = 0, json = true) public double initial_weight_scale = 1.0; @API(help = "Loss function", filter = Default.class, json = true) public Loss loss = Loss.CrossEntropy; @API(help = "Learning rate decay factor between layers (N-th layer: rate*alpha^(N-1))", filter = Default.class, dmin = 0, json = true) public double rate_decay = 1.0; @API(help = "Constraint for squared sum of incoming weights per unit", filter = Default.class, json = true) public double max_w2 = Double.POSITIVE_INFINITY; @API(help = "Number of samples to train with non-distributed mode for improved stability", filter = Default.class, lmin = 0, json = true) public long warmup_samples = 0l; @API(help = "Number of training set samples for scoring (0 for all)", filter = Default.class, lmin = 0, json = true) public long score_training = 1000l; @API(help = "Number of validation set samples for scoring (0 for all)", filter = Default.class, lmin = 0, json = true) public long score_validation = 0l; @API(help = "Minimum interval (in seconds) between scoring", filter = Default.class, dmin = 0, json = true) public double score_interval = 2; @API(help = "Enable diagnostics for hidden layers", filter = Default.class, json = true) public boolean diagnostics = true; @API(help = "Enable fast mode (minor approximation in back-propagation)", filter = Default.class, json = true) public boolean fast_mode = true; @Override public boolean toHTML(StringBuilder sb) { return makeJsonBox(sb); } @Override protected void registered(RequestServer.API_VERSION ver) { super.registered(ver); for (Argument arg : _arguments) { if ( arg._name.equals("activation") || arg._name.equals("initial_weight_distribution") || arg._name.equals("mode") || arg._name.equals("expert_mode")) { arg.setRefreshOnChange(); } } } @Override protected void queryArgumentValueSet(Argument arg, java.util.Properties inputArgs) { super.queryArgumentValueSet(arg, inputArgs); if (arg._name.equals("classification")) { classification = true; arg.disable("Regression is not currently supported."); } if (arg._name.equals("ignored_cols")) arg.disable("Not currently supported."); if(arg._name.equals("initial_weight_scale") && (initial_weight_distribution == InitialWeightDistribution.UniformAdaptive) ) { arg.disable("Using sqrt(6 / (# units + # units of previous layer)) for Uniform distribution.", inputArgs); } if( arg._name.equals("mode") ) { if (H2O.CLOUD._memary.length > 1) { //TODO: re-enable this // arg.disable("Using MapReduce since cluster size > 1.", inputArgs); // mode = ExecutionMode.MapReduce; //Temporary solution if (mode == ExecutionMode.MapReduce) { arg.disable("Distributed MapReduce mode is not yet fully supported. Will run in single-node mode, wasting " + (H2O.CLOUD._memary.length - 1) + " cluster node(s).", inputArgs); mode = ExecutionMode.SingleNode; } } } if( arg._name.equals("warmup_samples") && mode == MapReduce && H2O.CLOUD._memary.length > 1) { arg.disable("Not yet implemented for distributed MapReduce execution modes, using a value of 0."); warmup_samples = 0; } if(arg._name.equals("loss") && !classification) { arg.disable("Using MeanSquare loss for regression.", inputArgs); loss = Loss.MeanSquare; } if (arg._name.equals("score_validation") && validation == null) { arg.disable("Only if a validation set is specified."); } if (arg._name.equals("loss") || arg._name.equals("max_w2") || arg._name.equals("warmup_samples") || arg._name.equals("score_training") || arg._name.equals("score_validation") || arg._name.equals("initial_weight_distribution") || arg._name.equals("initial_weight_scale") || arg._name.equals("score_interval") || arg._name.equals("diagnostics") || arg._name.equals("rate_decay") ) { if (!expert_mode) arg.disable("Only in expert mode."); } } public enum ExecutionMode { SingleThread, SingleNode, MapReduce } public enum InitialWeightDistribution { UniformAdaptive, Uniform, Normal } /** * Activation functions */ public enum Activation { Tanh, TanhWithDropout, Rectifier, RectifierWithDropout, Maxout, MaxoutWithDropout } /** * Loss functions * CrossEntropy is recommended */ public enum Loss { MeanSquare, CrossEntropy } // Hack: used to stop the monitor thread public static volatile boolean running = true; public NeuralNet() { description = DOC_GET; } @Override public final void execImpl() { startTrain(); } void startTrain() { logStart(); running = true; // Vec[] vecs = Utils.append(_train, response); // reChunk(vecs); // final Vec[] train = new Vec[vecs.length - 1]; // System.arraycopy(vecs, 0, train, 0, train.length); // final Vec trainResp = classification ? vecs[vecs.length - 1].toEnum() : vecs[vecs.length - 1]; final Vec[] train = _train; final Vec trainResp = classification ? response.toEnum() : response; final Layer[] ls = new Layer[hidden.length + 2]; ls[0] = new VecsInput(train, null); for( int i = 0; i < hidden.length; i++ ) { switch( activation ) { case Tanh: ls[i + 1] = new Tanh(hidden[i]); break; case TanhWithDropout: ls[i + 1] = new TanhDropout(hidden[i]); break; case Rectifier: ls[i + 1] = new Rectifier(hidden[i]); break; case RectifierWithDropout: ls[i + 1] = new RectifierDropout(hidden[i]); break; case Maxout: ls[i + 1] = new Maxout(hidden[i]); break; case MaxoutWithDropout: ls[i + 1] = new MaxoutDropout(hidden[i]); break; } } if( classification ) ls[ls.length - 1] = new VecSoftmax(trainResp, null); else ls[ls.length - 1] = new VecLinear(trainResp, null); //copy parameters from NeuralNet, and set previous/input layer links for( int i = 0; i < ls.length; i++ ) ls[i].init(ls, i, this); final Key sourceKey = Key.make(input("source")); final Frame frame = new Frame(_names, train); frame.add(_responseName, trainResp); final Errors[] trainErrors0 = new Errors[] { new Errors() }; final Errors[] validErrors0 = validation == null ? null : new Errors[] { new Errors() }; NeuralNetModel model = new NeuralNetModel(destination_key, sourceKey, frame, ls, this); model.training_errors = trainErrors0; model.validation_errors = validErrors0; model.delete_and_lock(self()); final Frame[] adapted = validation == null ? null : model.adapt(validation, false); final Trainer trainer; final long num_rows = source.numRows(); if (mode == SingleThread) { Log.info("Entering single-threaded execution mode"); trainer = new Trainer.Direct(ls, epochs, self()); } else { // one node works on the first batch of points serially for improved stability if (warmup_samples > 0) { Log.info("Training the first " + warmup_samples + " samples in serial for improved stability."); Trainer warmup = new Trainer.Direct(ls, (double)warmup_samples/num_rows, self()); warmup.start(); warmup.join(); //TODO: for MapReduce send weights from master VM to all other VMs } if (mode == SingleNode) { Log.info("Entering single-node (multi-threaded Hogwild) execution mode."); trainer = new Trainer.Threaded(ls, epochs, self(), -1); } else if (mode == MapReduce) { if (warmup_samples > 0 && mode == MapReduce) { Log.info("Multi-threaded warmup with " + warmup_samples + " samples."); Trainer warmup = new Trainer.Threaded(ls, (double)warmup_samples/num_rows, self(), -1); warmup.start(); warmup.join(); //TODO: for MapReduce send weights from master VM to all other VMs } Log.info("Entering multi-node (MapReduce + multi-threaded Hogwild) execution mode."); trainer = new Trainer.MapReduce(ls, epochs, self()); } else throw new RuntimeException("invalid execution mode."); } Log.info("Running for " + epochs + " epochs."); final NeuralNet nn = this; // Use a separate thread for monitoring (blocked most of the time) Thread monitor = new Thread() { Errors[] trainErrors = trainErrors0, validErrors = validErrors0; @Override public void run() { try { Vec[] valid = null; Vec validResp = null; if( validation != null ) { assert adapted != null; final Vec[] vs = adapted[0].vecs(); valid = Arrays.copyOf(vs, vs.length - 1); System.arraycopy(adapted[0].vecs(), 0, valid, 0, valid.length); validResp = vs[vs.length - 1]; } //score the model every 2 seconds (or less often, if it takes longer to score) final long num_samples_total = (long)(Math.ceil(num_rows * epochs)); long num = -1, last_eval = runTimeMs(); do { final long interval = (long)(score_interval * 1000); //time between evaluations long time_taken = runTimeMs() - last_eval; if (num >= 0 && time_taken < interval) { Thread.sleep(interval - time_taken); } last_eval = runTimeMs(); num = eval(valid, validResp); if (num >= num_samples_total) break; if (mode != MapReduce) { if (!isRunning(self()) || !running) break; } else { if (!running) break; //MapReduce calls cancel() early, we are waiting for running = false } } while (true); // remove validation data if( adapted != null && adapted[1] != null ) adapted[1].delete(); Log.info("Training finished."); } catch( Exception ex ) { cancel(ex); } } private long eval(Vec[] valid, Vec validResp) { long[][] cm = null; if( classification ) { int classes = ls[ls.length - 1].units; cm = new long[classes][classes]; } NeuralNetModel model = new NeuralNetModel(destination_key, sourceKey, frame, ls, nn); // score model on training set Errors e = eval(train, trainResp, score_training, valid == null ? cm : null); e.score_training = score_training == 0 ? train[0].length() : score_training; trainErrors = Utils.append(trainErrors, e); model.unstable |= Double.isNaN(e.mean_square) || Double.isNaN(e.cross_entropy); model.training_errors = trainErrors; // score model on validation set if( valid != null ) { e = eval(valid, validResp, score_validation, cm); e.score_validation = score_validation == 0 ? valid[0].length() : score_validation; validErrors = Utils.append(validErrors, e); model.unstable |= Double.isNaN(e.mean_square) || Double.isNaN(e.cross_entropy); } model.validation_errors = validErrors; model.confusion_matrix = cm; model.update(self()); // terminate model building if we detect that a model is unstable if (model.unstable) NeuralNet.running = false; return e.training_samples; } private Errors eval(Vec[] vecs, Vec resp, long n, long[][] cm) { Errors e = NeuralNet.eval(ls, vecs, resp, n, cm); e.training_samples = trainer.processed(); e.training_time_ms = runTimeMs(); return e; } }; trainer.start(); monitor.start(); trainer.join(); // Gracefully terminate the job submitted via H2O web API if (mode != MapReduce) { running = false; //tell the monitor thread to finish too try { monitor.join(); } catch (InterruptedException e) { e.printStackTrace(); } } else { while (running) { //MapReduce will inform us that running = false try { Thread.sleep(1); } catch (InterruptedException e) { e.printStackTrace(); } } } // remove this job -> stop H2O interface from refreshing H2OCountedCompleter task = _fjtask; if( task != null ) task.tryComplete(); this.remove(); } @Override public float progress() { NeuralNetModel model = UKV.get(destination_key); if( model != null && source != null) { Errors e = model.training_errors[model.training_errors.length - 1]; return Math.min(1f, 0.1f + Math.min(1, e.training_samples / (float) (epochs * source.numRows()))); } return 0; } public static Errors eval(Layer[] ls, Vec[] vecs, Vec resp, long n, long[][] cm) { Output output = (Output) ls[ls.length - 1]; if( output instanceof VecSoftmax ) output = new VecSoftmax(resp, (VecSoftmax) output); else output = new VecLinear(resp, (VecLinear) output); return eval(ls, new VecsInput(vecs, (VecsInput) ls[0]), output, n, cm); } private static Errors eval(Layer[] ls, Input input, Output output, long n, long[][] cm) { Layer[] clones = new Layer[ls.length]; clones[0] = input; for( int y = 1; y < clones.length - 1; y++ ) clones[y] = ls[y].clone(); clones[clones.length - 1] = output; for( int y = 0; y < clones.length; y++ ) clones[y].init(clones, y, false); Layer.shareWeights(ls, clones); return eval(clones, n, cm); } public static Errors eval(Layer[] ls, long n, long[][] cm) { Errors e = new Errors(); Input input = (Input) ls[0]; long len = input._len; // TODO: choose random subset instead of first n points (do this once per run) if( n != 0 ) len = Math.min(len, n); // classification if( ls[ls.length - 1] instanceof Softmax ) { int correct = 0; e.mean_square = 0; e.cross_entropy = 0; for( input._pos = 0; input._pos < len; input._pos++ ) { if( ((Softmax) ls[ls.length - 1]).target() == Layer.missing_int_value ) //NA continue; if( correct(ls, e, cm) ) correct++; } e.classification = (len - (double) correct) / len; e.mean_square /= len; e.cross_entropy /= len; //want to report the averaged cross-entropy } // regression else { e.mean_square = 0; for( input._pos = 0; input._pos < len; input._pos++ ) if( ls[ls.length - 1]._a[0] != Layer.missing_float_value ) error(ls, e); e.classification = Double.POSITIVE_INFINITY; e.mean_square /= len; } input._pos = 0; return e; } // classification scoring static boolean correct(Layer[] ls, Errors e, long[][] confusion) { Softmax output = (Softmax) ls[ls.length - 1]; if( output.target() == -1 ) return false; for (Layer l : ls) l.fprop(-1, false); float[] out = ls[ls.length - 1]._a; int target = output.target(); for( int o = 0; o < out.length; o++ ) { final boolean hitpos = (o == target); final double t = hitpos ? 1 : 0; final double d = t - out[o]; e.mean_square += d * d; e.cross_entropy += hitpos ? -Math.log(out[o]) : 0; } float[] preds = new float[out.length+1]; for (int i=0;i<out.length;++i) preds[i+1] = out[i]; double[] data = new double[ls[0]._a.length]; preds[0] = ModelUtils.getPrediction(preds, data); if( confusion != null ) { if (output.target() != Layer.missing_int_value) confusion[output.target()][(int)preds[0]]++; } return preds[0] == output.target(); } // regression scoring static void error(Layer[] ls, Errors e) { Linear linear = (Linear) ls[ls.length - 1]; for (Layer l : ls) l.fprop(-1, false); float[] output = ls[ls.length - 1]._a; float[] target = linear.target(); e.mean_square = 0; for( int o = 0; o < output.length; o++ ) { final double d = target[o] - output[o]; e.mean_square += d * d; } } @Override protected Response redirect() { return NeuralNetProgressPage.redirect(this, self(), dest()); } public static String link(Key k, String content) { NeuralNet req = new NeuralNet(); RString rs = new RString("<a href='" + req.href() + ".query?%key_param=%$key'>%content</a>"); rs.replace("key_param", "source"); rs.replace("key", k.toString()); rs.replace("content", content); return rs.toString(); } @Override public String speedDescription() { return "time/epoch"; } @Override public long speedValue() { Value value = DKV.get(dest()); NeuralNetModel m = value != null ? (NeuralNetModel) value.get() : null; long sv = 0; if( m != null ) { Errors[] e = m.training_errors; double epochsSoFar = e[e.length - 1].training_samples / (double) source.numRows(); sv = (epochsSoFar <= 0) ? 0 : (long) (e[e.length - 1].training_time_ms / epochsSoFar); } return sv; } public static class Errors extends Iced { static final int API_WEAVER = 1; static public DocGen.FieldDoc[] DOC_FIELDS; @API(help = "How many rows the algorithm has processed") public long training_samples; @API(help = "How long the algorithm ran in ms") public long training_time_ms; @API(help = "Classification error") public double classification = 1; @API(help = "Mean square error") public double mean_square = Double.POSITIVE_INFINITY; @API(help = "Cross entropy") public double cross_entropy = Double.POSITIVE_INFINITY; @API(help = "Number of training set samples for scoring") public long score_training; @API(help = "Number of validation set samples for scoring") public long score_validation; @Override public String toString() { return String.format("%.2f", (100 * classification)) + "% (MSE:" + String.format("%.2e", mean_square) + ", MCE:" + String.format("%.2e", cross_entropy) + ")"; } } public static class NeuralNetModel extends Model { static final int API_WEAVER = 1; static public DocGen.FieldDoc[] DOC_FIELDS; @API(help = "Model parameters") public NeuralNet parameters; //@API(help = "Layers") public Layer[] layers; //@API(help = "Layer weights") public float[][] weights; //@API(help = "Layer biases") public float[][] biases; @API(help = "Errors on the training set") public Errors[] training_errors; @API(help = "Errors on the validation set") public Errors[] validation_errors; @API(help = "Confusion matrix") public long[][] confusion_matrix; @API(help = "Mean bias") public float[] mean_bias; @API(help = "RMS bias") public float[] rms_bias; @API(help = "Mean weight") public float[] mean_weight; @API(help = "RMS weight") public float[] rms_weight; @API(help = "Unstable") public boolean unstable = false; NeuralNetModel(Key selfKey, Key dataKey, Frame fr, Layer[] ls, NeuralNet p) { super(selfKey, dataKey, fr, /* priorClassDistribution */ null); parameters = p; layers = ls; weights = new float[ls.length][]; biases = new float[ls.length][]; for( int y = 1; y < layers.length; y++ ) { weights[y] = layers[y]._w; biases[y] = layers[y]._b; } if (parameters.diagnostics) { // compute stats on all nodes mean_bias = new float[ls.length]; rms_bias = new float[ls.length]; mean_weight = new float[ls.length]; rms_weight = new float[ls.length]; for( int y = 1; y < layers.length; y++ ) { final Layer l = layers[y]; final int len = l._a.length; // compute mean values mean_bias[y] = rms_bias[y] = 0; mean_weight[y] = rms_weight[y] = 0; for(int u = 0; u < len; u++) { mean_bias[y] += biases[y][u]; for( int i = 0; i < l._previous._a.length; i++ ) { int w = u * l._previous._a.length + i; mean_weight[y] += weights[y][w]; } } mean_bias[y] /= len; mean_weight[y] /= len * l._previous._a.length; // compute rms values for(int u = 0; u < len; ++u) { final double db = biases[y][u] - mean_bias[y]; rms_bias[y] += db * db; for( int i = 0; i < l._previous._a.length; i++ ) { int w = u * l._previous._a.length + i; final double dw = weights[y][w] - mean_weight[y]; rms_weight[y] += dw * dw; } } rms_bias[y] = (float)Math.sqrt(rms_bias[y]/len); rms_weight[y] = (float)Math.sqrt(rms_weight[y]/len/l._previous._a.length); unstable |= Double.isNaN(mean_bias[y]) || Double.isNaN(rms_bias[y]) || Double.isNaN(mean_weight[y]) || Double.isNaN(rms_weight[y]); // Abort the run if weights or biases are unreasonably large (Note that all input values are normalized upfront) // This can happen with Rectifier units when L1/L2/max_w2 are all set to 0, especially when using more than 1 hidden layer. final double thresh = 1e10; unstable |= mean_bias[y] > thresh || rms_bias[y] > thresh || mean_weight[y] > thresh || rms_weight[y] > thresh; } } } @Override public String toString() { StringBuilder sb = new StringBuilder(); for (int i=0; i<weights.length; ++i) sb.append("\nweights["+i+"][]="+Arrays.toString(weights[i])); for (int i=0; i<biases.length; ++i) sb.append("\nbiases["+i+"][]="+Arrays.toString(biases[i])); sb.append("\n"); return sb.toString(); } public void toJavaHtml(StringBuilder sb) { //DocGen.HTML.title(sb, "The Java Neural Net model is not implemented yet."); } public boolean generateHTML(String title, StringBuilder sb) { final String mse_format = "%2.6f"; final String cross_entropy_format = "%2.6f"; DocGen.HTML.title(sb, title); DocGen.HTML.paragraph(sb, "Model Key: " + _key); sb.append("<div class='alert'>Actions: " + water.api.Predict.link(_key, "Score on dataset") + ", " + NeuralNet.link(_dataKey, "Compute new model") + "</div>"); parameters.toHTML(sb); // Plot training error { float[] train_err = new float[training_errors.length]; float[] train_samples = new float[training_errors.length]; for (int i=0; i<train_err.length; ++i) { train_err[i] = (float)training_errors[i].classification; train_samples[i] = training_errors[i].training_samples; } new D3Plot(train_samples, train_err, "training samples", "classification error", "Classification Error on Training Set").generate(sb); } // Plot validation error if (validation_errors != null) { float[] valid_err = new float[validation_errors.length]; float[] valid_samples = new float[validation_errors.length]; for (int i=0; i<valid_err.length; ++i) { valid_err[i] = (float)validation_errors[i].classification; valid_samples[i] = validation_errors[i].training_samples; } new D3Plot(valid_samples, valid_err, "training samples", "classification error", "Classification Error on Validation Set").generate(sb); } final boolean classification = isClassifier(); final String cmTitle = "Confusion Matrix" + (validation_errors == null ? " (Training Data)" : ""); // stats for training and validation final Errors train = training_errors[training_errors.length - 1]; final Errors valid = validation_errors != null ? validation_errors[validation_errors.length - 1] : null; if (classification) { DocGen.HTML.section(sb, "Training classification error: " + formatPct(train.classification)); } DocGen.HTML.section(sb, "Training mean square error: " + String.format(mse_format, train.mean_square)); if (classification) { DocGen.HTML.section(sb, "Training cross entropy: " + String.format(cross_entropy_format, train.cross_entropy)); if( valid != null ) { DocGen.HTML.section(sb, "Validation classification error: " + formatPct(valid.classification)); } } if( validation_errors != null ) { assert valid != null; DocGen.HTML.section(sb, "Validation mean square error: " + String.format(mse_format, valid.mean_square)); if (classification) { DocGen.HTML.section(sb, "Validation mean cross entropy: " + String.format(cross_entropy_format, valid.cross_entropy)); } if (valid.training_time_ms > 0) DocGen.HTML.section(sb, "Training speed: " + valid.training_samples * 1000 / valid.training_time_ms + " samples/s"); } else { if (train.training_time_ms > 0) DocGen.HTML.section(sb, "Training speed: " + train.training_samples * 1000 / train.training_time_ms + " samples/s"); } if (parameters != null && parameters.diagnostics) { DocGen.HTML.section(sb, "Status of Hidden and Output Layers"); sb.append("<table class='table table-striped table-bordered table-condensed'>"); sb.append("<tr>"); sb.append("<th>").append("#").append("</th>"); sb.append("<th>").append("Units").append("</th>"); sb.append("<th>").append("Activation").append("</th>"); sb.append("<th>").append("Rate").append("</th>"); sb.append("<th>").append("L1").append("</th>"); sb.append("<th>").append("L2").append("</th>"); sb.append("<th>").append("Momentum").append("</th>"); sb.append("<th>").append("Weight (Mean, RMS)").append("</th>"); sb.append("<th>").append("Bias (Mean, RMS)").append("</th>"); sb.append("</tr>"); for (int i=1; i<layers.length; ++i) { sb.append("<tr>"); sb.append("<td>").append("<b>").append(i).append("</b>").append("</td>"); sb.append("<td>").append("<b>").append(layers[i].units).append("</b>").append("</td>"); sb.append("<td>").append(layers[i].getClass().getSimpleName().replace("Vec","").replace("Chunk", "")).append("</td>"); sb.append("<td>").append(String.format("%.5g", layers[i].rate(train.training_samples))).append("</td>"); sb.append("<td>").append(layers[i].params.l1).append("</td>"); sb.append("<td>").append(layers[i].params.l2).append("</td>"); final String format = "%g"; sb.append("<td>").append(layers[i].momentum(train.training_samples)).append("</td>"); sb.append("<td>(").append(String.format(format, mean_weight[i])). append(", ").append(String.format(format, rms_weight[i])).append(")</td>"); sb.append("<td>(").append(String.format(format, mean_bias[i])). append(", ").append(String.format(format, rms_bias[i])).append(")</td>"); sb.append("</tr>"); } sb.append("</table>"); } if (unstable) { final String msg = "Job was aborted due to observed numerical instability (exponential growth)." + " Try a bounded activation function or regularization with L1, L2 or max_w2 and/or use a smaller learning rate or faster annealing."; DocGen.HTML.section(sb, "======================================================================================="); DocGen.HTML.section(sb, msg); DocGen.HTML.section(sb, "======================================================================================="); } if( confusion_matrix != null && confusion_matrix.length < 100 ) { assert(classification); String[] classes = classNames(); NeuralNetScore.confusion(sb, cmTitle, classes, confusion_matrix); } sb.append("<h3>" + "Progress" + "</h3>"); String training = "Number of training set samples for scoring: " + train.score_training; if (train.score_training > 0) { if (train.score_training < 1000) training += " (low, scoring might be inaccurate -> consider increasing this number in the expert mode)"; if (train.score_training > 10000) training += " (large, scoring can be slow -> consider reducing this number in the expert mode or scoring manually)"; } DocGen.HTML.section(sb, training); if (valid != null) { String validation = "Number of validation set samples for scoring: " + valid.score_validation; if (valid.score_validation > 0) { if (valid.score_validation < 1000) validation += " (low, scoring might be inaccurate -> consider increasing this number in the expert mode)"; if (valid.score_validation > 10000) validation += " (large, scoring can be slow -> consider reducing this number in the expert mode or scoring manually)"; } DocGen.HTML.section(sb, validation); } sb.append("<table class='table table-striped table-bordered table-condensed'>"); sb.append("<tr>"); sb.append("<th>Training Time</th>"); sb.append("<th>Training Samples</th>"); sb.append("<th>Training MSE</th>"); if (classification) { sb.append("<th>Training MCE</th>"); sb.append("<th>Training Classification Error</th>"); } sb.append("<th>Validation MSE</th>"); if (classification) { sb.append("<th>Validation MCE</th>"); sb.append("<th>Validation Classification Error</th>"); } sb.append("</tr>"); for( int i = training_errors.length - 1; i >= 0; i-- ) { sb.append("<tr>"); sb.append("<td>" + PrettyPrint.msecs(training_errors[i].training_time_ms, true) + "</td>"); if( validation_errors != null ) { sb.append("<td>" + String.format("%,d", validation_errors[i].training_samples) + "</td>"); } else { sb.append("<td>" + String.format("%,d", training_errors[i].training_samples) + "</td>"); } sb.append("<td>" + String.format(mse_format, training_errors[i].mean_square) + "</td>"); if (classification) { sb.append("<td>" + String.format(cross_entropy_format, training_errors[i].cross_entropy) + "</td>"); sb.append("<td>" + formatPct(training_errors[i].classification) + "</td>"); } if( validation_errors != null ) { sb.append("<td>" + String.format(mse_format, validation_errors[i].mean_square) + "</td>"); if (classification) { sb.append("<td>" + String.format(cross_entropy_format, validation_errors[i].cross_entropy) + "</td>"); sb.append("<td>" + formatPct(validation_errors[i].classification) + "</td>"); } } else sb.append("<td></td><td></td><td></td>"); sb.append("</tr>"); } sb.append("</table>"); return true; } private static String formatPct(double pct) { String s = "N/A"; if( !Double.isNaN(pct) ) s = String.format("%5.2f %%", 100 * pct); return s; } @Override protected float[] score0(Chunk[] chunks, int rowInChunk, double[] tmp, float[] preds) { Layer[] clones = new Layer[layers.length]; clones[0] = new ChunksInput(Utils.remove(chunks, chunks.length - 1), (VecsInput) layers[0]); for( int y = 1; y < layers.length - 1; y++ ) clones[y] = layers[y].clone(); Layer output = layers[layers.length - 1]; if( output instanceof VecSoftmax ) clones[clones.length - 1] = new ChunkSoftmax(chunks[chunks.length - 1], (VecSoftmax) output); else clones[clones.length - 1] = new ChunkLinear(chunks[chunks.length - 1], (VecLinear) output); for( int y = 0; y < clones.length; y++ ) { clones[y]._w = weights[y]; clones[y]._b = biases[y]; clones[y].init(clones, y, false); } ((Input) clones[0])._pos = rowInChunk; for (Layer clone : clones) clone.fprop(-1, false); float[] out = clones[clones.length - 1]._a; assert out.length == preds.length; for (int i=0; i<out.length; ++i) preds[i+1] = out[i]; double[] data = new double[out.length]; for (int i=0; i<out.length; ++i) data[i] = out[i]; preds[0] = ModelUtils.getPrediction(preds, data); return preds; } @Override protected float[] score0(double[] data, float[] preds) { throw new UnsupportedOperationException(); } @Override public ConfusionMatrix cm() { long[][] cm = confusion_matrix; if( cm != null ) return new ConfusionMatrix(cm); return null; } } public static class NeuralNetScore extends ModelJob { static final int API_WEAVER = 1; static public DocGen.FieldDoc[] DOC_FIELDS; static final String DOC_GET = "Neural network scoring"; @API(help = "Model", required = true, filter = Default.class) public NeuralNetModel model; @API(help = "Rows to consider for scoring, 0 (default) means the whole frame", filter = Default.class) public long max_rows; @API(help = "Classification error") public double classification_error; @API(help = "Mean square error") public double mean_square_error; @API(help = "Cross entropy") public double cross_entropy; @API(help = "Confusion matrix") public long[][] confusion_matrix; public NeuralNetScore() { description = DOC_GET; } @Override protected Response serve() { init(); Frame[] frs = model.adapt(source, false); int classes = model.layers[model.layers.length - 1].units; confusion_matrix = new long[classes][classes]; Layer[] clones = new Layer[model.layers.length]; for( int y = 0; y < model.layers.length; y++ ) { clones[y] = model.layers[y].clone(); clones[y]._w = model.weights[y]; clones[y]._b = model.biases[y]; } Vec[] vecs = frs[0].vecs(); Vec[] data = Utils.remove(vecs, vecs.length - 1); Vec resp = vecs[vecs.length - 1]; Errors e = eval(clones, data, resp, max_rows, confusion_matrix); classification_error = e.classification; mean_square_error = e.mean_square; cross_entropy = e.cross_entropy; if( frs[1] != null ) frs[1].delete(); return Response.done(this); } @Override public boolean toHTML(StringBuilder sb) { final boolean classification = model.isClassifier(); if (classification) { DocGen.HTML.section(sb, "Classification error: " + String.format("%5.2f %%", 100 * classification_error)); } DocGen.HTML.section(sb, "Mean square error: " + mean_square_error); if (classification) { DocGen.HTML.section(sb, "Mean cross entropy: " + cross_entropy); String[] domain = null; if (response.domain() != null) { domain = response.domain(); } else { // find the names for the categories from the model's domains, after finding the correct column int idx = source.find(response); if( idx == -1 ) { Vec vm = response.masterVec(); if( vm != null ) idx = source.find(vm); } if (idx != -1) domain = model._domains[idx]; } confusion(sb, "Confusion Matrix", domain, confusion_matrix); } return true; } static void confusion(StringBuilder sb, String title, String[] classes, long[][] confusionMatrix) { //sb.append("<h3>" + title + "</h3>"); sb.append("<table class='table table-striped table-bordered table-condensed'>"); sb.append("<tr><th>Actual \\ Predicted</th>"); if( classes == null ) { classes = new String[confusionMatrix.length]; for( int i = 0; i < classes.length; i++ ) classes[i] = "" + i; } for( String c : classes ) sb.append("<th>" + c + "</th>"); sb.append("<th>Error</th></tr>"); long[] totals = new long[classes.length]; long sumTotal = 0; long sumError = 0; for( int crow = 0; crow < classes.length; ++crow ) { long total = 0; long error = 0; sb.append("<tr><th>" + classes[crow] + "</th>"); for( int ccol = 0; ccol < classes.length; ++ccol ) { long num = confusionMatrix[crow][ccol]; total += num; totals[ccol] += num; if( ccol == crow ) { sb.append("<td style='background-color:LightGreen'>"); } else { sb.append("<td>"); error += num; } sb.append(num); sb.append("</td>"); } sb.append("<td>"); sb.append(String.format("%5.3f = %d / %d", (double) error / total, error, total)); sb.append("</td></tr>"); sumTotal += total; sumError += error; } sb.append("<tr><th>Totals</th>"); for (long total : totals) sb.append("<td>" + total + "</td>"); sb.append("<td><b>"); sb.append(String.format("%5.3f = %d / %d", (double) sumError / sumTotal, sumError, sumTotal)); sb.append("</b></td></tr>"); sb.append("</table>"); } } static int cores() { int cores = 0; for( H2ONode node : H2O.CLOUD._memary ) cores += node._heartbeat._num_cpus; return cores; } /** * Makes sure small datasets are spread over enough chunks to parallelize training. */ public static void reChunk(Vec[] vecs) { final int splits = cores() * 2; // More in case of unbalance if( vecs[0].nChunks() < splits ) { // A new random VectorGroup Key keys[] = new Vec.VectorGroup().addVecs(vecs.length); for( int v = 0; v < vecs.length; v++ ) { AppendableVec vec = new AppendableVec(keys[v]); long rows = vecs[0].length(); Chunk cache = null; for( int split = 0; split < splits; split++ ) { long off = rows * split / splits; long lim = rows * (split + 1) / splits; NewChunk chunk = new NewChunk(vec, split); for( long r = off; r < lim; r++ ) { if( cache == null || r < cache._start || r >= cache._start + cache._len ) cache = vecs[v].chunkForRow(r); if( !cache.isNA(r) ) { if( vecs[v]._domain != null ) chunk.addEnum((int) cache.at8(r)); else if( vecs[v].isInt() ) chunk.addNum(cache.at8(r), 0); else chunk.addNum(cache.at(r)); } else { if( vecs[v].isInt() ) chunk.addNA(); else { // Don't use addNA() for doubles, as NewChunk uses separate array chunk.addNum(Double.NaN); } } } chunk.close(split, null); } Vec t = vec.close(null); t._domain = vecs[v]._domain; vecs[v] = t; } } } }