package hex.deeplearning; import hex.FrameTask; import water.H2O; import water.H2O.H2OCountedCompleter; import water.Job; import water.util.Log; import java.util.Arrays; import java.util.Random; public class DeepLearningTask extends FrameTask<DeepLearningTask> { final private boolean _training; private hex.deeplearning.DeepLearningModel.DeepLearningModelInfo _input; hex.deeplearning.DeepLearningModel.DeepLearningModelInfo _output; final public hex.deeplearning.DeepLearningModel.DeepLearningModelInfo model_info() { return _output; } transient Neurons[] _neurons; int _chunk_node_count = 1; @Override protected boolean skipMissing() { return _output.get_params().missing_values_handling == DeepLearning.MissingValuesHandling.Skip; } public DeepLearningTask(hex.deeplearning.DeepLearningModel.DeepLearningModelInfo input, float fraction){this(input,fraction,null);} private DeepLearningTask(hex.deeplearning.DeepLearningModel.DeepLearningModelInfo input, float fraction, H2OCountedCompleter cmp){ super(input.get_params().self(),input.data_info(),cmp); _training=true; _input=input; _useFraction=fraction; _shuffle = _input.get_params().shuffle_training_data; assert(_output == null); } // transfer ownership from input to output (which will be worked on) @Override protected void setupLocal(){ _output = _input; //faster, good enough in this case (since the input was freshly deserialized by the Weaver) _input = null; _output.set_processed_local(0l); } // create local workspace (neurons) // and link them to shared weights @Override protected void chunkInit(){ _neurons = makeNeuronsForTraining(_output); } @Override public final void processRow(long seed, final double [] nums, final int numcats, final int [] cats, double [] responses){ if(_output.get_params().self() != null && !Job.isRunning(_output.get_params().self())) throw new Job.JobCancelledException(); if (model_info().get_params().reproducible) { seed += model_info().get_processed_global(); //avoid periodicity } else { seed = new Random().nextLong(); } ((Neurons.Input)_neurons[0]).setInput(seed, nums, numcats, cats); step(seed, _neurons, _output, _training, responses); } @Override protected void chunkDone(long n) { if (_training) _output.add_processed_local(n); } @Override public void reduce(DeepLearningTask other){ if (other._output.get_processed_local() > 0 //other NNTask was active (its model_info should be used for averaging) && other._output != _output) //other NNTask worked on a different model_info { // avoid adding remote model info to unprocessed local data, still random // (this can happen if we have no chunks on the master node) if (_output.get_processed_local() == 0) { _output = other._output; _chunk_node_count = other._chunk_node_count; } else { _output.add(other._output); _chunk_node_count += other._chunk_node_count; } } if (other._output.unstable()) _output.set_unstable(); } static long _lastWarn; static long _warnCount; @Override protected void postGlobal(){ if (H2O.CLOUD.size() > 1 && !_output.get_params().replicate_training_data) { long now = System.currentTimeMillis(); if (_chunk_node_count < H2O.CLOUD.size() && (now - _lastWarn > 5000) && _warnCount < 3) { // Log.info("Synchronizing across " + _chunk_node_count + " H2O node(s)."); Log.warn(H2O.CLOUD.size() - _chunk_node_count + " node(s) (out of " + H2O.CLOUD.size() + ") are not contributing to model updates. Consider setting replicate_training_data to true or using a larger training dataset (or fewer H2O nodes)."); _lastWarn = now; _warnCount++; } } if (!_output.get_params().replicate_training_data || H2O.CLOUD.size() == 1) { _output.div(_chunk_node_count); _output.add_processed_global(_output.get_processed_local()); _output.set_processed_local(0l); } assert(_input == null); } public static Neurons[] makeNeuronsForTraining(final DeepLearningModel.DeepLearningModelInfo minfo) { return makeNeurons(minfo, true); } public static Neurons[] makeNeuronsForTesting(final DeepLearningModel.DeepLearningModelInfo minfo) { return makeNeurons(minfo, false); } // Helper private static Neurons[] makeNeurons(final DeepLearningModel.DeepLearningModelInfo minfo, boolean training) { DataInfo dinfo = minfo.data_info(); final DeepLearning params = minfo.get_params(); final int[] h = params.hidden; Neurons[] neurons = new Neurons[h.length + 2]; // input + hidden + output // input neurons[0] = new Neurons.Input(minfo.units[0], dinfo); // hidden for( int i = 0; i < h.length + (params.autoencoder ? 1 : 0); i++ ) { int n = params.autoencoder && i == h.length ? minfo.units[0] : h[i]; switch( params.activation ) { case Tanh: neurons[i+1] = new Neurons.Tanh(n); break; case TanhWithDropout: neurons[i+1] = params.autoencoder && i == h.length ? new Neurons.Tanh(n) : new Neurons.TanhDropout(n); break; case Rectifier: neurons[i+1] = new Neurons.Rectifier(n); break; case RectifierWithDropout: neurons[i+1] = params.autoencoder && i == h.length ? new Neurons.Rectifier(n) : new Neurons.RectifierDropout(n); break; case Maxout: neurons[i+1] = new Neurons.Maxout(n); break; case MaxoutWithDropout: neurons[i+1] = params.autoencoder && i == h.length ? new Neurons.Maxout(n) : new Neurons.MaxoutDropout(n); break; } } if(!params.autoencoder) { if (params.classification) neurons[neurons.length - 1] = new Neurons.Softmax(minfo.units[minfo.units.length - 1]); else neurons[neurons.length - 1] = new Neurons.Linear(1); } //copy parameters from NN, and set previous/input layer links for( int i = 0; i < neurons.length; i++ ) { neurons[i].init(neurons, i, params, minfo, training); neurons[i]._input = neurons[0]; } // // debugging // for (Neurons n : neurons) Log.info(n.toString()); return neurons; } // forward/backward propagation // assumption: layer 0 has _a filled with (horizontalized categoricals) double values public static void step(long seed, Neurons[] neurons, DeepLearningModel.DeepLearningModelInfo minfo, boolean training, double[] responses) { try { for (int i=1; i<neurons.length-1; ++i) { neurons[i].fprop(seed, training); } if (minfo.get_params().autoencoder) { neurons[neurons.length - 1].fprop(seed, training); if (training) { for (int i=neurons.length-1; i>0; --i) { neurons[i].bprop(); } } } else { if (minfo.get_params().classification) { ((Neurons.Softmax) neurons[neurons.length - 1]).fprop(); if (training) { for (int i = 1; i < neurons.length - 1; i++) Arrays.fill(neurons[i]._e.raw(), 0); int target_label; if (Double.isNaN(responses[0])) { //missing response target_label = Neurons.missing_int_value; } else { assert ((double) (int) responses[0] == responses[0]); //classification -> integer labels expected target_label = (int) responses[0]; } ((Neurons.Softmax) neurons[neurons.length - 1]).bprop(target_label); } } else { ((Neurons.Linear) neurons[neurons.length - 1]).fprop(); if (training) { for (int i = 1; i < neurons.length - 1; i++) Arrays.fill(neurons[i]._e.raw(), 0); float target_value; if (Double.isNaN(responses[0])) { //missing response target_value = Neurons.missing_real_value; } else { target_value = (float) responses[0]; } ((Neurons.Linear) neurons[neurons.length - 1]).bprop(target_value); } } if (training) { for (int i=neurons.length-2; i>0; --i) neurons[i].bprop(); } } } catch(RuntimeException ex) { Log.warn(ex.getMessage()); minfo.set_unstable(); throw new Job.JobCancelledException("Canceling job due to numerical instability."); } } }