package hex; import static hex.NeuralNet.*; import static water.util.MRUtils.sampleFrame; import hex.deeplearning.DeepLearning; import hex.deeplearning.DeepLearningModel; import hex.deeplearning.DeepLearningTask; import hex.deeplearning.Neurons; import junit.framework.Assert; import org.junit.BeforeClass; import org.junit.Ignore; import org.junit.Test; import water.JUnitRunnerDebug; import water.Key; import water.TestUtil; import water.UKV; import water.fvec.Frame; import water.fvec.NFSFileVec; import water.fvec.ParseDataset2; import water.fvec.Vec; import water.util.Log; import water.util.Utils; import java.util.Random; public class DeepLearningVsNeuralNet extends TestUtil { Frame _train, _test; @BeforeClass public static void stall() { stall_till_cloudsize(JUnitRunnerDebug.NODES); } void compareVal(float a, float b, float abseps, float releps) { // check for equality if (Float.compare(a, b) == 0) { } // check for small relative error else if (Math.abs(a-b)/Math.max(a,b) < releps) { } // check for small absolute error else if (Math.abs(a - b) <= abseps) { } // fail else { // Log.err("Not close enough: " + a + " " + b); Assert.failNotEquals("Not equal: ", a, b); } } @Ignore @Test public void compare() throws Exception { final long seed = 0xc0ffee; Random rng = new Random(seed); DeepLearning.Activation[] activations = { DeepLearning.Activation.Maxout, DeepLearning.Activation.MaxoutWithDropout, DeepLearning.Activation.Tanh, DeepLearning.Activation.TanhWithDropout, DeepLearning.Activation.Rectifier, DeepLearning.Activation.RectifierWithDropout, }; DeepLearning.Loss[] losses = { DeepLearning.Loss.MeanSquare, DeepLearning.Loss.CrossEntropy }; DeepLearning.InitialWeightDistribution[] dists = { DeepLearning.InitialWeightDistribution.Normal, DeepLearning.InitialWeightDistribution.Uniform, DeepLearning.InitialWeightDistribution.UniformAdaptive }; double[] initial_weight_scales = { 1e-3 + 1e-2 * rng.nextFloat() }; double[] holdout_ratios = { 0.7 + 0.2 * rng.nextFloat() }; int[][] hiddens = { {1}, {1+rng.nextInt(50)}, {17,13}, {20,10,5} }; double[] rates = { 0.005 + 1e-2 * rng.nextFloat() }; int[] epochs = { 5 + rng.nextInt(5) }; double[] input_dropouts = { 0, rng.nextFloat() * 0.5 }; double p0 = 0.5 * rng.nextFloat(); long pR = 1000 + rng.nextInt(1000); double p1 = 0.5 + 0.49 * rng.nextFloat(); double l1 = 1e-5 * rng.nextFloat(); double l2 = 1e-5 * rng.nextFloat(); float max_w2 = Float.POSITIVE_INFINITY; // rng.nextInt(50); double rate_annealing = 1e-7 + rng.nextFloat() * 1e-6; boolean threaded = false; int num_repeats = 1; // TODO: test that Deep Learning and NeuralNet agree for Mnist dataset // String[] files = { "smalldata/mnist/train.csv" }; // hiddens = new int[][]{ {50,50} }; // threaded = true; // num_repeats = 5; // TODO: test that Deep Learning and NeuralNet agree for covtype dataset // String[] files = { "smalldata/covtype/covtype.20k.data.my" }; // hiddens = new int[][]{ {100,100} }; // epochs = new int[]{ 50 }; // threaded = true; // num_repeats = 2; String[] files = { "smalldata/iris/iris.csv", "smalldata/neural/two_spiral.data" }; for (DeepLearning.Activation activation : activations) { for (DeepLearning.Loss loss : losses) { for (DeepLearning.InitialWeightDistribution dist : dists) { for (double scale : initial_weight_scales) { for (double holdout_ratio : holdout_ratios) { for (double input_dropout : input_dropouts) { for (int[] hidden : hiddens) { for (int epoch : epochs) { for (double rate : rates) { for (String file : files) { for (boolean fast_mode : new boolean[]{true,false}) { float reftrainerr=0, trainerr=0; float reftesterr=0, testerr=0; float[] a = new float[hidden.length+2]; float[] b = new float[hidden.length+2]; float[] ba = new float[hidden.length+2]; float[] bb = new float[hidden.length+2]; long numweights = 0, numbiases = 0; for (int repeat = 0; repeat < num_repeats; ++repeat) { long myseed = seed + repeat; Log.info(""); Log.info("STARTING."); Log.info("Running with " + activation.name() + " activation function and " + loss.name() + " loss function."); Log.info("Initialization with " + dist.name() + " distribution and " + scale + " scale, holdout ratio " + holdout_ratio); Log.info("Using seed " + seed); Key kfile = NFSFileVec.make(find_test_file(file)); Frame frame = ParseDataset2.parse(Key.make(), new Key[]{kfile}); _train = sampleFrame(frame, (long)(frame.numRows()*holdout_ratio), seed); _test = sampleFrame(frame, (long)(frame.numRows()*(1-holdout_ratio)), seed+1); // Train new Deep Learning Neurons[] neurons; DeepLearningModel mymodel; { DeepLearning p = new DeepLearning(); p.source = (Frame)_train.clone(); p.response = _train.lastVec(); p.ignored_cols = null; p.seed = myseed; p.hidden = hidden; p.adaptive_rate = false; p.rho = 0; p.epsilon = 0; p.rate = rate; p.activation = activation; p.max_w2 = max_w2; p.epochs = epoch; p.input_dropout_ratio = input_dropout; p.rate_annealing = rate_annealing; p.loss = loss; p.l1 = l1; p.l2 = l2; p.momentum_start = p0; p.momentum_ramp = pR; p.momentum_stable = p1; p.initial_weight_distribution = dist; p.initial_weight_scale = scale; p.classification = true; p.diagnostics = true; p.validation = null; p.quiet_mode = true; p.fast_mode = fast_mode; p.train_samples_per_iteration = 0; //sync once per period p.ignore_const_cols = false; //same as old NeuralNet code p.shuffle_training_data = false; //same as old NeuralNet code p.nesterov_accelerated_gradient = true; //same as old NeuralNet code p.classification_stop = -1; //don't stop early -> need to compare against old NeuralNet code, which doesn't stop either p.force_load_balance = false; //keep 1 chunk for reproducibility p.replicate_training_data = false; p.single_node_mode = true; p.invoke(); mymodel = UKV.get(p.dest()); neurons = DeepLearningTask.makeNeuronsForTesting(mymodel.model_info()); } // Reference: NeuralNet Layer[] ls; NeuralNetModel refmodel; NeuralNet p = new NeuralNet(); { Vec[] data = Utils.remove(_train.vecs(), _train.vecs().length - 1); Vec labels = _train.lastVec(); p.seed = myseed; p.hidden = hidden; p.rate = rate; p.max_w2 = max_w2; p.epochs = epoch; p.input_dropout_ratio = input_dropout; p.rate_annealing = rate_annealing; p.l1 = l1; p.l2 = l2; p.momentum_start = p0; p.momentum_ramp = pR; p.momentum_stable = p1; if (dist == DeepLearning.InitialWeightDistribution.Normal) p.initial_weight_distribution = InitialWeightDistribution.Normal; else if (dist == DeepLearning.InitialWeightDistribution.Uniform) p.initial_weight_distribution = InitialWeightDistribution.Uniform; else if (dist == DeepLearning.InitialWeightDistribution.UniformAdaptive) p.initial_weight_distribution = InitialWeightDistribution.UniformAdaptive; p.initial_weight_scale = scale; p.diagnostics = true; p.fast_mode = fast_mode; p.classification = true; if (loss == DeepLearning.Loss.MeanSquare) p.loss = Loss.MeanSquare; else if (loss == DeepLearning.Loss.CrossEntropy) p.loss = Loss.CrossEntropy; ls = new Layer[hidden.length+2]; ls[0] = new Layer.VecsInput(data, null); for (int i=0; i<hidden.length; ++i) { if (activation == DeepLearning.Activation.Tanh) { p.activation = NeuralNet.Activation.Tanh; ls[1+i] = new Layer.Tanh(hidden[i]); } else if (activation == DeepLearning.Activation.TanhWithDropout) { p.activation = Activation.TanhWithDropout; ls[1+i] = new Layer.TanhDropout(hidden[i]); } else if (activation == DeepLearning.Activation.Rectifier) { p.activation = Activation.Rectifier; ls[1+i] = new Layer.Rectifier(hidden[i]); } else if (activation == DeepLearning.Activation.RectifierWithDropout) { p.activation = Activation.RectifierWithDropout; ls[1+i] = new Layer.RectifierDropout(hidden[i]); } else if (activation == DeepLearning.Activation.Maxout) { p.activation = Activation.Maxout; ls[1+i] = new Layer.Maxout(hidden[i]); } else if (activation == DeepLearning.Activation.MaxoutWithDropout) { p.activation = Activation.MaxoutWithDropout; ls[1+i] = new Layer.MaxoutDropout(hidden[i]); } } ls[ls.length-1] = new Layer.VecSoftmax(labels, null); for (int i = 0; i < ls.length; i++) { ls[i].init(ls, i, p); } Trainer trainer; if (threaded) trainer = new Trainer.Threaded(ls, p.epochs, null, -1); else trainer = new Trainer.Direct(ls, p.epochs, null); trainer.start(); trainer.join(); refmodel = new NeuralNetModel(null, null, _train, ls, p); } /** * Compare MEAN weights and biases in hidden and output layer */ for (int n=1; n<ls.length; ++n) { Neurons l = neurons[n]; Layer ref = ls[n]; for (int o = 0; o < l._a.size(); o++) { for (int i = 0; i < l._previous._a.size(); i++) { a[n] += ref._w[o * l._previous._a.size() + i]; b[n] += l._w.raw()[o * l._previous._a.size() + i]; numweights++; } ba[n] += ref._b[o]; bb[n] += l._b.get(o); numbiases++; } } /** * Compare predictions * Note: Reference and H2O each do their internal data normalization, * so we must use their "own" test data, which is assumed to be created correctly. */ water.api.ConfusionMatrix CM = new water.api.ConfusionMatrix(); // Deep Learning scoring { Frame fpreds = mymodel.score(_train); //[0] is label, [1]...[4] are the probabilities CM = new water.api.ConfusionMatrix(); CM.actual = _train; CM.vactual = _train.lastVec(); CM.predict = fpreds; CM.vpredict = fpreds.vecs()[0]; CM.invoke(); StringBuilder sb = new StringBuilder(); trainerr += new ConfusionMatrix(CM.cm).err(); for (String s : sb.toString().split("\n")) Log.info(s); fpreds.delete(); Frame fpreds2 = mymodel.score(_test); //[0] is label, [1]...[4] are the probabilities CM = new water.api.ConfusionMatrix(); CM.actual = _test; CM.vactual = _test.lastVec(); CM.predict = fpreds2; CM.vpredict = fpreds2.vecs()[0]; CM.invoke(); sb = new StringBuilder(); CM.toASCII(sb); testerr += new ConfusionMatrix(CM.cm).err(); for (String s : sb.toString().split("\n")) Log.info(s); fpreds2.delete(); } // NeuralNet scoring long [][] cm; { Log.info("\nNeuralNet Scoring:"); //training set NeuralNet.Errors train = NeuralNet.eval(ls, 0, null); reftrainerr += train.classification; //test set final Frame[] adapted = refmodel.adapt(_test, false); Vec[] data = Utils.remove(_test.vecs(), _test.vecs().length - 1); Vec labels = _test.vecs()[_test.vecs().length - 1]; Layer.VecsInput input = (Layer.VecsInput) ls[0]; input.vecs = data; input._len = data[0].length(); ((Layer.VecSoftmax) ls[ls.length-1]).vec = labels; int classes = ls[ls.length - 1].units; //WARNING: only works if training set is large enough to have all classes cm = new long[classes][classes]; NeuralNet.Errors test = NeuralNet.eval(ls, 0, cm); Log.info("\nNeuralNet Confusion Matrix:"); Log.info(new ConfusionMatrix(cm).toString()); reftesterr += test.classification; adapted[1].delete(); } Assert.assertEquals(cm[0][0], CM.cm[0][0]); Assert.assertEquals(cm[1][0], CM.cm[1][0]); Assert.assertEquals(cm[0][1], CM.cm[0][1]); Assert.assertEquals(cm[1][1], CM.cm[1][1]); // cleanup mymodel.delete(); refmodel.delete(); _train.delete(); _test.delete(); frame.delete(); } trainerr /= (float)num_repeats; reftrainerr /= (float)num_repeats; testerr /= (float)num_repeats; reftesterr /= (float)num_repeats; /** * Tolerances */ final float abseps = threaded ? 1e-2f : 1e-7f; final float releps = threaded ? 1e-2f : 1e-5f; // training set scoring Log.info("NeuralNet train error " + reftrainerr); Log.info("Deep Learning train error " + trainerr); compareVal(reftrainerr, trainerr, abseps, releps); // test set scoring Log.info("NeuralNet test error " + reftesterr); Log.info("Deep Learning test error " + testerr); compareVal(reftrainerr, trainerr, abseps, releps); // mean weights/biases for (int n=1; n<hidden.length+2; ++n) { Log.info("NeuralNet mean weight for layer " + n + ": " + a[n]/numweights); Log.info("Deep Learning mean weight for layer " + n + ": " + b[n]/numweights); Log.info("NeuralNet mean bias for layer " + n + ": " + ba[n]/numbiases); Log.info("Deep Learning mean bias for layer " + n + ": " + bb[n]/numbiases); compareVal(a[n]/numweights, b[n]/numweights, abseps, releps); compareVal(ba[n]/numbiases, bb[n]/numbiases, abseps, releps); } } } } } } } } } } } } } }