DeepLearningProstateTest.java example

Explorer
h2o-3-master
package hex.deeplearning;

import hex.ConfusionMatrix;
import hex.deeplearning.DeepLearningModel.DeepLearningParameters;
import hex.deeplearning.DeepLearningModel.DeepLearningParameters.ClassSamplingMethod;
import hex.genmodel.utils.DistributionFamily;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
import water.DKV;
import water.H2O;
import water.Key;
import water.TestUtil;
import water.exceptions.H2OModelBuilderIllegalArgumentException;
import water.fvec.Frame;
import water.fvec.NFSFileVec;
import water.fvec.Vec;
import water.parser.ParseDataset;
import water.rapids.Rapids;
import water.util.FileUtils;
import water.util.Log;

import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.Random;

import static hex.ConfusionMatrix.buildCM;

public class DeepLearningProstateTest extends TestUtil {
  @BeforeClass() public static void setup() { stall_till_cloudsize(1); }

  @Test public void run() throws Exception { runFraction(0.00002f); }

  public void runFraction(float fraction) {
    long seed = 0xDECAFFF;
    Random rng = new Random(seed);
    String[] datasets = new String[2];
    int[][] responses = new int[datasets.length][];
    datasets[0] = "smalldata/logreg/prostate.csv"; responses[0] = new int[]{1,2,8}; //CAPSULE (binomial), AGE (regression), GLEASON (multi-class)
    datasets[1] = "smalldata/iris/iris.csv";  responses[1] = new int[]{4}; //Iris-type (multi-class)
    HashSet<Long> checkSums = new LinkedHashSet<>();

    int testcount = 0;
    int count = 0;
    for (int i = 0; i < datasets.length; ++i) {
      final String dataset = datasets[i];
        for (final int resp : responses[i]) {
          Frame frame=null, vframe=null;
          try {
            NFSFileVec nfs = TestUtil.makeNfsFileVec(dataset);
            frame = ParseDataset.parse(Key.make(), nfs._key);
            NFSFileVec vnfs = TestUtil.makeNfsFileVec(dataset);
            vframe = ParseDataset.parse(Key.make(), vnfs._key);
            boolean classification = !(i == 0 && resp == 2);
            String respname = frame.name(resp);
            if (classification && !frame.vec(resp).isCategorical()) {
              Vec r = frame.vec(resp).toCategoricalVec();
              frame.remove(resp).remove();
              frame.add(respname, r);
              DKV.put(frame);

              Vec vr = vframe.vec(respname).toCategoricalVec();
              vframe.remove(respname).remove();
              vframe.add(respname, vr);
              DKV.put(vframe);
            }
            if (classification) {
              assert (frame.vec(respname).isCategorical());
              assert (vframe.vec(respname).isCategorical());
            }
            for (DeepLearningParameters.Loss loss : new DeepLearningParameters.Loss[]{
                DeepLearningParameters.Loss.Automatic,
                DeepLearningParameters.Loss.CrossEntropy,
                DeepLearningParameters.Loss.Huber,
//                DeepLearningParameters.Loss.ModifiedHuber,
                DeepLearningParameters.Loss.Absolute,
                DeepLearningParameters.Loss.Quadratic
            }) {
              if (!classification && (loss == DeepLearningParameters.Loss.CrossEntropy || loss == DeepLearningParameters.Loss.ModifiedHuber))
                continue;
              for (DistributionFamily dist : new DistributionFamily[]{
                  DistributionFamily.AUTO,
                  DistributionFamily.laplace,
                  DistributionFamily.huber,
//                  DistributionFamily.modified_huber,
                  DistributionFamily.bernoulli,
                  DistributionFamily.gaussian,
                  DistributionFamily.poisson,
                  DistributionFamily.tweedie,
                  DistributionFamily.gamma
              }) {
                if (classification && dist != DistributionFamily.multinomial && dist != DistributionFamily.bernoulli && dist != DistributionFamily.modified_huber)
                  continue;
                if (!classification) {
                  if (dist == DistributionFamily.multinomial || dist == DistributionFamily.bernoulli || dist == DistributionFamily.modified_huber)
                    continue;
                }
                boolean cont =false;
                switch (dist) {
                  case tweedie:
                  case gamma:
                  case poisson:
                    if (loss != DeepLearningParameters.Loss.Automatic)
                      cont=true;
                    break;
                  case huber:
                    if (loss != DeepLearningParameters.Loss.Huber && loss != DeepLearningParameters.Loss.Automatic)
                      cont=true;
                    break;
                  case laplace:
                    if (loss != DeepLearningParameters.Loss.Absolute && loss != DeepLearningParameters.Loss.Automatic)
                      cont=true;
                    break;
                  case modified_huber:
                    if (loss != DeepLearningParameters.Loss.ModifiedHuber && loss != DeepLearningParameters.Loss.Automatic)
                      cont=true;
                    break;
                  case bernoulli:
                    if (loss != DeepLearningParameters.Loss.CrossEntropy && loss != DeepLearningParameters.Loss.Automatic)
                      cont=true;
                    break;
                }
                if (cont) continue;

                for (boolean elastic_averaging : new boolean[]{
                    true,
                    false,
                }) {
                  for (boolean replicate : new boolean[]{
                      true,
                      false,
                  }) {
                    for (DeepLearningParameters.Activation activation : new DeepLearningParameters.Activation[]{
                        DeepLearningParameters.Activation.Tanh,
                        DeepLearningParameters.Activation.TanhWithDropout,
                        DeepLearningParameters.Activation.Rectifier,
                        DeepLearningParameters.Activation.RectifierWithDropout,
                        DeepLearningParameters.Activation.Maxout,
                        DeepLearningParameters.Activation.MaxoutWithDropout,
                    }) {
                      boolean reproducible = false;
                      switch (dist) {
                        case tweedie:
                        case gamma:
                        case poisson:
                          reproducible = true; //don't remember why - probably to force stability
                        default:
                      }
                      for (boolean load_balance : new boolean[]{
                          true,
                          false,
                      }) {
                        for (boolean shuffle : new boolean[]{
                            true,
                            false,
                        }) {
                          for (boolean balance_classes : new boolean[]{
                              true,
                              false,
                          }) {
                            for (ClassSamplingMethod csm : new ClassSamplingMethod[]{
                                ClassSamplingMethod.Stratified,
                                ClassSamplingMethod.Uniform
                            }) {
                              for (int scoretraining : new int[]{
                                  200,
                                  20,
                                  0,
                              }) {
                                for (int scorevalidation : new int[]{
                                    200,
                                    20,
                                    0,
                                }) {
                                  for (int vf : new int[]{
                                      0,  //no validation
                                      1,  //same as source
                                      -1, //different validation frame
                                  }) {
                                    for (int n_folds : new int[]{
                                        0,
                                        2,
                                    }) {
                                      if (n_folds > 0 && balance_classes) continue; //FIXME: Add back

                                      for (boolean overwrite_with_best_model : new boolean[]{false, true}) {
                                        for (int train_samples_per_iteration : new int[]{
                                            -2, //auto-tune
                                            -1, //N epochs per iteration
                                            0, //1 epoch per iteration
                                            rng.nextInt(200), // <1 epoch per iteration
                                            500, //>1 epoch per iteration
                                        }) {
                                          DeepLearningModel model1 = null, model2 = null;
                                          count++;
                                          if (fraction < rng.nextFloat()) continue;

                                          try {
                                            Log.info("**************************)");
                                            Log.info("Starting test #" + count);
                                            Log.info("**************************)");
                                            final double epochs = 7 + rng.nextDouble() + rng.nextInt(4);
                                            final int[] hidden = new int[]{3 + rng.nextInt(4), 3 + rng.nextInt(6)};
                                            final double[] hidden_dropout_ratios = activation.name().contains("Hidden") ? new double[]{rng.nextFloat(), rng.nextFloat()} : null;
                                            Frame valid = null; //no validation
                                            if (vf == 1) valid = frame; //use the same frame for validation
                                            else if (vf == -1)
                                              valid = vframe; //different validation frame (here: from the same file)
                                            long myseed = rng.nextLong();
                                            boolean replicate2 = rng.nextBoolean();
                                            boolean elastic_averaging2 = rng.nextBoolean();

                                            // build the model, with all kinds of shuffling/rebalancing/sampling
                                            DeepLearningParameters p = new DeepLearningParameters();
                                            {
                                              Log.info("Using seed: " + myseed);
                                              p._train = frame._key;
                                              p._response_column = respname;
                                              p._valid = valid == null ? null : valid._key;

                                              p._hidden = hidden;
                                              p._input_dropout_ratio = 0.1;
                                              p._hidden_dropout_ratios = hidden_dropout_ratios;
                                              p._activation = activation;
//                                      p.best_model_key = best_model_key;
                                              p._overwrite_with_best_model = overwrite_with_best_model;
                                              p._epochs = epochs;
                                              p._loss = loss;
                                              p._distribution = dist;
                                              p._nfolds = n_folds;
                                              p._seed = myseed;
                                              p._train_samples_per_iteration = train_samples_per_iteration;
                                              p._force_load_balance = load_balance;
                                              p._replicate_training_data = replicate;
                                              p._reproducible = reproducible;
                                              p._shuffle_training_data = shuffle;
                                              p._score_training_samples = scoretraining;
                                              p._score_validation_samples = scorevalidation;
                                              p._classification_stop = -1;
                                              p._regression_stop = -1;
                                              p._stopping_rounds = 0;
                                              p._balance_classes = classification && balance_classes;
                                              p._quiet_mode = true;
                                              p._score_validation_sampling = csm;
                                              p._elastic_averaging = elastic_averaging;
//                                      Log.info(new String(p.writeJSON(new AutoBuffer()).buf()).replace(",","\n"));
                                              DeepLearning dl = new DeepLearning(p, Key.<DeepLearningModel>make(Key.make().toString() + "first"));
                                              try {
                                                model1 = dl.trainModel().get();
                                                checkSums.add(model1.checksum());
                                                testcount++;
                                              } catch (Throwable t) {
                                                model1 = DKV.getGet(dl.dest());
                                                if (model1 != null)
                                                  Assert.assertTrue(model1._output._job.isCrashed());
                                                throw t;
                                              }
                                              Log.info("Trained for " + model1.epoch_counter + " epochs.");
                                              assert (((p._train_samples_per_iteration <= 0 || p._train_samples_per_iteration >= frame.numRows()) && model1.epoch_counter > epochs)
                                                  || Math.abs(model1.epoch_counter - epochs) / epochs < 0.20);

                                              // check that iteration is of the expected length - check via when first scoring happens
                                              if (p._train_samples_per_iteration == 0) {
                                                // no sampling - every node does its share of the full data
                                                if (!replicate)
                                                  assert ((double) model1._output._scoring_history.get(1, 3) == 1);
                                                  // sampling on each node - replicated data
                                                else
                                                  assert ((double) model1._output._scoring_history.get(1, 3) > 0.7 && (double) model1._output._scoring_history.get(1, 3) < 1.3)
                                                      : ("First scoring at " + model1._output._scoring_history.get(1, 3) + " epochs, should be closer to 1!" + "\n" + model1.toString());
                                              } else if (p._train_samples_per_iteration == -1) {
                                                // no sampling - every node does its share of the full data
                                                if (!replicate)
                                                  assert ((double) model1._output._scoring_history.get(1, 3) == 1);
                                                  // every node passes over the full dataset
                                                else {
                                                  if (!reproducible)
                                                    assert ((double) model1._output._scoring_history.get(1, 3) == H2O.CLOUD.size());
                                                }
                                              }

                                              if (n_folds != 0) {
                                                assert (model1._output._cross_validation_metrics != null);
                                              } else {
                                                assert (model1._output._cross_validation_metrics == null);
                                              }

                                            }

                                            assert (model1.model_info().get_params()._l1 == 0);
                                            assert (model1.model_info().get_params()._l2 == 0);
                                            Assert.assertFalse(model1._output._job.isCrashed());

                                            if (n_folds != 0) continue;
                                            // Do some more training via checkpoint restart
                                            // For n_folds, continue without n_folds (not yet implemented) - from now on, model2 will have n_folds=0...
                                            DeepLearningParameters p2 = new DeepLearningParameters();
                                            Assert.assertTrue(model1.model_info().get_processed_total() >= frame.numRows() * epochs);

                                            {
                                              p2._checkpoint = model1._key;
                                              p2._distribution = dist;
                                              p2._loss = loss;
                                              p2._nfolds = n_folds;
                                              p2._train = frame._key;
                                              p2._activation = activation;
                                              p2._hidden = hidden;
                                              p2._valid = valid == null ? null : valid._key;
                                              p2._l1 = 1e-3;
                                              p2._l2 = 1e-3;
                                              p2._reproducible = reproducible;
                                              p2._response_column = respname;
                                              p2._overwrite_with_best_model = overwrite_with_best_model;
                                              p2._quiet_mode = true;
                                              p2._epochs = 2 * epochs; //final amount of training epochs
                                              p2._replicate_training_data = replicate2;
                                              p2._stopping_rounds = 0;
                                              p2._seed = myseed;
//                                              p2._loss = loss; //fall back to default
//                                              p2._distribution = dist; //fall back to default
                                              p2._train_samples_per_iteration = train_samples_per_iteration;
                                              p2._balance_classes = classification && balance_classes;
                                              p2._elastic_averaging = elastic_averaging2;
                                              DeepLearning dl = new DeepLearning(p2);
                                              try {
                                                model2 = dl.trainModel().get();
                                              } catch (Throwable t) {
                                                model2 = DKV.getGet(dl.dest());
                                                if (model2 != null)
                                                  Assert.assertTrue(model2._output._job.isCrashed());
                                                throw t;
                                              }
                                            }
                                            Assert.assertTrue(model1._output._job.isDone());
                                            Assert.assertTrue(model2._output._job.isDone());

                                            assert (model1._parms != p2);
                                            assert (model1.model_info().get_params() != model2.model_info().get_params());

                                            assert (model1.model_info().get_params()._l1 == 0);
                                            assert (model1.model_info().get_params()._l2 == 0);

                                            if (!overwrite_with_best_model)
                                              Assert.assertTrue(model2.model_info().get_processed_total() >= frame.numRows() * 2 * epochs);

                                            assert (p != p2);
                                            assert (p != model1.model_info().get_params());
                                            assert (p2 != model2.model_info().get_params());

                                            if (p._loss == DeepLearningParameters.Loss.Automatic) {
                                              assert (p2._loss == DeepLearningParameters.Loss.Automatic);
//                                              assert(model1.model_info().get_params()._loss != DeepLearningParameters.Loss.Automatic);
//                                              assert(model2.model_info().get_params()._loss != DeepLearningParameters.Loss.Automatic);
                                            }
                                            assert (p._hidden_dropout_ratios == null);
                                            assert (p2._hidden_dropout_ratios == null);
                                            if (p._activation.toString().contains("WithDropout")) {
                                              assert (model1.model_info().get_params()._hidden_dropout_ratios != null);
                                              assert (model2.model_info().get_params()._hidden_dropout_ratios != null);
                                              assert (Arrays.equals(
                                                  model1.model_info().get_params()._hidden_dropout_ratios,
                                                  model2.model_info().get_params()._hidden_dropout_ratios));
                                            }
                                            assert (p._l1 == 0);
                                            assert (p._l2 == 0);
                                            assert (p2._l1 == 1e-3);
                                            assert (p2._l2 == 1e-3);
                                            assert (model1.model_info().get_params()._l1 == 0);
                                            assert (model1.model_info().get_params()._l2 == 0);
                                            assert (model2.model_info().get_params()._l1 == 1e-3);
                                            assert (model2.model_info().get_params()._l2 == 1e-3);

                                            if (valid == null) valid = frame;
                                            double threshold;
                                            if (model2._output.isClassifier()) {
                                              Frame pred = null;
                                              Vec labels, predlabels, pred2labels;
                                              try {
                                                pred = model2.score(valid);
                                                DKV.put(Key.make("pred"), pred);
                                                // Build a POJO, validate same results
                                                if (!model2.testJavaScoring(valid, pred, 1e-6)) {
                                                  model2.testJavaScoring(valid, pred, 1e-6);
                                                }
                                                Assert.assertTrue(model2.testJavaScoring(valid, pred, 1e-6));

                                                hex.ModelMetrics mm = hex.ModelMetrics.getFromDKV(model2, valid);
                                                double error;
                                                // binary
                                                if (model2._output.nclasses() == 2) {
                                                  assert (resp == 1);
                                                  threshold = mm.auc_obj().defaultThreshold();
                                                  error = mm.auc_obj().defaultErr();
                                                  // check that auc.cm() is the right CM
                                                  Assert.assertEquals(new ConfusionMatrix(mm.auc_obj().defaultCM(), valid.vec(respname).domain()).err(), error, 1e-15);
                                                  // check that calcError() is consistent as well (for CM=null, AUC!=null)
                                                  Assert.assertEquals(mm.cm().err(), error, 1e-15);

                                                  // check that the labels made with the default threshold are consistent with the CM that's reported by the AUC object
                                                  labels = valid.vec(respname);
                                                  predlabels = pred.vecs()[0];
                                                  ConfusionMatrix cm = buildCM(labels, predlabels);
                                                  Log.info("CM from pre-made labels:");
                                                  Log.info(cm.toASCII());
                                                  if (Math.abs(cm.err() - error) > 2e-2) {
                                                    ConfusionMatrix cm2 = buildCM(labels, predlabels);
                                                    Log.info(cm2.toASCII());
                                                  }
                                                  Assert.assertEquals(cm.err(), error, 2e-2);

                                                  // confirm that orig CM was made with the right threshold
                                                  // manually make labels with AUC-given default threshold
                                                  String ast = "(as.factor (> (cols pred [2]) " + threshold + "))";
                                                  Frame tmp = Rapids.exec(ast).getFrame();
                                                  pred2labels = tmp.vecs()[0];
                                                  cm = buildCM(labels, pred2labels);
                                                  Log.info("CM from self-made labels:");
                                                  Log.info(cm.toASCII());
                                                  Assert.assertEquals(cm.err(), error, 2e-2); //AUC-given F1-optimal threshold might not reproduce AUC-given CM-error identically, but should match up to 2%
                                                  tmp.delete();
                                                }
                                                DKV.remove(Key.make("pred"));
                                              } finally {
                                                if (pred != null) pred.delete();
                                              }
                                            } //classifier
                                            else {
                                              Frame pred = null;
                                              try {
                                                pred = model2.score(valid);
                                                // Build a POJO, validate same results
                                                Assert.assertTrue(model2.testJavaScoring(frame, pred, 1e-6));
                                              } finally {
                                                if (pred != null) pred.delete();
                                              }
                                            }
                                            Log.info("Parameters combination " + count + ": PASS");
                                          } catch (H2OModelBuilderIllegalArgumentException | IllegalArgumentException ex) {
                                            System.err.println(ex);
                                            throw H2O.fail("should not get here");
                                          } catch (RuntimeException t) {
                                            String msg = "" + t.getMessage() + // this way we evade null messages
                                                (t.getCause() == null ? "" : t.getCause().getMessage());
                                            Assert.assertTrue("Unexpected exception " + t + ": " + msg, msg.contains("unstable"));
                                          } catch (AssertionError ae) {
                                            throw ae; // test assertions should be preserved
                                          } catch (Throwable t) {
                                            t.printStackTrace();
                                            throw new RuntimeException(t);
                                          } finally {
                                            if (model1 != null) {
                                              model1.deleteCrossValidationModels();
                                              model1.delete();
                                            }
                                            if (model2 != null) {
                                              model2.deleteCrossValidationModels();
                                              model2.delete();
                                            }
                                          }
                                        }
                                      }
                                    }
                                  }
                                }
                              }
                            }
                          }
                        }
                      }
                    }
                  }
                }
              }
            }
          }
          finally {
            if (frame!=null) frame.delete();
            if (vframe!=null) vframe.delete();
          }
      }
    }
    Log.info("\n\n=============================================");
    Log.info("Tested " + testcount + " out of " + count + " parameter combinations.");
    Log.info("=============================================");
    if (checkSums.size() != testcount) {
      Log.info("Only found " + checkSums.size() + " unique checksums.");
    }
    Assert.assertTrue(checkSums.size() == testcount);
    }

    public static class Mid extends DeepLearningProstateTest {
    @Test @Ignore public void run() throws Exception { runFraction(0.01f); } //for nightly tests
  }

  public static class Short extends DeepLearningProstateTest {
    @Test @Ignore public void run() throws Exception { runFraction(0.001f); }
  }
}