DeepLearningScoreTest.java example

Explorer
h2o-3-master
package hex.deeplearning;

import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import water.DKV;
import water.H2O;
import water.Key;
import water.TestUtil;
import water.fvec.Frame;
import water.fvec.NFSFileVec;
import water.fvec.RebalanceDataSet;
import water.fvec.Vec;
import water.parser.ParseDataset;
import hex.deeplearning.DeepLearningModel.DeepLearningParameters;
import water.util.FileUtils;

/**
 * This test simulates environment
 * produced by Spark - dataset divided into
 * many small chunks,  some of theam are empty.
 */
public class DeepLearningScoreTest extends TestUtil {
  @BeforeClass
  public static void setup() { stall_till_cloudsize(1); }

  /** Load simple dataset, rebalance to a number of chunks > number of rows, and run deep learning */
  @Test public void testPubDev928() {
    // Create rebalanced dataset
    Key rebalancedKey = Key.make("rebalanced");
    NFSFileVec nfs = TestUtil.makeNfsFileVec("smalldata/logreg/prostate.csv");
    Frame fr = ParseDataset.parse(Key.make(), nfs._key);
    RebalanceDataSet rb = new RebalanceDataSet(fr, rebalancedKey, (int)(fr.numRows()+1));
    H2O.submitTask(rb);
    rb.join();
    Frame rebalanced = DKV.get(rebalancedKey).get();

    // Assert that there is at least one 0-len chunk
    assertZeroLengthChunk("Rebalanced dataset should contain at least one 0-len chunk!", rebalanced.anyVec());

    DeepLearningModel dlModel = null;
    try {
      // Launch Deep Learning
      DeepLearningParameters dlParams = new DeepLearningParameters();
      dlParams._train = rebalancedKey;
      dlParams._epochs = 5;
      dlParams._response_column = "CAPSULE";

      dlModel = new DeepLearning(dlParams).trainModel().get();
    } finally {
      fr.delete();
      rebalanced.delete();
      if (dlModel != null) dlModel.delete();
    }
  }

  private void assertZeroLengthChunk(String msg, Vec v) {
    boolean hasZeroLenChunk = false;
    for (int i = 0; i < v.nChunks(); i++) {
      hasZeroLenChunk |= (v.chunkForChunkIdx(i).len() == 0);
      System.out.println(v.chunkForChunkIdx(i).len());
    }
    Assert.assertTrue(msg, hasZeroLenChunk);
  }
}