package hex.deeplearning;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import water.DKV;
import water.H2O;
import water.Key;
import water.TestUtil;
import water.fvec.Frame;
import water.fvec.NFSFileVec;
import water.fvec.RebalanceDataSet;
import water.fvec.Vec;
import water.parser.ParseDataset;
import hex.deeplearning.DeepLearningModel.DeepLearningParameters;
import water.util.FileUtils;
/**
* This test simulates environment
* produced by Spark - dataset divided into
* many small chunks, some of theam are empty.
*/
public class DeepLearningScoreTest extends TestUtil {
@BeforeClass
public static void setup() { stall_till_cloudsize(1); }
/** Load simple dataset, rebalance to a number of chunks > number of rows, and run deep learning */
@Test public void testPubDev928() {
// Create rebalanced dataset
Key rebalancedKey = Key.make("rebalanced");
NFSFileVec nfs = TestUtil.makeNfsFileVec("smalldata/logreg/prostate.csv");
Frame fr = ParseDataset.parse(Key.make(), nfs._key);
RebalanceDataSet rb = new RebalanceDataSet(fr, rebalancedKey, (int)(fr.numRows()+1));
H2O.submitTask(rb);
rb.join();
Frame rebalanced = DKV.get(rebalancedKey).get();
// Assert that there is at least one 0-len chunk
assertZeroLengthChunk("Rebalanced dataset should contain at least one 0-len chunk!", rebalanced.anyVec());
DeepLearningModel dlModel = null;
try {
// Launch Deep Learning
DeepLearningParameters dlParams = new DeepLearningParameters();
dlParams._train = rebalancedKey;
dlParams._epochs = 5;
dlParams._response_column = "CAPSULE";
dlModel = new DeepLearning(dlParams).trainModel().get();
} finally {
fr.delete();
rebalanced.delete();
if (dlModel != null) dlModel.delete();
}
}
private void assertZeroLengthChunk(String msg, Vec v) {
boolean hasZeroLenChunk = false;
for (int i = 0; i < v.nChunks(); i++) {
hasZeroLenChunk |= (v.chunkForChunkIdx(i).len() == 0);
System.out.println(v.chunkForChunkIdx(i).len());
}
Assert.assertTrue(msg, hasZeroLenChunk);
}
}