package water.fvec; import static org.junit.Assert.*; import org.junit.*; import water.*; import water.parser.ParseDataset; import water.util.FileUtils; import water.util.FrameUtils; import water.util.Log; public class RebalanceDatasetTest extends TestUtil { @BeforeClass public static void setup() { stall_till_cloudsize(1); } @Test public void testProstate(){ NFSFileVec[] nfs = new NFSFileVec[]{ TestUtil.makeNfsFileVec("smalldata/logreg/prostate.csv"), TestUtil.makeNfsFileVec("smalldata/covtype/covtype.20k.data"), TestUtil.makeNfsFileVec("smalldata/chicago/chicagoCrimes10k.csv.zip")}; //NFSFileVec.make(find_test_file("bigdata/laptop/usecases/cup98VAL_z.csv"))}; for (NFSFileVec fv : nfs) { Frame fr = ParseDataset.parse(Key.make(), fv._key); Key rebalancedKey = Key.make("rebalanced"); int[] trials = {380, 1, 3, 8, 9, 12, 256, 16, 32, 64, 11, 13}; for (int i : trials) { Frame rebalanced = null; try { Scope.enter(); RebalanceDataSet rb = new RebalanceDataSet(fr, rebalancedKey, i); H2O.submitTask(rb); rb.join(); rebalanced = DKV.get(rebalancedKey).get(); ParseDataset.logParseResults(rebalanced); assertEquals(rebalanced.numRows(), fr.numRows()); assertEquals(rebalanced.anyVec().nChunks(), i); assertTrue(TestUtil.isIdenticalUpToRelTolerance(fr, rebalanced, 1e-10)); Log.info("Rebalanced into " + i + " chunks:"); Log.info(FrameUtils.chunkSummary(rebalanced).toString()); } finally { if (rebalanced != null) rebalanced.delete(); Scope.exit(); } } if (fr != null) fr.delete(); } } }