package water.fvec; import org.junit.BeforeClass; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import water.*; import water.parser.ParseDataset; import water.util.Log; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; public class ExportTest extends TestUtil { @Rule public TemporaryFolder tmpFolder = new TemporaryFolder(); @BeforeClass public static void setup() { stall_till_cloudsize(1); } @Test public void testExport() throws IOException { Frame fr = parse_test_file("smalldata/airlines/airlineUUID.csv"); Key rebalancedKey = Key.make("rebalanced"); Frame rebalanced = null; Frame imported = null; int[] partSpec = {1, 4, 7, 30, -1}; int[] expPart = {1, 4, 6, 17, -1}; for (int i = 0; i < partSpec.length; i++) { Log.info("Testing export to " + partSpec[i] + " files."); try { int parts = partSpec[i]; Scope.enter(); rebalanced = rebalance(fr, rebalancedKey, 17); File folder = tmpFolder.newFolder("export_" + parts); File target = (parts == 1) ? new File(folder, "data.csv") : folder; Log.info("Should output #" + expPart[i] + " part files to " + target.getPath() + "."); Frame.export(rebalanced, target.getPath(), "export", false, parts).get(); // check the number of produced part files (only if the number was given) if (expPart[i] != -1) { assertEquals(expPart[i], folder.listFiles().length); if (parts == 1) { assertTrue(target.exists()); } else { for (int j = 0; j < expPart[i]; j++) { String suffix = (j < 10) ? "0000" + j : "000" + j; assertTrue(new File(folder, "part-m-" + suffix).exists()); } } } assertTrue(target.exists()); imported = parseFolder(folder); assertEquals(fr.numRows(), imported.numRows()); assertTrue(TestUtil.isBitIdentical(fr, imported)); } finally { if (rebalanced != null) rebalanced.delete(); if (imported != null) imported.delete(); Scope.exit(); } } fr.delete(); } private static Frame rebalance(Frame fr, Key targetKey, int nChunks) { RebalanceDataSet rb = new RebalanceDataSet(fr, targetKey, nChunks); H2O.submitTask(rb); rb.join(); return DKV.get(targetKey).get(); } private static Frame parseFolder(File folder) { assert folder.isDirectory(); File[] files = folder.listFiles(); assert files != null; Arrays.sort(files); ArrayList<Key> keys = new ArrayList<>(); for( File f : files ) if( f.isFile() ) keys.add(NFSFileVec.make(f)._key); Key[] res = new Key[keys.size()]; keys.toArray(res); return ParseDataset.parse(Key.make(), res); } }