package water.exec; import static org.junit.Assert.fail; import org.junit.BeforeClass; import org.junit.Test; import water.Key; import water.Lockable; import water.TestUtil; import water.fvec.Frame; public class DdplyTest extends TestUtil { @BeforeClass public static void stall() { stall_till_cloudsize(3); } // This test is intended to use a file large enough to strip across multiple // nodes with multiple groups, to test that all generated groups are both // built and executed distributed. @Test public void testDdplyBig() { Key k0 = Key.make("cars.hex"); Key k1 = Key.make("orange.hex"); try { Frame fr0 = parseFrame(k0,"smalldata/cars.csv"); checkStr("ddply(cars.hex,c(3),nrow)"); // More complex multi-return checkStr("ddply(cars.hex,c(3),function(x) {cbind(mean(x[,2]),mean(x[,3]))})"); // A big enough file to distribute across multiple nodes. // Trimmed down to run in reasonable time. //Frame fr1 = parseFrame(k1,"smalldata/unbalanced/orange_small_train.data.zip"); //checkStr("ddply(orange.hex,c(7),nrow)"); //checkStr("ddply(orange.hex,c(206,207),function(x){ cbind( mean(x$Var6), sum(x$Var6+x$Var7) ) })"); // A more complex ddply that works as of 3/1/2014 but is slow for a junit //checkStr("ddply(orange.hex,c(206,207),function(x){"+ // "max6 = max(x$Var6);"+ // "min6 = min(x$Var6);"+ // "len = max6-min6+1;"+ // "tot = sum(x$Var7);"+ // "avg = tot/len"+ // "})"); } finally { Lockable.delete(k0); // Remove original hex frame key Lockable.delete(k1); // Remove original hex frame key } } void checkStr( String s ) { Env env=null; try { env = Exec2.exec(s); if( env.isAry() ) { // Print complete frames for inspection Frame res = env.popAry(); String skey = env.key(); System.out.println(res.toStringAll()); env.subRef(res,skey); // But then end lifetime } else { System.out.println( env.resultString() ); fail("Not a Frame result"); } } catch( IllegalArgumentException iae ) { fail(iae.getMessage()); } if( env != null ) env.remove_and_unlock(); } }