package water.exec;
import hex.FrameSplitter;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import water.H2O;
import water.Key;
import water.Lockable;
import water.TestUtil;
import water.fvec.Frame;
import water.fvec.NFSFileVec;
import water.fvec.ParseDataset2;
import java.io.File;
import static org.junit.Assert.*;
public class Expr2Test extends TestUtil {
@BeforeClass public static void stall() { stall_till_cloudsize(1); }
@Rule
public ExpectedException thrown = ExpectedException.none();
@Test public void rbindTest() {
Key dest1 = Key.make("f1");
float[] ratios = arf(0.5f);
Frame[] splits = null;
File file1 = TestUtil.find_test_file("smalldata/tnc3_10.csv");
//File file = TestUtil.find_test_file("smalldata/iris/iris_wheader.csv");
//File file = TestUtil.find_test_file("smalldata/cars.csv");
Key fkey1 = NFSFileVec.make(file1);
Frame f = ParseDataset2.parse(dest1,new Key[]{fkey1});
FrameSplitter fs = new FrameSplitter(f, ratios);
H2O.submitTask(fs).join();
splits = fs.getResult();
Frame rbinded_frame;
Env ev = Exec2.exec("rbind("+splits[0]._key+","+splits[1]._key+")" );
try {
rbinded_frame = ev.popAry();
} finally {
if (ev!=null) ev.remove_and_unlock();
}
assertEquals(rbinded_frame.numRows(),f.numRows());
rbinded_frame.delete();
Lockable.delete(dest1);
for (Frame s : splits)
if (s != null) s.delete();
}
@Test public void testBasicExpr1() {
Key dest = Key.make("h.hex");
try {
File file = TestUtil.find_test_file("smalldata/tnc3_10.csv");
//File file = TestUtil.find_test_file("smalldata/iris/iris_wheader.csv");
//File file = TestUtil.find_test_file("smalldata/cars.csv");
Key fkey = NFSFileVec.make(file);
ParseDataset2.parse(dest,new Key[]{fkey});
// Simple numbers & simple expressions
checkStr("1.23",1.23);
checkStr(" 1.23 + 2.34",3.57);
checkStr(" 1.23 + 2.34 * 3", 8.25); // op precedence of * over +
checkStr(" 1.23 2.34", "Junk at end of line\n"+" 1.23 2.34\n"+" ^--^\n"); // Syntax error
checkStr("1.23 < 2.34",1);
checkStr("1.23 <=2.34",1);
checkStr("1.23 > 2.34",0);
checkStr("1.23 >=2.34",0);
checkStr("1.23 ==2.34",0);
checkStr("1.23 !=2.34",1);
checkStr("1 & 2",1);
checkStr("NA&0",0); // R-spec: 0 not NA
checkStr("0&NA",0); // R-spec: 0 not NA
checkStr("NA&1",Double.NaN); // R-spec: NA not 1
checkStr("1&NA",Double.NaN);
checkStr("1|NA",1);
checkStr("1&&2",1);
checkStr("1||0",1);
checkStr("NA||1",1);
checkStr("NA||0",Double.NaN);
checkStr("0||NA",Double.NaN);
checkStr("!1",0);
checkStr("(!)(1)",0);
checkStr("(!!)(1)", "Arg 'x' typed as dblary but passed dblary(dblary)\n"+"(!!)(1)\n"+" ^-^\n");
checkStr("-1",-1);
checkStr("-(1)",-1);
checkStr("(-)(1)", "Passed 1 args but expected 2\n"+"(-)(1)\n"+" ^--^\n");
checkStr("-T",-1);
checkStr("* + 1", "Arg 'x' typed as dblary but passed anyary{dblary,dblary,}(dblary,dblary)\n"+"* + 1\n"+"^----^\n");
// Simple op as prefix calls
checkStr("+(1.23,2.34)","Missing ')'\n"+"+(1.23,2.34)\n"+" ^---^\n"); // Syntax error: looks like unary op application
checkStr("+(1.23)",1.23); // Unary operator
// Simple scalar assignment
checkStr("1=2","Junk at end of line\n"+"1=2\n"+" ^^\n");
checkStr("x","Unknown var x\n"+"x\n"+"^^\n");
checkStr("x+2","Unknown var x\n"+"x+2\n"+"^^\n");
checkStr("2+x","Missing expr or unknown ID\n"+"2+x\n"+" ^\n");
checkStr("x=1",1);
checkStr("x<-1",1); // Alternative R assignment syntax
checkStr("x=3;y=4",4); // Return value is last expr
// Ambiguity & Language
checkStr("x=mean"); // Assign x to the built-in fcn mean
checkStr("x=mean=3",3); // Assign x & id mean with 3; "mean" here is not related to any built-in fcn
checkStr("x=mean(c(3))",3); // Assign x to the result of running fcn mean(3)
checkStr("x=mean(c(\n3))",3); // Assign x to the result of running fcn mean(3)
checkStr("x=mean+3","Arg 'x' typed as dblary but passed dbl(ary)\n"+"x=mean+3\n"+" ^-----^\n"); // Error: "mean" is a function; cannot add a function and a number
checkStr("apply(c(1,2,3),,nrow)","Missing argument\napply(c(1,2,3),,nrow)\n ^\n");
checkStr("foo==bar","Unknown var foo\nfoo==bar\n^--^\n"); // Error msg is about "foo==" and not new assignment "foo="
// Simple array handling; broadcast operators
checkStr("h.hex"); // Simple ref
checkStr("sum(apply(h.hex[,c(4,5)],1,mean))",183.96); // Row-wise apply on mean
checkStr("h.hex[2,3]",1); // Scalar selection
checkStr("h.hex[2,+]","Must be scalar or array\n"+"h.hex[2,+]\n"+" ^-^\n"); // Function not allowed
checkStr("h.hex[2+4,-4]");// Select row 6, all-cols but 4
checkStr("h.hex[1,-1]; h.hex[2,-2]; h.hex[3,-3]");// Partial results are freed
checkStr("h.hex[2+3,h.hex]","Selector must be a single column: [pclass, name, sex, age, sibsp, parch, ticket, fare, cabin, embarked, boat, body, home.dest, survived]"); // Error: col selector has too many columns
checkStr("h.hex[2,]"); // Row 2 all cols
checkStr("h.hex[,3]"); // Col 3 all rows
checkStr("h.hex+1"); // Broadcast scalar over ary
checkStr("h.hex-h.hex");
checkStr("1.23+(h.hex-h.hex)");
checkStr("(1.23+h.hex)-h.hex");
checkStr("min(h.hex,1+2)",0);
checkStr("max(h.hex,1+2)",211.3375);
checkStr("min.na.rm(h.hex,NA)",0); // 0
checkStr("max.na.rm(h.hex,NA)",211.3375); // 211.3375
checkStr("min.na.rm(c(NA, 1), -1)",-1); // -1
checkStr("max.na.rm(c(NA, 1), -1)", 1); // 1
checkStr("max(c(Inf,1), 2 )", Double.POSITIVE_INFINITY); // Infinity
checkStr("min(c(Inf,1),-Inf)", Double.NEGATIVE_INFINITY); // -Infinity
checkStr("is.na(h.hex)");
checkStr("sum(is.na(h.hex))", 0);
checkStr("nrow(h.hex)*3", 30);
checkStr("h.hex[nrow(h.hex)-1,ncol(h.hex)-1]");
checkStr("x=1;x=h.hex"); // Allowed to change types via shadowing at REPL level
checkStr("a=h.hex"); // Top-level assignment back to H2O.STORE
checkStr("(h.hex+1)<-2","Junk at end of line\n"+"(h.hex+1)<-2\n"+" ^-^\n"); // No L-value
checkStr("h.hex[nrow(h.hex=1),]","Arg 'x' typed as ary but passed dbl\n"+"h.hex[nrow(h.hex=1),]\n"+" ^--------^\n"); // Passing a scalar 1.0 to nrow
checkStr("h.hex[{h.hex=10},]"); // ERROR BROKEN: SHOULD PARSE statement list here; then do evil side-effect killing h.hex but also using 10 to select last row
checkStr("h.hex[3,4]<-4;",4);
checkStr("c(1,3,5)");
// Column row subselection
checkStr("h.hex[,c(1,3,5)]");
checkStr("h.hex[c(1,3,5),]");
checkStr("a=c(11,22,33,44,55,66); a[c(2,6,1),]");
// Named column selection
checkStr("h.hex$ 2","Missing column name after $\nh.hex$ 2\n ^^\n");
checkStr("h.hex$crunk","Missing column crunk in frame [pclass, name, sex, age, sibsp, parch, ticket, fare, cabin, embarked, boat, body, home.dest, survived]");
checkStr("h.hex$pclass");
checkStr("mean(h.hex$pclass)",1);
// More complicated operator precedence
checkStr("c(1,0)&c(2,3)");// 1,0
checkStr("c(2,NA)&&T",1); // 1
checkStr("-(x = 3)",-3);
checkStr("x<-+");
checkStr("x<-+;x(2)","Passed 1 args but expected 2\nx<-+;x(2)\n ^--^\n"); // Error, + is binary if used as prefix
checkStr("x<-+;x(1,2)",3); // 3
checkStr("x<-*;x(2,3)",6); // 6
checkStr("x=c(0,1);!x+1"); // ! has lower precedence
checkStr("x=c(1,-2);-+---x");
checkStr("x=c(1,-2);--!--x");
checkStr("!(y=c(3,4))");
checkStr("!x!=1");
checkStr("(!x)!=1");
checkStr("1+x^2");
checkStr("1+x**2");
checkStr("x + 2/y");
checkStr("x + (2/y)");
checkStr("-x + y");
checkStr("-(x + y)");
checkStr("-x % y");
checkStr("-(x % y)");
checkStr("T|F&F",1); // Evals as T|(F&F)==1 not as (T|F)&F==0
checkStr("T||F&&F",1); // Evals as T|(F&F)==1 not as (T|F)&F==0
// User functions
checkStr("function(=){x+1}(2)","Invalid var\nfunction(=){x+1}(2)\n ^\n");
checkStr("function(x,=){x+1}(2)","Invalid var\nfunction(x,=){x+1}(2)\n ^\n");
checkStr("function(x,<-){x+1}(2)","Invalid var\nfunction(x,<-){x+1}(2)\n ^\n");
checkStr("function(x,x){x+1}(2)","Repeated argument\nfunction(x,x){x+1}(2)\n ^^\n");
checkStr("function(x,y,z){x[]}(h.hex,1,2)");
checkStr("function(x){x[]}(2)","Arg 'x' typed as ary but passed dbl\nfunction(x){x[]}(2)\n ^--^\n");
checkStr("function(x){x+1}(2)",3);
checkStr("function(x){y=x+y}(2)");
checkStr("function(x){}(2)");
checkStr("function(x){y=x*2; y+1}(2)",5);
checkStr("function(x){y=1+2}(2)",3);
checkStr("function(x){y=1+2;y=c(1,2)}"); // Not allowed to change types in inner scopes
checkStr("a=function(x) x+1; 7",7); // Function def w/out curly-braces; return 7
checkStr("a=function(x) {x+1}; 7",7); // Function def w/ curly-braces; return 7
checkStr("a=function(x) {x+1; 7}"); // Function def of 7
checkStr("c(1,c(2,3))");
checkStr("a=c(1,Inf);c(2,a)");
// Test sum flattening all args
checkStr("sum(1,2,3)",6);
checkStr("sum(c(1,3,5))",9);
checkStr("sum(4,c(1,3,5),2,6)",21);
checkStr("sum(1,h.hex,3)"); // should report an error because h.hex has enums
checkStr("sum(c(NA,-1,1))",Double.NaN);
checkStr("sum.na.rm(c(NA,-1,1))",0);
checkStr("function(a){a[];a=1}");
checkStr("a=1;a=2;function(x){x=a;a=3}");
checkStr("a=h.hex;function(x){x=a;a=3;nrow(x)*a}(a)",30);
checkStr("a=h.hex;a[,1]=(a[,1]==8)");
// Higher-order function typing: fun is typed in the body of function(x)
checkStr("function(funy){function(x){funy(x)*funy(x)}}(sgn)(-2)",1);
// Filter/selection
checkStr("h.hex[h.hex[,4]>30,]");
checkStr("a=c(1,2,3);a[a[,1]>10,1]");
checkStr("sapply(a,sum)[1,1]",6);
checkStr("apply(h.hex,2,sum)"); // ERROR BROKEN: the ENUM cols should fold to NA
checkStr("y=5;apply(h.hex,2,function(x){x[]+y})");
checkStr("apply(h.hex,2,function(x){x=1;h.hex})","Arg 'fcn' typed as ary(ary) but passed ary(dbl)\napply(h.hex,2,function(x){x=1;h.hex})\n ^-------------------------------^\n");
checkStr("apply(h.hex,2,function(x){h.hex})","apply requires that ary fun(ary x) return 1 column");
checkStr("apply(h.hex,2,function(x){sum(x)/nrow(x)})");
checkStr("mean=function(x){apply(x,2,sum)/nrow(x)};mean(h.hex)");
// Conditional selection;
checkStr("ifelse(0,1,2)",2);
checkStr("ifelse(0,h.hex+1,h.hex+2)");
checkStr("ifelse(h.hex>3,99,h.hex)"); // Broadcast selection
checkStr("ifelse(0,+,*)(1,2)",2); // Select functions
checkStr("(0 ? + : *)(1,2)",2); // Trinary select
checkStr("(1? h.hex : (h.hex+1))[1,2]",0); // True (vs false) test
// Impute the mean
checkStr("apply(h.hex,2,function(x){total=sum(ifelse(is.na(x),0,x)); rcnt=nrow(x)-sum(is.na(x)); mean=total / rcnt; ifelse(is.na(x),mean,x)})");
checkStr("factor(h.hex[,5])");
// Slice assignment & map
checkStr("h.hex[,2]");
checkStr("h.hex[,2]+1");
checkStr("h.hex[,3]=3.3;h.hex"); // Replace a col with a constant
checkStr("h.hex[,3]=h.hex[,2]+1"); // Replace a col
checkStr("h.hex[,ncol(h.hex)+1]=4"); // Extend a col
checkStr("a=ncol(h.hex);h.hex[,c(a+1,a+2)]=5"); // Extend two cols
checkStr("h.hex[,7]=x=3; !(x+2)");
checkStr("table(h.hex)");
checkStr("table(h.hex[,5])");
checkStr("table(h.hex[,c(2,7)])");
checkStr("table(h.hex[,c(2,9)])");
checkStr("a=cbind(c(1,2,3), c(4,5,6))");
checkStr("a[,1] = factor(a[,1])");
checkStr("is.factor(a[,1])",1);
checkStr("isTRUE(c(1,3))",0);
checkStr("a=1;isTRUE(1)",1);
checkStr("a=c(1,2);isTRUE(a)",0);
checkStr("isTRUE(min)",0);
checkStr("seq_len(0)","Error in seq_len(0): argument must be coercible to positive integer");
checkStr("seq_len(-1)","Error in seq_len(-1): argument must be coercible to positive integer");
checkStr("seq_len(10)");
checkStr("3 < 4 | F & 3 > 4", 1); // Evals as (3<4) | (F & (3>4))
checkStr("3 < 4 || F && 3 > 4", 1);
checkStr("h.hex[,4] != 29 || h.hex[,2] < 305 && h.hex[,2] < 81", Double.NaN);
//checkStr("h.hex[h.hex[,4]>40,]=-99");
//checkStr("h.hex[2,]=h.hex[7,]");
//checkStr("h.hex[c(1,3,5),1] = h.hex[c(2,4,6),2]");
//checkStr("h.hex[c(1,3,5),1] = h.hex[c(2,4),2]");
//checkStr("map()");
//checkStr("map(1)");
//checkStr("map(+,h.hex,1)");
//checkStr("map(+,1,2)");
//checkStr("map(function(x){x[];1},h.hex)");
//checkStr("map(function(a,b,d){a+b+d},h.hex,h.hex,1)");
//checkStr("map(function(a,b){a+ncol(b)},h.hex,h.hex)");
// Quantile
checkStr("quantile(seq_len(10),seq_len(10)/10)");
checkStr("quantile(runif(seq_len(10000),-1),seq_len(10)/10)");
checkStr("quantile(h.hex[,4],c(0,.05,0.3,0.55,0.7,0.95,0.99))");
// ddply error checks
checkStr("ddply(h.hex,h.hex,sum)","Only one column-of-columns for column selection");
checkStr("ddply(h.hex,seq_len(10000),sum)","Too many columns selected");
checkStr("ddply(h.hex,NA,sum)","NA not a valid column");
checkStr("ddply(h.hex,c(1,NA,3),sum)","NA not a valid column");
checkStr("ddply(h.hex,c(1,99,3),sum)","Column 99 out of range for frame columns 17");
checkStr("nrow(unique(h.hex[,5]))",3);
checkStr("nrow(unique(h.hex[,6]))",2);
checkStr("nrow(unique(h.hex[,c(5,6)]))",4); // multi-column unique
// Newlines as statement-ends
checkStr("3*4+5*6",42);
checkStr("(h.hex[1,1]=2)",2);
checkStr("(h.hex[1,1]=2\n)",2);
checkStr("(h.hex[1,1]\n=2)",2);
checkStr("(h.hex\n[1,1]=2)",2);
checkStr("function(){x=1.23;(x=4.5)\n}()",4.5);
checkStr("function(){x=1.23;x=\n4.5\n}()",4.5);
checkStr("x=3\nfunction()x=1.23\nx",3);
checkStr("x=3\nfunction(){(x=1.23)}\nx",3);
checkStr("x=function(df)\n{\nmin(df$age)\n}\n;x(h.hex)",0.92);
checkStr("1.23\n-4",-4);
checkStr("1.23 +\n-4",-2.77);
checkStr("x=3;3*-x",-9); // *- is not a token
checkStr("x=3;3\n*\n-\nx",3); // Each of '3' and '*' and '-' and 'x' is a standalone statement
// No strings, yet
checkStr("function(df) { min(df[,\"age\"]) }","The current Exec does not handle strings\nfunction(df) { min(df[,\"age\"]) }\n ^-----^\n");
// Cleanup testing temps
checkStr("a=0;x=0;y=0",0); // Delete keys from global scope
} finally {
Lockable.delete(dest); // Remove original hex frame key
}
}
void checkStr( String s ) {
Env env=null;
try {
env = Exec2.exec(s);
if( env.isAry() ) { // Print complete frames for inspection
Frame res = env.popAry();
String skey = env.key();
System.out.println(res.toStringAll());
env.subRef(res,skey); // But then end lifetime
} else {
System.out.println( env.resultString() );
}
}
catch( IllegalArgumentException iae ) { System.out.println(iae.getMessage()); }
if( env != null ) env.remove_and_unlock();
debug_print(s);
}
void checkStr( String s, double d ) {
Env env = Exec2.exec(s);
assertFalse("Should be scalar result not Frame: "+s,env.isAry() );
assertFalse( env.isFcn() );
double res = env.popDbl();
assertEquals(d, res, d / 1e8);
env.remove_and_unlock();
debug_print(s);
}
void checkStr( String s, String err ) {
Env env = null;
try {
env = Exec2.exec(s);
env.remove_and_unlock();
fail(); // Supposed to throw; reaching here is an error
} catch ( IllegalArgumentException e ) {
assertEquals(err, e.getMessage());
}
debug_print(s);
}
// Handy code to debug leaking keys
public static void debug_print( String s ) {
// int sz=0;
// int vgs=0, frs=0, vcs=0, cks=0;
// for( Key k : H2O.keySet() ) {
// sz++;
// Value val = DKV.get(k);
// Iced ice = TypeMap.newInstance(val.type());
// if( ice instanceof Vec.VectorGroup ) vgs++;
// else if( ice instanceof Vec ) vcs++;
// else if( ice instanceof Chunk ) cks++;
// else if( ice instanceof Frame ) frs++;
// }
// System.out.println("KKK="+(sz-vgs-frs-vcs-cks)+
// ", VGS="+vgs+
// ", FRS="+frs+
// ", VCS="+vcs+
// ", CKS="+cks+
// ", "+s);
}
}