package water.exec; import water.*; import water.fvec.Frame; import water.util.Log; import java.util.ArrayList; import java.util.Stack; /** Parse and execute a generic R-like string, in the context of an H2O Cloud * @author cliffc@0xdata.com */ public class Exec2 { // Parse a string, execute it & return a Frame. // Basic types: ary (Frame), dbl (scalar double), fcn (function) // Functions are 1st class; every argument typed one of the above. // Assignment is always to in-scope variables only. // Initial environment has all Frame Keys mapped to Frame-typed variables // Big Allocation: all expressions are eval'd in a context where a large temp // is available, and all allocations are compatible with that temp. Linear- // logic style execution is guaranteed inside the big temp. Parse error if // an expression which is not provably small does not have an active temp. // Grammar: // statements := cxexpr ; statements // cxexpr := // COMPLEX expr // infix_expr // Simple RHS-expr // id = cxexpr // Shadows outer var with a ptr assignment; no copy // // Overwrites inner var; types must match. // id <- cxexpr // Alternative R syntax for assignment // id[] = cxexpr // Slice/partial assignment; id already exists // iexpr ? cxexpr : cxexpr // exprs must have equal types // infix_expr := // Leading infix expression // op1 infix_expr term* // +x but also e.g. ++--!+-!-++!3 // op1? slice term* // e.g. cos() or -sin(foo) or -+-fun()[1,2] // term : = // Infix expression // op2 infix_expr // Standard R operator prec ordering // slice := // prefix_expr // No slicing // prefix_expr[] // Whole slice // prefix_expr[cxexpr?,cxexpr?] // optional row & col slicing // prefix_expr$col // named column // prefix_expr := // val // val(cxexpr,...)* // Prefix function application, evals LEFT TO RIGHT // val := // ( cxexpr ) // Ordering evaluation // id // any visible var; will be typed // num // Scalars, treated as 1x1 // op // Built-in functions // function(v0,v1,v2) { statements; ...v0,v1,v2... } // 1st-class lexically scoped functions // function(v0,v1,v2) statement // Single statement variant // op1 := + - ! // Unary operators allowed w/out parens prefix location // op2 := + - * / % & | <= > >= != ... // Binary operators allowed w/out parens infix location // op := sgn sin cos nrow ncol isNA sqrt isTRUE year month day ... // op := min max sum sdev mean ... // op := c cbind seq quantile table ... // Various R operators public static Env exec( String str ) throws IllegalArgumentException { cluster_init(); // Preload the global environment from existing Frames ArrayList<ASTId> global = new ArrayList<ASTId>(); ArrayList<Key> locked = new ArrayList<Key> (); Env env = new Env(locked); final Key [] frameKeys = H2O.KeySnapshot.globalSnapshot().filter(new H2O.KVFilter() { @Override public boolean filter(H2O.KeyInfo k) { return k._type == TypeMap.FRAME; } }).keys(); Log.info("Locking " + frameKeys.length +"keys for Exec2."); for( Key k : frameKeys ) { // Convert all VAs to Frames Value val = DKV.get(k); if( val == null || !val.isFrame()) continue; // Bad if it's already locked by 'null', because lock by 'null' is removed when you leave Exec. // Before was adding all frames with read-shared lock here. // Should be illegal to add any keys locked by "null' to exec? (is it only unparsed keys?) // undoing. this doesn't always work (gets stack trace) Frame fr = val.get(); String kstr = k.toString(); try { env.push(fr,kstr); global.add(new ASTId(Type.ARY,kstr,0,global.size())); fr.read_lock(null); locked.add(fr._key); } catch( Exception e ) { Log.err("Exception while adding frame "+k+" to Exec env"); } } // Some global constants global.add(new ASTId(Type.DBL,"TRUE",0,global.size())); env.push(1.0); global.add(new ASTId(Type.DBL,"FALSE",0,global.size())); env.push(0.0); global.add(new ASTId(Type.DBL,"T",0,global.size())); env.push(1.0); global.add(new ASTId(Type.DBL,"F",0,global.size())); env.push(0.0); global.add(new ASTId(Type.DBL,"NA",0,global.size())); env.push(Double.NaN); global.add(new ASTId(Type.DBL,"Inf",0,global.size())); env.push(Double.POSITIVE_INFINITY); // Parse. Type-errors get caught here and throw IAE try { int argcnt = global.size(); Exec2 ex = new Exec2(str, global); AST ast = ex.parse(); env.push(global.size()-argcnt); // Push space for temps ast.exec(env); env.postWrite(); } catch( RuntimeException t ) { env.remove_and_unlock(); throw t; } return env; } // Simple parser state final String _str; final char _buf[]; // Chars from the string int _x; // Parse pointer Stack<ArrayList<ASTId>> _env; private Exec2( String str, ArrayList<ASTId> global ) { _str = str; _buf = str.toCharArray(); _env = new Stack<ArrayList<ASTId>>(); _env.push(global); } int lexical_depth() { return _env.size()-1; } AST parse() { AST ast = ASTStatement.parse(this); skipWS(); // No trailing crud return _x == _buf.length ? ast : throwErr("Junk at end of line",_buf.length-1); } // -------------------------------------------------------------------------- // Generic parsing functions // -------------------------------------------------------------------------- void skipWS() { skipWS(false); } void skipWS( boolean EOS) { while( _x < _buf.length && isWS(_buf[_x]) && (!EOS || _buf[_x]!='\n') ) _x++; } // Skip whitespace. // If c is the next char, eat it & return true // Else return false. boolean peek(char c) { return peek(c,false); } // Peek for 'c' past whitespace but not past a newline if EOS is set // (basically treat newline as the statement-end character ';' which does not // match c) boolean peek(char c, boolean EOS) { char d; while( _x < _buf.length && isWS(_buf[_x]) && _buf[_x]!='\n' ) _x++; int nx=_x; if( !EOS ) while( nx < _buf.length && isWS(_buf[nx]) ) nx++; if( nx==_buf.length || _buf[nx]!=c ) return false; _x=nx+1; return true; } // Same as peek, but throw if char not found. Always newlines are treated as whitespace AST xpeek(char c, int x, AST ast) { return peek(c,false) ? ast : throwErr("Missing '"+c+"'",x); } // True if end-of-statement (';' or '\n' or no-more-data) boolean peekEOS() { while( _x < _buf.length ) { char d = _buf[_x++]; if( d==';' || d=='\n' ) return true; if( !isWS(d) ) { _x--; return false; } } return false; } static boolean isDigit(char c) { return c>='0' && c<= '9'; } static boolean isWS(char c) { return c<=' '; } static boolean isReserved(char c) { return c=='(' || c==')' || c=='[' || c==']' || c==',' || c==':' || c==';' || c=='$'; } static boolean isLetter(char c) { return (c>='a'&&c<='z') || (c>='A' && c<='Z') || c=='_'; } static boolean isLetter2(char c) { return c=='.' || c==':' || c=='\\' || isDigit(c) || isLetter(c); } static boolean isQuote(char c) { return c=='"' || c=='\''; } // Return an ID string, or null if we get weird stuff or numbers. Valid IDs // include all the operators, except parens (function application) and assignment. // Valid IDs: + - <= > ! [ ] joe123 ABC // Invalid : +++ 0joe ( = ) 123.45 1e3 String isID() { if( _x>=_buf.length ) return null; // No characters to parse char c = _buf[_x]; // Fail on special chars in the grammar if( isReserved(c) && c != ':') return null; if (c == ':') { if (!isDigit(_buf[_x+1])) return null; } // Fail on leading numeric if( isDigit(c) ) return null; if (c == '^' && _buf[_x+1] == '-') return _str.substring(++_x -1, _x); _x++; // Accept parse of 1 char // If first char is letter, standard ID if( isLetter(c) ) { int x=_x-1; // start of ID while( _x < _buf.length && isLetter2(_buf[_x]) ) _x++; return _str.substring(x,_x); } // Check for super-special operators that are three chars of the form %*%. // These are calls to R's matrix operators. if( _x+2 <= _buf.length && c == '%' && _buf[_x+1] == '%' ) { if( _buf[_x] == '*' ) { _x+=2; return "%*%"; } if (_buf[_x] == '/' ) { _x+=2; return "%/%"; } } if (_x+2 <= _buf.length && c == '%' && _buf[_x] == '%') { _x++; return "%%"; } // If first char is special, accept 1 or 2 special chars. // i.e. allow != >= == <= but not = alone if( _x>=_buf.length ) return _str.substring(_x-1,_x); char c2=_buf[_x]; if( isDigit(c2) || isLetter(c2) || isWS(c2) || isReserved(c2) ) { if( c=='=' ) { _x--; return null; } // Equals alone is not an ID return _str.substring(_x-1,_x); } if( c=='<' && c2=='-' ) { _x--; return null; } // The other assignment operator // Must accept as single letters to avoid ambiguity if( c=='+' || c=='-' || c=='*' || c=='/' ) return _str.substring(_x-1,_x); // One letter look ahead to decide on what to accept if( c=='=' || c=='!' || c=='<' || c =='>' ) if ( c2 =='=' ) return _str.substring(++_x-2,_x); else return _str.substring(_x-1,_x); _x++; // Else accept e.g. <= >= ++ != == etc... return _str.substring(_x-2,_x); } String isString() { // returns string value without enclosing quotes if( _x>=_buf.length ) return null; // No characters to parse char c = _buf[_x]; if( isQuote(c) ) { int x=_x+1; while( x < _buf.length && _buf[x] != c )x++; return _str.substring(_x+1,x); } return null; } // isID specifically does not parse "=" or "<-". This guy does. boolean isAssign(boolean EOS) { if( peek('<',EOS) ) { if( _buf[_x]=='-' ) { _x++; return true; } else return false; } if( !peek('=',EOS) ) return false; char c = _buf[_x]; if( c != '=' ) return true; // what valid 2-letter tokens start with "="? == but not =+ => _x--; return false; } // Yet-to-be-parsed private String debug() { return new String(_buf,_x,_buf.length-_x); } // -------------------------------------------------------------------------- // Nicely report a syntax error AST throwErr( String msg, int idx ) { int lo = _x, hi=idx; if( idx < _x ) { lo = idx; hi=_x; } String s = msg+ '\n'+_str+'\n'; int i; for( i=0; i<lo; i++ ) s+= ' '; s+='^'; i++; for( ; i<hi; i++ ) s+= '-'; if( i<=hi ) s+= '^'; s += '\n'; throw new IllegalArgumentException(s); } // To avoid a class-circularity hang, we need to force other members of the // cluster to load the Exec & AST classes BEFORE trying to execute code // remotely, because e.g. ddply runs functions on all nodes. private static boolean _inited; // One-shot init private static void cluster_init() { if( _inited ) return; new DRemoteTask() { @Override public void lcompute() { new ASTPlus(); // Touch a common class to force loading tryComplete(); } @Override public void reduce( DRemoteTask dt ) { } }.invokeOnAllNodes(); _inited = true; } }