package water.exec;
import hex.FrameTask.DataInfo;
import hex.Quantiles;
import hex.gram.Gram.GramTask;
import hex.la.DMatrix;
import hex.la.Matrix;
import jsr166y.CountedCompleter;
import org.apache.commons.math3.util.ArithmeticUtils;
import org.joda.time.DateTime;
import org.joda.time.MutableDateTime;
import org.joda.time.format.DateTimeFormatter;
import water.*;
import water.api.QuantilesPage;
import water.fvec.*;
import water.util.Utils;
import java.math.BigDecimal;
import java.math.MathContext;
import java.math.RoundingMode;
import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;
/** Parse a generic R string and build an AST, in the context of an H2O Cloud
* @author cliffc@0xdata.com
*/
// --------------------------------------------------------------------------
public abstract class ASTOp extends AST {
// The order of operator precedence follows R rules.
// Highest the first
static final public int OPP_PREFIX = 100; /* abc() */
static final public int OPP_POWER = 13; /* ^ */
static final public int OPP_UPLUS = 12; /* + */
static final public int OPP_UMINUS = 12; /* - */
static final public int OPP_INTDIV = 11; /* %/% */
static final public int OPP_MOD = 11; /* %xyz% */
static final public int OPP_MUL = 10; /* * */
static final public int OPP_DIV = 10; /* / */
static final public int OPP_PLUS = 9; /* + */
static final public int OPP_MINUS = 9; /* - */
static final public int OPP_GT = 8; /* > */
static final public int OPP_GE = 8; /* >= */
static final public int OPP_LT = 8; /* < */
static final public int OPP_LE = 8; /* <= */
static final public int OPP_EQ = 8; /* == */
static final public int OPP_NE = 8; /* != */
static final public int OPP_NOT = 7; /* ! */
static final public int OPP_AND = 6; /* &, && */
static final public int OPP_OR = 5; /* |, || */
static final public int OPP_DILDA = 4; /* ~ */
static final public int OPP_RARROW = 3; /* ->, ->> */
static final public int OPP_ASSN = 2; /* = */
static final public int OPP_LARROW = 1; /* <-, <<- */
// Operator assocation order
static final public int OPA_LEFT = 0;
static final public int OPA_RIGHT = 1;
// Operation formula notations
static final public int OPF_INFIX = 0;
static final public int OPF_PREFIX = 1;
// Tables of operators by arity
static final public HashMap<String,ASTOp> UNI_INFIX_OPS = new HashMap();
static final public HashMap<String,ASTOp> BIN_INFIX_OPS = new HashMap();
static final public HashMap<String,ASTOp> PREFIX_OPS = new HashMap();
static final public HashMap<String,ASTOp> UDF_OPS = new HashMap();
// Too avoid a cyclic class-loading dependency, these are init'd before subclasses.
static final String VARS1[] = new String[]{ "", "x"};
static final String VARS2[] = new String[]{ "", "x","y"};
static {
// Unary infix ops
putUniInfix(new ASTUniPlus());
putUniInfix(new ASTUniMinus());
putUniInfix(new ASTNot());
// Binary infix ops
putBinInfix(new ASTPlus());
putBinInfix(new ASTSub());
putBinInfix(new ASTMul());
putBinInfix(new ASTDiv());
putBinInfix(new ASTPow());
putBinInfix(new ASTPow2());
putBinInfix(new ASTMod());
putBinInfix(new ASTMod2());
putBinInfix(new ASTAND());
putBinInfix(new ASTOR());
putBinInfix(new ASTLT());
putBinInfix(new ASTLE());
putBinInfix(new ASTGT());
putBinInfix(new ASTGE());
putBinInfix(new ASTEQ());
putBinInfix(new ASTNE());
putBinInfix(new ASTLA());
putBinInfix(new ASTLO());
putBinInfix(new ASTMMult());
putBinInfix(new ASTIntDiv());
putBinInfix(new ASTColSeq());
// Unary prefix ops
putPrefix(new ASTIsNA());
putPrefix(new ASTNrow());
putPrefix(new ASTNcol());
putPrefix(new ASTLength());
putPrefix(new ASTAbs ());
putPrefix(new ASTSgn ());
putPrefix(new ASTSqrt());
putPrefix(new ASTCeil());
putPrefix(new ASTFlr ());
putPrefix(new ASTTrun());
putPrefix(new ASTRound());
putPrefix(new ASTSignif());
putPrefix(new ASTLog ());
putPrefix(new ASTExp ());
putPrefix(new ASTScale());
putPrefix(new ASTFactor());
putPrefix(new ASTNumeric());
putPrefix(new ASTIsFactor());
putPrefix(new ASTAnyFactor()); // For Runit testing
putPrefix(new ASTCanBeCoercedToLogical());
putPrefix(new ASTAnyNA());
putPrefix(new ASTIsTRUE());
putPrefix(new ASTMTrans());
// Trigonometric functions
putPrefix(new ASTCos());
putPrefix(new ASTSin());
putPrefix(new ASTTan());
putPrefix(new ASTACos());
putPrefix(new ASTASin());
putPrefix(new ASTATan());
putPrefix(new ASTCosh());
putPrefix(new ASTSinh());
putPrefix(new ASTTanh());
// Time extractions, to and from msec since the Unix Epoch
putPrefix(new ASTYear ());
putPrefix(new ASTMonth ());
putPrefix(new ASTDay ());
putPrefix(new ASTHour ());
putPrefix(new ASTMinute());
putPrefix(new ASTSecond());
putPrefix(new ASTMillis());
putPrefix(new ASTasDate());
// Time series operations
putPrefix(new ASTDiff ());
// More generic reducers
putPrefix(new ASTMin ());
putPrefix(new ASTMax ());
putPrefix(new ASTSum ());
putPrefix(new ASTSdev());
putPrefix(new ASTVar());
putPrefix(new ASTMean());
putPrefix(new ASTMedian());
putPrefix(new ASTMostCommon());
putPrefix(new ASTMinNaRm());
putPrefix(new ASTMaxNaRm());
putPrefix(new ASTSumNaRm());
putPrefix(new ASTXorSum ());
// Misc
putPrefix(new ASTSeq ());
putPrefix(new ASTSeqLen());
putPrefix(new ASTRepLen());
putPrefix(new ASTQtile ());
putPrefix(new ASTCat ());
putPrefix(new ASTCbind ());
putPrefix(new ASTRbind ());
putPrefix(new ASTTable ());
putPrefix(new ASTReduce());
putPrefix(new ASTIfElse());
putPrefix(new ASTRApply());
putPrefix(new ASTSApply());
putPrefix(new ASTddply ());
putPrefix(new ASTUnique());
putPrefix(new ASTRunif ());
putPrefix(new ASTCut ());
putPrefix(new ASTfindInterval());
putPrefix(new ASTPrint ());
putPrefix(new ASTLs ());
putPrefix(new ASTStrSplit());
putPrefix(new ASTToLower());
putPrefix(new ASTToUpper());
putPrefix(new ASTGSub());
putPrefix(new ASTSetLevel());
putPrefix(new ASTStrSub());
putPrefix(new ASTRevalue());
putPrefix(new ASTWhich());
putPrefix(new ASTTrim());
putPrefix(new ASTSample());
}
static private boolean isReserved(String fn) {
return UNI_INFIX_OPS.containsKey(fn) || BIN_INFIX_OPS.containsKey(fn) || PREFIX_OPS.containsKey(fn);
}
static private void putUniInfix(ASTOp ast) { UNI_INFIX_OPS.put(ast.opStr(),ast); }
static private void putBinInfix(ASTOp ast) { BIN_INFIX_OPS.put(ast.opStr(),ast); }
static private void putPrefix (ASTOp ast) { PREFIX_OPS.put(ast.opStr(),ast); }
static void putUDF (ASTOp ast, String fn) {
if (isReserved(fn)) throw new IllegalArgumentException("Trying to overload a reserved method: "+fn+". Must not overload a reserved method with a user-defined function.");
if (UDF_OPS.containsKey(fn)) removeUDF(fn);
UDF_OPS.put(fn,ast);
}
static void removeUDF (String fn) { UDF_OPS.remove(fn); }
static public ASTOp isOp(String id) {
// This order matters. If used as a prefix OP, `+` and `-` are binary only.
ASTOp op4 = UDF_OPS.get(id); if( op4 != null ) return op4;
return isBuiltinOp(id);
}
static public ASTOp isBuiltinOp(String id) {
ASTOp op3 = PREFIX_OPS.get(id); if( op3 != null ) return op3;
ASTOp op2 = BIN_INFIX_OPS.get(id); if( op2 != null ) return op2;
ASTOp op1 = UNI_INFIX_OPS.get(id); return op1;
}
static public boolean isInfixOp(String id) {
return BIN_INFIX_OPS.containsKey(id) || UNI_INFIX_OPS.containsKey(id);
}
static public boolean isUDF(String id) {
return UDF_OPS.containsKey(id);
}
static public boolean isUDF(ASTOp op) { return isUDF(op.opStr()); }
static public Set<String> opStrs() {
Set<String> all = UNI_INFIX_OPS.keySet();
all.addAll(BIN_INFIX_OPS.keySet());
all.addAll(PREFIX_OPS.keySet());
all.addAll(UDF_OPS.keySet());
return all;
}
final int _form; // formula notation, 0 - infix, 1 - prefix
final int _precedence; // operator precedence number
final int _association; // 0 - left associated, 1 - right associated
// All fields are final, because functions are immutable
final String _vars[]; // Variable names
ASTOp( String vars[], Type ts[], int form, int prec, int asso) {
super(Type.fcn(ts));
_form = form;
_precedence = prec;
_association = asso;
_vars = vars;
assert ts.length==vars.length : "No vars?" + this;
}
ASTOp( String vars[], Type t, int form, int prec, int asso) {
super(t);
_form = form;
_precedence = prec;
_association = asso;
_vars = vars;
assert t._ts.length==vars.length : "No vars?" + this;
}
abstract String opStr();
abstract ASTOp make();
public boolean leftAssociate( ) {
return _association == OPA_LEFT;
}
@Override public String toString() {
String s = _t._ts[0]+" "+opStr()+"(";
int len=_t._ts.length;
for( int i=1; i<len-1; i++ )
s += _t._ts[i]+" "+(_vars==null?"":_vars[i])+", ";
return s + (len > 1 ? _t._ts[len-1]+" "+(_vars==null?"":_vars[len-1]) : "")+")";
}
public String toString(boolean verbose) {
if( !verbose ) return toString(); // Just the fun name& arg names
return toString();
}
static ASTOp parse(Exec2 E) {
int x = E._x;
String id = E.isID();
if( id == null ) return null;
ASTOp op = isOp(id); // The order matters. If used as a prefix OP, `+` and `-` are binary only.
// Also, if assigning to a built-in function then do not parse-as-a-fcn.
// Instead it will default to parsing as an ID in ASTAssign.parse
if( op != null ) {
int x1 = E._x;
if (!E.peek('=') && !(E.peek('<') && E.peek('-'))) {
E._x = x1; return op.make();
}
}
E._x = x;
return ASTFunc.parseFcn(E);
}
// Parse a unary infix OP or return null.
static ASTOp parseUniInfixOp(Exec2 E) {
int x = E._x;
String id = E.isID();
if( id == null ) return null;
ASTOp op = UNI_INFIX_OPS.get(id);
if( op != null) return op.make();
E._x = x; // Roll back, no parse happened
return null;
}
// Parse a binary infix OP or return null.
static ASTOp parseBinInfixOp(Exec2 E) {
int x = E._x;
String id = E.isID();
if( id == null ) return null;
ASTOp op = BIN_INFIX_OPS.get(id);
if( op != null) return op.make();
E._x = x; // Roll back, no parse happened
return null;
}
@Override void exec(Env env) { env.push(this); }
// Standard column-wise function application
abstract void apply(Env env, int argcnt, ASTApply apply);
// Special row-wise 'apply'
double[] map(Env env, double[] in, double[] out) { throw H2O.unimpl(); }
}
abstract class ASTUniOp extends ASTOp {
static Type[] newsig() {
Type t1 = Type.dblary();
return new Type[]{t1,t1};
}
ASTUniOp( int form, int precedence, int association ) {
super(VARS1,newsig(),form,precedence,association);
}
double op( double d ) { throw H2O.fail(); }
protected ASTUniOp( String[] vars, Type[] types, int form, int precedence, int association ) {
super(vars,types,form,precedence,association);
}
@Override void apply(Env env, int argcnt, ASTApply apply) {
// Expect we can broadcast across all functions as needed.
if( !env.isAry() ) { env.poppush(op(env.popDbl())); return; }
Frame fr = env.popAry();
String skey = env.key();
final ASTUniOp uni = this; // Final 'this' so can use in closure
Frame fr2 = new MRTask2() {
@Override public void map( Chunk chks[], NewChunk nchks[] ) {
for( int i=0; i<nchks.length; i++ ) {
NewChunk n =nchks[i];
Chunk c = chks[i];
int rlen = c._len;
for( int r=0; r<rlen; r++ )
n.addNum(uni.op(c.at0(r)));
}
}
}.doAll(fr.numCols(),fr).outputFrame(fr._names, null);
env.subRef(fr,skey);
env.pop(); // Pop self
env.push(fr2);
}
}
abstract class ASTUniPrefixOp extends ASTUniOp {
ASTUniPrefixOp( ) { super(OPF_PREFIX,OPP_PREFIX,OPA_RIGHT); }
ASTUniPrefixOp( String[] vars, Type[] types ) { super(vars,types,OPF_PREFIX,OPP_PREFIX,OPA_RIGHT); }
}
class ASTCos extends ASTUniPrefixOp { @Override String opStr(){ return "cos"; } @Override ASTOp make() {return new ASTCos ();} @Override double op(double d) { return Math.cos(d);}}
class ASTSin extends ASTUniPrefixOp { @Override String opStr(){ return "sin"; } @Override ASTOp make() {return new ASTSin ();} @Override double op(double d) { return Math.sin(d);}}
class ASTTan extends ASTUniPrefixOp { @Override String opStr(){ return "tan"; } @Override ASTOp make() {return new ASTTan ();} @Override double op(double d) { return Math.tan(d);}}
class ASTACos extends ASTUniPrefixOp { @Override String opStr(){ return "acos"; } @Override ASTOp make() {return new ASTACos();} @Override double op(double d) { return Math.acos(d);}}
class ASTASin extends ASTUniPrefixOp { @Override String opStr(){ return "asin"; } @Override ASTOp make() {return new ASTASin();} @Override double op(double d) { return Math.asin(d);}}
class ASTATan extends ASTUniPrefixOp { @Override String opStr(){ return "atan"; } @Override ASTOp make() {return new ASTATan();} @Override double op(double d) { return Math.atan(d);}}
class ASTCosh extends ASTUniPrefixOp { @Override String opStr(){ return "cosh"; } @Override ASTOp make() {return new ASTCosh ();} @Override double op(double d) { return Math.cosh(d);}}
class ASTSinh extends ASTUniPrefixOp { @Override String opStr(){ return "sinh"; } @Override ASTOp make() {return new ASTSinh ();} @Override double op(double d) { return Math.sinh(d);}}
class ASTTanh extends ASTUniPrefixOp { @Override String opStr(){ return "tanh"; } @Override ASTOp make() {return new ASTTanh ();} @Override double op(double d) { return Math.tanh(d);}}
class ASTAbs extends ASTUniPrefixOp { @Override String opStr(){ return "abs"; } @Override ASTOp make() {return new ASTAbs ();} @Override double op(double d) { return Math.abs(d);}}
class ASTSgn extends ASTUniPrefixOp { @Override String opStr(){ return "sgn" ; } @Override ASTOp make() {return new ASTSgn ();} @Override double op(double d) { return Math.signum(d);}}
class ASTSqrt extends ASTUniPrefixOp { @Override String opStr(){ return "sqrt"; } @Override ASTOp make() {return new ASTSqrt();} @Override double op(double d) { return Math.sqrt(d);}}
class ASTCeil extends ASTUniPrefixOp { @Override String opStr(){ return "ceil"; } @Override ASTOp make() {return new ASTCeil();} @Override double op(double d) { return Math.ceil(d);}}
class ASTFlr extends ASTUniPrefixOp { @Override String opStr(){ return "floor"; } @Override ASTOp make() {return new ASTFlr ();} @Override double op(double d) { return Math.floor(d);}}
class ASTTrun extends ASTUniPrefixOp { @Override String opStr(){ return "trunc"; } @Override ASTOp make() {return new ASTTrun();} @Override double op(double d) { return d>=0?Math.floor(d):Math.ceil(d);}}
class ASTLog extends ASTUniPrefixOp { @Override String opStr(){ return "log"; } @Override ASTOp make() {return new ASTLog ();} @Override double op(double d) { return Math.log(d);}}
class ASTExp extends ASTUniPrefixOp { @Override String opStr(){ return "exp"; } @Override ASTOp make() {return new ASTExp ();} @Override double op(double d) { return Math.exp(d);}}
//class ASTIsNA extends ASTUniPrefixOp { @Override String opStr(){ return "is.na"; } @Override ASTOp make() {return new ASTIsNA();} @Override double op(double d) { return Double.isNaN(d)?1:0;}}
class ASTIsNA extends ASTUniPrefixOp { @Override String opStr(){ return "is.na";} @Override ASTOp make() { return new ASTIsNA();} @Override double op(double d) { return Double.isNaN(d)?1:0;}
@Override void apply(Env env, int argcnt, ASTApply apply) {
// Expect we can broadcast across all functions as needed.
if( !env.isAry() ) { env.poppush(op(env.popDbl())); return; }
Frame fr = env.popAry();
String skey = env.key();
final ASTUniOp uni = this; // Final 'this' so can use in closure
Frame fr2 = new MRTask2() {
@Override public void map( Chunk chks[], NewChunk nchks[] ) {
for( int i=0; i<nchks.length; i++ ) {
NewChunk n = nchks[i];
Chunk c = chks[i];
int rlen = c._len;
for( int r=0; r<rlen; r++ )
n.addNum( ( c.isNA0(r) || isNA0(c, r)) ? 1 : 0);
}
}
}.doAll(fr.numCols(),fr).outputFrame(fr._names, null);
env.subRef(fr,skey);
env.pop(); // Pop self
env.push(fr2);
}
private boolean isNA0(Chunk c, int row0) {
if (c._vec.isEnum()) {
if (c._vec.domain()[(int) c.at0(row0)].equals("NA")) return true;
}
return false;
}
}
class ASTWhich extends ASTOp {
ASTWhich() { super(new String[]{"which", "x"},
new Type[]{Type.dblary(), Type.dblary()},
OPF_PREFIX, OPP_PREFIX, OPA_RIGHT);}
@Override String opStr() { return "which"; }
@Override ASTOp make() { return new ASTWhich(); }
@Override void apply(Env env, int argcnt, ASTApply apply) {
if(env.isAry()) {
Frame fr = env.popAry();
if (fr.numCols() != 1) throw new IllegalArgumentException("`which` accepts at exactly 1 column!");
String skey = env.key();
Frame fr2 = new MRTask2() {
@Override public void map(Chunk chk, NewChunk nchk) {
for (int r = 0; r < chk._len; ++r)
if (chk.at0(r) == 1) nchk.addNum(chk._start + r + 1);
}
}.doAll(1,fr).outputFrame(new String[]{"which"},null);
env.subRef(fr,skey);
env.pop(); // Pop self
env.push(fr2);
}
}
}
class ASTRound extends ASTOp {
@Override String opStr() { return "round"; }
ASTRound() { super(new String[]{"round", "x", "digits"},
new Type[]{Type.dblary(), Type.dblary(), Type.DBL},
OPF_PREFIX,
OPP_PREFIX,
OPA_RIGHT);
}
@Override ASTOp make() { return this; }
@Override void apply(Env env, int argcnt, ASTApply apply) {
final int digits = (int)env.popDbl();
if(env.isAry()) {
Frame fr = env.popAry();
for(int i = 0; i < fr.vecs().length; i++) {
if(fr.vecs()[i].isEnum())
throw new IllegalArgumentException("Non-numeric column " + String.valueOf(i+1) + " in data frame");
}
String skey = env.key();
Frame fr2 = new MRTask2() {
@Override public void map(Chunk chks[], NewChunk nchks[]) {
for(int i = 0; i < nchks.length; i++) {
NewChunk n = nchks[i];
Chunk c = chks[i];
int rlen = c._len;
for(int r = 0; r < rlen; r++)
n.addNum(roundDigits(c.at0(r),digits));
}
}
}.doAll(fr.numCols(),fr).outputFrame(fr.names(),fr.domains());
env.subRef(fr,skey);
env.pop(); // Pop self
env.push(fr2);
}
else
env.poppush(roundDigits(env.popDbl(),digits));
}
static double roundDigits(double x, int digits) {
if(Double.isNaN(x)) return x;
BigDecimal bd = new BigDecimal(x);
bd = bd.setScale(digits, RoundingMode.HALF_EVEN);
return bd.doubleValue();
}
}
class ASTSignif extends ASTOp {
@Override String opStr() { return "signif"; }
ASTSignif() { super(new String[]{"signif", "x", "digits"},
new Type[]{Type.dblary(), Type.dblary(), Type.DBL},
OPF_PREFIX,
OPP_PREFIX,
OPA_RIGHT);
}
@Override ASTOp make() { return this; }
@Override void apply(Env env, int argcnt, ASTApply apply) {
final int digits = (int)env.popDbl();
if(digits < 0)
throw new IllegalArgumentException("Error in signif: argument digits must be a non-negative integer");
if(env.isAry()) {
Frame fr = env.popAry();
for(int i = 0; i < fr.vecs().length; i++) {
if(fr.vecs()[i].isEnum())
throw new IllegalArgumentException("Non-numeric column " + String.valueOf(i+1) + " in data frame");
}
String skey = env.key();
Frame fr2 = new MRTask2() {
@Override public void map(Chunk chks[], NewChunk nchks[]) {
for(int i = 0; i < nchks.length; i++) {
NewChunk n = nchks[i];
Chunk c = chks[i];
int rlen = c._len;
for(int r = 0; r < rlen; r++)
n.addNum(signifDigits(c.at0(r),digits));
}
}
}.doAll(fr.numCols(),fr).outputFrame(fr.names(),fr.domains());
env.subRef(fr,skey);
env.pop(); // Pop self
env.push(fr2);
}
else
env.poppush(signifDigits(env.popDbl(),digits));
}
static double signifDigits(double x, int digits) {
if(Double.isNaN(x)) return x;
BigDecimal bd = new BigDecimal(x);
bd = bd.round(new MathContext(digits, RoundingMode.HALF_EVEN));
return bd.doubleValue();
}
}
class ASTNrow extends ASTUniPrefixOp {
ASTNrow() { super(VARS1,new Type[]{Type.DBL,Type.ARY}); }
@Override String opStr() { return "nrow"; }
@Override ASTOp make() {return this;}
@Override void apply(Env env, int argcnt, ASTApply apply) {
Frame fr = env.popAry();
String skey = env.key();
double d = fr.numRows();
env.subRef(fr,skey);
env.poppush(d);
}
}
class ASTNcol extends ASTUniPrefixOp {
ASTNcol() { super(VARS1,new Type[]{Type.DBL,Type.ARY}); }
@Override String opStr() { return "ncol"; }
@Override ASTOp make() {return this;}
@Override void apply(Env env, int argcnt, ASTApply apply) {
Frame fr = env.popAry();
String skey = env.key();
double d = fr.numCols();
env.subRef(fr,skey);
env.poppush(d);
}
}
class ASTLength extends ASTUniPrefixOp {
ASTLength() { super(VARS1, new Type[]{Type.DBL,Type.ARY}); }
@Override String opStr() { return "length"; }
@Override ASTOp make() { return this; }
@Override void apply(Env env, int argcnt, ASTApply apply) {
Frame fr = env.popAry();
String skey = env.key();
double d = fr.numCols() == 1 ? fr.numRows() : fr.numCols();
env.subRef(fr,skey);
env.poppush(d);
}
}
class ASTIsFactor extends ASTUniPrefixOp {
ASTIsFactor() { super(VARS1,new Type[]{Type.DBL,Type.ARY}); }
@Override String opStr() { return "is.factor"; }
@Override ASTOp make() {return this;}
@Override void apply(Env env, int argcnt, ASTApply apply) {
if(!env.isAry()) { env.poppush(0); return; }
Frame fr = env.popAry();
String skey = env.key();
double d = 1;
Vec[] v = fr.vecs();
for(int i = 0; i < v.length; i++) {
if(!v[i].isEnum()) { d = 0; break; }
}
env.subRef(fr,skey);
env.poppush(d);
}
}
// Added to facilitate Runit testing
class ASTAnyFactor extends ASTUniPrefixOp {
ASTAnyFactor() { super(VARS1,new Type[]{Type.DBL,Type.ARY}); }
@Override String opStr() { return "any.factor"; }
@Override ASTOp make() {return this;}
@Override void apply(Env env, int argcnt, ASTApply apply) {
if(!env.isAry()) { env.poppush(0); return; }
Frame fr = env.popAry();
String skey = env.key();
double d = 0;
Vec[] v = fr.vecs();
for(int i = 0; i < v.length; i++) {
if(v[i].isEnum()) { d = 1; break; }
}
env.subRef(fr,skey);
env.poppush(d);
}
}
class ASTCanBeCoercedToLogical extends ASTUniPrefixOp {
ASTCanBeCoercedToLogical() { super(VARS1,new Type[]{Type.DBL,Type.ARY}); }
@Override String opStr() { return "canBeCoercedToLogical"; }
@Override ASTOp make() {return this;}
@Override void apply(Env env, int argcnt, ASTApply apply) {
if(!env.isAry()) { env.poppush(0); return; }
Frame fr = env.popAry();
String skey = env.key();
double d = 0;
Vec[] v = fr.vecs();
for (Vec aV : v) {
if (aV.isInt()) {
if ((aV.min() == 0 && aV.max() == 1) || (aV.min() == 0 && aV.min() == aV.max()) || (aV.min() == 1 && aV.min() == aV.max())) {
d = 1;
break;
}
}
}
env.subRef(fr,skey);
env.poppush(d);
}
}
class ASTAnyNA extends ASTUniPrefixOp {
ASTAnyNA() { super(VARS1,new Type[]{Type.DBL,Type.ARY}); }
@Override String opStr() { return "any.na"; }
@Override ASTOp make() {return this;}
@Override void apply(Env env, int argcnt, ASTApply apply) {
if(!env.isAry()) { env.poppush(0); return; }
Frame fr = env.popAry();
String skey = env.key();
double d = 0;
Vec[] v = fr.vecs();
for(int i = 0; i < v.length; i++) {
if(v[i].naCnt() > 0) { d = 1; break; }
}
env.subRef(fr, skey);
env.poppush(d);
}
}
class ASTIsTRUE extends ASTUniPrefixOp {
ASTIsTRUE() {super(VARS1,new Type[]{Type.DBL,Type.unbound()});}
@Override String opStr() { return "isTRUE"; }
@Override ASTOp make() {return new ASTIsTRUE();} // to make sure fcn get bound at each new context
@Override void apply(Env env, int argcnt, ASTApply apply) {
double res = env.isDbl() && env.popDbl()==1.0 ? 1:0;
env.pop();
env.poppush(res);
}
}
class ASTScale extends ASTUniPrefixOp {
ASTScale() { super(VARS1,new Type[]{Type.ARY,Type.ARY}); }
@Override String opStr() { return "scale"; }
@Override ASTOp make() {return this;}
@Override void apply(Env env, int argcnt, ASTApply apply) {
if(!env.isAry()) { env.poppush(Double.NaN); return; }
Frame fr = env.popAry();
String skey = env.key();
Frame fr2 = new Scale().doIt(fr.numCols(), fr).outputFrame(fr._names, fr.domains());
env.subRef(fr,skey);
env.pop(); // Pop self
env.push(fr2);
}
private static class Scale extends MRTask2<Scale> {
protected int _nums = 0;
protected int[] _ind; // Saves indices of numeric cols first, followed by enums
protected double[] _normSub;
protected double[] _normMul;
@Override public void map(Chunk chks[], NewChunk nchks[]) {
// Normalize numeric cols only
for(int k = 0; k < _nums; k++) {
int i = _ind[k];
NewChunk n = nchks[i];
Chunk c = chks[i];
int rlen = c._len;
for(int r = 0; r < rlen; r++)
n.addNum((c.at0(r)-_normSub[i])*_normMul[i]);
}
for(int k = _nums; k < chks.length; k++) {
int i = _ind[k];
NewChunk n = nchks[i];
Chunk c = chks[i];
int rlen = c._len;
for(int r = 0; r < rlen; r++)
n.addNum(c.at0(r));
}
}
public Scale doIt(int outputs, Frame fr) { return dfork2(outputs, fr).getResult(); }
public Scale dfork2(int outputs, Frame fr) {
final Vec [] vecs = fr.vecs();
for(int i = 0; i < vecs.length; i++) {
if(!vecs[i].isEnum()) _nums++;
}
if(_normSub == null) _normSub = MemoryManager.malloc8d(_nums);
if(_normMul == null) { _normMul = MemoryManager.malloc8d(_nums); Arrays.fill(_normMul,1); }
if(_ind == null) _ind = MemoryManager.malloc4(vecs.length);
int ncnt = 0; int ccnt = 0;
for(int i = 0; i < vecs.length; i++){
if(!vecs[i].isEnum()) {
_normSub[ncnt] = vecs[i].mean();
_normMul[ncnt] = 1.0/vecs[i].sigma();
_ind[ncnt++] = i;
} else
_ind[_nums+(ccnt++)] = i;
}
assert ncnt == _nums && (ncnt + ccnt == vecs.length);
return dfork(outputs, fr, false);
}
}
}
// ----
abstract class ASTTimeOp extends ASTOp {
static Type[] newsig() {
Type t1 = Type.dblary();
return new Type[]{t1,t1};
}
ASTTimeOp() { super(VARS1,newsig(),OPF_PREFIX,OPP_PREFIX,OPA_RIGHT); }
abstract long op( MutableDateTime dt );
@Override void apply(Env env, int argcnt, ASTApply apply) {
// Single instance of MDT for the single call
if( !env.isAry() ) { // Single point
double d = env.popDbl();
if( !Double.isNaN(d) ) d = op(new MutableDateTime((long)d));
env.poppush(d);
return;
}
// Whole column call
Frame fr = env.popAry();
String skey = env.key();
final ASTTimeOp uni = this; // Final 'this' so can use in closure
Frame fr2 = new MRTask2() {
@Override public void map( Chunk chks[], NewChunk nchks[] ) {
MutableDateTime dt = new MutableDateTime(0);
for( int i=0; i<nchks.length; i++ ) {
NewChunk n =nchks[i];
Chunk c = chks[i];
int rlen = c._len;
for( int r=0; r<rlen; r++ ) {
double d = c.at0(r);
if( !Double.isNaN(d) ) {
dt.setMillis((long)d);
d = uni.op(dt);
}
n.addNum(d);
}
}
}
}.doAll(fr.numCols(),fr).outputFrame(fr._names, null);
env.subRef(fr,skey);
env.pop(); // Pop self
env.push(fr2);
}
}
class ASTYear extends ASTTimeOp { @Override String opStr(){ return "year" ; } @Override ASTOp make() {return new ASTYear ();} @Override long op(MutableDateTime dt) { return dt.getYear();}}
class ASTMonth extends ASTTimeOp { @Override String opStr(){ return "month"; } @Override ASTOp make() {return new ASTMonth ();} @Override long op(MutableDateTime dt) { return dt.getMonthOfYear()-1;}}
class ASTDay extends ASTTimeOp { @Override String opStr(){ return "day" ; } @Override ASTOp make() {return new ASTDay ();} @Override long op(MutableDateTime dt) { return dt.getDayOfMonth();}}
class ASTHour extends ASTTimeOp { @Override String opStr(){ return "hour" ; } @Override ASTOp make() {return new ASTHour ();} @Override long op(MutableDateTime dt) { return dt.getHourOfDay();}}
class ASTMinute extends ASTTimeOp { @Override String opStr(){return "minute";} @Override ASTOp make() {return new ASTMinute();} @Override long op(MutableDateTime dt) { return dt.getMinuteOfHour();}}
class ASTSecond extends ASTTimeOp { @Override String opStr(){return "second";} @Override ASTOp make() {return new ASTSecond();} @Override long op(MutableDateTime dt) { return dt.getSecondOfMinute();}}
class ASTMillis extends ASTTimeOp { @Override String opStr(){return "millis";} @Override ASTOp make() {return new ASTMillis();} @Override long op(MutableDateTime dt) { return dt.getMillisOfSecond();}}
class ASTasDate extends ASTOp {
ASTasDate() { super(new String[]{"as.Date", "x", "format"},
new Type[]{Type.ARY, Type.ARY, Type.STR},
OPF_PREFIX,
OPP_PREFIX,OPA_RIGHT); }
@Override String opStr() { return "as.Date"; }
@Override ASTOp make() {return new ASTasDate();}
@Override void apply(Env env, int argcnt, ASTApply apply) {
final String format = env.popStr();
if (format.isEmpty()) throw new IllegalArgumentException("as.Date requires a non-empty format string");
// check the format string more?
Frame fr = env.ary(-1);
if( fr.vecs().length != 1 || !fr.vecs()[0].isEnum() )
throw new IllegalArgumentException("as.Date requires a single column of factors");
Frame fr2 = new MRTask2() {
@Override public void map( Chunk chks[], NewChunk nchks[] ) {
//done on each node in lieu of rewriting DateTimeFormatter as Iced
DateTimeFormatter dtf = ParseTime.forStrptimePattern(format).withZone(ParseTime.getTimezone());
for( int i=0; i<nchks.length; i++ ) {
NewChunk n =nchks[i];
Chunk c = chks[i];
int rlen = c._len;
for( int r=0; r<rlen; r++ ) {
if (!c.isNA0(r)) {
String date = c._vec.domain((long)c.at0(r));
n.addNum(DateTime.parse(date, dtf).getMillis(), 0);
} else n.addNA();
}
}
}
}.doAll(fr.numCols(),fr).outputFrame(fr._names, null);
env.poppush(2, fr2, null);
}
}
class ASTStrSplit extends ASTOp {
ASTStrSplit() { super(new String[]{"strsplit", "x", "split"},
new Type[]{Type.ARY, Type.ARY, Type.STR},
OPF_PREFIX,
OPP_PREFIX, OPA_RIGHT); }
@Override String opStr() { return "strsplit"; }
@Override ASTOp make() { return new ASTStrSplit(); }
@Override void apply(Env env, int argcnt, ASTApply apply) {
String split = env.popStr();
Frame fr = env.ary(-1);
if (fr.numCols() != 1) throw new IllegalArgumentException("strsplit requires a single column.");
split = split.isEmpty() ? "" : split;
final String[] old_domains = fr.anyVec().domain();
final String[][] new_domains = newDomains(old_domains, split);
final String[] col_names = new String[new_domains.length];
for (int i = 1; i <= col_names.length; ++i)
col_names[i-1] = "C"+i;
final String regex = split;
Frame fr2 = new MRTask2() {
@Override public void map(Chunk[] cs, NewChunk[] ncs) {
Chunk c = cs[0];
for (int i = 0; i < c._len; ++i) {
int idx = (int)c.at0(i);
String s = old_domains[idx];
String[] ss = s.split(regex);
int cnt = 0;
for (int j = 0; j < ss.length; ++j) {
int n_idx = Arrays.asList(new_domains[cnt]).indexOf(ss[j]);
if (n_idx == -1) ncs[cnt++].addNA();
else ncs[cnt++].addNum(n_idx);
}
if (cnt < ncs.length)
for (; cnt < ncs.length; ++cnt) ncs[cnt].addNA();
}
}
}.doAll(col_names.length, fr).outputFrame(col_names, new_domains);
env.poppush(2, fr2, null);
}
private String[][] newDomains(String[] domains, String regex) {
ArrayList<HashSet<String>> strs = new ArrayList<HashSet<String>>();
for (String domain : domains) {
String[] news = domain.split(regex);
for (int i = 0; i < news.length; ++i) {
if (strs.size() == i) {
HashSet<String> x = new HashSet<String>();
x.add(news[i]);
strs.add(x);
} else {
HashSet<String> x = strs.get(i);
x.add(news[i]);
strs.set(i, x);
}
}
}
String[][] doms = new String[strs.size()][];
for (int i = 0; i < strs.size(); ++i) {
HashSet<String> x = strs.get(i);
doms[i] = new String[x.size()];
for (int j = 0; j < x.size(); ++j)
doms[i][j] = (String)x.toArray()[j];
}
return doms;
}
}
class ASTToLower extends ASTUniPrefixOp {
@Override String opStr() { return "tolower"; }
@Override ASTOp make() { return new ASTToLower(); }
@Override void apply(Env env, int argcnt, ASTApply apply) {
if( !env.isAry() ) { throw new IllegalArgumentException("tolower only operates on a single vector!"); }
Frame fr = env.popAry();
if (fr.numCols() != 1) throw new IllegalArgumentException("tolower only takes a single column of data. Got "+ fr.numCols()+" columns.");
String skey = env.key();
String[] new_dom = fr.anyVec().domain().clone();
for (int i = 0; i < new_dom.length; ++i)
new_dom[i] = new_dom[i].toLowerCase(Locale.ENGLISH);
Frame fr2 = new Frame(fr._names, fr.vecs());
fr2.anyVec()._domain = new_dom;
env.subRef(fr,skey);
env.pop();
env.push(fr2);
}
}
class ASTToUpper extends ASTUniPrefixOp {
@Override String opStr() { return "toupper"; }
@Override ASTOp make() { return new ASTToUpper(); }
@Override void apply(Env env, int argcnt, ASTApply apply) {
if( !env.isAry() ) { throw new IllegalArgumentException("toupper only operates on a single vector!"); }
Frame fr = env.popAry();
if (fr.numCols() != 1) throw new IllegalArgumentException("toupper only takes a single column of data. Got "+ fr.numCols()+" columns.");
String skey = env.key();
String[] new_dom = fr.anyVec().domain().clone();
for (int i = 0; i < new_dom.length; ++i)
new_dom[i] = new_dom[i].toUpperCase(Locale.ENGLISH);
Frame fr2 = new Frame(fr._names, fr.vecs());
fr2.anyVec()._domain = new_dom;
env.subRef(fr,skey);
env.pop();
env.push(fr2);
}
}
class ASTRevalue extends ASTOp {
ASTRevalue(){ super(new String[]{"revalue", "x", "replace", "warn_missing"},
new Type[]{Type.ARY, Type.ARY, Type.STR, Type.DBL},
OPF_PREFIX,
OPP_PREFIX, OPA_RIGHT); }
@Override String opStr() { return "revalue"; }
@Override ASTOp make() { return new ASTRevalue(); }
@Override void apply(Env env, int argcnt, ASTApply apply) {
final boolean warn_missing = env.popDbl() == 1;
final String replace = env.popStr();
String skey = env.key();
Frame fr = env.popAry();
if (fr.numCols() != 1) throw new IllegalArgumentException("revalue works on a single column at a time.");
String[] old_dom = fr.anyVec()._domain;
if (old_dom == null) throw new IllegalArgumentException("Column is not a factor column. Can only revalue a factor column.");
HashMap<String, String> dom_map = hashMap(replace);
for (int i = 0; i < old_dom.length; ++i) {
if (dom_map.containsKey(old_dom[i])) {
old_dom[i] = dom_map.get(old_dom[i]);
dom_map.remove(old_dom[i]);
}
}
if (dom_map.size() > 0 && warn_missing) {
for (String k : dom_map.keySet()) {
env._warnings = Arrays.copyOf(env._warnings, env._warnings.length + 1);
env._warnings[env._warnings.length - 1] = "Warning: old value " + k + " not a factor level.";
}
}
}
private HashMap<String, String> hashMap(String replace) {
HashMap<String, String> map = new HashMap<String, String>();
//replace is a ';' separated string. Each piece after splitting is a key:value pair.
String[] maps = replace.split(";");
for (String s : maps) {
String[] pair = s.split(":");
String key = pair[0];
String value = pair[1];
map.put(key, value);
}
return map;
}
}
class ASTGSub extends ASTOp {
ASTGSub() { super(new String[]{"gsub", "pattern", "replacement", "x", "ignore.case"},
new Type[]{Type.ARY, Type.STR, Type.STR, Type.ARY, Type.DBL},
OPF_PREFIX,
OPP_PREFIX, OPA_RIGHT); }
@Override String opStr() { return "gsub"; }
@Override ASTOp make() { return new ASTGSub(); }
@Override void apply(Env env, int argcnt, ASTApply apply) {
final boolean ignore_case = env.popDbl() == 1;
String skey = env.key();
Frame fr = env.popAry();
if (fr.numCols() != 1) throw new IllegalArgumentException("gsub works on a single column at a time.");
final String replacement = env.popStr();
final String pattern = env.popStr();
String[] doms = fr.anyVec().domain().clone();
for (int i = 0; i < doms.length; ++i)
doms[i] = ignore_case ? doms[i].toLowerCase(Locale.ENGLISH).replaceAll(pattern, replacement)
: doms[i].replaceAll(pattern, replacement);
Frame fr2 = new Frame(fr.names(), fr.vecs());
fr2.anyVec()._domain = doms;
env.subRef(fr, skey);
env.poppush(1, fr2, null);
}
}
class ASTSetLevel extends ASTOp {
ASTSetLevel() { super(new String[]{"setLevel", "x", "level"},
new Type[]{Type.ARY, Type.ARY, Type.STR},
OPF_PREFIX,
OPP_PREFIX, OPA_RIGHT); }
@Override String opStr() { return "setLevel"; }
@Override ASTOp make() { return new ASTSetLevel(); }
@Override void apply(Env env, int argcnt, ASTApply apply) {
final String level = env.popStr();
String skey = env.key();
Frame fr = env.popAry();
if (fr.numCols() != 1) throw new IllegalArgumentException("setLevel works on a single column at a time.");
String[] doms = fr.anyVec().domain().clone();
if( doms == null )
throw new IllegalArgumentException("Cannot set the level on a non-factor column!");
final int idx = Arrays.asList(doms).indexOf(level);
if (idx == -1)
throw new IllegalArgumentException("Did not find level `" + level + "` in the column.");
Frame fr2 = new MRTask2() {
@Override public void map(Chunk c, NewChunk nc) {
for (int i=0;i<c._len;++i)
nc.addNum(idx);
}
}.doAll(1, fr.anyVec()).outputFrame(null, fr.names(), fr.domains());
env.subRef(fr, skey);
env.poppush(1, fr2, null);
}
}
class ASTTrim extends ASTOp {
ASTTrim() { super(new String[]{"trim","x"},
new Type[]{Type.dblary(), Type.dblary()},
OPF_PREFIX,
OPP_PREFIX, OPA_RIGHT); }
@Override String opStr() { return "trim"; }
@Override ASTOp make() { return new ASTTrim(); }
@Override void apply(Env env, int argcnt, ASTApply apply) {
String skey = env.key();
Frame fr = env.popAry();
if (fr.numCols() != 1) throw new IllegalArgumentException("trim works on a single column at a time.");
String[] doms = fr.anyVec().domain().clone();
for (int i = 0; i < doms.length; ++i) doms[i] = doms[i].trim();
Frame fr2 = new Frame(fr.names(), fr.vecs());
fr2.anyVec()._domain = doms;
env.subRef(fr, skey);
env.poppush(1, fr2, null);
}
}
//FIXME: Create new chunks that overlay the frame to avoid ragged chunk issue
class ASTSample extends ASTOp {
ASTSample() { super(new String[]{"sample", "ary", "nobs", "seed"},
new Type[]{Type.ARY, Type.ARY, Type.DBL, Type.DBL},
OPF_PREFIX, OPP_PREFIX, OPA_RIGHT); }
@Override String opStr() { return "sample"; }
@Override ASTOp make() { return new ASTSample(); }
@Override void apply(Env env, int argcnt, ASTApply apply) {
final double seed = env.popDbl();
final double nobs = env.popDbl();
String skey = env.key();
Frame fr = env.popAry();
long[] espc = fr.anyVec()._espc;
long[] chk_sizes = new long[espc.length];
final long[] css = new long[espc.length];
for (int i = 0; i < espc.length-1; ++i)
chk_sizes[i] = espc[i+1] - espc[i];
chk_sizes[chk_sizes.length-1] = fr.numRows() - espc[espc.length-1];
long per_chunk_sample = (long) Math.floor(nobs / (double)espc.length);
long defecit = (long) (nobs - per_chunk_sample*espc.length) ;
// idxs is an array list of chunk indexes for adding to the sample size. Chunks with no defecit can not be "sampled" as candidates.
ArrayList<Integer> idxs = new ArrayList<Integer>();
for (int i = 0; i < css.length; ++i) {
// get the max allowed rows to sample from the chunk
css[i] = Math.min(per_chunk_sample, chk_sizes[i]);
// if per_chunk_sample > css[i] => spread around the defecit to meet number of rows requirement.
long def = per_chunk_sample - css[i];
// no more "room" in chunk `i`
if (def >= 0) {
defecit += def;
// else `i` has "room"
}
if (chk_sizes[i] > per_chunk_sample) idxs.add(i);
}
if (defecit > 0) {
Random rng = new Random(seed != -1 ? (long)seed : System.currentTimeMillis());
while (defecit > 0) {
if (idxs.size() <= 0) break;
// select chunks at random and add to the number of rows they should sample,
// up to the number of rows in the chunk.
int rand = rng.nextInt(idxs.size());
if (css[idxs.get(rand)] == chk_sizes[idxs.get(rand)]) {
idxs.remove(rand);
continue;
}
css[idxs.get(rand)]++;
defecit--;
}
}
Frame fr2 = new MRTask2() {
@Override public void map(Chunk[] chks, NewChunk[] nchks) {
int N = chks[0]._len;
int m = 0;
long n = css[chks[0].cidx()];
int row = 0;
Random rng = new Random(seed != -1 ? (long)seed : System.currentTimeMillis());
while( m < n) {
double u = rng.nextDouble();
if ( (N - row)* u >= (n - m)) {
row++;
} else {
for (int i = 0; i < chks.length; ++i) nchks[i].addNum(chks[i].at0(row));
row++; m++;
}
}
}
}.doAll(fr.numCols(), fr).outputFrame(fr.names(), fr.domains());
env.subRef(fr, skey);
env.poppush(1, fr2, null);
}
}
class ASTStrSub extends ASTOp {
ASTStrSub() { super(new String[]{"sub", "pattern", "replacement", "x", "ignore.case"},
new Type[]{Type.ARY, Type.STR, Type.STR, Type.ARY, Type.DBL},
OPF_PREFIX,
OPP_PREFIX, OPA_RIGHT); }
@Override String opStr() { return "sub"; }
@Override ASTOp make() { return new ASTStrSub(); }
@Override void apply(Env env, int argcnt, ASTApply apply) {
final boolean ignore_case = env.popDbl() == 1;
String skey = env.key();
Frame fr = env.popAry();
if (fr.numCols() != 1) throw new IllegalArgumentException("sub works on a single column at a time.");
final String replacement = env.popStr();
final String pattern = env.popStr();
String[] doms = fr.anyVec().domain().clone();
for (int i = 0; i < doms.length; ++i)
doms[i] = ignore_case ? doms[i].toLowerCase(Locale.ENGLISH).replaceFirst(pattern, replacement)
: doms[i].replaceFirst(pattern, replacement);
Frame fr2 = new Frame(fr.names(), fr.vecs());
fr2.anyVec()._domain = doms;
env.subRef(fr, skey);
env.poppush(1, fr2, null);
}
}
// Finite backward difference for user-specified lag
// http://en.wikipedia.org/wiki/Finite_difference
class ASTDiff extends ASTOp {
ASTDiff() { super(new String[]{"diff", "x", "lag", "differences"},
new Type[]{Type.ARY, Type.ARY, Type.DBL, Type.DBL},
OPF_PREFIX,
OPP_PREFIX,
OPA_RIGHT); }
@Override String opStr() { return "diff"; }
@Override ASTOp make() {return new ASTDiff();}
@Override void apply(Env env, int argcnt, ASTApply apply) {
final int diffs = (int)env.popDbl();
if(diffs < 0) throw new IllegalArgumentException("differences must be an integer >= 1");
final int lag = (int)env.popDbl();
if(lag < 0) throw new IllegalArgumentException("lag must be an integer >= 1");
Frame fr = env.popAry();
String skey = env.key();
if(fr.vecs().length != 1 || fr.vecs()[0].isEnum())
throw new IllegalArgumentException("diff takes a single numeric column vector");
Frame fr2 = new MRTask2() {
@Override public void map(Chunk chk, NewChunk nchk) {
int rstart = (int)(diffs*lag - chk._start);
if(rstart > chk._len) return;
rstart = Math.max(0, rstart);
// Formula: \Delta_h^n x_t = \sum_{i=0}^n (-1)^i*\binom{n}{k}*x_{t-i*h}
for(int r = rstart; r < chk._len; r++) {
double x = chk.at0(r);
long row = chk._start + r;
for(int i = 1; i <= diffs; i++) {
double x_lag = chk.at_slow(row - i*lag);
double coef = ArithmeticUtils.binomialCoefficient(diffs, i);
x += (i % 2 == 0) ? coef*x_lag : -coef*x_lag;
}
nchk.addNum(x);
}
}
}.doAll(1,fr).outputFrame(fr.names(), fr.domains());
env.subRef(fr, skey);
env.pop();
env.push(fr2);
}
}
// ----
// Class of things that will auto-expand across arrays in a 2-to-1 way:
// applying 2 things (from an array or scalar to array or scalar) producing an
// array or scalar result.
abstract class ASTBinOp extends ASTOp {
static Type[] newsig() {
Type t1 = Type.dblary(), t2 = Type.dblary();
return new Type[]{Type.anyary(new Type[]{t1,t2}),t1,t2};
}
ASTBinOp( int form, int precedence, int association ) {
super(VARS2, newsig(), form, precedence, association); // binary ops are infix ops
}
abstract double op( double d0, double d1 );
@Override void apply(Env env, int argcnt, ASTApply apply) {
// Expect we can broadcast across all functions as needed.
Frame fr0 = null, fr1 = null;
double d0=0, d1=0;
if( env.isAry() ) fr1 = env.popAry(); else d1 = env.popDbl(); String k0 = env.key();
if( env.isAry() ) fr0 = env.popAry(); else d0 = env.popDbl(); String k1 = env.key();
if( fr0==null && fr1==null ) {
env.poppush(op(d0,d1));
return;
}
final boolean lf = fr0 != null;
final boolean rf = fr1 != null;
final double df0 = d0, df1 = d1;
Frame fr = null; // Do-All frame
int ncols = 0; // Result column count
if( fr0 !=null ) { // Left?
ncols = fr0.numCols();
if( fr1 != null ) {
if( fr0.numCols() != fr1.numCols() ||
fr0.numRows() != fr1.numRows() )
throw new IllegalArgumentException("Arrays must be same size: LHS FRAME NUM ROWS/COLS: "+fr0.numRows()+"/"+fr0.numCols() +" vs RHS FRAME NUM ROWS/COLS: "+fr1.numRows()+"/"+fr1.numCols());
fr = new Frame(fr0).add(fr1,true);
} else {
fr = fr0;
}
} else {
ncols = fr1.numCols();
fr = fr1;
}
final ASTBinOp bin = this; // Final 'this' so can use in closure
// Run an arbitrary binary op on one or two frames & scalars
Frame fr2 = new MRTask2() {
@Override public void map( Chunk chks[], NewChunk nchks[] ) {
for( int i=0; i<nchks.length; i++ ) {
NewChunk n =nchks[i];
int rlen = chks[0]._len;
Chunk c0 = chks[i];
if( (!c0._vec.isEnum() &&
!(lf && rf && chks[i+nchks.length]._vec.isEnum())) ||
bin instanceof ASTEQ ||
bin instanceof ASTNE ) {
for( int r=0; r<rlen; r++ ) {
double lv; double rv;
if (lf) {
if(vecs(i).isUUID() || (chks[i].isNA0(r) && !bin.opStr().equals("|"))) { n.addNum(Double.NaN); continue; }
lv = chks[i].at0(r);
} else {
if (Double.isNaN(df0) && !bin.opStr().equals("|")) { n.addNum(Double.NaN); continue; }
lv = df0;
}
if (rf) {
if(vecs(i+(lf ? nchks.length:0)).isUUID() || chks[i].isNA0(r) && !bin.opStr().equals("|")) { n.addNum(Double.NaN); continue; }
rv = chks[i+(lf ? nchks.length:0)].at0(r);
} else {
if (Double.isNaN(df1) && !bin.opStr().equals("|")) { n.addNum(Double.NaN); continue; }
rv = df1;
}
n.addNum(bin.op(lv, rv));
}
} else {
for( int r=0; r<rlen; r++ ) n.addNA();
}
}
}
}.doAll(ncols,fr).outputFrame((lf ? fr0 : fr1)._names,null);
if( fr0 != null ) env.subRef(fr0,k0);
if( fr1 != null ) env.subRef(fr1,k1);
env.pop();
env.push(fr2);
}
}
class ASTUniPlus extends ASTUniOp { ASTUniPlus() { super(OPF_INFIX, OPP_UPLUS, OPA_RIGHT); } @Override String opStr(){ return "+" ;} @Override ASTOp make() {return new ASTUniPlus(); } @Override double op(double d) { return d;}}
class ASTUniMinus extends ASTUniOp { ASTUniMinus() { super(OPF_INFIX, OPP_UMINUS, OPA_RIGHT); } @Override String opStr(){ return "-" ;} @Override ASTOp make() {return new ASTUniMinus();} @Override double op(double d) { return -d;}}
class ASTNot extends ASTUniOp { ASTNot() { super(OPF_INFIX, OPP_NOT, OPA_RIGHT); } @Override String opStr(){ return "!" ;} @Override ASTOp make() {return new ASTNot(); } @Override double op(double d) { return d==0?1:0; }}
class ASTPlus extends ASTBinOp { ASTPlus() { super(OPF_INFIX, OPP_PLUS, OPA_LEFT ); } @Override String opStr(){ return "+" ;} @Override ASTOp make() {return new ASTPlus();} @Override double op(double d0, double d1) { return d0+d1;}}
class ASTSub extends ASTBinOp { ASTSub() { super(OPF_INFIX, OPP_MINUS, OPA_LEFT); } @Override String opStr(){ return "-" ;} @Override ASTOp make() {return new ASTSub ();} @Override double op(double d0, double d1) { return d0-d1;}}
class ASTMul extends ASTBinOp { ASTMul() { super(OPF_INFIX, OPP_MUL, OPA_LEFT); } @Override String opStr(){ return "*" ;} @Override ASTOp make() {return new ASTMul ();} @Override double op(double d0, double d1) { return d0*d1;}}
class ASTDiv extends ASTBinOp { ASTDiv() { super(OPF_INFIX, OPP_DIV, OPA_LEFT); } @Override String opStr(){ return "/" ;} @Override ASTOp make() {return new ASTDiv ();} @Override double op(double d0, double d1) { return d0/d1;}}
class ASTPow extends ASTBinOp { ASTPow() { super(OPF_INFIX, OPP_POWER, OPA_RIGHT);} @Override String opStr(){ return "^" ;} @Override ASTOp make() {return new ASTPow ();} @Override double op(double d0, double d1) { return Math.pow(d0,d1);}}
class ASTPow2 extends ASTBinOp { ASTPow2() { super(OPF_INFIX, OPP_POWER, OPA_RIGHT);} @Override String opStr(){ return "**" ;} @Override ASTOp make() {return new ASTPow2();} @Override double op(double d0, double d1) { return Math.pow(d0,d1);}}
class ASTMod extends ASTBinOp { ASTMod() { super(OPF_INFIX, OPP_MOD, OPA_LEFT); } @Override String opStr(){ return "%" ;} @Override ASTOp make() {return new ASTMod ();} @Override double op(double d0, double d1) { return d0%d1;}}
class ASTMod2 extends ASTBinOp { ASTMod2() { super(OPF_INFIX, OPP_MOD, OPA_LEFT); } @Override String opStr(){ return "%%" ;} @Override ASTOp make() {return new ASTMod2 ();} @Override double op(double d0, double d1) { return d0%d1;}}
class ASTLT extends ASTBinOp { ASTLT() { super(OPF_INFIX, OPP_LT, OPA_LEFT); } @Override String opStr(){ return "<" ;} @Override ASTOp make() {return new ASTLT ();} @Override double op(double d0, double d1) { return d0<d1 && !Utils.equalsWithinOneSmallUlp(d0,d1)?1:0;}}
class ASTLE extends ASTBinOp { ASTLE() { super(OPF_INFIX, OPP_LE, OPA_LEFT); } @Override String opStr(){ return "<=" ;} @Override ASTOp make() {return new ASTLE ();} @Override double op(double d0, double d1) { return d0<d1 || Utils.equalsWithinOneSmallUlp(d0,d1)?1:0;}}
class ASTGT extends ASTBinOp { ASTGT() { super(OPF_INFIX, OPP_GT, OPA_LEFT); } @Override String opStr(){ return ">" ;} @Override ASTOp make() {return new ASTGT ();} @Override double op(double d0, double d1) { return d0>d1 && !Utils.equalsWithinOneSmallUlp(d0,d1)?1:0;}}
class ASTGE extends ASTBinOp { ASTGE() { super(OPF_INFIX, OPP_GE, OPA_LEFT); } @Override String opStr(){ return ">=" ;} @Override ASTOp make() {return new ASTGE ();} @Override double op(double d0, double d1) { return d0>d1 || Utils.equalsWithinOneSmallUlp(d0,d1)?1:0;}}
class ASTEQ extends ASTBinOp { ASTEQ() { super(OPF_INFIX, OPP_EQ, OPA_LEFT); } @Override String opStr(){ return "==" ;} @Override ASTOp make() {return new ASTEQ ();} @Override double op(double d0, double d1) { return Utils.equalsWithinOneSmallUlp(d0,d1)?1:0;}}
class ASTNE extends ASTBinOp { ASTNE() { super(OPF_INFIX, OPP_NE, OPA_LEFT); } @Override String opStr(){ return "!=" ;} @Override ASTOp make() {return new ASTNE ();} @Override double op(double d0, double d1) { return Utils.equalsWithinOneSmallUlp(d0,d1)?0:1;}}
class ASTLA extends ASTBinOp { ASTLA() { super(OPF_INFIX, OPP_AND, OPA_LEFT); } @Override String opStr(){ return "&" ;} @Override ASTOp make() {return new ASTLA ();} @Override double op(double d0, double d1) { return (d0!=0 && d1!=0) ? (Double.isNaN(d0) || Double.isNaN(d1)?Double.NaN:1) :0;}}
class ASTLO extends ASTBinOp { ASTLO() { super(OPF_INFIX, OPP_OR, OPA_LEFT); } @Override String opStr(){ return "|" ;} @Override ASTOp make() {return new ASTLO ();} @Override double op(double d0, double d1) {
if (d0 == 0 && Double.isNaN(d1)) { return Double.NaN; }
if (d1 == 0 && Double.isNaN(d0)) { return Double.NaN; }
if (Double.isNaN(d0) && Double.isNaN(d1)) { return Double.NaN; }
if (d0 == 0 && d1 == 0) { return 0; }
return 1;
}}
class ASTIntDiv extends ASTBinOp { ASTIntDiv() { super(OPF_INFIX, OPP_INTDIV, OPA_LEFT); } @Override String opStr(){ return "%/%";} @Override ASTOp make() {return new ASTIntDiv();} @Override double op(double d0, double d1) { return Math.floor(d0/d1); }}
// Variable length; instances will be created of required length
abstract class ASTReducerOp extends ASTOp {
final double _init;
boolean _narm; // na.rm in R
ASTReducerOp( double init, boolean narm ) {
super(new String[]{"","dbls"},
new Type[]{Type.DBL,Type.varargs(Type.dblary())},
OPF_PREFIX,
OPP_PREFIX,
OPA_RIGHT);
_init = init;
_narm = narm;
}
@Override double[] map(Env env, double[] in, double[] out) {
double s = _init;
for (double v : in) if (!_narm || !Double.isNaN(v)) s = op(s,v);
if (out == null || out.length < 1) out = new double[1];
out[0] = s;
return out;
}
abstract double op( double d0, double d1 );
@Override void apply(Env env, int argcnt, ASTApply apply) {
double sum=_init;
for( int i=0; i<argcnt-1; i++ )
if( env.isDbl() ) sum = op(sum,env.popDbl());
else {
Frame fr = env.popAry();
String skey = env.key();
sum = op(sum,_narm?new NaRmRedOp(this).doAll(fr)._d:new RedOp(this).doAll(fr)._d);
env.subRef(fr,skey);
}
env.poppush(sum);
}
private static class RedOp extends MRTask2<RedOp> {
final ASTReducerOp _bin;
RedOp( ASTReducerOp bin ) { _bin = bin; _d = bin._init; }
double _d;
@Override public void map( Chunk chks[] ) {
for( int i=0; i<chks.length; i++ ) {
Chunk C = chks[i];
for( int r=0; r<C._len; r++ )
_d = _bin.op(_d,C.at0(r));
if( Double.isNaN(_d) ) break;
}
}
@Override public void reduce( RedOp s ) { _d = _bin.op(_d,s._d); }
}
private static class NaRmRedOp extends MRTask2<NaRmRedOp> {
final ASTReducerOp _bin;
NaRmRedOp( ASTReducerOp bin ) { _bin = bin; _d = bin._init; }
double _d;
@Override public void map( Chunk chks[] ) {
for( int i=0; i<chks.length; i++ ) {
Chunk C = chks[i];
for( int r=0; r<C._len; r++ )
if (!Double.isNaN(C.at0(r)))
_d = _bin.op(_d,C.at0(r));
if( Double.isNaN(_d) ) break;
}
}
@Override public void reduce( NaRmRedOp s ) { _d = _bin.op(_d,s._d); }
}
}
class ASTSum extends ASTReducerOp { ASTSum( ) {super(0,false);} @Override String opStr(){ return "sum" ;} @Override ASTOp make() {return new ASTSum(); } @Override double op(double d0, double d1) { return d0+d1;}}
class ASTSumNaRm extends ASTReducerOp { ASTSumNaRm( ) {super(0,true) ;} @Override String opStr(){ return "sum.na.rm";} @Override ASTOp make() {return new ASTSumNaRm();} @Override double op(double d0, double d1) { return d0+d1;}}
class ASTReduce extends ASTOp {
static final String VARS[] = new String[]{ "", "op2", "ary"};
static final Type TYPES[]= new Type []{ Type.ARY, Type.fcn(new Type[]{Type.DBL,Type.DBL,Type.DBL}), Type.ARY };
ASTReduce( ) { super(VARS,TYPES,OPF_PREFIX,OPP_PREFIX,OPA_RIGHT); }
@Override String opStr(){ return "Reduce";}
@Override ASTOp make() {return this;}
@Override void apply(Env env, int argcnt, ASTApply apply) { throw H2O.unimpl(); }
}
// TODO: Check refcnt mismatch issue: tmp = cbind(h.hex,3.5) results in different refcnts per col
class ASTCbind extends ASTOp {
@Override String opStr() { return "cbind"; }
ASTCbind( ) { super(new String[]{"cbind","ary"},
new Type[]{Type.ARY,Type.varargs(Type.dblary())},
OPF_PREFIX,
OPP_PREFIX,OPA_RIGHT); }
@Override ASTOp make() {return new ASTCbind(); }
@Override void apply(Env env, int argcnt, ASTApply apply) {
Vec vmax = null;
for(int i = 0; i < argcnt-1; i++) {
if(env.isAry(-argcnt+1+i)) {
Frame tmp = env.ary(-argcnt+1+i);
if(vmax == null) vmax = tmp.vecs()[0];
else if(tmp.numRows() != vmax.length())
// R pads shorter cols to match max rows by cycling/repeating, but we won't support that
throw new IllegalArgumentException("Row mismatch! Expected " + String.valueOf(vmax.length()) + " but frame has " + String.valueOf(tmp.numRows()));
}
}
Frame fr = new Frame(new String[0],new Vec[0]);
for(int i = 0; i < argcnt-1; i++) {
if( env.isAry(-argcnt+1+i) ) {
String name;
Frame fr2 = env.ary(-argcnt+1+i);
Frame fr3 = fr.makeCompatible(fr2);
if( fr3 != fr2 ) { // If copied into a new Frame, need to adjust refs
env.addRef(fr3);
env.subRef(fr2,null);
}
// Take name from an embedded assign: "cbind(colNameX = some_frame, ...)"
if( fr2.numCols()==1 && apply != null && (name = apply._args[i+1].argName()) != null ) {
if (name.equals(fr3._key.toString())) fr.add(fr3,true);
else fr.add(name, fr3.anyVec());
} else fr.add(fr3,true);
} else {
double d = env.dbl(-argcnt+1+i);
Vec v = vmax == null ? Vec.make1Elem(d) : vmax.makeCon(d);
fr.add("C" + String.valueOf(i+1), v);
env.addRef(v);
}
}
env._ary[env._sp-argcnt] = fr; env._fcn[env._sp-argcnt] = null;
env._sp -= argcnt-1;
Arrays.fill(env._ary,env._sp,env._sp+(argcnt-1),null);
assert env.check_refcnt(fr.anyVec());
}
}
class ASTRbind extends ASTOp {
@Override String opStr() { return "rbind"; }
ASTRbind( ) { super(new String[]{"rbind","ary"},
new Type[]{Type.ARY,Type.varargs(Type.dblary())},
OPF_PREFIX,
OPP_PREFIX,OPA_RIGHT); }
@Override ASTOp make() {return new ASTRbind(); }
private static String get_type(Vec v) {
if (v.isUUID()) return "UUID";
if (v.isEnum()) return "factor";
if (v.isTime()) return "time";
if (v.isFloat() || v.isInt()) return "numeric";
return "bad";
}
private static class RbindMRTask extends MRTask2<RbindMRTask> {
private final int[] _emap;
private final int _chunkOffset;
private final Vec _v;
RbindMRTask(H2O.H2OCountedCompleter hc, int[] emap, Vec v, int offset) { super(hc); _emap = emap; _v = v; _chunkOffset = offset;}
@Override public void map(Chunk cs) {
int idx = _chunkOffset+cs.cidx();
Key ckey = Vec.chunkKey(_v._key, idx);
if (_emap != null) {
NewChunk nc = new NewChunk(_v, idx);
// loop over rows and update ints for new domain mapping according to vecs[c].domain()
for (int r=0;r < cs._len;++r) {
if (cs.isNA0(r)) nc.addNA();
else nc.addNum(_emap[(int)cs.at80(r)], 0);
}
nc.close(_fs);
} else {
Chunk oc = cs.clone();
oc._start = -1;
oc._vec = null;
oc._mem = cs.getBytes().clone(); // needless replication of the data, can do ref counting on byte[] _mem
DKV.put(ckey, oc, _fs, true);
}
}
}
private static class RbindTask extends H2O.H2OCountedCompleter<RbindTask> {
final transient Vec[] _vecs;
final Vec _v;
final long[] _espc;
String[] _dom;
RbindTask(H2O.H2OCountedCompleter cc, Vec[] vecs, Vec v, long[] espc) { super(cc); _vecs = vecs; _v = v; _espc = espc; }
private static Map<Integer, String> invert(Map<String, Integer> map) {
Map<Integer, String> inv = new HashMap<Integer, String>();
for (Map.Entry<String, Integer> e : map.entrySet()) {
inv.put(e.getValue(), e.getKey());
}
return inv;
}
@Override public void compute2() {
addToPendingCount(_vecs.length-1);
boolean isEnum = _vecs[0].domain() != null;
int[][] emaps = new int[_vecs.length][];
if (isEnum) {
// loop to create BIG domain
HashMap<String, Integer> dmap = new HashMap<String, Integer>(); // probably should allocate something that's big enough (i.e. 2*biggest_domain)
int c = 0;
for (int i = 0; i < _vecs.length; ++i) {
emaps[i] = new int[_vecs[i].domain().length];
for (int j = 0; j < emaps[i].length; ++j)
if (!dmap.containsKey(_vecs[i].domain()[j]))
dmap.put(_vecs[i].domain()[j], emaps[i][j]=c++);
else emaps[i][j] = dmap.get(_vecs[i].domain()[j]);
}
_dom = new String[dmap.size()];
HashMap<Integer, String> inv = (HashMap<Integer, String>) invert(dmap);
for (int s = 0; s < _dom.length; ++s) _dom[s] = inv.get(s);
}
int offset=0;
for (int i=0; i<_vecs.length; ++i) {
new RbindMRTask(this, emaps[i], _v, offset).asyncExec(_vecs[i]);
offset += _vecs[i].nChunks();
}
}
@Override public void onCompletion(CountedCompleter cc) {
_v._domain = _dom;
UKV.put(_v._key,_v);
}
}
private static class ParallelRbinds extends H2O.H2OCountedCompleter{
private final Frame[] _f;
private final int _argcnt;
private final AtomicInteger _ctr;
private int _maxP = 100;
private long[] _espc;
private Vec[] _vecs;
ParallelRbinds(Frame[] f, int argcnt) { _f = f; _argcnt = argcnt; _ctr = new AtomicInteger(_maxP-1); } //TODO pass maxP to constructor
@Override public void compute2() {
addToPendingCount(_f[0].numCols()-1);
int nchks=0;
for (int i =0; i < _argcnt; ++i)
nchks+=_f[i].anyVec().nChunks();
_espc = new long[nchks+1];
int coffset = _f[0].anyVec().nChunks();
long[] first_espc = _f[0].anyVec()._espc;
System.arraycopy(first_espc, 0, _espc, 0, first_espc.length);
for (int i=1; i < _argcnt; ++i) {
long roffset = _espc[coffset];
long[] espc = _f[i].anyVec()._espc;
int j = 1;
for (; j < espc.length; j++)
_espc[coffset + j] = roffset+ espc[j];
coffset += _f[i].anyVec().nChunks();
}
Key[] keys = _f[0].anyVec().group().addVecs(_f[0].numCols());
_vecs = new Vec[keys.length];
String type;
for (int i=0; i<_vecs.length; ++i) {
_vecs[i] = new Vec(keys[i], _espc, null, (type = get_type(_f[0].vec(i))).equals("UUID"), type.equals("time") ? (byte) 3 : (byte) -1);
}
for (int i=0; i < Math.min(_maxP, _vecs.length); ++i) forkVecTask(i);
}
private void forkVecTask(final int i) {
Vec[] vecs = new Vec[_argcnt];
for (int j= 0; j < _argcnt; ++j) {
Vec vm, v = _f[j].vec(i);
vecs[j] = ((vm=v.masterVec())==null) ? v : vm;
}
new RbindTask(new Callback(), vecs, _vecs[i], _espc).fork();
}
private class Callback extends H2O.H2OCallback {
public Callback(){super(ParallelRbinds.this);}
@Override public void callback(H2O.H2OCountedCompleter h2OCountedCompleter) {
int i = _ctr.incrementAndGet();
if(i < _vecs.length)
forkVecTask(i);
}
}
}
@Override void apply(Env env, int argcnt, ASTApply apply) {
// quick check to make sure rbind is feasible
if (argcnt-1 == 1) { return; } // leave stack as is
Frame[] fs = new Frame[argcnt-1];
Frame f1 = env.peekAry();
int j = fs.length-1;
boolean[] wrapped = new boolean[f1.numCols()];
for (int c = 0; c<f1.numCols(); ++c) wrapped[c] = f1.vec(c).masterVec() != null;
fs[j--] = f1;
// do error checking and compute new offsets in tandem
for (int i = 1; i < argcnt-1; ++i) {
Frame t = env.ary(-(i+1));
fs[j--]=t;
// check columns match
if (t.numCols() != f1.numCols())
throw new IllegalArgumentException("Column mismatch! Expected " + f1.numCols() + " but frame has " + t.numCols());
// check column types
for (int c = 0; c < f1.numCols(); ++c) {
wrapped[c] |= t.vec(c).masterVec() != null;
if (!get_type(f1.vec(c)).equals(get_type(t.vec(c))))
throw new IllegalArgumentException("Column type mismatch! Expected type " + get_type(f1.vec(c)) + " but vec has type " + get_type(t.vec(c)));
}
}
ParallelRbinds t;
H2O.submitTask(t = new ParallelRbinds(fs, argcnt-1)).join();
for (int i = 0; i < wrapped.length; ++i)
if (wrapped[i]) t._vecs[i] = t._vecs[i].toEnum();
Key m = Key.make();
env.poppush(argcnt, new Frame(m, f1.names(), t._vecs), m.toString());
}
}
class ASTMinNaRm extends ASTReducerOp {
ASTMinNaRm( ) { super( Double.POSITIVE_INFINITY, true ); }
@Override
String opStr(){ return "min.na.rm";}
@Override
ASTOp make() {return new ASTMinNaRm();}
@Override double op(double d0, double d1) { return Math.min(d0, d1); }
@Override void apply(Env env, int argcnt, ASTApply apply) {
double min = Double.POSITIVE_INFINITY;
int nacnt = 0;
for( int i=0; i<argcnt-1; i++ )
if( env.isDbl() ) {
double a = env.popDbl();
if (Double.isNaN(a)) nacnt++;
else min = Math.min(min, a);
}
else {
Frame fr = env.peekAry();
for (Vec v : fr.vecs())
min = Math.min(min, v.min());
env.pop();
}
if (nacnt > 0 && min == Double.POSITIVE_INFINITY)
min = Double.NaN;
env.poppush(min);
}
}
class ASTMaxNaRm extends ASTReducerOp {
ASTMaxNaRm( ) { super( Double.NEGATIVE_INFINITY, true ); }
@Override
String opStr(){ return "max.na.rm";}
@Override
ASTOp make() {return new ASTMaxNaRm();}
@Override double op(double d0, double d1) { return Math.max(d0,d1); }
@Override void apply(Env env, int argcnt, ASTApply apply) {
double max = Double.NEGATIVE_INFINITY;
int nacnt = 0;
for( int i=0; i<argcnt-1; i++ )
if( env.isDbl() ) {
double a = env.popDbl();
if (Double.isNaN(a)) nacnt++;
else max = Math.max(max, a);
}
else {
Frame fr = env.peekAry();
for (Vec v : fr.vecs())
max = Math.max(max, v.max());
env.pop();
}
if (nacnt > 0 && max == Double.NEGATIVE_INFINITY)
max = Double.NaN;
env.poppush(max);
}
}
class ASTMin extends ASTReducerOp {
ASTMin( ) { super( Double.POSITIVE_INFINITY, false); }
@Override
String opStr(){ return "min";}
@Override
ASTOp make() {return new ASTMin();}
@Override double op(double d0, double d1) { return Math.min(d0, d1); }
@Override void apply(Env env, int argcnt, ASTApply apply) {
double min = Double.POSITIVE_INFINITY;
for( int i=0; i<argcnt-1; i++ )
if( env.isDbl() ) min = Math.min(min, env.popDbl());
else {
Frame fr = env.peekAry();
for (Vec v : fr.vecs())
if (v.naCnt() > 0) { min = Double.NaN; break; }
else min = Math.min(min, v.min());
env.pop();
}
env.poppush(min);
}
}
class ASTMax extends ASTReducerOp {
ASTMax( ) { super( Double.NEGATIVE_INFINITY, false ); }
@Override
String opStr(){ return "max";}
@Override
ASTOp make() {return new ASTMax();}
@Override double op(double d0, double d1) { return Math.max(d0,d1); }
@Override void apply(Env env, int argcnt, ASTApply apply) {
double max = Double.NEGATIVE_INFINITY;
for( int i=0; i<argcnt-1; i++ )
if( env.isDbl() ) max = Math.max(max, env.popDbl());
else {
Frame fr = env.peekAry();
for (Vec v : fr.vecs())
if (v.naCnt() > 0) { max = Double.NaN; break; }
else max = Math.max(max, v.max());
env.pop();
}
env.poppush(max);
}
}
// R like binary operator &&
class ASTAND extends ASTOp {
@Override String opStr() { return "&&"; }
ASTAND( ) {
super(new String[]{"", "x", "y"},
new Type[]{Type.DBL,Type.dblary(),Type.dblary()},
OPF_PREFIX,
OPP_AND,
OPA_RIGHT);
}
@Override ASTOp make() { return new ASTAND(); }
@Override void apply(Env env, int argcnt, ASTApply apply) {
double op1 = env.isAry(-2) ? env.ary(-2).vecs()[0].at(0) : env.dbl(-2);
double op2 = op1==0 ? 0 :
Double.isNaN(op1) ? Double.NaN :
env.isAry(-1) ? env.ary(-1).vecs()[0].at(0) : env.dbl(-1);
env.pop(3);
if (!Double.isNaN(op2)) op2 = op2==0?0:1;
env.push(op2);
}
}
// R like binary operator ||
class ASTOR extends ASTOp {
@Override String opStr() { return "||"; }
ASTOR( ) {
super(new String[]{"", "x", "y"},
new Type[]{Type.DBL,Type.dblary(),Type.dblary()},
OPF_PREFIX,
OPP_OR,
OPA_RIGHT);
}
@Override ASTOp make() { return new ASTOR(); }
@Override void apply(Env env, int argcnt, ASTApply apply) {
double op1 = env.isAry(-2) ? env.ary(-2).vecs()[0].at(0) : env.dbl(-2);
double op2 = !Double.isNaN(op1) && op1!=0 ? 1 :
env.isAry(-1) ? env.ary(-1).vecs()[0].at(0) : env.dbl(-1);
if (!Double.isNaN(op2) && op2 != 0)
op2 = 1;
else if (op2 == 0 && Double.isNaN(op1))
op2 = Double.NaN;
env.push(op2);
}
}
// Brute force implementation of matrix multiply
class ASTMMult extends ASTOp {
@Override String opStr() { return "%*%"; }
ASTMMult( ) {
super(new String[]{"", "x", "y"},
new Type[]{Type.ARY,Type.ARY,Type.ARY},
OPF_PREFIX,
OPP_MUL,
OPA_RIGHT);
}
@Override ASTOp make() { return new ASTMMult(); }
@Override void apply(Env env, int argcnt, ASTApply apply) {
env.poppush(3, DMatrix.mmul(env.ary(-2),env.ary(-1)),null);
}
}
// Brute force implementation of matrix transpose
class ASTMTrans extends ASTOp {
@Override String opStr() { return "t"; }
ASTMTrans( ) {
super(new String[]{"", "x"},
new Type[]{Type.ARY,Type.dblary()},
OPF_PREFIX,
OPP_PREFIX,
OPA_RIGHT);
}
@Override ASTOp make() { return new ASTMTrans(); }
@Override void apply(Env env, int argcnt, ASTApply apply) {
if(!env.isAry(-1)) {
Key k = new Vec.VectorGroup().addVec();
Futures fs = new Futures();
AppendableVec avec = new AppendableVec(k);
NewChunk chunk = new NewChunk(avec, 0);
chunk.addNum(env.dbl(-1));
chunk.close(0, fs);
Vec vec = avec.close(fs);
fs.blockForPending();
vec._domain = null;
Frame fr = new Frame(new String[] {"C1"}, new Vec[] {vec});
env.poppush(2,new Matrix(fr).trans(),null);
} else
env.poppush(2,DMatrix.transpose(env.ary(-1)),null);
}
}
// Similar to R's seq_len
class ASTSeqLen extends ASTOp {
@Override String opStr() { return "seq_len"; }
ASTSeqLen( ) {
super(new String[]{"seq_len", "n"},
new Type[]{Type.ARY,Type.DBL},
OPF_PREFIX,
OPP_PREFIX,
OPA_RIGHT);
}
@Override ASTOp make() { return this; }
@Override void apply(Env env, int argcnt, ASTApply apply) {
long len = (long)env.popDbl();
if (len <= 0)
throw new IllegalArgumentException("Error in seq_len(" +len+"): argument must be coercible to positive integer");
env.poppush(1,new Frame(new String[]{"c"}, new Vec[]{Vec.makeSeq(len)}),null);
}
}
class ASTColSeq extends ASTOp {
@Override String opStr() { return ":"; }
ASTColSeq() { super(new String[]{":", "from", "to"},
new Type[]{Type.dblary(), Type.DBL, Type.DBL},
OPF_PREFIX,
OPP_PREFIX,
OPA_RIGHT);
}
@Override ASTOp make() { return this; }
@Override void apply(Env env, int argcnt, ASTApply apply) {
double by = 1.0;
double to = env.popDbl();
double from = env.popDbl();
double delta = to - from;
if(delta == 0 && to == 0)
env.poppush(to);
else {
double n = delta/by;
if(n < 0)
throw new IllegalArgumentException("wrong sign in 'by' argument");
else if(n > Double.MAX_VALUE)
throw new IllegalArgumentException("'by' argument is much too small");
double dd = Math.abs(delta)/Math.max(Math.abs(from), Math.abs(to));
if(dd < 100*Double.MIN_VALUE)
env.poppush(from);
else {
Key k = new Vec.VectorGroup().addVec();
Futures fs = new Futures();
AppendableVec av = new AppendableVec(k);
NewChunk nc = new NewChunk(av, 0);
int len = (int)n + 1;
for (int r = 0; r < len; r++) nc.addNum(from + r*by);
// May need to adjust values = by > 0 ? min(values, to) : max(values, to)
nc.close(0, fs);
Vec vec = av.close(fs);
fs.blockForPending();
vec._domain = null;
env.poppush(1, new Frame(new String[] {"C1"}, new Vec[] {vec}), null);
}
}
}
}
// Same logic as R's generic seq method
class ASTSeq extends ASTOp {
@Override String opStr() { return "seq"; }
ASTSeq() { super(new String[]{"seq", "from", "to", "by"},
new Type[]{Type.dblary(), Type.DBL, Type.DBL, Type.DBL},
OPF_PREFIX,
OPP_PREFIX,
OPA_RIGHT);
}
@Override ASTOp make() { return this; }
@Override void apply(Env env, int argcnt, ASTApply apply) {
double by = env.popDbl();
double to = env.popDbl();
double from = env.popDbl();
double delta = to - from;
if(delta == 0 && to == 0)
env.poppush(to);
else {
double n = delta/by;
if(n < 0)
throw new IllegalArgumentException("wrong sign in 'by' argument");
else if(n > Double.MAX_VALUE)
throw new IllegalArgumentException("'by' argument is much too small");
double dd = Math.abs(delta)/Math.max(Math.abs(from), Math.abs(to));
if(dd < 100*Double.MIN_VALUE)
env.poppush(from);
else {
Key k = new Vec.VectorGroup().addVec();
Futures fs = new Futures();
AppendableVec av = new AppendableVec(k);
NewChunk nc = new NewChunk(av, 0);
int len = (int)n + 1;
for (int r = 0; r < len; r++) nc.addNum(from + r*by);
// May need to adjust values = by > 0 ? min(values, to) : max(values, to)
nc.close(0, fs);
Vec vec = av.close(fs);
fs.blockForPending();
vec._domain = null;
env.poppush(1, new Frame(new String[] {"C1"}, new Vec[] {vec}), null);
}
}
}
}
class ASTRepLen extends ASTOp {
@Override String opStr() { return "rep_len"; }
ASTRepLen() { super(new String[]{"rep_len", "x", "length.out"},
new Type[]{Type.dblary(), Type.DBL, Type.DBL},
OPF_PREFIX,
OPP_PREFIX,
OPA_RIGHT);
}
@Override ASTOp make() { return this; }
@Override void apply(Env env, int argcnt, ASTApply apply) {
if(env.isAry(-2)) H2O.unimpl();
else {
long len = (long)env.popDbl();
if(len <= 0)
throw new IllegalArgumentException("Error in rep_len: argument length.out must be coercible to a positive integer");
double x = env.popDbl();
env.poppush(1,new Frame(new String[]{"C1"}, new Vec[]{Vec.makeConSeq(x, len)}),null);
}
}
}
// Compute exact quantiles given a set of cutoffs, using multipass binning algo.
class ASTQtile extends ASTOp {
@Override String opStr() { return "quantile"; }
ASTQtile( ) {
super(new String[]{"quantile","x","probs"},
new Type[]{Type.ARY, Type.ARY, Type.ARY},
OPF_PREFIX,
OPP_PREFIX,
OPA_RIGHT);
}
@Override ASTQtile make() { return new ASTQtile(); }
@Override void apply(Env env, int argcnt, ASTApply apply) {
Frame x = env.ary(-2);
Vec xv = x .theVec("Argument #1 in Quantile contains more than 1 column.");
Vec pv = env.ary(-1).theVec("Argument #2 in Quantile contains more than 1 column.");
double p[] = new double[(int)pv.length()];
for (int i = 0; i < pv.length(); i++) {
if ((p[i]=pv.at((long)i)) < 0 || p[i] > 1)
throw new IllegalArgumentException("Quantile: probs must be in the range of [0, 1].");
}
if ( xv.isEnum() ) {
throw new IllegalArgumentException("Quantile: column type cannot be Enum.");
}
// create output vec
Vec res = pv.makeCon(Double.NaN);
final int MAX_ITERATIONS = 16;
final int MAX_QBINS = 1000; // less uses less memory, can take more passes
final boolean MULTIPASS = true; // approx in 1 pass if false
// Type 7 matches R default
final int INTERPOLATION = 7; // linear if quantile not exact on row. 2 uses mean.
// a little obtuse because reusing first pass object, if p has multiple thresholds
// since it's always the same (always had same valStart/End seed = vec min/max
// some MULTIPASS conditionals needed if we were going to make this work for approx or exact
final Quantiles[] qbins1 = new Quantiles.BinTask2(MAX_QBINS, xv.min(), xv.max()).doAll(xv)._qbins;
for( int i=0; i<p.length; i++ ) {
double quantile = p[i];
// need to pass a different threshold now for each finishUp!
qbins1[0].finishUp(xv, new double[]{quantile}, INTERPOLATION, MULTIPASS);
if( qbins1[0]._done ) {
res.set(i,qbins1[0]._pctile[0]);
} else {
// the 2-N map/reduces are here (with new start/ends. MULTIPASS is implied
Quantiles[] qbinsM = new Quantiles.BinTask2(MAX_QBINS, qbins1[0]._newValStart, qbins1[0]._newValEnd).doAll(xv)._qbins;
for( int iteration = 2; iteration <= MAX_ITERATIONS; iteration++ ) {
qbinsM[0].finishUp(xv, new double[]{quantile}, INTERPOLATION, MULTIPASS);
if( qbinsM[0]._done ) {
res.set(i,qbinsM[0]._pctile[0]);
break;
}
// the 2-N map/reduces are here (with new start/ends. MULTIPASS is implied
qbinsM = new Quantiles.BinTask2(MAX_QBINS, qbinsM[0]._newValStart, qbinsM[0]._newValEnd).doAll(xv)._qbins;
}
}
}
res.chunkForChunkIdx(0).close(0,null);
res.postWrite();
env.poppush(argcnt, new Frame(new String[]{"Quantile"}, new Vec[]{res}), null);
}
}
// Variable length; flatten all the component arys
class ASTCat extends ASTOp {
@Override String opStr() { return "c"; }
ASTCat( ) { super(new String[]{"cat","dbls"},
new Type[]{Type.ARY,Type.varargs(Type.dblary())},
OPF_PREFIX,
OPP_PREFIX,
OPA_RIGHT); }
@Override ASTOp make() {return new ASTCat();}
@Override double[] map(Env env, double[] in, double[] out) {
if (out == null || out.length < in.length) out = new double[in.length];
for (int i = 0; i < in.length; i++) out[i] = in[i];
return out;
}
@Override void apply(Env env, int argcnt, ASTApply apply) {
Key key = Vec.VectorGroup.VG_LEN1.addVecs(1)[0];
AppendableVec av = new AppendableVec(key);
NewChunk nc = new NewChunk(av,0);
for( int i=0; i<argcnt-1; i++ ) {
if (env.isAry(i-argcnt+1)) for (Vec vec : env.ary(i-argcnt+1).vecs()) {
if (vec.nChunks() > 1) H2O.unimpl();
for (int r = 0; r < vec.length(); r++) nc.addNum(vec.at(r));
}
else nc.addNum(env.dbl(i-argcnt+1));
}
nc.close(0,null);
Vec v = av.close(null);
env.pop(argcnt);
env.push(new Frame(new String[]{"C1"}, new Vec[]{v}));
}
}
class ASTRunif extends ASTOp {
@Override String opStr() { return "runif"; }
ASTRunif() { super(new String[]{"runif","dbls","seed"},
new Type[]{Type.ARY,Type.ARY,Type.DBL},
OPF_PREFIX,
OPP_PREFIX,
OPA_RIGHT); }
@Override ASTOp make() {return new ASTRunif();}
@Override void apply(Env env, int argcnt, ASTApply apply) {
double temp = env.popDbl();
final long seed = (temp == -1) ? System.currentTimeMillis() : (long)temp;
Frame fr = env.popAry();
String skey = env.key();
long [] espc = fr.anyVec()._espc;
long rem = fr.numRows();
if(rem > espc[espc.length-1]) throw H2O.unimpl();
for(int i = 0; i < espc.length; ++i){
if(rem <= espc[i]){
espc = Arrays.copyOf(espc, i+1);
break;
}
}
espc[espc.length-1] = rem;
Vec randVec = new Vec(fr.anyVec().group().addVecs(1)[0],espc);
Futures fs = new Futures();
DKV.put(randVec._key,randVec, fs);
for(int i = 0; i < espc.length-1; ++i)
DKV.put(randVec.chunkKey(i),new C0DChunk(0,(int)(espc[i+1]-espc[i])),fs);
fs.blockForPending();
new MRTask2() {
@Override public void map(Chunk c){
Random rng = new Random(seed*c.cidx());
for(int i = 0; i < c._len; ++i)
c.set0(i, (float)rng.nextDouble());
}
}.doAll(randVec);
env.subRef(fr,skey);
env.pop();
env.push(new Frame(new String[]{"rnd"},new Vec[]{randVec}));
}
}
class ASTSdev extends ASTOp {
ASTSdev() { super(new String[]{"sd", "ary"}, new Type[]{Type.DBL,Type.ARY},
OPF_PREFIX,
OPP_PREFIX,
OPA_RIGHT); }
@Override String opStr() { return "sd"; }
@Override ASTOp make() { return new ASTSdev(); }
@Override void apply(Env env, int argcnt, ASTApply apply) {
Frame fr = env.peekAry();
if (fr.vecs().length > 1)
throw new IllegalArgumentException("sd does not apply to multiple cols.");
if (fr.vecs()[0].isEnum())
throw new IllegalArgumentException("sd only applies to numeric vector.");
double sig = fr.vecs()[0].sigma();
env.pop();
env.poppush(sig);
}
}
class ASTVar extends ASTOp {
ASTVar() { super(new String[]{"var", "ary"}, new Type[]{Type.dblary(),Type.dblary()},
OPF_PREFIX,
OPP_PREFIX,
OPA_RIGHT); }
@Override String opStr() { return "var"; }
@Override ASTOp make() { return new ASTVar(); }
@Override void apply(Env env, int argcnt, ASTApply apply) {
if(env.isDbl()) {
env.pop(2); env.push(Double.NaN);
} else {
Frame fr = env.ary(-1);
String[] colnames = fr.names();
// Save standard deviations for later use
double[] sdev = new double[fr.numCols()];
for(int i = 0; i < fr.numCols(); i++)
sdev[i] = fr.vecs()[i].sigma();
// TODO: Might be more efficient to modify DataInfo to allow for separate standardization of mean and std dev
DataInfo dinfo = new DataInfo(fr, 0, true, false, DataInfo.TransformType.STANDARDIZE);
GramTask tsk = new GramTask(null, dinfo, false, false).doAll(dinfo._adaptedFrame);
double[][] var = tsk._gram.getXX();
long nobs = tsk._nobs;
assert sdev.length == var.length;
assert sdev.length == var[0].length;
// Just push the scalar if input is a single col
if(var.length == 1 && var[0].length == 1) {
env.pop(2);
double x = var[0][0]*sdev[0]*sdev[0]; // Undo normalization of each col's standard deviation
x = x*nobs/(nobs-1); // Divide by n-1 rather than n so unbiased
env.push(x);
} else {
// Build output vecs for var-cov matrix
Key keys[] = Vec.VectorGroup.VG_LEN1.addVecs(var.length);
Vec[] vecs = new Vec[var.length];
for(int i = 0; i < var.length; i++) {
AppendableVec v = new AppendableVec(keys[i]);
NewChunk c = new NewChunk(v,0);
v._domain = null;
for (int j = 0; j < var[0].length; j++) {
double x = var[i][j]*sdev[i]*sdev[j]; // Undo normalization of each col's standard deviation
x = x*nobs/(nobs-1); // Divide by n-1 rather than n so unbiased
c.addNum(x);
}
c.close(0, null);
vecs[i] = v.close(null);
}
env.pop(2); env.push(new Frame(colnames, vecs));
}
}
}
}
class ASTMean extends ASTOp {
ASTMean() { super(new String[]{"mean", "ary"}, new Type[]{Type.DBL,Type.ARY},
OPF_PREFIX,
OPP_PREFIX,
OPA_RIGHT); }
@Override String opStr() { return "mean"; }
@Override ASTOp make() { return new ASTMean(); }
@Override void apply(Env env, int argcnt, ASTApply apply) {
Frame fr = env.peekAry();
if (fr.vecs().length > 1)
throw new IllegalArgumentException("mean does not apply to multiple cols.");
if (fr.vecs()[0].isEnum())
throw new IllegalArgumentException("mean only applies to numeric vector.");
double ave = fr.vecs()[0].mean();
env.pop();
env.poppush(ave);
}
@Override double[] map(Env env, double[] in, double[] out) {
if (out == null || out.length < 1) out = new double[1];
double s = 0; int cnt=0;
for (double v : in) if( !Double.isNaN(v) ) { s+=v; cnt++; }
out[0] = s/cnt;
return out;
}
}
class ASTMedian extends ASTOp {
ASTMedian() { super(new String[]{"median", "ary"}, new Type[]{Type.DBL,Type.ARY},
OPF_PREFIX,
OPP_PREFIX,
OPA_RIGHT); }
@Override String opStr() { return "median"; }
@Override ASTOp make() { return new ASTMedian(); }
@Override void apply(Env env, int argcnt, ASTApply apply) {
Frame fr = env.peekAry();
if (fr.vecs().length > 1)
throw new IllegalArgumentException("median does not apply to multiple cols.");
if (fr.vecs()[0].isEnum())
throw new IllegalArgumentException("median only applies to numeric vector.");
QuantilesPage qp = new QuantilesPage();
qp.source_key = fr;
qp.column = fr.anyVec();
qp.invoke();
double median = qp.result;
env.pop();
env.poppush(median);
}
}
class ASTMostCommon extends ASTOp {
ASTMostCommon() { super(new String[]{"mode", "ary"}, new Type[]{Type.DBL,Type.ARY},
OPF_PREFIX,
OPP_PREFIX,
OPA_RIGHT); }
@Override String opStr() { return "mode"; }
@Override ASTOp make() { return new ASTMostCommon(); }
@Override void apply(Env env, int argcnt, ASTApply apply) {
Frame fr = env.peekAry();
if (fr.vecs().length > 1)
throw new IllegalArgumentException("mode does not apply to multiple cols.");
if (!fr.vecs()[0].isEnum())
throw new IllegalArgumentException("mode only applies to factor columns.");
Vec column = fr.anyVec();
String dom[] = column.domain();
long[][] levels = new long[1][];
levels[0] = new Vec.CollectDomain(column).doAll(new Frame(column)).domain();
long[][] counts = new ASTTable.Tabularize(levels).doAll(column)._counts;
long maxCounts = -1;
int mode = -1;
for (int i = 0; i < counts[0].length; ++i) {
if (counts[0][i] > maxCounts && !dom[i].equals("NA")) {
maxCounts = counts[0][i];
mode = i;
}
}
double mc = mode != -1 ? (double)mode : (double)Arrays.asList(dom).indexOf("NA");
if (mc == -1) mc = Double.NaN;
env.pop();
env.poppush(mc);
}
}
class ASTXorSum extends ASTReducerOp { ASTXorSum() {super(0,false); }
@Override String opStr(){ return "xorsum";}
@Override ASTOp make() {return new ASTXorSum();}
@Override double op(double d0, double d1) {
long d0Bits = Double.doubleToLongBits(d0);
long d1Bits = Double.doubleToLongBits(d1);
long xorsumBits = d0Bits ^ d1Bits;
// just need to not get inf or nan. If we zero the upper 4 bits, we won't
final long ZERO_SOME_SIGN_EXP = 0x0fffffffffffffffL;
xorsumBits = xorsumBits & ZERO_SOME_SIGN_EXP;
double xorsum = Double.longBitsToDouble(xorsumBits);
return xorsum;
}
@Override double[] map(Env env, double[] in, double[] out) {
if (out == null || out.length < 1) out = new double[1];
long xorsumBits = 0;
long vBits;
// for dp ieee 754 , sign and exp are the high 12 bits
// We don't want infinity or nan, because h2o will return a string.
double xorsum = 0;
for (double v : in) {
vBits = Double.doubleToLongBits(v);
xorsumBits = xorsumBits ^ vBits;
}
// just need to not get inf or nan. If we zero the upper 4 bits, we won't
final long ZERO_SOME_SIGN_EXP = 0x0fffffffffffffffL;
xorsumBits = xorsumBits & ZERO_SOME_SIGN_EXP;
xorsum = Double.longBitsToDouble(xorsumBits);
out[0] = xorsum;
return out;
}
}
// Selective return. If the selector is a double, just eval both args and
// return the selected one. If the selector is an array, then it must be
// compatible with argument arrays (if any), and the selection is done
// element-by-element.
class ASTIfElse extends ASTOp {
static final String VARS[] = new String[]{"ifelse","tst","true","false"};
static Type[] newsig() {
Type t1 = Type.unbound(), t2 = Type.unbound(), t3=Type.unbound();
return new Type[]{Type.anyary(new Type[]{t1,t2,t3}),t1,t2,t3};
}
ASTIfElse( ) { super(VARS, newsig(),OPF_INFIX,OPP_PREFIX,OPA_RIGHT); }
@Override ASTOp make() {return new ASTIfElse();}
@Override String opStr() { return "ifelse"; }
// Parse an infix trinary ?: operator
static AST parse(Exec2 E, AST tst, boolean EOS) {
if( !E.peek('?',true) ) return null;
int x=E._x;
AST tru=E.xpeek(':',E._x,parseCXExpr(E,false));
if( tru == null ) E.throwErr("Missing expression in trinary",x);
x = E._x;
AST fal=parseCXExpr(E,EOS);
if( fal == null ) E.throwErr("Missing expression in trinary",x);
return ASTApply.make(new AST[]{new ASTIfElse(),tst,tru,fal},E,x);
}
@Override void apply(Env env, int argcnt, ASTApply apply) {
// All or none are functions
assert ( env.isFcn(-1) && env.isFcn(-2) && _t.ret().isFcn())
|| (!env.isFcn(-1) && !env.isFcn(-2) && !_t.ret().isFcn());
// If the result is an array, then one of the other of the two must be an
// array. , and this is a broadcast op.
assert !_t.isAry() || env.isAry(-1) || env.isAry(-2);
// Single selection? Then just pick slots
if( !env.isAry(-3) ) {
if( env.dbl(-3)==0 ) env.pop_into_stk(-4);
else { env.pop(); env.pop_into_stk(-3); }
return;
}
Frame frtst=null, frtru= null, frfal= null;
double dtst= 0 , dtru= 0 , dfal= 0 ;
String kf, kt, kq;
boolean bothStr=false; // are both yes and no a string? ok that's easy to deal with...
String stru=null, sfal=null;
if( env.isAry() ) frfal= env.popAry();
else if( env.isDbl() && !env.isStr() ) { dfal = env.popDbl(); }
else if( env.isStr() ) { sfal=env.popStr(); dfal=0.0; }
kf = env.key();
if( env.isAry() ) frtru= env.popAry();
else if( env.isDbl() && !env.isStr() ) { dtru = env.popDbl(); }
else if( env.isStr() ) { stru=env.popStr(); dtru=1.0; }
kt = env.key();
if( env.isAry() ) frtst= env.popAry(); else dtst = env.popDbl();
kq = env.key();
bothStr= stru!=null&&sfal!=null; // bothStr==true => make domain [stru, sfal]
// Multi-selection
// Build a doAll frame
Frame fr = new Frame(frtst); // Do-All frame
final int ncols = frtst.numCols(); // Result column count
final long nrows = frtst.numRows(); // Result row count
String names[]=null;
if( frtru !=null ) { // True is a Frame?
if( frtru.numCols() != ncols || frtru.numRows() != nrows )
throw new IllegalArgumentException("Arrays must be same size: "+frtst+" vs "+frtru);
fr.add(frtru,true);
names = frtru._names;
}
if( frfal !=null ) { // False is a Frame?
if( frfal.numCols() != ncols || frfal.numRows() != nrows )
throw new IllegalArgumentException("Arrays must be same size: "+frtst+" vs "+frfal);
fr.add(frfal,true);
names = frfal._names;
}
if( names==null && frtst!=null ) names = frtst._names;
final boolean t = frtru != null;
final boolean f = frfal != null;
final double fdtru = dtru;
final double fdfal = dfal;
String[][] domains=fr.domains();
if( bothStr ) domains[0] = new String[]{sfal,stru};
// Run a selection picking true/false across the frame
Frame fr2 = new MRTask2() {
@Override public void map( Chunk chks[], NewChunk nchks[] ) {
for( int i=0; i<nchks.length; i++ ) {
NewChunk n =nchks[i];
int off=i;
Chunk ctst= chks[off];
Chunk ctru= t ? chks[off+=ncols] : null;
Chunk cfal= f ? chks[off+=ncols] : null;
int rlen = ctst._len;
for( int r=0; r<rlen; r++ )
if( ctst.isNA0(r) ) n.addNA();
else n.addNum(ctst.at0(r)!=0 ? (t ? ctru.at0(r) : fdtru) : (f ? cfal.at0(r) : fdfal));
}
}
}.doAll(ncols,fr).outputFrame(names,domains);
env.subRef(frtst,kq);
if( frtru != null ) env.subRef(frtru,kt);
if( frfal != null ) env.subRef(frfal,kf);
env.pop();
env.push(fr2);
}
}
class ASTCut extends ASTOp {
ASTCut() { super(new String[]{"cut", "ary", "dbls"},
new Type[]{Type.ARY, Type.ARY, Type.dblary()},
OPF_PREFIX,
OPP_PREFIX,
OPA_RIGHT); }
@Override String opStr() { return "cut"; }
@Override ASTOp make() {return new ASTCut();}
@Override void apply(Env env, int argcnt, ASTApply apply) {
if(env.isDbl()) {
final int nbins = (int) Math.floor(env.popDbl());
if(nbins < 2)
throw new IllegalArgumentException("Number of intervals must be at least 2");
Frame fr = env.popAry();
String skey = env.key();
if(fr.vecs().length != 1 || fr.vecs()[0].isEnum())
throw new IllegalArgumentException("First argument must be a numeric column vector");
final double fmax = fr.vecs()[0].max();
final double fmin = fr.vecs()[0].min();
final double width = (fmax - fmin)/nbins;
if(width == 0) throw new IllegalArgumentException("Data vector is constant!");
// Note: I think R perturbs constant vecs slightly so it can still bin values
// Construct domain names from bins intervals
String[][] domains = new String[1][nbins];
domains[0][0] = "(" + String.valueOf(fmin - 0.001*(fmax-fmin)) + "," + String.valueOf(fmin + width) + "]";
for(int i = 1; i < nbins; i++)
domains[0][i] = "(" + String.valueOf(fmin + i*width) + "," + String.valueOf(fmin + (i+1)*width) + "]";
Frame fr2 = new MRTask2() {
@Override public void map(Chunk chk, NewChunk nchk) {
for(int r = 0; r < chk._len; r++) {
double x = chk.at0(r);
double n = x == fmax ? nbins-1 : Math.floor((x - fmin)/width);
nchk.addNum(n);
}
}
}.doAll(1,fr).outputFrame(fr._names, domains);
env.subRef(fr, skey);
env.pop();
env.push(fr2);
} else if(env.isAry()) {
Frame ary = env.popAry();
String skey1 = env.key();
if(ary.vecs().length != 1 || ary.vecs()[0].isEnum())
throw new IllegalArgumentException("Second argument must be a numeric column vector");
Vec brks = ary.vecs()[0];
// TODO: Check that num rows below some cutoff, else this will likely crash
// Remove duplicates and sort vector of breaks in ascending order
SortedSet<Double> temp = new TreeSet<Double>();
for(int i = 0; i < brks.length(); i++) temp.add(brks.at(i));
int cnt = 0; final double[] cutoffs = new double[temp.size()];
for(Double x : temp) { cutoffs[cnt] = x; cnt++; }
if(cutoffs.length < 2)
throw new IllegalArgumentException("Vector of breaks must have at least 2 unique values");
Frame fr = env.popAry();
String skey2 = env.key();
if(fr.vecs().length != 1 || fr.vecs()[0].isEnum())
throw new IllegalArgumentException("First argument must be a numeric column vector");
// Construct domain names from bin intervals
final int nbins = cutoffs.length-1;
String[][] domains = new String[1][nbins];
for(int i = 0; i < nbins; i++)
domains[0][i] = "(" + cutoffs[i] + "," + cutoffs[i+1] + "]";
Frame fr2 = new MRTask2() {
@Override public void map(Chunk chk, NewChunk nchk) {
for(int r = 0; r < chk._len; r++) {
double x = chk.at0(r);
if(Double.isNaN(x) || x <= cutoffs[0] || x > cutoffs[cutoffs.length-1])
nchk.addNum(Double.NaN);
else {
for(int i = 1; i < cutoffs.length; i++) {
if(x <= cutoffs[i]) { nchk.addNum(i-1); break; }
}
}
}
}
}.doAll(1,fr).outputFrame(fr._names, domains);
env.subRef(ary, skey1);
env.subRef(fr, skey2);
env.pop();
env.push(fr2);
} else throw H2O.unimpl();
}
}
class ASTfindInterval extends ASTOp {
ASTfindInterval() { super(new String[]{"findInterval", "ary", "vec", "rightmost.closed"},
new Type[]{Type.ARY, Type.ARY, Type.dblary(), Type.DBL},
OPF_PREFIX,
OPP_PREFIX,
OPA_RIGHT); }
@Override String opStr() { return "findInterval"; }
@Override ASTOp make() { return new ASTfindInterval(); }
@Override void apply(Env env, int argcnt, ASTApply apply) {
final boolean rclosed = env.popDbl() == 0 ? false : true;
if(env.isDbl()) {
final double cutoff = env.popDbl();
Frame fr = env.popAry();
String skey = env.key();
if(fr.vecs().length != 1 || fr.vecs()[0].isEnum())
throw new IllegalArgumentException("First argument must be a numeric column vector");
Frame fr2 = new MRTask2() {
@Override public void map(Chunk chk, NewChunk nchk) {
for(int r = 0; r < chk._len; r++) {
double x = chk.at0(r);
if(Double.isNaN(x))
nchk.addNum(Double.NaN);
else {
if(rclosed)
nchk.addNum(x > cutoff ? 1 : 0); // For rightmost.closed = TRUE
else
nchk.addNum(x >= cutoff ? 1 : 0);
}
}
}
}.doAll(1,fr).outputFrame(fr._names, fr.domains());
env.subRef(fr, skey);
env.pop();
env.push(fr2);
} else if(env.isAry()) {
Frame ary = env.popAry();
String skey1 = env.key();
if(ary.vecs().length != 1 || ary.vecs()[0].isEnum())
throw new IllegalArgumentException("Second argument must be a numeric column vector");
Vec brks = ary.vecs()[0];
// TODO: Check that num rows below some cutoff, else this will likely crash
// Check if vector of cutoffs is sorted in weakly ascending order
final int len = (int)brks.length();
final double[] cutoffs = new double[len];
for(int i = 0; i < len-1; i++) {
if(brks.at(i) > brks.at(i+1))
throw new IllegalArgumentException("Second argument must be sorted in non-decreasing order");
cutoffs[i] = brks.at(i);
}
cutoffs[len-1] = brks.at(len-1);
Frame fr = env.popAry();
String skey2 = env.key();
if(fr.vecs().length != 1 || fr.vecs()[0].isEnum())
throw new IllegalArgumentException("First argument must be a numeric column vector");
Frame fr2 = new MRTask2() {
@Override public void map(Chunk chk, NewChunk nchk) {
for(int r = 0; r < chk._len; r++) {
double x = chk.at0(r);
if(Double.isNaN(x))
nchk.addNum(Double.NaN);
else {
double n = Arrays.binarySearch(cutoffs, x);
if(n < 0) nchk.addNum(-n-1);
else if(rclosed && n == len-1) nchk.addNum(n); // For rightmost.closed = TRUE
else nchk.addNum(n+1);
}
}
}
}.doAll(1,fr).outputFrame(fr._names, fr.domains());
env.subRef(ary, skey1);
env.subRef(fr, skey2);
env.pop();
env.push(fr2);
}
}
}
class ASTFactor extends ASTOp {
ASTFactor() { super(new String[]{"factor", "ary"},
new Type[]{Type.ARY, Type.ARY},
OPF_PREFIX,
OPP_PREFIX,OPA_RIGHT); }
@Override String opStr() { return "factor"; }
@Override ASTOp make() {return new ASTFactor();}
@Override void apply(Env env, int argcnt, ASTApply apply) {
Frame ary = env.peekAry(); // Ary on top of stack, keeps +1 refcnt
String skey = env.peekKey();
if( ary.numCols() != 1 )
throw new IllegalArgumentException("factor requires a single column");
Vec v0 = ary.vecs()[0];
Vec v1 = v0.isEnum() ? null : v0.toEnum();
if (v1 != null) {
ary = new Frame(ary._names,new Vec[]{v1});
skey = null;
}
env.poppush(2, ary, skey);
}
}
class ASTNumeric extends ASTOp {
ASTNumeric() { super(new String[]{"as.numeric", "ary"},
new Type[]{Type.ARY, Type.ARY},
OPF_PREFIX,
OPP_PREFIX,OPA_RIGHT); }
@Override String opStr() { return "as.numeric"; }
@Override ASTOp make() {return new ASTNumeric();}
@Override void apply(Env env, int argcnt, ASTApply apply) {
Frame ary = env.peekAry(); // Ary on top of stack, keeps +1 refcnt
String skey = env.peekKey();
Vec[] nvecs = new Vec[ary.numCols()];
for (int c = 0; c < ary.numCols(); ++c) {
Vec v = ary.vecs()[c];
Vec nv = v.isEnum() ? v.masterVec() : null;
(nvecs[c] = nv == null ? v : nv)._domain = null;
}
ary = new Frame(ary._names, nvecs);
env.poppush(2, ary, skey);
}
}
class ASTPrint extends ASTOp {
static Type[] newsig() {
Type t1 = Type.unbound();
return new Type[]{t1, t1, Type.varargs(Type.unbound())};
}
ASTPrint() { super(new String[]{"print", "x", "y..."},
newsig(),
OPF_PREFIX,
OPP_PREFIX,OPA_RIGHT); }
@Override String opStr() { return "print"; }
@Override ASTOp make() {return new ASTPrint();}
@Override void apply(Env env, int argcnt, ASTApply apply) {
for( int i=1; i<argcnt; i++ ) {
if( env.isAry(i-argcnt) ) {
env._sb.append(env.ary(i-argcnt).toStringAll());
} else {
env._sb.append(env.toString(env._sp+i-argcnt,true));
}
}
env.pop(argcnt-2); // Pop most args
env.pop_into_stk(-2); // Pop off fcn, returning 1st arg
}
}
/**
* R 'ls' command.
*
* This method is purely for the console right now. Print stuff into the string buffer.
* JSON response is not configured at all.
*/
class ASTLs extends ASTOp {
ASTLs() { super(new String[]{"ls"},
new Type[]{Type.DBL},
OPF_PREFIX,
OPP_PREFIX,
OPA_RIGHT); }
@Override String opStr() { return "ls"; }
@Override ASTOp make() {return new ASTLs();}
@Override void apply(Env env, int argcnt, ASTApply apply) {
for( Key key : H2O.KeySnapshot.globalSnapshot().keys())
if( key.user_allowed() && H2O.get(key) != null )
env._sb.append(key.toString());
// Pop the self-function and push a zero.
env.pop();
env.push(0.0);
}
}