package water.exec;
import java.util.Arrays;
import water.Iced;
/** Typing system for a generic R-like parser.
* Supports Hindley-Milner style type inference.
* @author cliffc@0xdata.com
*/
// --------------------------------------------------------------------------
public class Type extends Iced {
final static private int UNBOUND= 0;
final static private int BOUND = 1; // to _ts[0]
final static private int DBL0 = 2;
final static private int ARY0 = 3;
final static private int FCN0 = 4; // Return type in _ts[0], args in _ts[1...];
final static private int DBLARY0= 5; // Type is either DBL or ARY but not FCN
final static private int ANYARY0= 6; // Type is ARY if any ts[] is an ARY, else DBL
final static private int STR0 = 7;
final static private int VARARGS=32; // OR'd onto last type in a fcn, allows zero or more of this type
int _t; // One of the above #s
static private int UNIQUE; // Unique ID handy for debugging
final int _x = UNIQUE++; // Point in program where type is 1st defined.
Type[] _ts; // null==prim, else fcn and _ts[0] is return, _ts[1+...] are arg types
Type( int t, Type[] ts ) { assert varargs_clean(t,ts); _t=t; _ts=ts; }
Type( int t, Type[] ts, float f ) { this(t,ts); _t|=VARARGS;}
Type copy() {
Type[] ts = null;
if (_ts!=null) {
ts =_ts.clone(); for (int i = 0; i < ts.length; i++)
if (_ts[i]!=null) ts[i] = _ts[i].copy();
}
int vararg = _t&VARARGS;
Type copy = new Type(_t&~VARARGS,ts);
copy._t |= vararg;
return copy;
}
// Check no varargs flags, except on the last type of functions
private boolean varargs_clean( int t, Type ts[] ) {
if( (t&VARARGS)!=0 ) return false; // Need to clean this upfront
if( t!=FCN0 || ts==null ) return true;
for( int i=0; i<ts.length-1; i++ )
if( ts[i] != null && (ts[i]._t&VARARGS)!=0 )
return false;
return true;
}
// Make some base types
static Type DBL = new Type(DBL0,null);
static Type ARY = new Type(ARY0,null);
static Type STR = new Type(STR0, null);
public static Type unbound() { return new Type(UNBOUND,new Type[1]); }
public static Type fcn(Type[] ts) { return new Type(FCN0,ts); }
public static Type varargs(Type t) { return new Type(t._t,t._ts,1f);}
public static Type dblary() { return new Type(DBLARY0,new Type[1]); }
public static Type anyary(Type ts[]) { return new Type(ANYARY0,ts); }
// Tarjan Union-Find
Type find() {
Type t = this;
if( _t==BOUND ) t=_ts[0]=_ts[0].find();
if( t._t!=ANYARY0 ) return t;
return t.findAnyAry();
}
// "anyary" was my 1st attempt at a Union-Type. It's not going to work so
// easily. Need back-ptrs from the component types to the different
// union-type flavors. Then when union'ing a component, I can visit types
// constructed from the component & union them also as needed. For IfElse, I
// need the True & False types, the Test type and the Result type. These
// combo's are legal, and all others illegal:
// rez tst T F
// D D D D
// A A D A
// A A A D
// A A A A
// A D A A
// F D F F // and all Fcns are union'd
//
// DA DA D DA // a single Dbl is not constraining
// DA DA DA D
// DA D DA DA
// A DA A A
// A DA A DA // Any array means the result is ary
// A DA DA A
// A A DA DA // weird: at least one of DA must be an A
// A DA DA DA //
// DA DA DA DA // no functions
//
// U D U U // could be all Fcns or any other mix
// U DA U U // Most general allowed type for IfElse
// Drop DBL's, drop dups
// If any are ARY, can only be ARY or fail
// If FCNs, all must be equal
// Return any singular type.
private Type findAnyAry() {
int len=0;
Type fun=null;
for( int i=0; i<_ts.length; i++ ) {
Type t = _ts[i].find();
if( t._t == FCN0 && fun != null ) {
t.union(fun); t=fun=t.find();
} else {
if( t._t == FCN0 ) fun = t;
if( t._t != DBL0 && t._t != STR0 && // Keep non-DBL & non-STR
!dupType(len,t) ) // But remove dups
_ts[len++] = t;
}
}
// No more types? Defaults to DBL
if( len == 0 ) { _t=BOUND; return (_ts[0] = DBL); }
// Single variant type? Defaults to that type
if( len == 1 ) { _t=BOUND; return _ts[0]; }
if( len < _ts.length ) _ts = Arrays.copyOf(_ts, len);
return this;
}
private boolean dupType( int len, Type t ) {
for( int j=0; j<len; j++ ) if( _ts[j]==t ) return true;
return false;
}
boolean union( Type t ) {
Type ta= find();
Type tb=t.find();
int tta = ta._t&(VARARGS-1); // Strip off varargs
int ttb = tb._t&(VARARGS-1); // Strip off varargs
if( ta==tb ) return true;
else if( (tta== FCN0 && ttb== FCN0) || // Functions are equal?
(tta==ANYARY0 && ttb==ANYARY0) ) { // AnyArys are equal?
// Structural breakdown of function-type equality.
// Made more complex by allowing varargs types.
Type t0 = ta, t1 = tb; // Shorter type in t0
if( ta._ts.length>tb._ts.length ) { t0=tb; t1=ta; }
// Walk the shorter list, checking types
boolean ok=true;
int len=t0._ts.length;
Type varargs=null;
// Extra args in T1 can only be matched with a varargs repeat from T0
if( len < t1._ts.length ) {
varargs = t0._ts[len-1].find();
if( (varargs._t&VARARGS)!=0 )
len--; // Dont match the varargs arg in 1st loop
else varargs=null; // Else not a varargs
}
for( int i=0; i<len; i++ ) // Match all args
if( !t0._ts[i].union(t1._ts[i]) )
ok = false; // Subtypes are unequal
if( len == t1._ts.length ) return ok;
if( len == t1._ts.length-1 && (t1._ts[len].find()._t&VARARGS) != 0 )
return true; // Also ok for a zero-length varargs in t1, and no arg in t0
if( varargs==null ) return false;
// Must be varargs:
for( int i=len; i<t1._ts.length; i++ ) {
int tvar = (varargs._t&(VARARGS-1));
Type var = tvar==DBLARY0 ? dblary() : (tvar==UNBOUND ? unbound() : varargs); // Use a new unbound type
if( !var.union(t1._ts[i]) )
ok = false; // Subtypes are unequal
}
return ok;
}
else if( tta==UNBOUND || (tta==DBLARY0 && tb.isDblAry()) ) { ta._t=BOUND; ta._ts[0]= tb; }
else if( ttb==UNBOUND || (ttb==DBLARY0 && ta.isDblAry()) ) { tb._t=BOUND; tb._ts[0]= ta; }
else if( tta==DBLARY0 && ttb==DBLARY0 ) { ta._t=BOUND; ta._ts[0]=tb; }
else if( tta==ANYARY0 && ttb==DBLARY0 ) throw water.H2O.unimpl(); // ???
else if( tta==ANYARY0 && ttb==ARY0 ) throw water.H2O.unimpl(); // ?one of many must be an array?
else if( tta==ANYARY0 && ttb==DBL0 ) { // Force all to DBL
boolean ok=true;
for( Type t2 : ta._ts ) ok |= !Type.DBL.union(t2);
return ok;
} else if( ttb==ANYARY0 ) throw water.H2O.unimpl();
else if( tta==ttb ) return true; // Equal after varargs stripping
else return false; // Types are unequal
return true;
}
// If clearly not a function. False for unbound variables, which might
// become "not a function" later.
boolean isUnbound(){ Type t=find(); return t._t==UNBOUND; }
boolean isAry() { Type t=find(); return t._t==ARY0; }
boolean isDbl() { Type t=find(); return t._t==DBL0; }
boolean isFcn() { Type t=find(); return t._t==FCN0; }
boolean isNotFun() { Type t=find(); return t._t==DBL0 || t._t==ARY0 || t._t==DBLARY0 || t._t==STR0; }
boolean isDblAry() { Type t=find(); return t._t==DBL0 || t._t==ARY0; }
boolean isStr() { Type t=find(); return t._t==STR0; }
// Return type of functions
public Type ret() { Type t=find(); assert t._t == FCN0; return t._ts[0].find(); }
@Override public String toString() {
String s=null;
switch( _t&(VARARGS-1) ) {
case UNBOUND: s = "@"+_x; break;
case BOUND: s = _ts[0].toString(); break;
case DBL0: s = "dbl"; break;
case ARY0: s = "ary"; break;
case DBLARY0: s = "dblary"; break;
case STR0: s = "str"; break;
case ANYARY0: {
s = "anyary{";
for( Type t : _ts ) s += t+",";
s += "}";
break;
}
case FCN0: {
s = _ts[0]+"(";
for( int i=1; i<_ts.length-1; i++ )
s += _ts[i]+",";
if( _ts.length > 1 ) s += _ts[_ts.length-1];
s += ")";
break;
}
default: throw water.H2O.unimpl();
}
if( (_t&VARARGS)!=0 ) s += "...";
return s;
}
}