package water.exec;
import water.Key;
import water.MRTask2;
import water.fvec.*;
import water.util.Utils;
import java.util.Arrays;
public class ASTTable extends ASTOp {
ASTTable() { super(new String[]{"table", "ary"}, new Type[]{Type.ARY,Type.ARY},
OPF_PREFIX,
OPP_PREFIX,
OPA_RIGHT); }
@Override String opStr() { return "table"; }
@Override ASTOp make() { return new ASTTable(); }
@Override void apply(Env env, int argcnt, ASTApply apply) {
int ncol;
Frame fr = env.ary(-1);
if ((ncol = fr.vecs().length) > 2)
throw new IllegalArgumentException("table does not apply to more than two cols.");
for (int i = 0; i < ncol; i++) if (!fr.vecs()[i].isInt())
throw new IllegalArgumentException("table only applies to integer vectors.");
String[][] domains = new String[ncol][]; // the domain names to display as row and col names
// if vec does not have original domain, use levels returned by CollectDomain
long[][] levels = new long[ncol][];
for (int i = 0; i < ncol; i++) {
Vec v = fr.vecs()[i];
levels[i] = new Vec.CollectDomain(v).doAll(new Frame(v)).domain();
domains[i] = v.domain();
}
long[][] counts = new Tabularize(levels).doAll(fr)._counts;
// Build output vecs
Key keys[] = Vec.VectorGroup.VG_LEN1.addVecs(counts.length+1);
Vec[] vecs = new Vec[counts.length+1];
String[] colnames = new String[counts.length+1];
AppendableVec v0 = new AppendableVec(keys[0]);
v0._domain = fr.vecs()[0].domain() == null ? null : fr.vecs()[0].domain().clone();
NewChunk c0 = new NewChunk(v0,0);
for( int i=0; i<levels[0].length; i++ ) c0.addNum((double) levels[0][i]);
c0.close(0,null);
vecs[0] = v0.close(null);
colnames[0] = "row.names";
if (ncol==1) colnames[1] = "Count";
for (int level1=0; level1 < counts.length; level1++) {
AppendableVec v = new AppendableVec(keys[level1+1]);
NewChunk c = new NewChunk(v,0);
v._domain = null;
for (int level0=0; level0 < counts[level1].length; level0++)
c.addNum((double) counts[level1][level0]);
c.close(0, null);
vecs[level1+1] = v.close(null);
if (ncol>1) {
colnames[level1+1] = domains[1]==null? Long.toString(levels[1][level1]) : domains[1][(int)(levels[1][level1])];
}
}
env.pop(2);
env.push(new Frame(colnames, vecs));
}
public static class Tabularize extends MRTask2<Tabularize> {
public final long[][] _domains;
public long[][] _counts;
public Tabularize(long[][] dom) { super(); _domains=dom; }
@Override public void map(Chunk[] cs) {
assert cs.length == _domains.length;
_counts = _domains.length==1? new long[1][] : new long[_domains[1].length][];
for (int i=0; i < _counts.length; i++) _counts[i] = new long[_domains[0].length];
for (int i=0; i < cs[0]._len; i++) {
if (cs[0].isNA0(i)) continue;
long ds[] = _domains[0];
int level0 = Arrays.binarySearch(ds, cs[0].at80(i));
assert 0 <= level0 && level0 < ds.length : "l0="+level0+", len0="+ds.length+", min="+ds[0]+", max="+ds[ds.length-1];
int level1;
if (cs.length>1) {
if (cs[1].isNA0(i)) continue; else level1 = Arrays.binarySearch(_domains[1],(int)cs[1].at80(i));
assert 0 <= level1 && level1 < _domains[1].length;
} else {
level1 = 0;
}
_counts[level1][level0]++;
}
}
@Override public void reduce(Tabularize that) { Utils.add(_counts, that._counts); }
}
}