package water.rapids.ast.prims.advmath;
import water.DKV;
import water.MRTask;
import water.fvec.Chunk;
import water.fvec.Frame;
import water.fvec.Vec;
import water.rapids.Env;
import water.rapids.ast.AstPrimitive;
import water.rapids.ast.AstRoot;
import water.rapids.ast.prims.mungers.AstGroup;
import water.rapids.vals.ValFrame;
import water.util.IcedHashMap;
public class AstUnique extends AstPrimitive {
@Override
public String[] args() {
return new String[]{"ary"};
}
@Override
public int nargs() {
return 1 + 1;
} // (unique col)
@Override
public String str() {
return "unique";
}
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot asts[]) {
Frame fr = stk.track(asts[1].exec(env)).getFrame();
Vec vec0 = fr.vec(0);
Vec v;
if (fr.numCols() != 1)
throw new IllegalArgumentException("Unique applies to a single column only.");
if (vec0.isCategorical()) {
v = Vec.makeSeq(0, (long) vec0.domain().length, true);
v.setDomain(vec0.domain());
DKV.put(v);
} else {
UniqTask t = new UniqTask().doAll(fr);
int nUniq = t._uniq.size();
final AstGroup.G[] uniq = t._uniq.keySet().toArray(new AstGroup.G[nUniq]);
v = Vec.makeZero(nUniq, vec0.get_type());
new MRTask() {
@Override
public void map(Chunk c) {
int start = (int) c.start();
for (int i = 0; i < c._len; ++i) c.set(i, uniq[i + start]._gs[0]);
}
}.doAll(v);
}
return new ValFrame(new Frame(v));
}
private static class UniqTask extends MRTask<UniqTask> {
IcedHashMap<AstGroup.G, String> _uniq;
@Override
public void map(Chunk[] c) {
_uniq = new IcedHashMap<>();
AstGroup.G g = new AstGroup.G(1, null);
for (int i = 0; i < c[0]._len; ++i) {
g.fill(i, c, new int[]{0});
String s_old = _uniq.putIfAbsent(g, "");
if (s_old == null) g = new AstGroup.G(1, null);
}
}
@Override
public void reduce(UniqTask t) {
if (_uniq != t._uniq) {
IcedHashMap<AstGroup.G, String> l = _uniq;
IcedHashMap<AstGroup.G, String> r = t._uniq;
if (l.size() < r.size()) {
l = r;
r = _uniq;
} // larger on the left
for (AstGroup.G rg : r.keySet()) l.putIfAbsent(rg, ""); // loop over smaller set
_uniq = l;
t._uniq = null;
}
}
}
}