package water.rapids.ast.prims.assign;
import water.DKV;
import water.H2O;
import water.MRTask;
import water.fvec.Chunk;
import water.fvec.Frame;
import water.fvec.Vec;
import water.parser.BufferedString;
import water.rapids.*;
import water.rapids.ast.AstParameter;
import water.rapids.ast.AstPrimitive;
import water.rapids.ast.AstRoot;
import water.rapids.ast.params.AstNum;
import water.rapids.ast.params.AstNumList;
import water.rapids.ast.prims.mungers.AstColSlice;
import water.rapids.vals.ValFrame;
import water.util.ArrayUtils;
import java.util.Arrays;
import static water.rapids.ast.prims.assign.AstRecAsgnHelper.*;
/**
* Rectangular assign into a row and column slice. The destination must
* already exist. The output is conceptually a new copy of the data, with a
* fresh Frame. Copy-On-Write optimizations lower the cost to be proportional
* to the over-written sections.
*/
public class AstRectangleAssign extends AstPrimitive {
@Override
public String[] args() {
return new String[]{"dst", "src", "col_expr", "row_expr"};
}
@Override
public int nargs() {
return 5;
} // (:= dst src col_expr row_expr)
@Override
public String str() {
return ":=";
}
@Override
public ValFrame apply(Env env, Env.StackHelp stk, AstRoot[] asts) {
Frame dst = stk.track(asts[1].exec(env)).getFrame();
Val vsrc = stk.track(asts[2].exec(env));
AstParameter col_list = (AstParameter) asts[3];
// Column selection
AstNumList cols_numlist = new AstNumList(col_list.columns(dst.names()));
// Special for AstAssign: "empty" really means "all"
if (cols_numlist.isEmpty()) cols_numlist = new AstNumList(0, dst.numCols());
// Allow R-like number list expansion: negative column numbers mean exclusion
int[] cols = AstColSlice.col_select(dst.names(), cols_numlist);
// Any COW optimized path changes Vecs in dst._vecs, and so needs a
// defensive copy. Any update-in-place path updates Chunks instead of
// dst._vecs, and does not need a defensive copy. To make life easier,
// just make the copy now.
dst = new Frame(dst._names, dst.vecs().clone());
// Assign over the column slice
if (asts[4] instanceof AstNum || asts[4] instanceof AstNumList) { // Explictly named row assignment
AstNumList rows = (asts[4] instanceof AstNum)
? new AstNumList(((AstNum) asts[4]).getNum())
: ((AstNumList) asts[4]);
if (rows.isEmpty()) rows = new AstNumList(0, dst.numRows()); // Empty rows is really: all rows
switch (vsrc.type()) {
case Val.NUM:
assign_frame_scalar(dst, cols, rows, nanToNull(vsrc.getNum()), env._ses);
break;
case Val.STR:
assign_frame_scalar(dst, cols, rows, vsrc.getStr(), env._ses);
break;
case Val.FRM:
assign_frame_frame(dst, cols, rows, vsrc.getFrame(), env._ses);
break;
default:
throw new IllegalArgumentException("Source must be a Frame or Number, but found a " + vsrc.getClass());
}
} else { // Boolean assignment selection?
Frame rows = stk.track(asts[4].exec(env)).getFrame();
switch (vsrc.type()) {
case Val.NUM:
assign_frame_scalar(dst, cols, rows, nanToNull(vsrc.getNum()), env._ses);
break;
case Val.STR:
assign_frame_scalar(dst, cols, rows, vsrc.getStr(), env._ses);
break;
case Val.FRM:
throw H2O.unimpl();
default:
throw new IllegalArgumentException("Source must be a Frame or Number, but found a " + vsrc.getClass());
}
}
return new ValFrame(dst);
}
// Rectangular array copy from src into dst
private void assign_frame_frame(Frame dst, int[] cols, AstNumList rows, Frame src, Session ses) {
// Sanity check
if (cols.length != src.numCols())
throw new IllegalArgumentException("Source and destination frames must have the same count of columns");
long nrows = rows.cnt();
if (src.numRows() != nrows)
throw new IllegalArgumentException("Requires same count of rows in the number-list (" + nrows + ") as in the source (" + src.numRows() + ")");
// Whole-column assignment? Directly reuse columns: Copy-On-Write
// optimization happens here on the apply() exit.
if (dst.numRows() == nrows && rows.isDense()) {
for (int i = 0; i < cols.length; i++)
dst.replace(cols[i], src.vecs()[i]);
if (dst._key != null) DKV.put(dst);
return;
}
// Partial update; needs to preserve type, and may need to copy to support
// copy-on-write
Vec[] dvecs = dst.vecs();
final Vec[] svecs = src.vecs();
for (int col = 0; col < cols.length; col++) {
int dtype = dvecs[cols[col]].get_type();
if (dtype != svecs[col].get_type())
throw new IllegalArgumentException("Columns must be the same type; " +
"column " + col + ", \'" + dst._names[cols[col]] + "\', is of type " + dvecs[cols[col]].get_type_str() +
" and the source is " + svecs[col].get_type_str());
if ((dtype == Vec.T_CAT) && (! Arrays.equals(dvecs[cols[col]].domain(), svecs[col].domain())))
throw new IllegalArgumentException("Cannot assign to a categorical column with a different domain; " +
"source column " + src._names[col] + ", target column " + dst._names[cols[col]]);
}
// Frame fill
// Handle fast small case
if (nrows <= 1 || (cols.length * nrows) <= 1000) { // Go parallel for more than 1000 random updates
// Copy dst columns as-needed to allow update-in-place
dvecs = ses.copyOnWrite(dst, cols); // Update dst columns
long[] rownums = rows.expand8(); // Just these rows
for (int col = 0; col < svecs.length; col++)
if (svecs[col].get_type() == Vec.T_STR) {
BufferedString bStr = new BufferedString();
for (int ridx = 0; ridx < rownums.length; ridx++) {
BufferedString s = svecs[col].atStr(bStr, ridx);
dvecs[cols[col]].set(rownums[ridx], s != null ? s.toString() : null);
}
} else {
for (int ridx = 0; ridx < rownums.length; ridx++)
dvecs[cols[col]].set(rownums[ridx], svecs[col].at(ridx));
}
return;
}
// Handle large case
Vec[] vecs = ses.copyOnWrite(dst, cols);
Vec[] vecs2 = new Vec[cols.length]; // Just the selected columns get updated
for (int i = 0; i < cols.length; i++)
vecs2[i] = vecs[cols[i]];
rows.sort(); // Side-effect internal sort; needed for fast row lookup
new AssignFrameFrameTask(rows, svecs).doAll(vecs2);
}
private static class AssignFrameFrameTask extends RowSliceTask {
private Vec[] _svecs;
private AssignFrameFrameTask(AstNumList rows, Vec[] svecs) {
super(rows);
_svecs = svecs;
}
@Override
void mapChunkSlice(Chunk[] cs, int chkOffset) {
long start = cs[0].start();
Chunk[] scs = null;
for (int i = chkOffset; i < cs[0]._len; ++i) {
long idx = _rows.index(start + i);
if (idx < 0) continue;
if ((scs == null) || (scs[0].start() < idx) || (idx >= scs[0].start() + scs[0].len())) {
int sChkIdx = _svecs[0].elem2ChunkIdx(idx);
scs = new Chunk[_svecs.length];
for (int j = 0; j < _svecs.length; j++) {
scs[j] = _svecs[j].chunkForChunkIdx(sChkIdx);
}
}
BufferedString bStr = new BufferedString();
int si = (int) (idx - scs[0].start());
for (int j = 0; j < cs.length; j++) {
Chunk chk = cs[j];
Chunk schk = scs[j];
if (_svecs[j].get_type() == Vec.T_STR) {
BufferedString s = schk.atStr(bStr, si);
chk.set(i, s != null ? s.toString() : null);
BufferedString bss = chk.atStr(new BufferedString(), i);
if (s == null && bss != null) {
chk.set(i, s != null ? s.toString() : null);
}
} else {
chk.set(i, schk.atd(si));
}
}
}
}
}
// Assign a SCALAR over some dst rows; optimize for all rows
private void assign_frame_scalar(Frame dst, int[] cols, AstNumList rows, Object src, Session ses) {
long nrows = rows.cnt();
// Bulk assign a numeric constant (probably zero) over a frame. Directly set
// columns: Copy-On-Write optimization happens here on the apply() exit.
// Note: this skips "scalar to Vec" compatibility check because the whole Vec is overwritten
if (dst.numRows() == nrows && rows.isDense() && (src instanceof Number)) {
Vec anyVec = dst.anyVec();
assert anyVec != null; // if anyVec was null, then dst.numRows() would have been 0
Vec vsrc = anyVec.makeCon((double) src);
for (int col : cols)
dst.replace(col, vsrc);
if (dst._key != null) DKV.put(dst);
return;
}
// Make sure the scalar value is compatible with the target vector
for (int col: cols) {
if (! isScalarCompatible(src, dst.vec(col))) {
throw new IllegalArgumentException("Cannot assign value " + src + " into a vector of type " + dst.vec(col).get_type_str() + ".");
}
}
// Handle fast small case
if (nrows == 1) {
Vec[] vecs = ses.copyOnWrite(dst, cols);
long drow = (long) rows._bases[0];
for (int col : cols)
createValueSetter(vecs[col], src).setValue(vecs[col], drow);
return;
}
// Handle large case
Vec[] vecs = ses.copyOnWrite(dst, cols);
Vec[] vecs2 = new Vec[cols.length]; // Just the selected columns get updated
for (int i = 0; i < cols.length; i++)
vecs2[i] = vecs[cols[i]];
rows.sort(); // Side-effect internal sort; needed for fast row lookup
AssignFrameScalarTask.doAssign(rows, vecs2, src);
}
private static class AssignFrameScalarTask extends RowSliceTask {
final ValueSetter[] _setters;
AssignFrameScalarTask(AstNumList rows, Vec[] vecs, Object value) {
super(rows);
_setters = new ValueSetter[vecs.length];
for (int i = 0; i < _setters.length; i++)
_setters[i] = createValueSetter(vecs[i], value);
}
@Override
void mapChunkSlice(Chunk[] cs, int chkOffset) {
long start = cs[0].start();
for (int i = chkOffset; i < cs[0]._len; ++i)
if (_rows.has(start + i))
for (int col = 0; col < cs.length; col++)
_setters[col].setValue(cs[col], i);
}
/**
* Assigns a given value to a specified rows of given Vecs.
* @param rows row specification
* @param dst target Vecs
* @param src source Value
*/
static void doAssign(AstNumList rows, Vec[] dst, Object src) {
new AssignFrameScalarTask(rows, dst, src).doAll(dst);
}
}
private boolean isScalarCompatible(Object scalar, Vec v) {
if (scalar == null)
return true;
else if (scalar instanceof Number)
return v.get_type() == Vec.T_NUM || v.get_type() == Vec.T_TIME;
else if (scalar instanceof String) {
if (v.get_type() == Vec.T_CAT) {
return ArrayUtils.contains(v.domain(), (String) scalar);
} else
return v.get_type() == Vec.T_STR || (v.get_type() == Vec.T_UUID);
} else
return false;
}
private static Double nanToNull(double value) {
return Double.isNaN(value) ? null : value;
}
// Boolean assignment with a scalar
private void assign_frame_scalar(Frame dst, int[] cols, Frame rows, Object src, Session ses) {
Vec bool = rows.vec(0);
if (dst.numRows() != rows.numRows()) {
throw new IllegalArgumentException("Frame " + dst._key + " has different number of rows than frame " + rows._key +
" (" + dst.numRows() + " vs " + rows.numRows() + ").");
}
// Bulk assign a numeric constant over a frame. Directly set columns without checking target type
// assuming the user just wants to overwrite everything: Copy-On-Write optimization happens here on the apply() exit.
// Note: this skips "scalar to Vec" compatibility check because the whole Vec is overwritten
if (bool.isConst() && ((int) bool.min() == 1) && (src instanceof Number)) {
Vec anyVec = dst.anyVec();
assert anyVec != null;
Vec vsrc = anyVec.makeCon((double) src);
for (int col : cols)
dst.replace(col, vsrc);
if (dst._key != null) DKV.put(dst);
return;
}
// Make sure the scalar value is compatible with the target vector
for (int col: cols) {
if (! isScalarCompatible(src, dst.vec(col))) {
throw new IllegalArgumentException("Cannot assign value " + src + " into a vector of type " + dst.vec(col).get_type_str() + ".");
}
}
Vec[] vecs = ses.copyOnWrite(dst, cols);
Vec[] vecs2 = new Vec[cols.length]; // Just the selected columns get updated
for (int i = 0; i < cols.length; i++)
vecs2[i] = vecs[cols[i]];
ConditionalAssignTask.doAssign(vecs2, src, rows.vec(0));
}
private static class ConditionalAssignTask extends MRTask<ConditionalAssignTask> {
final ValueSetter[] _setters;
ConditionalAssignTask(Vec[] vecs, Object value) {
_setters = new ValueSetter[vecs.length];
for (int i = 0; i < _setters.length; i++) _setters[i] = AstRecAsgnHelper.createValueSetter(vecs[i], value);
}
@Override
public void map(Chunk[] cs) {
Chunk bool = cs[cs.length - 1];
for (int row = 0; row < cs[0]._len; row++) {
if (bool.at8(row) == 1)
for (int col = 0; col < cs.length - 1; col++) _setters[col].setValue(cs[col], row);
}
}
/**
* Sets a given value to all cells where given predicateVec is true.
* @param dst target Vecs
* @param src source Value
* @param predicateVec predicate Vec
*/
static void doAssign(Vec[] dst, Object src, Vec predicateVec) {
Vec[] vecs = new Vec[dst.length + 1];
System.arraycopy(dst, 0, vecs, 0, dst.length);
vecs[vecs.length - 1] = predicateVec;
new ConditionalAssignTask(dst, src).doAll(vecs);
}
}
private static abstract class RowSliceTask extends MRTask<RowSliceTask> {
final AstNumList _rows;
RowSliceTask(AstNumList rows) { _rows = rows; }
@Override
public void map(Chunk[] cs) {
long start = cs[0].start();
long end = start + cs[0]._len;
long min = (long) _rows.min(), max = (long) _rows.max() - 1; // exclusive max to inclusive max when stride == 1
// [ start, ..., end ] the chunk
//1 [] rows out left: rows.max() < start
//2 [] rows out rite: rows.min() > end
//3 [ rows ] rows run left: rows.min() < start && rows.max() <= end
//4 [ rows ] rows run in : start <= rows.min() && rows.max() <= end
//5 [ rows ] rows run rite: start <= rows.min() && end < rows.max()
if (!(max < start || min > end)) { // not situation 1 or 2 above
long startOffset = min > start ? min : start; // situation 4 and 5 => min > start;
int chkOffset = (int) (startOffset - start);
mapChunkSlice(cs, chkOffset);
}
}
abstract void mapChunkSlice(Chunk[] cs, int chkOffset);
}
}