package water.fvec;
import water.*;
import java.util.Arrays;
/**
* A NEW single distributed vector column.
*
* The NEW vector has no data, and takes no space. It supports distributed
* parallel writes to it, via calls to append2. Such writes happen in parallel
* and all writes are ordered. Writes *will* be local to the node doing them,
* specifically to allow control over locality. By default, writes will go
* local-homed chunks with no compression; there is a final 'close' to the NEW
* vector which may do compression; the final 'close' will return some other
* Vec type. NEW Vectors do NOT support reads!
*/
public class AppendableVec extends Vec {
// Temporary ESPC, for uses which do not know the number of Chunks up front.
public long _tmp_espc[];
// Allow Chunks to have their final Chunk index (set at closing) offset by
// this much. Used by the Parser to fold together multi-file AppendableVecs.
public final int _chunkOff;
public AppendableVec( Key<Vec> key, byte type ) { this(key, new long[4], type, 0); }
public AppendableVec( Key<Vec> key, long[] tmp_espc, byte type, int chunkOff) {
super( key, -1/*no rowLayout yet*/, null, type );
_tmp_espc = tmp_espc;
_chunkOff = chunkOff;
}
// A NewVector chunk was "closed" - completed. Add it's info to the roll-up.
// This call is made in parallel across all node-local created chunks, but is
// not called distributed.
synchronized void closeChunk( int cidx, int len ) {
// The Parser will pre-allocate the _tmp_espc large enough (the Parser
// knows how many final Chunks there will be up front). Other users are
// encouraged to set a "large enough" espc - and a shared one at that - to
// avoid these copies.
// Set the length into the temp ESPC at the Chunk index (accounting for _chunkOff)
cidx -= _chunkOff;
while( cidx >= _tmp_espc.length ) // should not happen if espcs are preallocated and shared!
_tmp_espc = Arrays.copyOf(_tmp_espc, _tmp_espc.length<<1);
_tmp_espc[cidx] = len;
}
public static Vec[] closeAll(AppendableVec [] avs) {
Futures fs = new Futures();
Vec [] res = closeAll(avs,fs);
fs.blockForPending();
return res;
}
public static Vec[] closeAll(AppendableVec [] avs, Futures fs) {
Vec [] res = new Vec[avs.length];
final int rowLayout = avs[0].compute_rowLayout();
for(int i = 0; i < avs.length; ++i)
res[i] = avs[i].close(rowLayout,fs);
return res;
}
// Class 'reduce' call on new vectors; to combine the roll-up info.
// Called single-threaded from the M/R framework.
public void reduce( AppendableVec nv ) {
if( this == nv ) return; // Trivially done
if( _tmp_espc == nv._tmp_espc ) return;
// Combine arrays of elements-per-chunk
long e1[] = nv._tmp_espc; // Shorter array of longs?
if (e1.length > _tmp_espc.length) { // Keep longer array
e1 = _tmp_espc; // Keep the shorter one in e1
_tmp_espc = nv._tmp_espc; // Keep longer in the object
}
for( int i=0; i<e1.length; i++ ) // Copy non-zero elements over
if( _tmp_espc[i]==0 && e1[i] != 0 ) // Read-filter (old code unconditionally did a R-M-W cycle)
_tmp_espc[i] = e1[i]; // Only write if needed
}
public Vec layout_and_close(Futures fs) { return close(compute_rowLayout(),fs); }
public int compute_rowLayout() {
int nchunk = _tmp_espc.length;
while( nchunk > 1 && _tmp_espc[nchunk-1] == 0 )
nchunk--;
// Compute elems-per-chunk.
// Roll-up elem counts, so espc[i] is the starting element# of chunk i.
long espc[] = new long[nchunk+1]; // Shorter array
long x=0; // Total row count so far
for( int i=0; i<nchunk; i++ ) {
espc[i] = x; // Start elem# for chunk i
x += _tmp_espc[i]; // Raise total elem count
}
espc[nchunk]=x; // Total element count in last
return ESPC.rowLayout(_key,espc);
}
// "Close" out a NEW vector - rewrite it to a plain Vec that supports random
// reads, plus computes rows-per-chunk, min/max/mean, etc.
public Vec close(int rowLayout, Futures fs) {
// Compute #chunks
int nchunk = _tmp_espc.length;
DKV.remove(chunkKey(nchunk),fs); // remove potential trailing key
while( nchunk > 1 && _tmp_espc[nchunk-1] == 0 ) {
nchunk--;
DKV.remove(chunkKey(nchunk),fs); // remove potential trailing key
}
// Replacement plain Vec for AppendableVec.
Vec vec = new Vec(_key, rowLayout, domain(), _type);
DKV.put(_key,vec,fs); // Inject the header into the K/V store
return vec;
}
// Default read/write behavior for AppendableVecs
@Override protected boolean readable() { return false; }
@Override protected boolean writable() { return true ; }
@Override public NewChunk chunkForChunkIdx(int cidx) { return new NewChunk(this,cidx); }
// None of these are supposed to be called while building the new vector
@Override public Value chunkIdx( int cidx ) { throw H2O.fail(); }
@Override public long length() { throw H2O.fail(); }
@Override public int nChunks() { throw H2O.fail(); }
@Override public int elem2ChunkIdx( long i ) { throw H2O.fail(); }
@Override protected long chunk2StartElem( int cidx ) { throw H2O.fail(); }
@Override public long byteSize() { return 0; }
@Override public String toString() { return "[AppendableVec, unknown size]"; }
}