package water.fvec;
import water.AutoBuffer;
import water.H2O;
import water.MemoryManager;
import water.UDP;
import java.util.Iterator;
/**
* Created by tomasnykodym on 3/18/14.
* Sparse chunk.
*/
public class CXIChunk extends Chunk {
protected transient int _sparseLen; // Number of elements in this chunk
protected transient int _valsz; // byte size of stored value
protected transient int _valsz_log; //
protected transient int _ridsz; // byte size of stored (chunk-relative) row nums
protected static final int OFF = 6;
protected transient volatile int _lastOff = OFF;
private static final long [] NAS = {C1Chunk._NA,C2Chunk._NA,C4Chunk._NA,C8Chunk._NA};
protected CXIChunk(int len, int nzs, int valsz, byte [] buf){
assert (valsz == 0 || valsz == 1 || valsz == 2 || valsz == 4 || valsz == 8);
_len = len;
_ridsz = len >= 65535?4:2;
int log = 0;
while((1 << log) < valsz)++log;
assert valsz == 0 || (1 << log) == valsz;
_valsz = valsz;
_valsz_log = log;
UDP.set4(buf,0,len);
byte b = (byte) _ridsz;
buf[4] = b;
buf[5] = (byte) _valsz;
_mem = buf;
_sparseLen = nzs;
assert (_mem.length - OFF) % (_valsz + _ridsz) == 0:"unexpected mem.length in sparse chunk: mem.length = " + (_mem.length - OFF) + "val_sz = " + _valsz + ", rowId_sz = " + _ridsz;
}
@Override public final boolean isSparse() {return true;}
@Override public final int sparseLen(){return _sparseLen;}
@Override public final int nonzeros(int [] arr){
int len = sparseLen();
int off = OFF;
final int inc = _valsz + _ridsz;
for(int i = 0; i < len; ++i, off += inc)
arr[i] = _ridsz == 2 ? UDP.get2(_mem, off)&0xFFFF : UDP.get4(_mem, off) ;
return len;
}
@Override boolean set_impl(int idx, long l) { return false; }
@Override boolean set_impl(int idx, double d) { return false; }
@Override boolean set_impl(int idx, float f ) { return false; }
@Override boolean setNA_impl(int idx) { return false; }
@Override protected long at8_impl(int idx) {
int off = findOffset(idx);
if(getId(off) != idx)return 0;
long v = getIValue(off);
if( v== NAS[_valsz_log])
throw new IllegalArgumentException("at8 but value is missing");
return v;
}
@Override protected double atd_impl(int idx) {
int off = findOffset(idx);
if(getId(off) != idx)return 0;
long v = getIValue(off);
return (v == NAS[_valsz_log])?Double.NaN:v;
}
@Override protected boolean isNA_impl( int i ) {
int off = findOffset(i);
if(getId(off) != i)return false;
return getIValue(off) == NAS[_valsz_log];
}
@Override boolean hasFloat () { return false; }
@Override public String toString(){
return getClass().getSimpleName() + "( start = " + _start + ", len = " + _len + " sparseLen = " + _sparseLen + " valSz = " + _valsz + " rIdSz = " + _ridsz + ")";
}
@Override NewChunk inflate_impl(NewChunk nc) {
final int slen = sparseLen();
nc.set_sparseLen(slen);
nc.set_len(len());
nc.alloc_mantissa(slen);
nc.alloc_exponent(slen);
nc.alloc_indices(slen);
int off = OFF;
for( int i = 0; i < slen; ++i, off += _ridsz + _valsz) {
nc.indices()[i] = getId(off);
long v = getIValue(off);
if(v == NAS[_valsz_log])
nc.setNA_impl2(i);
else
nc.mantissa()[i] = v;
}
return nc;
}
// get id of nth (chunk-relative) stored element
protected final int getId(int off){
return _ridsz == 2
?UDP.get2(_mem,off)&0xFFFF
:UDP.get4(_mem,off);
}
// get offset of nth (chunk-relative) stored element
private final int getOff(int n){return OFF + (_ridsz + _valsz)*n;}
// extract integer value from an (byte)offset
protected final long getIValue(int off){
switch(_valsz){
case 1: return _mem[off+ _ridsz]&0xFF;
case 2: return UDP.get2(_mem, off + _ridsz);
case 4: return UDP.get4(_mem, off + _ridsz);
case 8: return UDP.get8(_mem, off + _ridsz);
default: throw H2O.unimpl();
}
}
// find offset of the chunk-relative row id, or -1 if not stored (i.e. sparse zero)
protected final int findOffset(int idx) {
if(idx >= _len)throw new IndexOutOfBoundsException();
int sparseLen = sparseLen();
if(sparseLen == 0)return 0;
final byte [] mem = _mem;
if(idx <= getId(OFF)) // easy cut off accessing the zeros prior first nz
return OFF;
int last = mem.length - _ridsz - _valsz;
if(idx >= getId(last)) // easy cut off accessing of the tail zeros
return last;
final int off = _lastOff;
int lastIdx = getId(off);
// check the last accessed elem + one after
if( idx == lastIdx ) return off;
if(idx > lastIdx){
// check the next one (no need to check bounds, already checked at the beginning)
final int nextOff = off + _ridsz + _valsz;
int nextId = getId(nextOff);
if(idx < nextId) return off;
if(idx == nextId){
_lastOff = nextOff;
return nextOff;
}
}
// binary search
int lo=0, hi = sparseLen;
while( lo+1 != hi ) {
int mid = (hi+lo)>>>1;
if( idx < getId(getOff(mid))) hi = mid;
else lo = mid;
}
int y = getOff(lo);
_lastOff = y;
return y;
}
@Override public AutoBuffer write(AutoBuffer bb) { return bb.putA1(_mem, _mem.length); }
@Override public Chunk read(AutoBuffer bb) {
_mem = bb.bufClose();
_start = -1;
_len = UDP.get4(_mem,0);
_ridsz = _mem[4];
_valsz = _mem[5];
_sparseLen = (_mem.length - OFF) / (_valsz + _ridsz);
assert (_mem.length - OFF) % (_valsz + _ridsz) == 0:"unexpected mem.length in sparse chunk: mem.length = " + (_mem.length - OFF) + "val_sz = " + _valsz + ", rowId_sz = " + _ridsz;
int x = _valsz;
int log = 0;
while(x > 1){
x = x >>> 1;
++log;
}
_valsz_log = log;
return this;
}
@Override public final int nextNZ(int rid){
final int off = rid == -1?OFF:findOffset(rid);
/* if(rid == -1) {
_offCache = OFF;
return getId(OFF);
}
int off = _offCache; */
int x = getId(off);
/* if(x != rid) {
off = _offCache = rid == -1 ? OFF : findOffset(rid);
x = getId(off);
}*/
if(x > rid)return x;
if(off < _mem.length - _ridsz - _valsz)
return getId(off + _ridsz + _valsz);
return _len;
}
public abstract class Value {
protected int _off = 0;
public int rowInChunk(){return getId(_off);}
public abstract long asLong();
public abstract double asDouble();
public abstract boolean isNA();
}
public final class SparseIterator implements Iterator<Value> {
final Value _val;
public SparseIterator(Value v){_val = v;}
@Override public final boolean hasNext(){return _val._off < _mem.length - (_ridsz + _valsz);}
@Override public final Value next(){
if(_val._off == 0)_val._off = OFF;
else _val._off += (_ridsz + _valsz);
return _val;
}
@Override public final void remove(){throw new UnsupportedOperationException();}
}
public Iterator<Value> values(){
return new SparseIterator(new Value(){
@Override public final long asLong(){
long v = getIValue(_off);
if(v == NAS[(_valsz >>> 1) - 1]) throw new IllegalArgumentException("at8 but value is missing");
return v;
}
@Override public final double asDouble() {
long v = getIValue(_off);
return (v == NAS[_valsz_log -1])?Double.NaN:v;
}
@Override public final boolean isNA(){
long v = getIValue(_off);
return (v == NAS[_valsz_log]);
}
});
}
}