package water.fvec;
import water.Job;
import water.Key;
import water.Value;
import water.exceptions.H2OIllegalArgumentException;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
/**
* A vector of plain Bytes.
*/
public class ByteVec extends Vec {
public ByteVec( Key key, int rowLayout ) { super(key, rowLayout); }
@Override public C1NChunk chunkForChunkIdx(int cidx) { return (C1NChunk)super.chunkForChunkIdx(cidx); }
/** Return column missing-element-count - ByteVecs do not allow any "missing elements" */
@Override public long naCnt() { return 0; }
/** Is all integers? Yes, it's all bytes */
@Override public boolean isInt(){return true; }
/** Get an unspecified amount of initial bytes; typically a whole C1NChunk of
* length Vec.DFLT_CHUNK_SIZE but no guarantees. Useful for previewing the start
* of large files.
* @return array of initial bytes */
public byte[] getFirstBytes() { return chunkForChunkIdx(0)._mem; }
static final byte CHAR_CR = 13;
static final byte CHAR_LF = 10;
/** Get all the bytes of a given chunk.
* Useful for previewing sections of files.
*
* @param chkIdx index of desired chunk
* @return array of initial bytes
*/
public byte[] getPreviewChunkBytes(int chkIdx) {
if (chkIdx >= nChunks())
throw new H2OIllegalArgumentException("Asked for chunk index beyond the number of chunks.");
if (chkIdx == 0)
return chunkForChunkIdx(chkIdx)._mem;
else { //must eat partial lines
// FIXME: a hack to consume partial lines since each preview chunk is seen as cidx=0
byte[] mem = chunkForChunkIdx(chkIdx)._mem;
int i = 0, j = mem.length-1;
while (i < mem.length && mem[i] != CHAR_CR && mem[i] != CHAR_LF) i++;
while (j > i && mem[j] != CHAR_CR && mem[j] != CHAR_LF) j--;
if (j-i > 1) return Arrays.copyOfRange(mem,i,j);
else return null;
}
}
/**
* Open a stream view over the underlying data
*/
public InputStream openStream(final Key job_key) {
InputStream is = new InputStream() {
final long[] sz = new long[1];
private int _cidx, _pidx, _sz;
private C1NChunk _c0;
@Override
public int available() {
if (_c0 == null || _sz >= _c0._len) {
sz[0] += _c0 != null ? _c0._len : 0;
if (_cidx >= nChunks()) return 0;
_c0 = chunkForChunkIdx(_cidx++);
_sz = C1NChunk._OFF;
if (job_key != null)
Job.update(_c0._len, job_key);
}
return _c0._len - _sz;
}
@Override
public void close() {
_cidx = nChunks();
_c0 = null;
_sz = 0;
}
@Override
public int read() throws IOException {
return available() == 0 ? -1 : 0xFF & _c0._mem[_sz++];
}
@Override
public int read(byte[] b, int off, int len) {
if (b == null) { // Back-channel read of cidx
if (_cidx > _pidx) { // Remove prev chunk from memory
Value v = Value.STORE_get(chunkKey(_pidx++));
if (v != null && v.isPersisted()) {
v.freePOJO(); // Eagerly toss from memory
v.freeMem();
} // Else not found, or not on disk somewhere
}
return _cidx;
}
int sz = available();
if (sz == 0)
return -1;
len = Math.min(len, sz);
System.arraycopy(_c0._mem, _sz, b, off, len);
_sz += len;
return len;
}
};
try {
is.available();
} catch (IOException e) {
throw new RuntimeException(e);
}
return is;
}
}