package water.parser.parquet;
import org.apache.hadoop.fs.PositionedReadable;
import org.apache.hadoop.fs.Seekable;
import water.fvec.Chunk;
import water.fvec.Vec;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
/**
* Seekable and PositionedReadable implementation of InputStream backed by a Vec data source.
*/
public class VecDataInputStream extends InputStream implements Seekable, PositionedReadable {
private static final byte[] EMPTY_BUFFER = new byte[0];
private final Vec _v;
private byte[] _buffer;
private long _offset;
private int _pos;
public VecDataInputStream(Vec v) {
this._v = v;
flushBuffer(0L);
}
private int buffAvailable() {
return _buffer.length - _pos;
}
private long globAvailable() {
return _v.length() - (_offset + _pos);
}
private void fetchData(long position) {
Chunk chk = _v.chunkForRow(position);
_buffer = chk.asBytes();
_offset = chk.start();
_pos = (int) (position - _offset);
assert _buffer.length > 0;
}
private void flushBuffer(long position) {
_buffer = EMPTY_BUFFER;
_pos = 0;
_offset = position;
}
@Override
public int read() throws IOException {
if (buffAvailable() <= 0) {
if (globAvailable() <= 0L) {
return -1;
}
fetchData(_offset + _pos);
}
return _buffer[_pos++] & 0xff;
}
@Override
public int read(byte[] buffer, int offset, int length) throws IOException {
int read = read(_offset + _pos, buffer, offset, length);
int skipped = (int) skip(read);
assert skipped == read;
return read;
}
@Override
public long skip(long n) throws IOException {
if (n == 0L) {
return 0L;
}
long target = _offset + _pos + n;
if (inBuffer(target)) {
seekInBuffer(target);
} else {
if (target > _v.length()) {
n -= target - _v.length();
target = _v.length();
}
flushBuffer(target);
}
return n;
}
@Override
public int read(final long position, byte[] buffer, int offset, int length) throws IOException {
int loaded = 0;
long currentPosition = position;
while ((loaded < length) && (currentPosition < _v.length())) {
byte[] buff;
int pos;
if (inBuffer(currentPosition)) {
buff = _buffer;
pos = (int) (currentPosition - _offset);
} else {
Chunk chunk = _v.chunkForRow(currentPosition);
buff = chunk.asBytes();
pos = (int) (currentPosition - chunk.start());
}
int avail = Math.min(buff.length - pos, length - loaded);
System.arraycopy(buff, pos, buffer, offset + loaded, avail);
loaded += avail;
currentPosition += avail;
}
return loaded;
}
@Override
public void readFully(long position, byte[] buffer, int offset, int length) throws IOException {
int loaded = read(position, buffer, offset, length);
if (loaded != length) {
throw new EOFException("Reached the end of the Vec while reading into buffer.");
}
}
@Override
public void readFully(long position, byte[] buffer) throws IOException {
readFully(position, buffer, 0, buffer.length);
}
@Override
public void seek(long position) throws IOException {
if (inBuffer(position)) {
seekInBuffer(position);
} else {
flushBuffer(position);
}
}
private void seekInBuffer(long position) {
_pos = (int) (position - _offset);
}
private boolean inBuffer(long position) {
return (position >= _offset) && (position < _offset + _buffer.length);
}
@Override
public long getPos() throws IOException {
return _offset + _pos;
}
@Override
public boolean seekToNewSource(long targetPos) throws IOException {
throw new UnsupportedOperationException("Intentionally not implemented");
}
}