package water;
import java.io.IOException;
import java.nio.channels.ByteChannel;
import java.sql.Timestamp;
import java.util.concurrent.*;
import static water.ExternalFrameUtils.writeToChannel;
/**
* <p>This class is used to read data from H2O Frames from non-H2O environments, such as Spark Executors.
* It is expected that the frame we want to read is already in the DKV. The check for the presence is up on the
* user of this class.</p>
*
* <strong>Example usage of this class:</strong></br>
*
* First we need to open the connection to H2O and initialize the reader:</br>
* <pre>
* {@code
* // specify indexes of columns we want to read data from
* int[] selectedColumnIndices = {0, 1};
* // specify expected types for the selected columns
* byte[] expectedTypes = {ExternalFrameHandler.EXPECTED_BOOL, ExternalFrameHandler.EXPECTED_INT};
* ByteChannel channel = ExternalFrameUtils.getConnection("ip:port");
* ExternalFrameReader reader = new ExternalFrameReader(channel, "frameName", 0, selectedColumnIndices);
* }
* </pre>
* </p>
*
* <p>
* In the next step we can read the data we expect, in our case boolean and integer:</br>
* <pre>
* {@code
* int rowsRead = 0;
* while(rowsRead < reader.getNumRows){
* boolean b = reader.readBool();
* if(reader.isLastNA{
* // it is NA
* }else{
* // it is value
* }
*
* int i = reader.readInt()
* if(reader.isLastNA{
* // it is NA
* }else{
* // it is value
* }
* }
* }
* </pre>
* </p>
*
* <p>
* And at the end we need to make sure to force to code wait for all data to be read:</br>
* <pre>
* {@code
* reader.waitUntilAllReceived();
* }
* </pre>
* </p>
*/
final public class ExternalFrameReaderClient {
private boolean isLastNA = false;
private AutoBuffer ab;
private String frameKey;
private int chunkIdx;
private int[] selectedColumnIndices;
private ByteChannel channel;
private int numRows;
private byte[] expectedTypes = null;
/**
* @param channel channel to h2o node
* @param frameKey name of frame we want to read from
* @param chunkIdx chunk index from we want to read
* @param selectedColumnIndices indices of columns we want to read from
* @param expectedTypes expected types
*/
public ExternalFrameReaderClient(ByteChannel channel, String frameKey, int chunkIdx, int[] selectedColumnIndices, byte[] expectedTypes) throws IOException {
this.channel = channel;
this.frameKey = frameKey;
this.chunkIdx = chunkIdx;
this.expectedTypes = expectedTypes;
this.selectedColumnIndices = selectedColumnIndices;
this.ab = initAndGetAb();
}
public int getNumRows() {
return numRows;
}
public boolean readBoolean() {
boolean data = ab.getZ();
isLastNA = ExternalFrameUtils.isNA(ab, data);
return data;
}
public byte readByte() {
byte data = ab.get1();
isLastNA = ExternalFrameUtils.isNA(ab, data);
return data;
}
public char readChar() {
char data = ab.get2();
isLastNA = ExternalFrameUtils.isNA(ab, data);
return data;
}
public short readShort() {
short data = ab.get2s();
isLastNA = ExternalFrameUtils.isNA(ab, data);
return data;
}
public int readInt() {
int data = ab.getInt();
isLastNA = ExternalFrameUtils.isNA(ab, data);
return data;
}
public long readLong() {
long data = ab.get8();
isLastNA = ExternalFrameUtils.isNA(ab, data);
return data;
}
public float readFloat() {
float data = ab.get4f();
isLastNA = ExternalFrameUtils.isNA(data);
return data;
}
public double readDouble() {
double data = ab.get8d();
isLastNA = ExternalFrameUtils.isNA(data);
return data;
}
public String readString() {
String data = ab.getStr();
isLastNA = ExternalFrameUtils.isNA(ab, data);
return data;
}
public Timestamp readTimestamp() {
Timestamp data = new Timestamp(ab.get8());
isLastNA = ExternalFrameUtils.isNA(ab, data);
return data;
}
/**
* This method is used to check if the last received value was marked as NA by H2O backend
*/
public boolean isLastNA() {
return isLastNA;
}
/**
* This method ensures the application waits for all bytes to be received before continuing in the
* application's control flow.
*
* It has to be called at the end of reading.
* @param timeout timeout in seconds
* @throws ExternalFrameConfirmationException
*/
public void waitUntilAllReceived(int timeout) throws ExternalFrameConfirmationException {
try {
byte flag = ExternalFrameConfirmationCheck.getConfirmation(ab, timeout);
assert (flag == ExternalFrameHandler.CONFIRM_READING_DONE);
} catch (TimeoutException ex) {
throw new ExternalFrameConfirmationException("Timeout for confirmation exceeded!");
} catch (InterruptedException e) {
throw new ExternalFrameConfirmationException("Confirmation thread interrupted!");
} catch (ExecutionException e) {
throw new ExternalFrameConfirmationException("Confirmation failed!");
}
}
private AutoBuffer initAndGetAb() throws IOException {
AutoBuffer sentAb = new AutoBuffer();
sentAb.put1(ExternalFrameHandler.INIT_BYTE);
sentAb.put1(ExternalFrameHandler.DOWNLOAD_FRAME);
sentAb.putStr(frameKey);
sentAb.putInt(chunkIdx);
sentAb.putA1(expectedTypes);
sentAb.putA4(selectedColumnIndices);
writeToChannel(sentAb, channel);
AutoBuffer receiveAb = new AutoBuffer(channel, null);
// once we send H2O all information it needs to prepare for reading, it sends us back number of rows
this.numRows = receiveAb.getInt();
return receiveAb;
}
}