package edu.washington.escience.myria.storage;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.List;
import java.util.Objects;
import org.joda.time.DateTime;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import edu.washington.escience.myria.Schema;
import edu.washington.escience.myria.column.Column;
import edu.washington.escience.myria.column.builder.ColumnBuilder;
import edu.washington.escience.myria.column.builder.ColumnFactory;
import edu.washington.escience.myria.column.builder.WritableColumn;
import edu.washington.escience.myria.util.MyriaUtils;
/**
* A simple collection of tuples that provides random access to the inner tuples and can be appended to. Once the user
* is done appending, they may fetch the {@link #finalResult} of the {@link #TupleBuffer}, which finalizes it and they
* may access its built contents as a list of {@link TupleBatch}. After finalizing, the {@link TupleBuffer} can no
* longer have values appended to it.
*/
public class TupleBuffer implements ReadableTable, AppendableTable {
/** Format of the emitted tuples. */
private final Schema schema;
/** Convenience constant; must match schema.numColumns() and currentColumns.size(). */
private final int numColumns;
/** List of completed TupleBatch objects. */
private final List<TupleBatch> readyBatches;
/** Internal state used to build up a TupleBatch. */
private List<ColumnBuilder<?>> currentBatch;
/** Internal state representing the number of columns that are ready in the current tuple. */
private int numColumnsReady;
/** Internal state representing which columns are ready in the current tuple. */
private final BitSet columnsReady;
/** Internal state representing the number of tuples in the in-progress TupleBatch. */
private int currentBatchSize;
/** Whether this buffer has been finalized. */
private boolean finalized;
/** The final results of thus buffer. Null until finalized. */
private ImmutableList<TupleBatch> finalBatches;
/** The number of tuples in this buffer. */
private int numTuples;
/** Batch size. */
private int batchSize;
/**
* @return the size of the batches.
*/
public int getBatchSize() {
return batchSize;
}
/**
* Constructs an empty TupleBuffer to hold tuples matching the specified Schema.
*
* @param schema specified the columns of the emitted TupleBatch objects.
*/
public TupleBuffer(final Schema schema) {
this.schema = Objects.requireNonNull(schema, "schema");
numColumns = schema.numColumns();
Preconditions.checkArgument(numColumns > 0, "cannot create a buffer with no columns");
readyBatches = new ArrayList<>();
currentBatch = ColumnFactory.allocateColumns(schema);
columnsReady = new BitSet(numColumns);
numColumnsReady = 0;
currentBatchSize = 0;
finalized = false;
numTuples = 0;
batchSize = TupleUtils.getBatchSize(schema);
}
/**
* Constructs an empty TupleBuffer to hold tuples matching the specified Schema.
*
* @param schema specified the columns of the emitted TupleBatch objects.
*/
public TupleBuffer(final Schema schema, int batchSize) {
this(schema);
this.batchSize = batchSize;
}
/**
* Makes a batch of any tuples in the buffer and appends it to the internal list.
*/
private void finishBatch() {
Preconditions.checkState(
currentBatchSize == batchSize,
"cannot finish a batch with %s < %s rows ready",
currentBatchSize,
batchSize);
finishBatchEvenIfSmall();
currentBatch = ColumnFactory.allocateColumns(schema);
}
/**
* Actually finish the batch. Does not ensure that the batch is full, and thus can only be used when finalizing.
*/
private void finishBatchEvenIfSmall() {
Preconditions.checkState(
numColumnsReady == 0,
"cannot finish a batch with with %s != 0 columns ready",
numColumnsReady);
Preconditions.checkState(!finalized, "cannot force finish a batch once finalized");
if (currentBatchSize == 0) {
return;
}
ImmutableList.Builder<Column<?>> columns = ImmutableList.builder();
for (ColumnBuilder<?> cb : currentBatch) {
columns.add(cb.build());
}
readyBatches.add(new TupleBatch(schema, columns.build()));
currentBatchSize = 0;
}
@Override
public final Schema getSchema() {
return schema;
}
@Override
public final int numTuples() {
return numTuples;
}
@Override
@Deprecated
public final Object getObject(final int column, final int row) {
Preconditions.checkElementIndex(row, numTuples());
int batchIndex = row / batchSize;
int localRow = row % batchSize;
if (batchIndex < readyBatches.size()) {
return readyBatches.get(batchIndex).getObject(column, localRow);
}
return currentBatch.get(column).getObject(localRow);
}
@Override
public final boolean getBoolean(final int column, final int row) {
Preconditions.checkElementIndex(row, numTuples());
int batchIndex = row / batchSize;
int localRow = row % batchSize;
if (batchIndex < readyBatches.size()) {
return readyBatches.get(batchIndex).getBoolean(column, localRow);
}
return currentBatch.get(column).getBoolean(localRow);
}
@Override
public final DateTime getDateTime(final int column, final int row) {
Preconditions.checkElementIndex(row, numTuples());
int batchIndex = row / batchSize;
int localRow = row % batchSize;
if (batchIndex < readyBatches.size()) {
return readyBatches.get(batchIndex).getDateTime(column, localRow);
}
return currentBatch.get(column).getDateTime(localRow);
}
@Override
public final double getDouble(final int column, final int row) {
Preconditions.checkElementIndex(row, numTuples());
int batchIndex = row / batchSize;
int localRow = row % batchSize;
if (batchIndex < readyBatches.size()) {
return readyBatches.get(batchIndex).getDouble(column, localRow);
}
return currentBatch.get(column).getDouble(localRow);
}
@Override
public final float getFloat(final int column, final int row) {
Preconditions.checkElementIndex(row, numTuples());
int batchIndex = row / batchSize;
int localRow = row % batchSize;
if (batchIndex < readyBatches.size()) {
return readyBatches.get(batchIndex).getFloat(column, localRow);
}
return currentBatch.get(column).getFloat(localRow);
}
@Override
public final long getLong(final int column, final int row) {
Preconditions.checkElementIndex(row, numTuples());
int batchIndex = row / batchSize;
int localRow = row % batchSize;
if (batchIndex < readyBatches.size()) {
return readyBatches.get(batchIndex).getLong(column, localRow);
}
return currentBatch.get(column).getLong(localRow);
}
@Override
public final int getInt(final int column, final int row) {
Preconditions.checkElementIndex(row, numTuples());
int batchIndex = row / batchSize;
int localRow = row % batchSize;
if (batchIndex < readyBatches.size()) {
return readyBatches.get(batchIndex).getInt(column, localRow);
}
return currentBatch.get(column).getInt(localRow);
}
@Override
public final String getString(final int column, final int row) {
Preconditions.checkElementIndex(row, numTuples());
int batchIndex = row / batchSize;
int localRow = row % batchSize;
if (batchIndex < readyBatches.size()) {
return readyBatches.get(batchIndex).getString(column, localRow);
}
return currentBatch.get(column).getString(localRow);
}
@Override
public final ByteBuffer getBlob(final int column, final int row) {
Preconditions.checkElementIndex(row, numTuples());
int batchIndex = row / batchSize;
int localRow = row % batchSize;
if (batchIndex < readyBatches.size()) {
return readyBatches.get(batchIndex).getBlob(column, localRow);
}
return currentBatch.get(column).getBlob(localRow);
}
@Override
public final int numColumns() {
return numColumns;
}
@Override
public final void putBoolean(final int column, final boolean value) {
checkPutIndex(column);
currentBatch.get(column).appendBoolean(value);
columnPut(column);
}
@Override
public final void putDateTime(final int column, final DateTime value) {
checkPutIndex(column);
currentBatch.get(column).appendDateTime(value);
columnPut(column);
}
@Override
public final void putDouble(final int column, final double value) {
checkPutIndex(column);
currentBatch.get(column).appendDouble(value);
columnPut(column);
}
@Override
public final void putFloat(final int column, final float value) {
checkPutIndex(column);
currentBatch.get(column).appendFloat(value);
columnPut(column);
}
@Override
public final void putInt(final int column, final int value) {
checkPutIndex(column);
currentBatch.get(column).appendInt(value);
columnPut(column);
}
@Override
public final void putLong(final int column, final long value) {
checkPutIndex(column);
currentBatch.get(column).appendLong(value);
columnPut(column);
}
@Override
@Deprecated
public final void putObject(final int column, final Object value) {
checkPutIndex(column);
currentBatch.get(column).appendObject(MyriaUtils.ensureObjectIsValidType(value));
columnPut(column);
}
@Override
public final void putString(final int column, final String value) {
checkPutIndex(column);
currentBatch.get(column).appendString(value);
columnPut(column);
}
@Override
public final void putBlob(final int column, final ByteBuffer value) {
checkPutIndex(column);
currentBatch.get(column).appendBlob(value);
columnPut(column);
}
/**
* Helper function: checks whether the specified column can be inserted into.
*
* @param column the column in which the value should be put.
*/
private void checkPutIndex(final int column) {
Preconditions.checkState(!finalized, "cannot append to a TupleBuffer once finalized");
Preconditions.checkElementIndex(column, numColumns);
Preconditions.checkState(
!columnsReady.get(column), "need to fill up one row before starting new one");
}
/**
* Helper function to update the internal state after a value has been inserted into the specified column.
*
* @param column the column in which the value was put.
*/
private void columnPut(final int column) {
columnsReady.set(column, true);
numColumnsReady++;
if (numColumnsReady == numColumns) {
currentBatchSize++;
numTuples++;
numColumnsReady = 0;
columnsReady.clear();
if (currentBatchSize == batchSize) {
finishBatch();
}
}
}
/**
* @return a list of all {@link TupleBatch}es in this buffer.
*/
public ImmutableList<TupleBatch> finalResult() {
if (finalized) {
return finalBatches;
}
finishBatchEvenIfSmall();
finalBatches = ImmutableList.copyOf(readyBatches);
finalized = true;
return finalBatches;
}
@Override
public ReadableColumn asColumn(final int column) {
return new ReadableSubColumn(this, Preconditions.checkElementIndex(column, numColumns));
}
@Override
public WritableColumn asWritableColumn(final int column) {
return new WritableSubColumn(this, column);
}
/**
* Append the specified value to the specified destination column in this TupleBuffer from the source column.
*
* @param destColumn which column in this TB the value will be inserted.
* @param sourceColumn the column from which data will be retrieved.
* @param sourceRow the row in the source column from which data will be retrieved.
*/
public final void put(
final int destColumn, final ReadableColumn sourceColumn, final int sourceRow) {
checkPutIndex(destColumn);
TupleUtils.copyValue(sourceColumn, sourceRow, this, destColumn);
}
}