package edu.washington.escience.myria.storage;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Objects;
import java.nio.ByteBuffer;
import org.joda.time.DateTime;
import com.google.common.base.Preconditions;
import edu.washington.escience.myria.MyriaConstants;
import edu.washington.escience.myria.Schema;
import edu.washington.escience.myria.column.Column;
import edu.washington.escience.myria.column.builder.ColumnBuilder;
import edu.washington.escience.myria.column.builder.ColumnFactory;
import edu.washington.escience.myria.column.builder.WritableColumn;
import edu.washington.escience.myria.util.MyriaUtils;
/**
* Used for creating TupleBatch objects on the fly. A helper class used in, e.g., the Scatter operator. Currently it
* doesn't support random access to a specific cell. Use TupleBuffer instead.
*/
public class TupleBatchBuffer implements AppendableTable {
/** Format of the emitted tuples. */
private final Schema schema;
/** Convenience constant; must match schema.numColumns() and currentColumns.size(). */
private final int numColumns;
/** List of completed TupleBatch objects. */
private final List<TupleBatch> readyTuples;
/** Internal state used to build up a TupleBatch. */
private List<ColumnBuilder<?>> currentBuildingColumns;
/** Internal state representing which columns are ready in the current tuple. */
private final BitSet columnsReady;
/** Internal state representing the number of columns that are ready in the current tuple. */
private int numColumnsReady;
/** Internal state representing the number of tuples in the in-progress TupleBatch. */
private int currentInProgressTuples;
/** The last time this operator returned a TupleBatch. */
private long lastPoppedTime;
/** the total number of tuples in readyTuples. */
private int readyTuplesNum;
/** BatchSize*/
private int batchSize;
/**
* Constructs an empty TupleBatchBuffer to hold tuples matching the specified Schema.
*
* @param schema specified the columns of the emitted TupleBatch objects.
*/
public TupleBatchBuffer(final Schema schema) {
this.schema = Objects.requireNonNull(schema);
readyTuples = new LinkedList<TupleBatch>();
currentBuildingColumns = ColumnFactory.allocateColumns(schema);
numColumns = schema.numColumns();
columnsReady = new BitSet(numColumns);
numColumnsReady = 0;
currentInProgressTuples = 0;
lastPoppedTime = System.nanoTime();
readyTuplesNum = 0;
batchSize = TupleUtils.getBatchSize(schema);
}
public int getBatchSize() {
return batchSize;
}
/**
* Append the tuple batch directly into readyTuples.
*
* @param tb the TB.
*/
public final void appendTB(final TupleBatch tb) {
/* If we're currently building a batch, we better finish it before we append this one to the list. Otherwise
* reordering will happen. */
finishBatch();
readyTuplesNum += tb.numTuples();
readyTuples.add(tb);
}
/**
* Helper function: checks whether the specified column can be inserted into.
*
* @param column the column in which the value should be put.
*/
private void checkPutIndex(final int column) {
Preconditions.checkElementIndex(column, numColumns);
Preconditions.checkState(
!columnsReady.get(column),
"need to fill up one row of TupleBatchBuffer before starting new one");
}
/**
* clear this TBB.
*/
public final void clear() {
columnsReady.clear();
currentBuildingColumns.clear();
currentInProgressTuples = 0;
numColumnsReady = 0;
readyTuples.clear();
readyTuplesNum = 0;
}
/**
* Helper function to update the internal state after a value has been inserted into the specified column.
*
* @param column the column in which the value was put.
*/
private void columnPut(final int column) {
columnsReady.set(column, true);
numColumnsReady++;
/* All columns are full, move to next line. */
if (numColumnsReady == numColumns) {
currentInProgressTuples++;
numColumnsReady = 0;
columnsReady.clear();
/* See if the current batch is full and finish it if so. */
if (currentInProgressTuples == batchSize) {
finishBatch();
}
}
}
/**
* Makes a batch of any tuples in the buffer and appends it to the internal list.
*
* @return true if any tuples were added.
*/
private boolean finishBatch() {
Preconditions.checkState(
numColumnsReady == 0, "Cannot finish a batch with partially-completed tuples");
if (currentInProgressTuples == 0) {
return false;
}
/* Build the batch */
List<Column<?>> buildingColumns = new ArrayList<Column<?>>(currentBuildingColumns.size());
for (ColumnBuilder<?> cb : currentBuildingColumns) {
buildingColumns.add(cb.build());
}
readyTuples.add(new TupleBatch(schema, buildingColumns, currentInProgressTuples));
/* Update the metadata and refresh the building state. */
readyTuplesNum += buildingColumns.get(0).size();
currentBuildingColumns = ColumnFactory.allocateColumns(schema);
currentInProgressTuples = 0;
return true;
}
/**
* Return all tuples in this buffer. The data do not get removed.
*
* @return a List<TupleBatch> containing all complete tuples that have been inserted into this buffer.
*/
public final List<TupleBatch> getAll() {
final List<TupleBatch> output = new ArrayList<TupleBatch>(readyTuples.size() + 1);
output.addAll(readyTuples);
if (currentInProgressTuples > 0) {
output.add(new TupleBatch(schema, getInProgressColumns(), currentInProgressTuples));
}
return output;
}
/**
* Return all tuples in this buffer. The data do not get removed.
*
* @return a List<TupleBatch> containing all complete tuples that have been inserted into this buffer.
*/
public final List<List<? extends Column<?>>> getAllAsRawColumn() {
final List<List<? extends Column<?>>> output = new ArrayList<>();
for (final TupleBatch batch : readyTuples) {
output.add(batch.getDataColumns());
}
if (currentInProgressTuples > 0) {
output.add(getInProgressColumns());
}
return output;
}
/**
* Get elapsed time since the last time when a TB is popped.
*
* @return the elapsed time from lastPoppedTime to present
*/
private long getElapsedTime() {
return System.nanoTime() - lastPoppedTime;
}
/**
* Build the in progress columns. The builders' states are untouched. They can keep building.
*
* @return the built in progress columns.
*/
private List<Column<?>> getInProgressColumns() {
List<Column<?>> newColumns = new ArrayList<Column<?>>(currentBuildingColumns.size());
for (ColumnBuilder<?> cb : currentBuildingColumns) {
newColumns.add(cb.forkNewBuilder().build());
}
return newColumns;
}
/**
* @return the number of ready tuples.
*/
public final int getReadyTuplesNum() {
return readyTuplesNum;
}
/**
* @return the Schema of the tuples in this buffer.
*/
@Override
public final Schema getSchema() {
return schema;
}
/**
* @return if there is filled TupleBatches ready for pop.
*/
public final boolean hasFilledTB() {
return readyTuples.size() > 0;
}
/**
* @return num columns.
*/
@Override
public final int numColumns() {
return numColumns;
}
/**
* @return the number of complete tuples stored in this TupleBatchBuffer.
*/
@Override
public final int numTuples() {
return readyTuplesNum + currentInProgressTuples;
}
/**
* @return pop filled and non-filled TupleBatch
*/
public final TupleBatch popAny() {
final TupleBatch tb = popFilled();
if (tb != null) {
updateLastPoppedTime();
return tb;
} else {
if (currentInProgressTuples > 0) {
final int size = currentInProgressTuples;
finishBatch();
updateLastPoppedTime();
readyTuplesNum -= size;
TupleBatch batch = readyTuples.remove(0);
Preconditions.checkState(size == batch.numTuples(), "Error with number of tuples");
Preconditions.checkState(currentInProgressTuples == 0, "Error with in progress tuples");
return batch;
} else {
return null;
}
}
}
/**
* @return pop filled and non-filled TupleBatch
*/
public final TupleBatch popAnyUsingTimeout() {
final TupleBatch tb = popFilled();
if (tb != null) {
updateLastPoppedTime();
return tb;
} else {
if (currentInProgressTuples > 0 && getElapsedTime() >= MyriaConstants.PUSHING_TB_TIMEOUT) {
final int size = currentInProgressTuples;
finishBatch();
updateLastPoppedTime();
readyTuplesNum -= size;
TupleBatch batch = readyTuples.remove(0);
Preconditions.checkState(size == batch.numTuples(), "Error with number of tuples");
Preconditions.checkState(currentInProgressTuples == 0, "Error with in progress tuples");
return batch;
} else {
return null;
}
}
}
/**
* Extract and return the first complete TupleBatch in this Buffer.
*
* @return the first complete TupleBatch in this buffer, or null if none is ready.
*/
public final TupleBatch popFilled() {
if (readyTuples.size() > 0) {
updateLastPoppedTime();
TupleBatch batch = readyTuples.remove(0);
readyTuplesNum -= batch.numTuples();
return batch;
}
return null;
}
/**
* Append the specified value to the specified column.
*
* @param column index of the column.
* @param value value to be appended.
*/
@Deprecated
public final void put(final int column, final Object value) {
checkPutIndex(column);
currentBuildingColumns.get(column).appendObject(value);
columnPut(column);
}
/**
* Append a complete tuple coming from two tuple batches: left and right. Used in join operators.
*
* @param leftTb the left tuple batch
* @param leftIdx the index of the left tuple in the tuple batch
* @param leftAnswerColumns an array that specifies which columns from the left tuple batch
* @param rightTb the right tuple batch
* @param rightIdx the index of the right tuple in the tuple batch
* @param rightAnswerColumns an array that specifies which columns from the right tuple batch
*/
public final void put(
final TupleBatch leftTb,
final int leftIdx,
final int[] leftAnswerColumns,
final TupleBatch rightTb,
final int rightIdx,
final int[] rightAnswerColumns) {
for (int i = 0; i < leftAnswerColumns.length; ++i) {
TupleUtils.copyValue(
leftTb.getDataColumns().get(leftAnswerColumns[i]),
leftIdx,
currentBuildingColumns.get(i));
}
for (int i = 0; i < rightAnswerColumns.length; ++i) {
TupleUtils.copyValue(
rightTb.getDataColumns().get(rightAnswerColumns[i]),
rightIdx,
currentBuildingColumns.get(i + leftAnswerColumns.length));
}
currentInProgressTuples++;
if (currentInProgressTuples == batchSize) {
finishBatch();
}
}
/**
* Append the specified value to the specified destination column in this TupleBatchBuffer from the source column.
*
* @param destColumn which column in this TBB the value will be inserted.
* @param sourceColumn the column from which data will be retrieved.
* @param sourceRow the row in the source column from which data will be retrieved.
*/
public final void appendFromColumn(
final int destColumn, final ReadableColumn sourceColumn, final int sourceRow) {
TupleUtils.copyValue(sourceColumn, sourceRow, this, destColumn);
}
/**
* Append the referenced row from the source {@link TupleBatch} to this {@link TupleBatchBuffer}.
*
* @param tb the source tuple batch.
* @param row the row index.
*/
public final void append(final TupleBatch tb, final int row) {
for (int col = 0; col < tb.numColumns(); ++col) {
append(tb, col, row);
}
}
/**
* Append the referenced value from the source {@link TupleBatch} to this {@link TupleBatchBuffer}.
*
* @param tb the source tuple batch.
* @param col the col index.
* @param row the row index.
*/
public final void append(final TupleBatch tb, final int col, final int row) {
appendFromColumn(columnsReady.nextClearBit(0), tb.getDataColumns().get(col), row);
}
/**
* Append the referenced value from the source {@link MutableTupleBuffer} to this {@link TupleBatchBuffer}.
*
* @param tuples the source tuple buffer.
* @param col the column index.
* @param row the row index.
*/
public final void append(final MutableTupleBuffer tuples, final int col, final int row) {
appendFromColumn(columnsReady.nextClearBit(0), tuples.getColumn(col, row), row % batchSize);
}
/**
* Append the referenced row from the source {@link MutableTupleBuffer} to this {@link TupleBatchBuffer}.
*
* @param tuples the source tuple buffer.
* @param row the row index.
*/
public final void append(final MutableTupleBuffer tuples, final int row) {
for (int col = 0; col < tuples.numColumns(); ++col) {
append(tuples, col, row);
}
}
@Override
public final void putBoolean(final int column, final boolean value) {
checkPutIndex(column);
currentBuildingColumns.get(column).appendBoolean(value);
columnPut(column);
}
@Override
public final void putDateTime(final int column, final DateTime value) {
checkPutIndex(column);
currentBuildingColumns.get(column).appendDateTime(value);
columnPut(column);
}
@Override
public final void putDouble(final int column, final double value) {
checkPutIndex(column);
currentBuildingColumns.get(column).appendDouble(value);
columnPut(column);
}
@Override
public final void putFloat(final int column, final float value) {
checkPutIndex(column);
currentBuildingColumns.get(column).appendFloat(value);
columnPut(column);
}
@Override
public final void putInt(final int column, final int value) {
checkPutIndex(column);
currentBuildingColumns.get(column).appendInt(value);
columnPut(column);
}
@Override
public final void putLong(final int column, final long value) {
checkPutIndex(column);
currentBuildingColumns.get(column).appendLong(value);
columnPut(column);
}
@Override
@Deprecated
public void putObject(final int column, final Object value) {
checkPutIndex(column);
currentBuildingColumns.get(column).appendObject(MyriaUtils.ensureObjectIsValidType(value));
columnPut(column);
}
@Override
public final void putString(final int column, final String value) {
checkPutIndex(column);
currentBuildingColumns.get(column).appendString(value);
columnPut(column);
}
@Override
public final void putBlob(final int column, final ByteBuffer value) {
checkPutIndex(column);
currentBuildingColumns.get(column).appendBlob(value);
columnPut(column);
}
/**
* Update lastPoppedTime to be the current time.
*/
private void updateLastPoppedTime() {
lastPoppedTime = System.nanoTime();
}
/**
* Add the specified {@link TupleBatch} to this buffer. The implementation is O(1) when possible, i.e. if the
* TupleBatch is full and this buffer is not building a partially-complete TupleBatch. Otherwise, it's O(N) in the
* size of the TupleBatch because it is a full copy.
*
* @param tupleBatch the tuple data to be added to this buffer.
* @param shallowCopy shallow or deep copy of tupleBatch elements.
*/
public void absorb(final TupleBatch tupleBatch, final boolean shallowCopy) {
if (shallowCopy) {
appendTB(tupleBatch);
} else {
tupleBatch.compactInto(this);
}
}
@Override
public WritableColumn asWritableColumn(final int column) {
return new WritableSubColumn(this, column);
}
}