package edu.washington.escience.myria.storage; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.BitSet; import java.util.List; import java.util.Objects; import org.joda.time.DateTime; import com.google.common.base.Preconditions; import edu.washington.escience.myria.Schema; import edu.washington.escience.myria.Type; import edu.washington.escience.myria.column.Column; import edu.washington.escience.myria.column.builder.ColumnBuilder; import edu.washington.escience.myria.column.builder.ColumnFactory; import edu.washington.escience.myria.column.builder.WritableColumn; import edu.washington.escience.myria.column.mutable.MutableColumn; import edu.washington.escience.myria.util.MyriaUtils; /** A simplified TupleBatchBuffer which supports random access. Designed for hash tables to use. */ public class MutableTupleBuffer implements ReadableTable, AppendableTable, Cloneable { /** Format of the emitted tuples. */ private final Schema schema; /** Convenience constant; must match schema.numColumns() and currentColumns.size(). */ private final int numColumns; /** List of completed TupleBatch objects. */ private final List<MutableColumn<?>[]> readyTuples; /** Internal state used to build up a TupleBatch. */ private ColumnBuilder<?>[] currentBuildingColumns; /** Internal state representing which columns are ready in the current tuple. */ private BitSet columnsReady; /** Internal state representing the number of columns that are ready in the current tuple. */ private int numColumnsReady; /** Internal state representing the number of tuples in the in-progress TupleBatch. */ private int currentInProgressTuples; /** Batch Size. */ private int batchSize; /** * @return batchSize for the tuple batch. */ public int getBatchSize() { return batchSize; } /** * Constructs an empty TupleBuffer to hold tuples matching the specified Schema. * * @param schema specified the columns of the emitted TupleBatch objects. */ public MutableTupleBuffer(final Schema schema) { this.schema = Objects.requireNonNull(schema); readyTuples = new ArrayList<MutableColumn<?>[]>(); currentBuildingColumns = ColumnFactory.allocateColumns(schema).toArray(new ColumnBuilder<?>[] {}); numColumns = schema.numColumns(); columnsReady = new BitSet(numColumns); numColumnsReady = 0; currentInProgressTuples = 0; batchSize = TupleUtils.getBatchSize(schema); } /** * clear this TBB. */ public final void clear() { columnsReady.clear(); currentBuildingColumns = null; currentInProgressTuples = 0; numColumnsReady = 0; readyTuples.clear(); } /** * Makes a batch of any tuples in the buffer and appends it to the internal list. */ private void finishBatch() { Preconditions.checkArgument(numColumnsReady == 0); Preconditions.checkArgument(currentInProgressTuples == batchSize); MutableColumn<?>[] buildingColumns = new MutableColumn<?>[numColumns]; int i = 0; for (ColumnBuilder<?> cb : currentBuildingColumns) { buildingColumns[i++] = cb.buildMutable(); } readyTuples.add(buildingColumns); currentBuildingColumns = ColumnFactory.allocateColumns(schema).toArray(new ColumnBuilder<?>[] {}); currentInProgressTuples = 0; } @Override public final Schema getSchema() { return schema; } @Override public final int numTuples() { return readyTuples.size() * batchSize + currentInProgressTuples; } @Override @Deprecated public final Object getObject(final int col, final int row) { return getColumn(col, row).getObject(getInColumnIndex(row)); } @Override public final boolean getBoolean(final int col, final int row) { return getColumn(col, row).getBoolean(getInColumnIndex(row)); } @Override public final double getDouble(final int col, final int row) { return getColumn(col, row).getDouble(getInColumnIndex(row)); } @Override public final float getFloat(final int col, final int row) { return getColumn(col, row).getFloat(getInColumnIndex(row)); } @Override public final long getLong(final int col, final int row) { return getColumn(col, row).getLong(getInColumnIndex(row)); } @Override public final int getInt(final int col, final int row) { return getColumn(col, row).getInt(getInColumnIndex(row)); } @Override public final String getString(final int col, final int row) { return getColumn(col, row).getString(getInColumnIndex(row)); } @Override public final DateTime getDateTime(final int col, final int row) { return getColumn(col, row).getDateTime(getInColumnIndex(row)); } @Override public final ByteBuffer getBlob(final int column, final int row) { int tupleBatchIndex = row / batchSize; int tupleIndex = row % batchSize; if (tupleBatchIndex > readyTuples.size() || tupleBatchIndex == readyTuples.size() && tupleIndex >= currentInProgressTuples) { throw new IndexOutOfBoundsException(); } if (tupleBatchIndex < readyTuples.size()) { return readyTuples.get(tupleBatchIndex)[column].getBlob(tupleIndex); } return currentBuildingColumns[column].getBlob(tupleIndex); } @Override public final int numColumns() { return numColumns; } @Override public final void putBoolean(final int column, final boolean value) { checkPutIndex(column); currentBuildingColumns[column].appendBoolean(value); columnPut(column); } @Override public final void putDateTime(final int column, final DateTime value) { checkPutIndex(column); currentBuildingColumns[column].appendDateTime(value); columnPut(column); } @Override public final void putDouble(final int column, final double value) { checkPutIndex(column); currentBuildingColumns[column].appendDouble(value); columnPut(column); } @Override public final void putFloat(final int column, final float value) { checkPutIndex(column); currentBuildingColumns[column].appendFloat(value); columnPut(column); } @Override public final void putInt(final int column, final int value) { checkPutIndex(column); currentBuildingColumns[column].appendInt(value); columnPut(column); } @Override public final void putLong(final int column, final long value) { checkPutIndex(column); currentBuildingColumns[column].appendLong(value); columnPut(column); } @Override @Deprecated public final void putObject(final int column, final Object value) { checkPutIndex(column); currentBuildingColumns[column].appendObject(MyriaUtils.ensureObjectIsValidType(value)); columnPut(column); } @Override public final void putString(final int column, final String value) { checkPutIndex(column); currentBuildingColumns[column].appendString(value); columnPut(column); } @Override public final void putBlob(final int column, final ByteBuffer value) { checkPutIndex(column); currentBuildingColumns[column].appendBlob(value); columnPut(column); } /** * Helper function: checks whether the specified column can be inserted into. * * @param column the column in which the value should be put. */ private void checkPutIndex(final int column) { Preconditions.checkElementIndex(column, numColumns); if (columnsReady.get(column)) { throw new RuntimeException( "Need to fill up one row of TupleBatchBuffer before starting new one"); } } /** * Helper function to update the internal state after a value has been inserted into the specified column. * * @param column the column in which the value was put. */ private void columnPut(final int column) { columnsReady.set(column, true); numColumnsReady++; if (numColumnsReady == numColumns) { currentInProgressTuples++; numColumnsReady = 0; columnsReady.clear(); if (currentInProgressTuples == batchSize) { finishBatch(); } } } /** * Append the specified value to the specified destination column in this TupleBatchBuffer from the source column. * * @param destColumn which column in this TB the value will be inserted. * @param sourceColumn the column from which data will be retrieved. * @param sourceRow the row in the source column from which data will be retrieved. */ public final void put( final int destColumn, final ReadableColumn sourceColumn, final int sourceRow) { TupleUtils.copyValue(sourceColumn, sourceRow, this, destColumn); } /** * Swap the specified values from sourceRow to destRow in this TupleBuffer from the given column. * * @param column which column in this TB the value will be inserted. * @param destRow the row in the dest column from which data will be retrieved. * @param sourceRow the row in the source column from which data will be retrieved. */ public final void swap(final int column, final int destRow, final int sourceRow) { final int numTuples = numTuples(); Preconditions.checkElementIndex(destRow, numTuples); Preconditions.checkElementIndex(sourceRow, numTuples); int destBatch = destRow / batchSize; int destBatchRow = destRow % batchSize; int sourceBatch = sourceRow / batchSize; int sourceBatchRow = sourceRow % batchSize; ReplaceableColumn sourceColumn; if (sourceBatch < readyTuples.size()) { sourceColumn = readyTuples.get(sourceBatch)[column]; } else { sourceColumn = currentBuildingColumns[column]; } ReplaceableColumn destColumn; if (destBatch < readyTuples.size()) { destColumn = readyTuples.get(destBatch)[column]; } else { destColumn = currentBuildingColumns[column]; } Type t = getSchema().getColumnType(column); switch (t) { case LONG_TYPE: long long1 = sourceColumn.getLong(sourceBatchRow); long long2 = destColumn.getLong(destBatchRow); sourceColumn.replaceLong(long2, sourceBatchRow); destColumn.replaceLong(long1, destBatchRow); break; case INT_TYPE: int int1 = sourceColumn.getInt(sourceBatchRow); int int2 = destColumn.getInt(destBatchRow); sourceColumn.replaceInt(int2, sourceBatchRow); destColumn.replaceInt(int1, destBatchRow); break; case DOUBLE_TYPE: double double1 = sourceColumn.getDouble(sourceBatchRow); double double2 = destColumn.getDouble(destBatchRow); sourceColumn.replaceDouble(double2, sourceBatchRow); destColumn.replaceDouble(double1, destBatchRow); break; case FLOAT_TYPE: float float1 = sourceColumn.getFloat(sourceBatchRow); float float2 = destColumn.getFloat(destBatchRow); sourceColumn.replaceFloat(float2, sourceBatchRow); destColumn.replaceFloat(float1, destBatchRow); break; case BOOLEAN_TYPE: boolean boolean1 = sourceColumn.getBoolean(sourceBatchRow); boolean boolean2 = destColumn.getBoolean(destBatchRow); sourceColumn.replaceBoolean(boolean2, sourceBatchRow); destColumn.replaceBoolean(boolean1, destBatchRow); break; case STRING_TYPE: String string1 = sourceColumn.getString(sourceBatchRow); String string2 = destColumn.getString(destBatchRow); sourceColumn.replaceString(string2, sourceBatchRow); destColumn.replaceString(string1, destBatchRow); break; case DATETIME_TYPE: DateTime date1 = sourceColumn.getDateTime(sourceBatchRow); DateTime date2 = destColumn.getDateTime(destBatchRow); sourceColumn.replaceDateTime(date2, sourceBatchRow); destColumn.replaceDateTime(date1, destBatchRow); break; case BLOB_TYPE: ByteBuffer bb1 = sourceColumn.getBlob(sourceBatchRow); ByteBuffer bb2 = destColumn.getBlob(destBatchRow); sourceColumn.replaceBlob(bb2, sourceBatchRow); destColumn.replaceBlob(bb1, destBatchRow); break; } } /** * @param row the row index. */ private void checkRowIndex(final int row) { if (row >= numTuples()) { throw new IndexOutOfBoundsException(); } } /** * @param column the column index. * @param row the row index. * @return a Replaceable Column that stores the given field. */ public final ReplaceableColumn getColumn(final int column, final int row) { checkRowIndex(row); int tupleBatchIndex = row / batchSize; if (tupleBatchIndex < readyTuples.size()) { return readyTuples.get(tupleBatchIndex)[column]; } else { return currentBuildingColumns[column]; } } /** * @param destColumn the destination column. * @param destRow the row. * @param value the replacement. */ public final void replaceInt(final int destColumn, final int destRow, final int value) { getColumn(destColumn, destRow).replaceInt(value, getInColumnIndex(destRow)); } /** * @param destColumn the destination column. * @param destRow the row. * @param value the replacement. */ public final void replaceLong(final int destColumn, final int destRow, final long value) { getColumn(destColumn, destRow).replaceLong(value, getInColumnIndex(destRow)); } /** * @param destColumn the destination column. * @param destRow the row. * @param value the replacement. */ public final void replaceFloat(final int destColumn, final int destRow, final float value) { getColumn(destColumn, destRow).replaceFloat(value, getInColumnIndex(destRow)); } /** * @param destColumn the destination column. * @param destRow the row. * @param value the replacement. */ public final void replaceDouble(final int destColumn, final int destRow, final double value) { getColumn(destColumn, destRow).replaceDouble(value, getInColumnIndex(destRow)); } /** * @param destColumn the destination column. * @param destRow the row. * @param value the replacement. */ public final void replaceString(final int destColumn, final int destRow, final String value) { getColumn(destColumn, destRow).replaceString(value, getInColumnIndex(destRow)); } /** * @param destColumn the destination column. * @param destRow the row. * @param value the replacement. */ public final void replaceByteBuffer( final int destColumn, final int destRow, final ByteBuffer value) { getColumn(destColumn, destRow).replaceBlob(value, getInColumnIndex(destRow)); } /** * Replace the specified value to the specified destination column in this TupleBuffer from the source column. * * @param destColumn which column in this TB the value will be inserted. * @param destRow the row in the dest column from which data will be retrieved. * @param sourceColumn the column from which data will be retrieved. * @param sourceRow the row in the source column from which data will be retrieved. */ public final void replace( final int destColumn, final int destRow, final ReadableColumn sourceColumn, final int sourceRow) { checkRowIndex(destRow); int tupleIndex = getInColumnIndex(destRow); ReplaceableColumn dest = getColumn(destColumn, destRow); switch (dest.getType()) { case BOOLEAN_TYPE: dest.replaceBoolean(sourceColumn.getBoolean(sourceRow), tupleIndex); break; case DATETIME_TYPE: dest.replaceDateTime(sourceColumn.getDateTime(sourceRow), tupleIndex); break; case DOUBLE_TYPE: dest.replaceDouble(sourceColumn.getDouble(sourceRow), tupleIndex); break; case FLOAT_TYPE: dest.replaceFloat(sourceColumn.getFloat(sourceRow), tupleIndex); break; case INT_TYPE: dest.replaceInt(sourceColumn.getInt(sourceRow), tupleIndex); break; case LONG_TYPE: dest.replaceLong(sourceColumn.getLong(sourceRow), tupleIndex); break; case STRING_TYPE: dest.replaceString(sourceColumn.getString(sourceRow), tupleIndex); break; case BLOB_TYPE: dest.replaceBlob(sourceColumn.getBlob(sourceRow), tupleIndex); break; } } /** * Return all tuples in this buffer. The data do not get removed. * * @return a List<TupleBatch> containing all complete tuples that have been inserted into this buffer. */ public final List<TupleBatch> getAll() { final List<TupleBatch> output = new ArrayList<TupleBatch>(); for (final MutableColumn<?>[] mutableColumns : readyTuples) { List<Column<?>> columns = new ArrayList<Column<?>>(); for (MutableColumn<?> mutableColumn : mutableColumns) { columns.add(mutableColumn.toColumn()); } output.add(new TupleBatch(schema, columns, batchSize)); } if (currentInProgressTuples > 0) { output.add(new TupleBatch(schema, getInProgressColumns(), currentInProgressTuples)); } return output; } /** * Build the in progress columns. The builders' states are untouched. They can keep building. * * @return the built in progress columns. */ private List<Column<?>> getInProgressColumns() { List<Column<?>> newColumns = new ArrayList<Column<?>>(currentBuildingColumns.length); for (ColumnBuilder<?> cb : currentBuildingColumns) { newColumns.add(cb.forkNewBuilder().build()); } return newColumns; } @Override public MutableTupleBuffer clone() { MutableTupleBuffer ret = new MutableTupleBuffer(getSchema()); ret.columnsReady = (BitSet) columnsReady.clone(); ret.numColumnsReady = numColumnsReady; ret.currentInProgressTuples = currentInProgressTuples; for (MutableColumn<?>[] columns : readyTuples) { MutableColumn<?>[] tmp = new MutableColumn<?>[columns.length]; for (int i = 0; i < columns.length; ++i) { tmp[i] = columns[i].clone(); } ret.readyTuples.add(tmp); } for (int i = 0; i < currentBuildingColumns.length; ++i) { ret.currentBuildingColumns[i] = currentBuildingColumns[i].forkNewBuilder(); } return ret; } @Override public ReadableColumn asColumn(final int column) { return new ReadableSubColumn(this, Preconditions.checkElementIndex(column, numColumns)); } @Override public WritableColumn asWritableColumn(final int column) { return new WritableSubColumn(this, column); } /** * Get the in-column row index of the given row index of the whole tuple buffer. * * @param row * @return the in-column row index */ public int getInColumnIndex(final int row) { return row % batchSize; } }