/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.raptor.storage;
import com.facebook.presto.orc.OrcDataSource;
import com.facebook.presto.orc.OrcRecordReader;
import com.facebook.presto.orc.memory.AggregatedMemoryContext;
import com.facebook.presto.spi.Page;
import com.facebook.presto.spi.PrestoException;
import com.facebook.presto.spi.UpdatablePageSource;
import com.facebook.presto.spi.block.Block;
import com.facebook.presto.spi.block.BlockBuilder;
import com.facebook.presto.spi.block.LazyBlock;
import com.facebook.presto.spi.block.LazyBlockLoader;
import com.facebook.presto.spi.block.RunLengthEncodedBlock;
import com.facebook.presto.spi.type.Type;
import com.google.common.collect.ImmutableList;
import io.airlift.slice.Slice;
import java.io.IOException;
import java.util.BitSet;
import java.util.Collection;
import java.util.List;
import java.util.Optional;
import java.util.OptionalInt;
import java.util.UUID;
import java.util.concurrent.CompletableFuture;
import static com.facebook.presto.orc.OrcReader.MAX_BATCH_SIZE;
import static com.facebook.presto.raptor.RaptorErrorCode.RAPTOR_ERROR;
import static com.facebook.presto.spi.predicate.Utils.nativeValueToBlock;
import static com.facebook.presto.spi.type.BigintType.BIGINT;
import static com.google.common.base.MoreObjects.toStringHelper;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
import static io.airlift.slice.Slices.utf8Slice;
import static java.lang.Math.toIntExact;
import static java.util.Objects.requireNonNull;
public class OrcPageSource
implements UpdatablePageSource
{
public static final int NULL_COLUMN = -1;
public static final int ROWID_COLUMN = -2;
public static final int SHARD_UUID_COLUMN = -3;
public static final int BUCKET_NUMBER_COLUMN = -4;
private final Optional<ShardRewriter> shardRewriter;
private final OrcRecordReader recordReader;
private final OrcDataSource orcDataSource;
private final BitSet rowsToDelete;
private final List<Long> columnIds;
private final List<Type> types;
private final Block[] constantBlocks;
private final int[] columnIndexes;
private final AggregatedMemoryContext systemMemoryContext;
private int batchId;
private boolean closed;
public OrcPageSource(
Optional<ShardRewriter> shardRewriter,
OrcRecordReader recordReader,
OrcDataSource orcDataSource,
List<Long> columnIds,
List<Type> columnTypes,
List<Integer> columnIndexes,
UUID shardUuid,
OptionalInt bucketNumber,
AggregatedMemoryContext systemMemoryContext)
{
this.shardRewriter = requireNonNull(shardRewriter, "shardRewriter is null");
this.recordReader = requireNonNull(recordReader, "recordReader is null");
this.orcDataSource = requireNonNull(orcDataSource, "orcDataSource is null");
this.rowsToDelete = new BitSet(toIntExact(recordReader.getFileRowCount()));
checkArgument(columnIds.size() == columnTypes.size(), "ids and types mismatch");
checkArgument(columnIds.size() == columnIndexes.size(), "ids and indexes mismatch");
int size = columnIds.size();
this.columnIds = ImmutableList.copyOf(columnIds);
this.types = ImmutableList.copyOf(columnTypes);
this.constantBlocks = new Block[size];
this.columnIndexes = new int[size];
requireNonNull(shardUuid, "shardUuid is null");
for (int i = 0; i < size; i++) {
this.columnIndexes[i] = columnIndexes.get(i);
if (this.columnIndexes[i] == NULL_COLUMN) {
constantBlocks[i] = buildSingleValueBlock(columnTypes.get(i), null);
}
else if (this.columnIndexes[i] == SHARD_UUID_COLUMN) {
constantBlocks[i] = buildSingleValueBlock(columnTypes.get(i), utf8Slice(shardUuid.toString()));
}
else if (this.columnIndexes[i] == BUCKET_NUMBER_COLUMN) {
if (bucketNumber.isPresent()) {
constantBlocks[i] = buildSingleValueBlock(columnTypes.get(i), (long) bucketNumber.getAsInt());
}
else {
constantBlocks[i] = buildSingleValueBlock(columnTypes.get(i), null);
}
}
}
this.systemMemoryContext = requireNonNull(systemMemoryContext, "systemMemoryContext is null");
}
@Override
public long getTotalBytes()
{
return recordReader.getSplitLength();
}
@Override
public long getCompletedBytes()
{
return orcDataSource.getReadBytes();
}
@Override
public long getReadTimeNanos()
{
return orcDataSource.getReadTimeNanos();
}
@Override
public boolean isFinished()
{
return closed;
}
@Override
public Page getNextPage()
{
try {
batchId++;
int batchSize = recordReader.nextBatch();
if (batchSize <= 0) {
close();
return null;
}
long filePosition = recordReader.getFilePosition();
Block[] blocks = new Block[columnIndexes.length];
for (int fieldId = 0; fieldId < blocks.length; fieldId++) {
Type type = types.get(fieldId);
if (constantBlocks[fieldId] != null) {
blocks[fieldId] = constantBlocks[fieldId].getRegion(0, batchSize);
}
else if (columnIndexes[fieldId] == ROWID_COLUMN) {
blocks[fieldId] = buildSequenceBlock(filePosition, batchSize);
}
else {
blocks[fieldId] = new LazyBlock(batchSize, new OrcBlockLoader(columnIndexes[fieldId], type));
}
}
return new Page(batchSize, blocks);
}
catch (IOException | RuntimeException e) {
closeWithSuppression(e);
throw new PrestoException(RAPTOR_ERROR, e);
}
}
@Override
public void close()
{
closed = true;
try {
recordReader.close();
}
catch (IOException e) {
throw new PrestoException(RAPTOR_ERROR, e);
}
}
@Override
public String toString()
{
return toStringHelper(this)
.add("columnNames", columnIds)
.add("types", types)
.toString();
}
@Override
public void deleteRows(Block rowIds)
{
for (int i = 0; i < rowIds.getPositionCount(); i++) {
long rowId = BIGINT.getLong(rowIds, i);
rowsToDelete.set(toIntExact(rowId));
}
}
@Override
public CompletableFuture<Collection<Slice>> finish()
{
checkState(shardRewriter.isPresent(), "shardRewriter is missing");
return shardRewriter.get().rewrite(rowsToDelete);
}
@Override
public long getSystemMemoryUsage()
{
return systemMemoryContext.getBytes();
}
private void closeWithSuppression(Throwable throwable)
{
requireNonNull(throwable, "throwable is null");
try {
close();
}
catch (RuntimeException e) {
// Self-suppression not permitted
if (throwable != e) {
throwable.addSuppressed(e);
}
}
}
private static Block buildSequenceBlock(long start, int count)
{
BlockBuilder builder = BIGINT.createFixedSizeBlockBuilder(count);
for (int i = 0; i < count; i++) {
BIGINT.writeLong(builder, start + i);
}
return builder.build();
}
private static Block buildSingleValueBlock(Type type, Object value)
{
Block block = nativeValueToBlock(type, value);
return new RunLengthEncodedBlock(block, MAX_BATCH_SIZE);
}
private final class OrcBlockLoader
implements LazyBlockLoader<LazyBlock>
{
private final int expectedBatchId = batchId;
private final int columnIndex;
private final Type type;
private boolean loaded;
public OrcBlockLoader(int columnIndex, Type type)
{
this.columnIndex = columnIndex;
this.type = requireNonNull(type, "type is null");
}
@Override
public final void load(LazyBlock lazyBlock)
{
if (loaded) {
return;
}
checkState(batchId == expectedBatchId);
try {
Block block = recordReader.readBlock(type, columnIndex);
lazyBlock.setBlock(block);
}
catch (IOException e) {
throw new PrestoException(RAPTOR_ERROR, e);
}
loaded = true;
}
}
}