/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.presto.hive.orc; import com.facebook.presto.hive.HiveColumnHandle; import com.facebook.presto.orc.OrcCorruptionException; import com.facebook.presto.orc.OrcDataSource; import com.facebook.presto.orc.OrcRecordReader; import com.facebook.presto.orc.memory.AggregatedMemoryContext; import com.facebook.presto.spi.ConnectorPageSource; import com.facebook.presto.spi.Page; import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.block.Block; import com.facebook.presto.spi.block.BlockBuilder; import com.facebook.presto.spi.block.BlockBuilderStatus; import com.facebook.presto.spi.block.LazyBlock; import com.facebook.presto.spi.block.LazyBlockLoader; import com.facebook.presto.spi.type.Type; import com.facebook.presto.spi.type.TypeManager; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; import java.io.IOException; import java.util.List; import static com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR; import static com.facebook.presto.hive.HiveErrorCode.HIVE_BAD_DATA; import static com.facebook.presto.hive.HiveErrorCode.HIVE_CURSOR_ERROR; import static com.facebook.presto.orc.OrcReader.MAX_BATCH_SIZE; import static com.google.common.base.MoreObjects.toStringHelper; import static com.google.common.base.Preconditions.checkState; import static java.util.Objects.requireNonNull; public class OrcPageSource implements ConnectorPageSource { private static final int NULL_ENTRY_SIZE = 0; private final OrcRecordReader recordReader; private final OrcDataSource orcDataSource; private final List<String> columnNames; private final List<Type> types; private final Block[] constantBlocks; private final int[] hiveColumnIndexes; private int batchId; private boolean closed; private final AggregatedMemoryContext systemMemoryContext; public OrcPageSource( OrcRecordReader recordReader, OrcDataSource orcDataSource, List<HiveColumnHandle> columns, TypeManager typeManager, AggregatedMemoryContext systemMemoryContext) { this.recordReader = requireNonNull(recordReader, "recordReader is null"); this.orcDataSource = requireNonNull(orcDataSource, "orcDataSource is null"); int size = requireNonNull(columns, "columns is null").size(); this.constantBlocks = new Block[size]; this.hiveColumnIndexes = new int[size]; ImmutableList.Builder<String> namesBuilder = ImmutableList.builder(); ImmutableList.Builder<Type> typesBuilder = ImmutableList.builder(); for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) { HiveColumnHandle column = columns.get(columnIndex); checkState(column.getColumnType() == REGULAR, "column type must be regular"); String name = column.getName(); Type type = typeManager.getType(column.getTypeSignature()); namesBuilder.add(name); typesBuilder.add(type); hiveColumnIndexes[columnIndex] = column.getHiveColumnIndex(); if (!recordReader.isColumnPresent(column.getHiveColumnIndex())) { BlockBuilder blockBuilder = type.createBlockBuilder(new BlockBuilderStatus(), MAX_BATCH_SIZE, NULL_ENTRY_SIZE); for (int i = 0; i < MAX_BATCH_SIZE; i++) { blockBuilder.appendNull(); } constantBlocks[columnIndex] = blockBuilder.build(); } } types = typesBuilder.build(); columnNames = namesBuilder.build(); this.systemMemoryContext = requireNonNull(systemMemoryContext, "systemMemoryContext is null"); } @Override public long getTotalBytes() { return recordReader.getSplitLength(); } @Override public long getCompletedBytes() { return orcDataSource.getReadBytes(); } @Override public long getReadTimeNanos() { return orcDataSource.getReadTimeNanos(); } @Override public boolean isFinished() { return closed; } @Override public Page getNextPage() { try { batchId++; int batchSize = recordReader.nextBatch(); if (batchSize <= 0) { close(); return null; } Block[] blocks = new Block[hiveColumnIndexes.length]; for (int fieldId = 0; fieldId < blocks.length; fieldId++) { Type type = types.get(fieldId); if (constantBlocks[fieldId] != null) { blocks[fieldId] = constantBlocks[fieldId].getRegion(0, batchSize); } else { blocks[fieldId] = new LazyBlock(batchSize, new OrcBlockLoader(hiveColumnIndexes[fieldId], type)); } } return new Page(batchSize, blocks); } catch (PrestoException e) { closeWithSuppression(e); throw e; } catch (IOException | RuntimeException e) { closeWithSuppression(e); throw new PrestoException(HIVE_CURSOR_ERROR, e); } } @Override public void close() { // some hive input formats are broken and bad things can happen if you close them multiple times if (closed) { return; } closed = true; try { recordReader.close(); } catch (IOException e) { throw Throwables.propagate(e); } } @Override public String toString() { return toStringHelper(this) .add("columnNames", columnNames) .add("types", types) .toString(); } @Override public long getSystemMemoryUsage() { return systemMemoryContext.getBytes(); } protected void closeWithSuppression(Throwable throwable) { requireNonNull(throwable, "throwable is null"); try { close(); } catch (RuntimeException e) { // Self-suppression not permitted if (throwable != e) { throwable.addSuppressed(e); } } } private final class OrcBlockLoader implements LazyBlockLoader<LazyBlock> { private final int expectedBatchId = batchId; private final int columnIndex; private final Type type; private boolean loaded; public OrcBlockLoader(int columnIndex, Type type) { this.columnIndex = columnIndex; this.type = requireNonNull(type, "type is null"); } @Override public final void load(LazyBlock lazyBlock) { if (loaded) { return; } checkState(batchId == expectedBatchId); try { Block block = recordReader.readBlock(type, columnIndex); lazyBlock.setBlock(block); } catch (IOException e) { if (e instanceof OrcCorruptionException) { throw new PrestoException(HIVE_BAD_DATA, e); } throw new PrestoException(HIVE_CURSOR_ERROR, e); } loaded = true; } } }