/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.hive;
import com.facebook.presto.hive.metastore.Column;
import com.facebook.presto.hive.metastore.Partition;
import com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore;
import com.facebook.presto.hive.metastore.Table;
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.ConnectorSplitSource;
import com.facebook.presto.spi.ConnectorTableLayoutHandle;
import com.facebook.presto.spi.FixedSplitSource;
import com.facebook.presto.spi.PrestoException;
import com.facebook.presto.spi.SchemaTableName;
import com.facebook.presto.spi.TableNotFoundException;
import com.facebook.presto.spi.connector.ConnectorSplitManager;
import com.facebook.presto.spi.connector.ConnectorTransactionHandle;
import com.google.common.base.Verify;
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Ordering;
import io.airlift.concurrent.BoundedExecutor;
import org.apache.hadoop.hive.metastore.ProtectMode;
import javax.inject.Inject;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.Executor;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.RejectedExecutionException;
import java.util.function.Function;
import static com.facebook.presto.hive.HiveErrorCode.HIVE_INVALID_METADATA;
import static com.facebook.presto.hive.HiveErrorCode.HIVE_METASTORE_ERROR;
import static com.facebook.presto.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH;
import static com.facebook.presto.hive.HivePartition.UNPARTITIONED_ID;
import static com.facebook.presto.hive.HiveUtil.checkCondition;
import static com.facebook.presto.hive.metastore.MetastoreUtil.makePartName;
import static com.facebook.presto.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR;
import static com.facebook.presto.spi.StandardErrorCode.SERVER_SHUTTING_DOWN;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Strings.isNullOrEmpty;
import static com.google.common.collect.Iterables.concat;
import static com.google.common.collect.Iterables.getOnlyElement;
import static com.google.common.collect.Iterables.transform;
import static java.lang.Math.min;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
import static org.apache.hadoop.hive.metastore.ProtectMode.getProtectModeFromString;
public class HiveSplitManager
implements ConnectorSplitManager
{
public static final String PRESTO_OFFLINE = "presto_offline";
private final String connectorId;
private final Function<HiveTransactionHandle, SemiTransactionalHiveMetastore> metastoreProvider;
private final NamenodeStats namenodeStats;
private final HdfsEnvironment hdfsEnvironment;
private final DirectoryLister directoryLister;
private final Executor executor;
private final CoercionPolicy coercionPolicy;
private final int maxOutstandingSplits;
private final int minPartitionBatchSize;
private final int maxPartitionBatchSize;
private final int maxInitialSplits;
private final boolean recursiveDfsWalkerEnabled;
@Inject
public HiveSplitManager(
HiveConnectorId connectorId,
HiveClientConfig hiveClientConfig,
Function<HiveTransactionHandle, SemiTransactionalHiveMetastore> metastoreProvider,
NamenodeStats namenodeStats,
HdfsEnvironment hdfsEnvironment,
DirectoryLister directoryLister,
@ForHiveClient ExecutorService executorService,
CoercionPolicy coercionPolicy)
{
this(connectorId,
metastoreProvider,
namenodeStats,
hdfsEnvironment,
directoryLister,
new BoundedExecutor(executorService, hiveClientConfig.getMaxSplitIteratorThreads()),
coercionPolicy,
hiveClientConfig.getMaxOutstandingSplits(),
hiveClientConfig.getMinPartitionBatchSize(),
hiveClientConfig.getMaxPartitionBatchSize(),
hiveClientConfig.getMaxInitialSplits(),
hiveClientConfig.getRecursiveDirWalkerEnabled()
);
}
public HiveSplitManager(
HiveConnectorId connectorId,
Function<HiveTransactionHandle, SemiTransactionalHiveMetastore> metastoreProvider,
NamenodeStats namenodeStats,
HdfsEnvironment hdfsEnvironment,
DirectoryLister directoryLister,
Executor executor,
CoercionPolicy coercionPolicy,
int maxOutstandingSplits,
int minPartitionBatchSize,
int maxPartitionBatchSize,
int maxInitialSplits,
boolean recursiveDfsWalkerEnabled)
{
this.connectorId = requireNonNull(connectorId, "connectorId is null").toString();
this.metastoreProvider = requireNonNull(metastoreProvider, "metastore is null");
this.namenodeStats = requireNonNull(namenodeStats, "namenodeStats is null");
this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
this.directoryLister = requireNonNull(directoryLister, "directoryLister is null");
this.executor = new ErrorCodedExecutor(executor);
this.coercionPolicy = requireNonNull(coercionPolicy, "coercionPolicy is null");
checkArgument(maxOutstandingSplits >= 1, "maxOutstandingSplits must be at least 1");
this.maxOutstandingSplits = maxOutstandingSplits;
this.minPartitionBatchSize = minPartitionBatchSize;
this.maxPartitionBatchSize = maxPartitionBatchSize;
this.maxInitialSplits = maxInitialSplits;
this.recursiveDfsWalkerEnabled = recursiveDfsWalkerEnabled;
}
@Override
public ConnectorSplitSource getSplits(ConnectorTransactionHandle transaction, ConnectorSession session, ConnectorTableLayoutHandle layoutHandle)
{
HiveTableLayoutHandle layout = (HiveTableLayoutHandle) layoutHandle;
List<HivePartition> partitions = layout.getPartitions().get();
HivePartition partition = Iterables.getFirst(partitions, null);
if (partition == null) {
return new FixedSplitSource(ImmutableList.of());
}
SchemaTableName tableName = partition.getTableName();
List<HiveBucketing.HiveBucket> buckets = partition.getBuckets();
Optional<HiveBucketHandle> bucketHandle = layout.getBucketHandle();
// sort partitions
partitions = Ordering.natural().onResultOf(HivePartition::getPartitionId).reverse().sortedCopy(partitions);
SemiTransactionalHiveMetastore metastore = metastoreProvider.apply((HiveTransactionHandle) transaction);
Optional<Table> table = metastore.getTable(tableName.getSchemaName(), tableName.getTableName());
if (!table.isPresent()) {
throw new TableNotFoundException(tableName);
}
Iterable<HivePartitionMetadata> hivePartitions = getPartitionMetadata(metastore, table.get(), tableName, partitions, bucketHandle.map(HiveBucketHandle::toBucketProperty));
HiveSplitLoader hiveSplitLoader = new BackgroundHiveSplitLoader(
connectorId,
table.get(),
hivePartitions,
bucketHandle,
buckets,
session,
hdfsEnvironment,
namenodeStats,
directoryLister,
executor,
maxPartitionBatchSize,
maxInitialSplits,
recursiveDfsWalkerEnabled);
HiveSplitSource splitSource = new HiveSplitSource(maxOutstandingSplits, hiveSplitLoader, executor);
hiveSplitLoader.start(splitSource);
return splitSource;
}
private Iterable<HivePartitionMetadata> getPartitionMetadata(SemiTransactionalHiveMetastore metastore, Table table, SchemaTableName tableName, List<HivePartition> hivePartitions, Optional<HiveBucketProperty> bucketProperty)
{
if (hivePartitions.isEmpty()) {
return ImmutableList.of();
}
if (hivePartitions.size() == 1) {
HivePartition firstPartition = getOnlyElement(hivePartitions);
if (firstPartition.getPartitionId().equals(UNPARTITIONED_ID)) {
return ImmutableList.of(new HivePartitionMetadata(firstPartition, Optional.empty(), ImmutableMap.of()));
}
}
Iterable<List<HivePartition>> partitionNameBatches = partitionExponentially(hivePartitions, minPartitionBatchSize, maxPartitionBatchSize);
Iterable<List<HivePartitionMetadata>> partitionBatches = transform(partitionNameBatches, partitionBatch -> {
Map<String, Optional<Partition>> batch = metastore.getPartitionsByNames(
tableName.getSchemaName(),
tableName.getTableName(),
Lists.transform(partitionBatch, HivePartition::getPartitionId));
ImmutableMap.Builder<String, Partition> partitionBuilder = ImmutableMap.builder();
for (Map.Entry<String, Optional<Partition>> entry : batch.entrySet()) {
if (!entry.getValue().isPresent()) {
throw new PrestoException(HIVE_METASTORE_ERROR, "Partition metadata not available");
}
partitionBuilder.put(entry.getKey(), entry.getValue().get());
}
Map<String, Partition> partitions = partitionBuilder.build();
Verify.verify(partitions.size() == partitionBatch.size());
if (partitionBatch.size() != partitions.size()) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, format("Expected %s partitions but found %s", partitionBatch.size(), partitions.size()));
}
ImmutableList.Builder<HivePartitionMetadata> results = ImmutableList.builder();
for (HivePartition hivePartition : partitionBatch) {
Partition partition = partitions.get(hivePartition.getPartitionId());
if (partition == null) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, "Partition not loaded: " + hivePartition);
}
// verify all partition is online
String protectMode = partition.getParameters().get(ProtectMode.PARAMETER_NAME);
String partName = makePartName(table.getPartitionColumns(), partition.getValues());
if (protectMode != null && getProtectModeFromString(protectMode).offline) {
throw new PartitionOfflineException(tableName, partName, false, null);
}
String prestoOffline = partition.getParameters().get(PRESTO_OFFLINE);
if (!isNullOrEmpty(prestoOffline)) {
throw new PartitionOfflineException(tableName, partName, true, prestoOffline);
}
// Verify that the partition schema matches the table schema.
// Either adding or dropping columns from the end of the table
// without modifying existing partitions is allowed, but every
// column that exists in both the table and partition must have
// the same type.
List<Column> tableColumns = table.getDataColumns();
List<Column> partitionColumns = partition.getColumns();
if ((tableColumns == null) || (partitionColumns == null)) {
throw new PrestoException(HIVE_INVALID_METADATA, format("Table '%s' or partition '%s' has null columns", tableName, partName));
}
ImmutableMap.Builder<Integer, HiveType> columnCoercions = ImmutableMap.builder();
for (int i = 0; i < min(partitionColumns.size(), tableColumns.size()); i++) {
HiveType tableType = tableColumns.get(i).getType();
HiveType partitionType = partitionColumns.get(i).getType();
if (!tableType.equals(partitionType)) {
if (!coercionPolicy.canCoerce(partitionType, tableType)) {
throw new PrestoException(HIVE_PARTITION_SCHEMA_MISMATCH, format("" +
"There is a mismatch between the table and partition schemas. " +
"The types are incompatible and cannot be coerced. " +
"The column '%s' in table '%s' is declared as type '%s', " +
"but partition '%s' declared column '%s' as type '%s'.",
tableColumns.get(i).getName(),
tableName,
tableType,
partName,
partitionColumns.get(i).getName(),
partitionType));
}
columnCoercions.put(i, partitionType);
}
}
Optional<HiveBucketProperty> partitionBucketProperty = partition.getStorage().getBucketProperty();
checkCondition(
partitionBucketProperty.equals(bucketProperty),
HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH,
"Hive table (%s) bucketing property (%s) does not match partition (%s) bucketing property (%s)",
hivePartition.getTableName(),
bucketProperty,
hivePartition.getPartitionId(),
partitionBucketProperty);
results.add(new HivePartitionMetadata(hivePartition, Optional.of(partition), columnCoercions.build()));
}
return results.build();
});
return concat(partitionBatches);
}
/**
* Partition the given list in exponentially (power of 2) increasing batch sizes starting at 1 up to maxBatchSize
*/
private static <T> Iterable<List<T>> partitionExponentially(List<T> values, int minBatchSize, int maxBatchSize)
{
return () -> new AbstractIterator<List<T>>()
{
private int currentSize = minBatchSize;
private final Iterator<T> iterator = values.iterator();
@Override
protected List<T> computeNext()
{
if (!iterator.hasNext()) {
return endOfData();
}
int count = 0;
ImmutableList.Builder<T> builder = ImmutableList.builder();
while (iterator.hasNext() && count < currentSize) {
builder.add(iterator.next());
++count;
}
currentSize = min(maxBatchSize, currentSize * 2);
return builder.build();
}
};
}
private static class ErrorCodedExecutor
implements Executor
{
private final Executor delegate;
private ErrorCodedExecutor(Executor delegate)
{
this.delegate = requireNonNull(delegate, "delegate is null");
}
@Override
public void execute(Runnable command)
{
try {
delegate.execute(command);
}
catch (RejectedExecutionException e) {
throw new PrestoException(SERVER_SHUTTING_DOWN, "Server is shutting down", e);
}
}
}
}