/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.presto.hive; import com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore; import com.facebook.presto.hive.metastore.Table; import com.facebook.presto.spi.ColumnHandle; import com.facebook.presto.spi.ConnectorTableHandle; import com.facebook.presto.spi.Constraint; import com.facebook.presto.spi.PrestoException; import com.facebook.presto.spi.SchemaTableName; import com.facebook.presto.spi.TableNotFoundException; import com.facebook.presto.spi.predicate.Domain; import com.facebook.presto.spi.predicate.NullableValue; import com.facebook.presto.spi.predicate.TupleDomain; import com.facebook.presto.spi.predicate.ValueSet; import com.facebook.presto.spi.type.Type; import com.facebook.presto.spi.type.TypeManager; import com.google.common.base.Predicates; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; import io.airlift.slice.Slice; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.metastore.ProtectMode; import org.joda.time.DateTimeZone; import javax.inject.Inject; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Optional; import static com.facebook.presto.hive.HiveBucketing.HiveBucket; import static com.facebook.presto.hive.HiveBucketing.getHiveBucketHandle; import static com.facebook.presto.hive.HiveBucketing.getHiveBucketNumbers; import static com.facebook.presto.hive.HiveErrorCode.HIVE_EXCEEDED_PARTITION_LIMIT; import static com.facebook.presto.hive.HiveUtil.getPartitionKeyColumnHandles; import static com.facebook.presto.hive.HiveUtil.parsePartitionValue; import static com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Predicates.not; import static com.google.common.base.Strings.isNullOrEmpty; import static java.lang.String.format; import static java.util.Objects.requireNonNull; import static java.util.stream.Collectors.toList; import static org.apache.hadoop.hive.metastore.ProtectMode.getProtectModeFromString; public class HivePartitionManager { public static final String PRESTO_OFFLINE = "presto_offline"; private static final String PARTITION_VALUE_WILDCARD = ""; private final String connectorId; private final DateTimeZone timeZone; private final boolean assumeCanonicalPartitionKeys; private final int maxPartitions; private final int domainCompactionThreshold; private final TypeManager typeManager; @Inject public HivePartitionManager( HiveConnectorId connectorId, TypeManager typeManager, HiveClientConfig hiveClientConfig) { this(connectorId, typeManager, hiveClientConfig.getDateTimeZone(), hiveClientConfig.isAssumeCanonicalPartitionKeys(), hiveClientConfig.getMaxPartitionsPerScan(), hiveClientConfig.getDomainCompactionThreshold()); } public HivePartitionManager( HiveConnectorId connectorId, TypeManager typeManager, DateTimeZone timeZone, boolean assumeCanonicalPartitionKeys, int maxPartitions, int domainCompactionThreshold) { this.connectorId = requireNonNull(connectorId, "connectorId is null").toString(); this.timeZone = requireNonNull(timeZone, "timeZone is null"); this.assumeCanonicalPartitionKeys = assumeCanonicalPartitionKeys; checkArgument(maxPartitions >= 1, "maxPartitions must be at least 1"); this.maxPartitions = maxPartitions; checkArgument(domainCompactionThreshold >= 1, "domainCompactionThreshold must be at least 1"); this.domainCompactionThreshold = domainCompactionThreshold; this.typeManager = requireNonNull(typeManager, "typeManager is null"); } public HivePartitionResult getPartitions(SemiTransactionalHiveMetastore metastore, ConnectorTableHandle tableHandle, Constraint<ColumnHandle> constraint) { HiveTableHandle hiveTableHandle = (HiveTableHandle) tableHandle; TupleDomain<ColumnHandle> effectivePredicate = constraint.getSummary(); SchemaTableName tableName = hiveTableHandle.getSchemaTableName(); Table table = getTable(metastore, tableName); Optional<HiveBucketHandle> hiveBucketHandle = getHiveBucketHandle(connectorId, table); List<HiveColumnHandle> partitionColumns = getPartitionKeyColumnHandles(connectorId, table); List<HiveBucket> buckets = getHiveBucketNumbers(table, effectivePredicate); TupleDomain<HiveColumnHandle> compactEffectivePredicate = toCompactTupleDomain(effectivePredicate, domainCompactionThreshold); if (effectivePredicate.isNone()) { return new HivePartitionResult(partitionColumns, ImmutableList.of(), TupleDomain.none(), TupleDomain.none(), hiveBucketHandle); } if (partitionColumns.isEmpty()) { return new HivePartitionResult( partitionColumns, ImmutableList.of(new HivePartition(tableName, compactEffectivePredicate, buckets)), effectivePredicate, TupleDomain.none(), hiveBucketHandle); } List<Type> partitionTypes = partitionColumns.stream() .map(column -> typeManager.getType(column.getTypeSignature())) .collect(toList()); List<String> partitionNames = getFilteredPartitionNames(metastore, tableName, partitionColumns, effectivePredicate); // do a final pass to filter based on fields that could not be used to filter the partitions int partitionCount = 0; ImmutableList.Builder<HivePartition> partitions = ImmutableList.builder(); for (String partitionName : partitionNames) { Optional<Map<ColumnHandle, NullableValue>> values = parseValuesAndFilterPartition(partitionName, partitionColumns, partitionTypes, constraint); if (values.isPresent()) { if (partitionCount == maxPartitions) { throw new PrestoException(HIVE_EXCEEDED_PARTITION_LIMIT, format( "Query over table '%s' can potentially read more than %s partitions", hiveTableHandle.getSchemaTableName(), maxPartitions)); } partitionCount++; partitions.add(new HivePartition(tableName, compactEffectivePredicate, partitionName, values.get(), buckets)); } } // All partition key domains will be fully evaluated, so we don't need to include those TupleDomain<ColumnHandle> remainingTupleDomain = TupleDomain.withColumnDomains(Maps.filterKeys(effectivePredicate.getDomains().get(), not(Predicates.in(partitionColumns)))); TupleDomain<ColumnHandle> enforcedTupleDomain = TupleDomain.withColumnDomains(Maps.filterKeys(effectivePredicate.getDomains().get(), Predicates.in(partitionColumns))); return new HivePartitionResult(partitionColumns, partitions.build(), remainingTupleDomain, enforcedTupleDomain, hiveBucketHandle); } private static TupleDomain<HiveColumnHandle> toCompactTupleDomain(TupleDomain<ColumnHandle> effectivePredicate, int threshold) { checkArgument(effectivePredicate.getDomains().isPresent()); ImmutableMap.Builder<HiveColumnHandle, Domain> builder = ImmutableMap.builder(); for (Map.Entry<ColumnHandle, Domain> entry : effectivePredicate.getDomains().get().entrySet()) { HiveColumnHandle hiveColumnHandle = (HiveColumnHandle) entry.getKey(); ValueSet values = entry.getValue().getValues(); ValueSet compactValueSet = values.getValuesProcessor().<Optional<ValueSet>>transform( ranges -> ranges.getRangeCount() > threshold ? Optional.of(ValueSet.ofRanges(ranges.getSpan())) : Optional.empty(), discreteValues -> discreteValues.getValues().size() > threshold ? Optional.of(ValueSet.all(values.getType())) : Optional.empty(), allOrNone -> Optional.empty()) .orElse(values); builder.put(hiveColumnHandle, Domain.create(compactValueSet, entry.getValue().isNullAllowed())); } return TupleDomain.withColumnDomains(builder.build()); } private Optional<Map<ColumnHandle, NullableValue>> parseValuesAndFilterPartition(String partitionName, List<HiveColumnHandle> partitionColumns, List<Type> partitionTypes, Constraint<ColumnHandle> constraint) { List<String> partitionValues = extractPartitionKeyValues(partitionName); Map<ColumnHandle, Domain> domains = constraint.getSummary().getDomains().get(); ImmutableMap.Builder<ColumnHandle, NullableValue> builder = ImmutableMap.builder(); for (int i = 0; i < partitionColumns.size(); i++) { HiveColumnHandle column = partitionColumns.get(i); NullableValue parsedValue = parsePartitionValue(partitionName, partitionValues.get(i), partitionTypes.get(i), timeZone); Domain allowedDomain = domains.get(column); if (allowedDomain != null && !allowedDomain.includesNullableValue(parsedValue.getValue())) { return Optional.empty(); } builder.put(column, parsedValue); } Map<ColumnHandle, NullableValue> values = builder.build(); if (!constraint.predicate().test(values)) { return Optional.empty(); } return Optional.of(values); } private Table getTable(SemiTransactionalHiveMetastore metastore, SchemaTableName tableName) { Optional<Table> target = metastore.getTable(tableName.getSchemaName(), tableName.getTableName()); if (!target.isPresent()) { throw new TableNotFoundException(tableName); } Table table = target.get(); String protectMode = table.getParameters().get(ProtectMode.PARAMETER_NAME); if (protectMode != null && getProtectModeFromString(protectMode).offline) { throw new TableOfflineException(tableName, false, null); } String prestoOffline = table.getParameters().get(PRESTO_OFFLINE); if (!isNullOrEmpty(prestoOffline)) { throw new TableOfflineException(tableName, true, prestoOffline); } return table; } private List<String> getFilteredPartitionNames(SemiTransactionalHiveMetastore metastore, SchemaTableName tableName, List<HiveColumnHandle> partitionKeys, TupleDomain<ColumnHandle> effectivePredicate) { checkArgument(effectivePredicate.getDomains().isPresent()); List<String> filter = new ArrayList<>(); for (HiveColumnHandle partitionKey : partitionKeys) { Domain domain = effectivePredicate.getDomains().get().get(partitionKey); if (domain != null && domain.isNullableSingleValue()) { Object value = domain.getNullableSingleValue(); if (value == null) { filter.add(HivePartitionKey.HIVE_DEFAULT_DYNAMIC_PARTITION); } else if (value instanceof Slice) { filter.add(((Slice) value).toStringUtf8()); } else if ((value instanceof Boolean) || (value instanceof Double) || (value instanceof Long)) { if (assumeCanonicalPartitionKeys) { filter.add(value.toString()); } else { // Hive treats '0', 'false', and 'False' the same. However, the metastore differentiates between these. filter.add(PARTITION_VALUE_WILDCARD); } } else { throw new PrestoException(NOT_SUPPORTED, "Only Boolean, Double and Long partition keys are supported"); } } else { filter.add(PARTITION_VALUE_WILDCARD); } } // fetch the partition names return metastore.getPartitionNamesByParts(tableName.getSchemaName(), tableName.getTableName(), filter) .orElseThrow(() -> new TableNotFoundException(tableName)); } public static List<String> extractPartitionKeyValues(String partitionName) { ImmutableList.Builder<String> values = ImmutableList.builder(); boolean inKey = true; int valueStart = -1; for (int i = 0; i < partitionName.length(); i++) { char current = partitionName.charAt(i); if (inKey) { checkArgument(current != '/', "Invalid partition spec: %s", partitionName); if (current == '=') { inKey = false; valueStart = i + 1; } } else if (current == '/') { checkArgument(valueStart != -1, "Invalid partition spec: %s", partitionName); values.add(FileUtils.unescapePathName(partitionName.substring(valueStart, i))); inKey = true; valueStart = -1; } } checkArgument(!inKey, "Invalid partition spec: %s", partitionName); values.add(FileUtils.unescapePathName(partitionName.substring(valueStart, partitionName.length()))); return values.build(); } }