/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.data.management.copy.hive; import java.io.IOException; import java.util.Collection; import java.util.List; import java.util.Properties; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import com.google.common.base.Optional; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.io.Closer; import gobblin.data.management.copy.CopyEntity; import gobblin.data.management.copy.CopyableFile; import gobblin.data.management.copy.entities.PostPublishStep; import gobblin.data.management.copy.entities.PrePublishStep; import gobblin.hive.HiveRegisterStep; import gobblin.hive.metastore.HiveMetaStoreUtils; import gobblin.hive.spec.HiveSpec; import gobblin.hive.spec.SimpleHiveSpec; import gobblin.metrics.event.EventSubmitter; import gobblin.metrics.event.MultiTimingEvent; import gobblin.util.commit.DeleteFileCommitStep; import lombok.Getter; import lombok.extern.slf4j.Slf4j; /** * A {@link HiveFileSet} for Hive partitions. Creates {@link CopyEntity}s for a single Hive partition. */ @Getter @Slf4j public class HivePartitionFileSet extends HiveFileSet { private HiveCopyEntityHelper hiveCopyEntityHelper; private final Partition partition; private final Properties properties; private Optional<Partition> existingTargetPartition; private final EventSubmitter eventSubmitter; public HivePartitionFileSet(HiveCopyEntityHelper hiveCopyEntityHelper, Partition partition, Properties properties) { super(partition.getCompleteName(), hiveCopyEntityHelper.getDataset()); this.hiveCopyEntityHelper = hiveCopyEntityHelper; this.partition = partition; this.properties = properties; this.existingTargetPartition = Optional.fromNullable(this.hiveCopyEntityHelper.getTargetPartitions().get(this.partition.getValues())); this.eventSubmitter = new EventSubmitter.Builder(this.hiveCopyEntityHelper.getDataset().getMetricContext(), "hive.dataset.copy") .addMetadata("Partition", this.partition.getName()).build(); } @Override protected Collection<CopyEntity> generateCopyEntities() throws IOException { try (Closer closer = Closer.create()) { MultiTimingEvent multiTimer = closer.register(new MultiTimingEvent(this.eventSubmitter, "PartitionCopy", true)); int stepPriority = 0; String fileSet = HiveCopyEntityHelper.gson.toJson(this.partition.getValues()); List<CopyEntity> copyEntities = Lists.newArrayList(); stepPriority = hiveCopyEntityHelper.addSharedSteps(copyEntities, fileSet, stepPriority); multiTimer.nextStage(HiveCopyEntityHelper.Stages.COMPUTE_TARGETS); Path targetPath = hiveCopyEntityHelper.getTargetLocation(hiveCopyEntityHelper.getDataset().fs, hiveCopyEntityHelper.getTargetFs(), this.partition.getDataLocation(), Optional.of(this.partition)); Partition targetPartition = getTargetPartition(this.partition, targetPath); multiTimer.nextStage(HiveCopyEntityHelper.Stages.EXISTING_PARTITION); if (this.existingTargetPartition.isPresent()) { hiveCopyEntityHelper.getTargetPartitions().remove(this.partition.getValues()); try { checkPartitionCompatibility(targetPartition, this.existingTargetPartition.get()); } catch (IOException ioe) { if (hiveCopyEntityHelper.getExistingEntityPolicy() != HiveCopyEntityHelper.ExistingEntityPolicy.REPLACE_PARTITIONS) { log.error("Source and target partitions are not compatible. Aborting copy of partition " + this.partition, ioe); return Lists.newArrayList(); } log.warn("Source and target partitions are not compatible. Will override target partition: " + ioe.getMessage()); log.debug("Incompatibility details: ", ioe); stepPriority = hiveCopyEntityHelper.addPartitionDeregisterSteps(copyEntities, fileSet, stepPriority, hiveCopyEntityHelper.getTargetTable(), this.existingTargetPartition.get()); this.existingTargetPartition = Optional.absent(); } } multiTimer.nextStage(HiveCopyEntityHelper.Stages.PARTITION_SKIP_PREDICATE); if (hiveCopyEntityHelper.getFastPartitionSkip().isPresent() && hiveCopyEntityHelper.getFastPartitionSkip().get().apply(this)) { log.info(String.format("Skipping copy of partition %s due to fast partition skip predicate.", this.partition.getCompleteName())); return Lists.newArrayList(); } HiveSpec partitionHiveSpec = new SimpleHiveSpec.Builder<>(targetPath) .withTable(HiveMetaStoreUtils.getHiveTable(hiveCopyEntityHelper.getTargetTable().getTTable())) .withPartition(Optional.of(HiveMetaStoreUtils.getHivePartition(targetPartition.getTPartition()))).build(); HiveRegisterStep register = new HiveRegisterStep(hiveCopyEntityHelper.getTargetURI(), partitionHiveSpec, hiveCopyEntityHelper.getHiveRegProps()); copyEntities.add(new PostPublishStep(fileSet, Maps.<String, String> newHashMap(), register, stepPriority++)); multiTimer.nextStage(HiveCopyEntityHelper.Stages.CREATE_LOCATIONS); HiveLocationDescriptor sourceLocation = HiveLocationDescriptor.forPartition(this.partition, hiveCopyEntityHelper.getDataset().fs, this.properties); HiveLocationDescriptor desiredTargetLocation = HiveLocationDescriptor.forPartition(targetPartition, hiveCopyEntityHelper.getTargetFs(), this.properties); Optional<HiveLocationDescriptor> existingTargetLocation = this.existingTargetPartition.isPresent() ? Optional.of(HiveLocationDescriptor.forPartition(this.existingTargetPartition.get(), hiveCopyEntityHelper.getTargetFs(), this.properties)) : Optional.<HiveLocationDescriptor> absent(); multiTimer.nextStage(HiveCopyEntityHelper.Stages.FULL_PATH_DIFF); HiveCopyEntityHelper.DiffPathSet diffPathSet = HiveCopyEntityHelper.fullPathDiff(sourceLocation, desiredTargetLocation, existingTargetLocation, Optional.<Partition> absent(), multiTimer, hiveCopyEntityHelper); multiTimer.nextStage(HiveCopyEntityHelper.Stages.CREATE_DELETE_UNITS); if (diffPathSet.pathsToDelete.size() > 0) { DeleteFileCommitStep deleteStep = DeleteFileCommitStep.fromPaths(hiveCopyEntityHelper.getTargetFs(), diffPathSet.pathsToDelete, hiveCopyEntityHelper.getDataset().properties); copyEntities.add(new PrePublishStep(fileSet, Maps.<String, String> newHashMap(), deleteStep, stepPriority++)); } multiTimer.nextStage(HiveCopyEntityHelper.Stages.CREATE_COPY_UNITS); for (CopyableFile.Builder builder : hiveCopyEntityHelper.getCopyableFilesFromPaths(diffPathSet.filesToCopy, hiveCopyEntityHelper.getConfiguration(), Optional.of(this.partition))) { copyEntities.add(builder.fileSet(fileSet).checksum(new byte[0]).build()); } log.info("Created {} copy entities for partition {}", copyEntities.size(), this.partition.getCompleteName()); return copyEntities; } } private Partition getTargetPartition(Partition originPartition, Path targetLocation) throws IOException { try { Partition targetPartition = new Partition(this.hiveCopyEntityHelper.getTargetTable(), originPartition.getTPartition().deepCopy()); targetPartition.getTable().setDbName(this.hiveCopyEntityHelper.getTargetDatabase()); targetPartition.getTPartition().setDbName(this.hiveCopyEntityHelper.getTargetDatabase()); targetPartition.getTPartition().putToParameters(HiveDataset.REGISTERER, HiveCopyEntityHelper.GOBBLIN_DISTCP); targetPartition.getTPartition().putToParameters(HiveDataset.REGISTRATION_GENERATION_TIME_MILLIS, Long.toString(this.hiveCopyEntityHelper.getStartTime())); targetPartition.setLocation(targetLocation.toString()); targetPartition.getTPartition().unsetCreateTime(); return targetPartition; } catch (HiveException he) { throw new IOException(he); } } private static void checkPartitionCompatibility(Partition desiredTargetPartition, Partition existingTargetPartition) throws IOException { if (!desiredTargetPartition.getDataLocation().equals(existingTargetPartition.getDataLocation())) { throw new IOException( String.format("Desired target location %s and already registered target location %s do not agree.", desiredTargetPartition.getDataLocation(), existingTargetPartition.getDataLocation())); } } }