/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.compliance.retention; import java.io.IOException; import java.sql.SQLException; import java.util.Arrays; import java.util.List; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import com.google.common.base.Optional; import com.google.common.base.Preconditions; import lombok.extern.slf4j.Slf4j; import gobblin.compliance.ComplianceConfigurationKeys; import gobblin.compliance.HivePartitionVersion; import gobblin.compliance.HiveProxyQueryExecutor; import gobblin.compliance.purger.HivePurgerQueryTemplate; import gobblin.compliance.utils.PartitionUtils; import gobblin.compliance.utils.ProxyUtils; import gobblin.configuration.State; import gobblin.data.management.retention.dataset.CleanableDataset; import gobblin.data.management.version.DatasetVersion; import gobblin.util.HadoopUtils; import static gobblin.compliance.purger.HivePurgerQueryTemplate.getDropPartitionQuery; import static gobblin.compliance.purger.HivePurgerQueryTemplate.getUseDbQuery; /** * Class to move/clean backups/staging partitions. * * @author adsharma */ @Slf4j public class HivePartitionVersionRetentionReaper extends HivePartitionVersionRetentionRunner { private FileSystem versionOwnerFs; private boolean simulate; private Optional<String> versionOwner = Optional.absent(); private Optional<String> backUpOwner = Optional.absent(); public HivePartitionVersionRetentionReaper(CleanableDataset dataset, DatasetVersion version, List<String> nonDeletableVersionLocations, State state) { super(dataset, version, nonDeletableVersionLocations, state); this.versionOwner = ((HivePartitionVersion) this.datasetVersion).getOwner(); Preconditions.checkArgument(this.state.contains(ComplianceConfigurationKeys.BACKUP_OWNER), "Missing required property " + ComplianceConfigurationKeys.BACKUP_OWNER); this.backUpOwner = Optional.fromNullable(this.state.getProp(ComplianceConfigurationKeys.BACKUP_OWNER)); this.simulate = this.state.getPropAsBoolean(ComplianceConfigurationKeys.COMPLIANCE_JOB_SIMULATE, ComplianceConfigurationKeys.DEFAULT_COMPLIANCE_JOB_SIMULATE); } /** * If simulate is set to true, will simply return. * If a version is pointing to a non-existing location, then drop the partition and close the jdbc connection. * If a version is pointing to the same location as of the dataset, then drop the partition and close the jdbc connection. * If a version is staging, it's data will be deleted and metadata is dropped. * IF a versions is backup, it's data will be moved to a backup dir, current metadata will be dropped and it will * be registered in the backup db. */ @Override public void clean() throws IOException { Path versionLocation = ((HivePartitionRetentionVersion) this.datasetVersion).getLocation(); Path datasetLocation = ((CleanableHivePartitionDataset) this.cleanableDataset).getLocation(); String completeName = ((HivePartitionRetentionVersion) this.datasetVersion).datasetURN(); State state = new State(this.state); this.versionOwnerFs = ProxyUtils.getOwnerFs(state, this.versionOwner); try (HiveProxyQueryExecutor queryExecutor = ProxyUtils .getQueryExecutor(state, this.versionOwner, this.backUpOwner)) { if (!this.versionOwnerFs.exists(versionLocation)) { log.info("Data versionLocation doesn't exist. Metadata will be dropped for the version " + completeName); } else if (datasetLocation.toString().equalsIgnoreCase(versionLocation.toString())) { log.info( "Dataset location is same as version location. Won't delete the data but metadata will be dropped for the version " + completeName); } else if (this.simulate) { log.info("Simulate is set to true. Won't move the version " + completeName); return; } else if (completeName.contains(ComplianceConfigurationKeys.STAGING)) { log.info("Deleting data from version " + completeName); this.versionOwnerFs.delete(versionLocation, true); } else if (completeName.contains(ComplianceConfigurationKeys.BACKUP)) { executeAlterQueries(queryExecutor); Path newVersionLocationParent = getNewVersionLocation().getParent(); log.info("Creating new dir " + newVersionLocationParent.toString()); this.versionOwnerFs.mkdirs(newVersionLocationParent); log.info("Moving data from " + versionLocation + " to " + getNewVersionLocation()); this.versionOwnerFs.rename(versionLocation, newVersionLocationParent); FsPermission permission = new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.NONE); HadoopUtils .setPermissions(newVersionLocationParent, this.versionOwner, this.backUpOwner, this.versionOwnerFs, permission); } executeDropVersionQueries(queryExecutor); } } // These methods are not implemented by this class @Override public void preCleanAction() { } @Override public void postCleanAction() { } private void executeAlterQueries(HiveProxyQueryExecutor queryExecutor) throws IOException { HivePartitionRetentionVersion version = (HivePartitionRetentionVersion) this.datasetVersion; String partitionSpecString = PartitionUtils.getPartitionSpecString(version.getSpec()); Preconditions.checkArgument(this.state.contains(ComplianceConfigurationKeys.BACKUP_DB), "Missing required property " + ComplianceConfigurationKeys.BACKUP_DB); String backUpDb = this.state.getProp(ComplianceConfigurationKeys.BACKUP_DB); String backUpTableName = getCompleteTableName(version); try { queryExecutor.executeQuery(HivePurgerQueryTemplate.getUseDbQuery(backUpDb), this.backUpOwner); queryExecutor.executeQuery(HivePurgerQueryTemplate .getCreateTableQuery(backUpDb + "." + backUpTableName, version.getDbName(), version.getTableName(), getBackUpTableLocation(version)), this.backUpOwner); Optional<String> fileFormat = Optional.absent(); if (this.state.getPropAsBoolean(ComplianceConfigurationKeys.SPECIFY_PARTITION_FORMAT, ComplianceConfigurationKeys.DEFAULT_SPECIFY_PARTITION_FORMAT)) { fileFormat = version.getFileFormat(); } queryExecutor.executeQuery(HivePurgerQueryTemplate .getAddPartitionQuery(backUpTableName, partitionSpecString, fileFormat, Optional.fromNullable(getNewVersionLocation().toString())), this.backUpOwner); } catch (SQLException e) { throw new IOException(e); } } private void executeDropVersionQueries(HiveProxyQueryExecutor queryExecutor) throws IOException { HivePartitionRetentionVersion version = (HivePartitionRetentionVersion) this.datasetVersion; String partitionSpec = PartitionUtils.getPartitionSpecString(version.getSpec()); try { queryExecutor.executeQuery(getUseDbQuery(version.getDbName()), this.versionOwner); queryExecutor.executeQuery(getDropPartitionQuery(version.getTableName(), partitionSpec), this.versionOwner); } catch (SQLException e) { throw new IOException(e); } } private String getVersionTimeStamp() { return ((HivePartitionRetentionVersion) this.datasetVersion).getTimeStamp(); } private String getCompleteTableName(HivePartitionVersion version) { return version.getTableName(); } private String getBackUpTableLocation(HivePartitionVersion version) { Preconditions.checkArgument(this.state.contains(ComplianceConfigurationKeys.BACKUP_DIR), "Missing required property " + ComplianceConfigurationKeys.BACKUP_DIR); return StringUtils .join(Arrays.asList(this.state.getProp(ComplianceConfigurationKeys.BACKUP_DIR), getCompleteTableName(version)), '/'); } private Path getNewVersionLocation() { HivePartitionVersion version = (HivePartitionRetentionVersion) this.datasetVersion; String backUpTableLocation = getBackUpTableLocation(version); return new Path( StringUtils.join(Arrays.asList(backUpTableLocation, getVersionTimeStamp(), version.getName()), '/')); } }