/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.compliance.retention; import java.io.IOException; import java.sql.SQLException; import java.util.List; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import com.google.common.base.Optional; import lombok.extern.slf4j.Slf4j; import gobblin.compliance.ComplianceConfigurationKeys; import gobblin.compliance.HiveProxyQueryExecutor; import gobblin.compliance.purger.HivePurgerQueryTemplate; import gobblin.compliance.utils.PartitionUtils; import gobblin.compliance.utils.ProxyUtils; import gobblin.configuration.State; import gobblin.data.management.retention.dataset.CleanableDataset; import gobblin.data.management.version.DatasetVersion; import gobblin.util.HadoopUtils; /** * A version cleaner for the {@link HivePartitionRetentionVersion}. * * A version will be considered as clean only if it's metadata no longer exists in the db and the data * it was pointing to no longer exists. * * @author adsharma */ @Slf4j public class HivePartitionVersionRetentionCleaner extends HivePartitionVersionRetentionRunner { private FileSystem fs; private boolean simulate; private Optional<String> versionOwner = Optional.absent(); public HivePartitionVersionRetentionCleaner(CleanableDataset dataset, DatasetVersion version, List<String> nonDeletableVersionLocations, State state) { super(dataset, version, nonDeletableVersionLocations, state); this.versionOwner = ((HivePartitionRetentionVersion) this.datasetVersion).getOwner(); this.simulate = this.state.getPropAsBoolean(ComplianceConfigurationKeys.COMPLIANCE_JOB_SIMULATE, ComplianceConfigurationKeys.DEFAULT_COMPLIANCE_JOB_SIMULATE); } /** * If simulate is set to true, this will simply return. * If version is pointing to an empty location, drop the partition and close the jdbc connection. * If version is pointing to the same location as of the dataset, then drop the partition and close the jdbc connection. * If version is pointing to the non deletable version locations, then drop the partition and close the jdbc connection. * Otherwise delete the data underneath, drop the partition and close the jdbc connection. */ @Override public void clean() throws IOException { Path versionLocation = ((HivePartitionRetentionVersion) this.datasetVersion).getLocation(); Path datasetLocation = ((CleanableHivePartitionDataset) this.cleanableDataset).getLocation(); String completeName = ((HivePartitionRetentionVersion) this.datasetVersion).datasetURN(); State state = new State(this.state); this.fs = ProxyUtils.getOwnerFs(state, this.versionOwner); try (HiveProxyQueryExecutor queryExecutor = ProxyUtils.getQueryExecutor(state, this.versionOwner)) { log.info("Trying to clean version " + completeName); if (!this.fs.exists(versionLocation)) { log.info("Data versionLocation doesn't exist. Metadata will be dropped for the version " + completeName); } else if (datasetLocation.toString().equalsIgnoreCase(versionLocation.toString())) { log.info( "Dataset location is same as version location. Won't delete the data but metadata will be dropped for the version " + completeName); } else if (this.nonDeletableVersionLocations.contains(versionLocation.toString())) { log.info( "This version corresponds to the non deletable version. Won't delete the data but metadata will be dropped for the version " + completeName); } else if (HadoopUtils.hasContent(this.fs, versionLocation)) { if (this.simulate) { log.info("Simulate is set to true. Won't delete the partition " + completeName); return; } log.info("Deleting data from the version " + completeName); this.fs.delete(versionLocation, true); } executeDropVersionQueries(queryExecutor); } } // These methods are not implemented by this class @Override public void preCleanAction() { } @Override public void postCleanAction() { } private void executeDropVersionQueries(HiveProxyQueryExecutor queryExecutor) throws IOException { String dbName = ((HivePartitionRetentionVersion) this.datasetVersion).getDbName(); String tableName = ((HivePartitionRetentionVersion) this.datasetVersion).getTableName(); String partitionSpec = PartitionUtils.getPartitionSpecString(((HivePartitionRetentionVersion) this.datasetVersion).getSpec()); try { queryExecutor.executeQuery(HivePurgerQueryTemplate.getUseDbQuery(dbName), this.versionOwner); queryExecutor .executeQuery(HivePurgerQueryTemplate.getDropPartitionQuery(tableName, partitionSpec), this.versionOwner); } catch (SQLException e) { throw new IOException(e); } } }