/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.compliance.purger; import java.util.ArrayList; import java.util.List; import java.util.Map; import org.apache.hadoop.hive.metastore.api.FieldSchema; import com.google.common.base.Optional; import gobblin.compliance.utils.PartitionUtils; /** * This class creates all queries required by {@link HivePurgerConverter} * * @author adsharma */ public class HivePurgerQueryTemplate { /** * Use the staging db for creating staging tables. * Alter query doesn't work with dbName.tableName. */ public static String getUseDbQuery(String dbName) { return "USE " + dbName; } public static String getAutoConvertJoinProperty() { return "SET hive.auto.convert.join=false"; } /** * Will allow insert query to specify only partition coloumn names instead of partition spec. */ public static String getDynamicPartitionNonStrictModeProperty() { return "SET hive.exec.dynamic.partition.mode=nonstrict"; } /** * To get around the OOM, we need to set hive.optimize.sort.dynamic.partition to true so that the data is sorted by * partition key and at one time only one partition needs to be accessed. */ public static String getOptimizeSortDynamicPartition() { return "SET hive.optimize.sort.dynamic.partition=true"; } /** * If staging table doesn't exist, it will create a staging table. */ public static String getCreateTableQuery(String completeNewTableName, String likeTableDbName, String likeTableName, String location) { return getCreateTableQuery(completeNewTableName, likeTableDbName, likeTableName) + " LOCATION " + PartitionUtils .getQuotedString(location); } public static String getCreateTableQuery(String completeNewTableName, String likeTableDbName, String likeTableName) { return "CREATE EXTERNAL TABLE IF NOT EXISTS " + completeNewTableName + " LIKE " + likeTableDbName + "." + likeTableName; } /** * This query will create a partition in staging table and insert the datasets whose compliance id is not * contained in the compliance id table. */ public static String getInsertQuery(PurgeableHivePartitionDataset dataset) { return "INSERT OVERWRITE" + " TABLE " + dataset.getCompleteStagingTableName() + " PARTITION (" + PartitionUtils .getPartitionSpecString(dataset.getSpec()) + ")" + " SELECT /*+MAPJOIN(b) */ " + getCommaSeparatedColumnNames( dataset.getCols(), "a.") + " FROM " + dataset.getDbName() + "." + dataset.getTableName() + " a LEFT JOIN " + dataset.getComplianceIdTable() + " b" + " ON a." + dataset.getComplianceField() + "=b." + dataset .getComplianceId() + " WHERE b." + dataset.getComplianceId() + " IS NULL AND " + getWhereClauseForPartition( dataset.getSpec(), "a."); } public static String getAddPartitionQuery(String tableName, String partitionSpec, Optional<String> fileFormat, Optional<String> location) { String query = "ALTER TABLE " + tableName + " ADD IF NOT EXISTS" + " PARTITION (" + partitionSpec + ")"; if (fileFormat.isPresent()) { query = query + " FILEFORMAT " + fileFormat.get(); } if (location.isPresent()) { query = query + " LOCATION " + PartitionUtils.getQuotedString(location.get()); } return query; } public static String getAlterTableLocationQuery(String tableName, String partitionSpec, String location) { return "ALTER TABLE " + tableName + " PARTITION (" + partitionSpec + ")" + " SET LOCATION " + PartitionUtils .getQuotedString(location); } public static String getDropTableQuery(String dbName, String tableName) { return "DROP TABLE IF EXISTS " + dbName + "." + tableName; } public static String getDropPartitionQuery(String tableName, String partitionSpec) { return "ALTER TABLE " + tableName + " DROP IF EXISTS" + " PARTITION (" + partitionSpec + ")"; } public static String getUpdatePartitionMetadataQuery(String dbName, String tableName, String partitionSpec) { return "ANALYZE TABLE " + dbName + "." + tableName + " PARTITION (" + partitionSpec + ") COMPUTE STATISTICS"; } /** * Will return all the queries needed to populate the staging table partition. * This won't include alter table partition location query. */ public static List<String> getPurgeQueries(PurgeableHivePartitionDataset dataset) { List<String> queries = new ArrayList<>(); queries.add(getUseDbQuery(dataset.getStagingDb())); queries.add(getInsertQuery(dataset)); return queries; } public static List<String> getCreateStagingTableQuery(PurgeableHivePartitionDataset dataset) { List<String> queries = new ArrayList<>(); queries.add(getUseDbQuery(dataset.getStagingDb())); queries.add(getAutoConvertJoinProperty()); queries.add(getCreateTableQuery(dataset.getCompleteStagingTableName(), dataset.getDbName(), dataset.getTableName(), dataset.getStagingTableLocation())); Optional<String> fileFormat = Optional.absent(); if (dataset.getSpecifyPartitionFormat()) { fileFormat = dataset.getFileFormat(); } queries.add(getAddPartitionQuery(dataset.getCompleteStagingTableName(), PartitionUtils.getPartitionSpecString(dataset.getSpec()), fileFormat, Optional.<String>absent())); return queries; } /** * Will return all the queries needed to have a backup table partition pointing to the original partition data location */ public static List<String> getBackupQueries(PurgeableHivePartitionDataset dataset) { List<String> queries = new ArrayList<>(); queries.add(getUseDbQuery(dataset.getDbName())); queries.add(getCreateTableQuery(dataset.getCompleteBackupTableName(), dataset.getDbName(), dataset.getTableName())); Optional<String> fileFormat = Optional.absent(); if (dataset.getSpecifyPartitionFormat()) { fileFormat = dataset.getFileFormat(); } queries.add( getAddPartitionQuery(dataset.getBackupTableName(), PartitionUtils.getPartitionSpecString(dataset.getSpec()), fileFormat, Optional.fromNullable(dataset.getOriginalPartitionLocation()))); return queries; } /** * Will return all the queries needed to alter the location of the table partition. * Alter table partition query doesn't work with syntax dbName.tableName */ public static List<String> getAlterOriginalPartitionLocationQueries(PurgeableHivePartitionDataset dataset) { List<String> queries = new ArrayList<>(); queries.add(getUseDbQuery(dataset.getDbName())); String partitionSpecString = PartitionUtils.getPartitionSpecString(dataset.getSpec()); queries.add( getAlterTableLocationQuery(dataset.getTableName(), partitionSpecString, dataset.getStagingPartitionLocation())); queries.add(getUpdatePartitionMetadataQuery(dataset.getDbName(), dataset.getTableName(), partitionSpecString)); return queries; } public static List<String> getDropStagingTableQuery(PurgeableHivePartitionDataset dataset) { List<String> queries = new ArrayList<>(); queries.add(getUseDbQuery(dataset.getStagingDb())); queries.add( getDropPartitionQuery(dataset.getStagingTableName(), PartitionUtils.getPartitionSpecString(dataset.getSpec()))); return queries; } /** * This method builds the where clause for the insertion query. * If prefix is a, then it builds a.datepartition='2016-01-01-00' AND a.size='12345' from [datepartition : '2016-01-01-00', size : '12345'] */ public static String getWhereClauseForPartition(Map<String, String> spec, String prefix) { StringBuilder sb = new StringBuilder(); for (Map.Entry<String, String> entry : spec.entrySet()) { if (!sb.toString().isEmpty()) { sb.append(" AND "); } sb.append(prefix + entry.getKey()); sb.append("="); sb.append(PartitionUtils.getQuotedString(entry.getValue())); } return sb.toString(); } public static String getCommaSeparatedColumnNames(List<FieldSchema> cols, String prefix) { StringBuilder sb = new StringBuilder(); for (FieldSchema fs : cols) { if (!sb.toString().isEmpty()) { sb.append(", "); } sb.append(prefix); sb.append(fs.getName()); } return sb.toString(); } }