/* * Copyright 2015 herd contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.finra.herd.service.helper; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.collections4.MultiValuedMap; import org.apache.commons.collections4.multimap.ArrayListValuedHashMap; import org.apache.commons.io.Charsets; import org.apache.commons.lang3.BooleanUtils; import org.apache.commons.lang3.CharUtils; import org.apache.commons.lang3.StringUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; import org.springframework.util.Assert; import org.finra.herd.core.helper.ConfigurationHelper; import org.finra.herd.dao.StorageFileDao; import org.finra.herd.dao.StorageUnitDao; import org.finra.herd.model.ObjectNotFoundException; import org.finra.herd.model.api.xml.BusinessObjectDataDdlOutputFormatEnum; import org.finra.herd.model.api.xml.BusinessObjectDataDdlRequest; import org.finra.herd.model.api.xml.BusinessObjectDataKey; import org.finra.herd.model.api.xml.BusinessObjectFormat; import org.finra.herd.model.api.xml.BusinessObjectFormatDdlRequest; import org.finra.herd.model.api.xml.BusinessObjectFormatKey; import org.finra.herd.model.api.xml.SchemaColumn; import org.finra.herd.model.dto.ConfigurationValue; import org.finra.herd.model.dto.HivePartitionDto; import org.finra.herd.model.jpa.BusinessObjectDataEntity; import org.finra.herd.model.jpa.BusinessObjectDataStatusEntity; import org.finra.herd.model.jpa.BusinessObjectFormatEntity; import org.finra.herd.model.jpa.CustomDdlEntity; import org.finra.herd.model.jpa.FileTypeEntity; import org.finra.herd.model.jpa.StorageEntity; import org.finra.herd.model.jpa.StoragePlatformEntity; import org.finra.herd.model.jpa.StorageUnitEntity; /** * The DDL generator for Hive 13. */ @Component @SuppressFBWarnings(value = "VA_FORMAT_STRING_USES_NEWLINE", justification = "We will use the standard carriage return character.") public class Hive13DdlGenerator extends DdlGenerator { /** * The partition key value for business object data without partitioning. */ public static final String NO_PARTITIONING_PARTITION_KEY = "partition"; /** * The partition value for business object data without partitioning. */ public static final String NO_PARTITIONING_PARTITION_VALUE = "none"; /** * Hive file format for ORC files. */ public static final String ORC_HIVE_FILE_FORMAT = "ORC"; /** * Hive file format for PARQUET files. */ public static final String PARQUET_HIVE_FILE_FORMAT = "PARQUET"; /** * Hive file format for text files. */ public static final String TEXT_HIVE_FILE_FORMAT = "TEXTFILE"; @Autowired private BusinessObjectDataDaoHelper businessObjectDataDaoHelper; @Autowired private BusinessObjectDataHelper businessObjectDataHelper; @Autowired private BusinessObjectFormatHelper businessObjectFormatHelper; @Autowired private ConfigurationHelper configurationHelper; @Autowired private S3KeyPrefixHelper s3KeyPrefixHelper; @Autowired private StorageFileDao storageFileDao; @Autowired private StorageFileHelper storageFileHelper; @Autowired private StorageHelper storageHelper; @Autowired private StorageUnitDao storageUnitDao; @Autowired private StorageUnitHelper storageUnitHelper; /** * Escapes single quote characters, if not already escaped, with an extra backslash. * * @param string the input text * * @return the output text with all single quote characters escaped by an extra backslash */ public String escapeSingleQuotes(String string) { Pattern pattern = Pattern.compile("(?<!\\\\)(')"); Matcher matcher = pattern.matcher(string); StringBuffer stringBuffer = new StringBuffer(); while (matcher.find()) { matcher.appendReplacement(stringBuffer, matcher.group(1).replace("'", "\\\\'")); } matcher.appendTail(stringBuffer); return stringBuffer.toString(); } /** * Generates the create table Hive 13 DDL as per specified business object data DDL request. * * @param request the business object data DDL request * @param businessObjectFormatEntity the business object format entity * @param customDdlEntity the optional custom DDL entity * @param storageNames the list of storage names * @param storageEntities the list of storage entities * @param s3BucketNames the map of storage entities to the relative S3 bucket names * * @return the create table Hive DDL */ @Override public String generateCreateTableDdl(BusinessObjectDataDdlRequest request, BusinessObjectFormatEntity businessObjectFormatEntity, CustomDdlEntity customDdlEntity, List<String> storageNames, List<StorageEntity> storageEntities, Map<StorageEntity, String> s3BucketNames) { // Get business object format key from the request. BusinessObjectFormatKey businessObjectFormatKey = new BusinessObjectFormatKey(request.getNamespace(), request.getBusinessObjectDefinitionName(), request.getBusinessObjectFormatUsage(), request.getBusinessObjectFormatFileType(), request.getBusinessObjectFormatVersion()); // Build partition filters based on the specified partition value filters. // We do validate that all specified storages are of "S3" storage platform type, so we specify S3 storage platform type in // the call below, so we select storage units only from all S3 storages, when the specified list of storages is empty. List<List<String>> partitionFilters = businessObjectDataDaoHelper .buildPartitionFilters(request.getPartitionValueFilters(), request.getPartitionValueFilter(), businessObjectFormatKey, request.getBusinessObjectDataVersion(), storageNames, StoragePlatformEntity.S3, null, businessObjectFormatEntity); // If the partitionKey="partition" and partitionValue="none", then DDL should // return a DDL which treats business object data as a table, not a partition. boolean isPartitioned = !businessObjectFormatEntity.getPartitionKey().equalsIgnoreCase(NO_PARTITIONING_PARTITION_KEY) || partitionFilters.size() != 1 || !partitionFilters.get(0).get(0).equalsIgnoreCase(NO_PARTITIONING_PARTITION_VALUE); // Generate the create table Hive 13 DDL. GenerateDdlRequest generateDdlRequest = new GenerateDdlRequest(); generateDdlRequest.allowMissingData = request.isAllowMissingData(); generateDdlRequest.businessObjectDataVersion = request.getBusinessObjectDataVersion(); generateDdlRequest.businessObjectFormatEntity = businessObjectFormatEntity; generateDdlRequest.businessObjectFormatVersion = request.getBusinessObjectFormatVersion(); generateDdlRequest.customDdlEntity = customDdlEntity; generateDdlRequest.includeAllRegisteredSubPartitions = request.isIncludeAllRegisteredSubPartitions(); generateDdlRequest.includeDropPartitions = request.isIncludeDropPartitions(); generateDdlRequest.includeDropTableStatement = request.isIncludeDropTableStatement(); generateDdlRequest.includeIfNotExistsOption = request.isIncludeIfNotExistsOption(); generateDdlRequest.isPartitioned = isPartitioned; generateDdlRequest.partitionFilters = partitionFilters; generateDdlRequest.s3BucketNames = s3BucketNames; generateDdlRequest.storageEntities = storageEntities; generateDdlRequest.storageNames = storageNames; generateDdlRequest.suppressScanForUnregisteredSubPartitions = request.isSuppressScanForUnregisteredSubPartitions(); generateDdlRequest.tableName = request.getTableName(); return generateCreateTableDdlHelper(generateDdlRequest); } /** * Generates the create table Hive 13 DDL as per specified business object format DDL request. * * @param request the business object format DDL request * @param businessObjectFormatEntity the business object format entity * @param customDdlEntity the optional custom DDL entity * * @return the create table Hive DDL */ @Override public String generateCreateTableDdl(BusinessObjectFormatDdlRequest request, BusinessObjectFormatEntity businessObjectFormatEntity, CustomDdlEntity customDdlEntity) { // If the partitionKey="partition", then DDL should return a DDL which treats business object data as a table, not a partition. Boolean isPartitioned = !businessObjectFormatEntity.getPartitionKey().equalsIgnoreCase(NO_PARTITIONING_PARTITION_KEY); // Generate the create table Hive 13 DDL. GenerateDdlRequest generateDdlRequest = new GenerateDdlRequest(); generateDdlRequest.businessObjectFormatEntity = businessObjectFormatEntity; generateDdlRequest.customDdlEntity = customDdlEntity; generateDdlRequest.isPartitioned = isPartitioned; generateDdlRequest.tableName = request.getTableName(); generateDdlRequest.includeDropTableStatement = request.isIncludeDropTableStatement(); generateDdlRequest.includeIfNotExistsOption = request.isIncludeIfNotExistsOption(); return generateCreateTableDdlHelper(generateDdlRequest); } @Override public String generateReplaceColumnsStatement(BusinessObjectFormatDdlRequest request, BusinessObjectFormatEntity businessObjectFormatEntity) { BusinessObjectFormat businessObjectFormat = businessObjectFormatHelper.createBusinessObjectFormatFromEntity(businessObjectFormatEntity); assertSchemaColumnsNotEmpty(businessObjectFormat, businessObjectFormatEntity); StringBuilder builder = new StringBuilder(34); builder.append("ALTER TABLE `"); builder.append(request.getTableName()); builder.append("` REPLACE COLUMNS (\n"); builder.append(generateDdlColumns(businessObjectFormatEntity, businessObjectFormat)); return builder.toString().trim() + ';'; } /** * Gets the DDL character value based on the specified configured character value. This method supports UTF-8 encoded strings and will "Hive" escape any * non-ASCII printable characters using '\(value)'. * * @param string the configured character value. * * @return the DDL character value. */ public String getDdlCharacterValue(String string) { return getDdlCharacterValue(string, false); } /** * Gets the DDL character value based on the specified configured character value. This method supports UTF-8 encoded strings and will "Hive" escape any * non-ASCII printable characters using '\(value)'. * * @param string the configured character value. * @param escapeSingleBackslash specifies if we need to escape a single backslash character with an extra backslash * * @return the DDL character value. */ public String getDdlCharacterValue(String string, boolean escapeSingleBackslash) { // Assume the empty string for the return value. StringBuilder returnValueStringBuilder = new StringBuilder(); // If we have an actual character, set the return value based on our rules. if (StringUtils.isNotEmpty(string)) { // Convert the string to UTF-8 so we can the proper characters that were sent via XML. String utf8String = new String(string.getBytes(Charsets.UTF_8), Charsets.UTF_8); // Loop through each character and add each one to the return value. for (int i = 0; i < utf8String.length(); i++) { // Default to the character itself. Character character = string.charAt(i); String nextValue = character.toString(); // If the character isn't ASCII printable, then "Hive" escape it. if (!CharUtils.isAsciiPrintable(character)) { // If the character is unprintable, then display it as the ASCII octal value in \000 format. nextValue = String.format("\\%03o", (int) character); } // Add this character to the return value. returnValueStringBuilder.append(nextValue); } // Check if we need to escape a single backslash character with an extra backslash. if (escapeSingleBackslash && returnValueStringBuilder.toString().equals("\\")) { returnValueStringBuilder.append('\\'); } } // Return the value. return returnValueStringBuilder.toString(); } @Override public BusinessObjectDataDdlOutputFormatEnum getDdlOutputFormat() { return BusinessObjectDataDdlOutputFormatEnum.HIVE_13_DDL; } /** * Gets a list of Hive partitions. For single level partitioning, no auto-discovery of sub-partitions (sub-directories) is needed - the business object data * will be represented by a single Hive partition instance. For multiple level partitioning, this method performs an auto-discovery of all sub-partitions * (sub-directories) and creates a Hive partition object instance for each partition. * * @param businessObjectDataKey the business object data key. * @param autoDiscoverableSubPartitionColumns the auto-discoverable sub-partition columns. * @param s3KeyPrefix the S3 key prefix. * @param storageFiles the storage files. * @param businessObjectDataEntity the business object data entity. * @param storageName the storage name. * * @return the list of Hive partitions */ public List<HivePartitionDto> getHivePartitions(BusinessObjectDataKey businessObjectDataKey, List<SchemaColumn> autoDiscoverableSubPartitionColumns, String s3KeyPrefix, Collection<String> storageFiles, BusinessObjectDataEntity businessObjectDataEntity, String storageName) { // We are using linked hash map to preserve the order of the discovered partitions. LinkedHashMap<List<String>, HivePartitionDto> linkedHashMap = new LinkedHashMap<>(); Pattern pattern = getHivePathPattern(autoDiscoverableSubPartitionColumns); for (String storageFile : storageFiles) { // Remove S3 key prefix from the file path. Please note that the storage files are already validated to start with S3 key prefix. String relativeFilePath = storageFile.substring(s3KeyPrefix.length()); // Try to match the relative file path to the expected subpartition folders. Matcher matcher = pattern.matcher(relativeFilePath); Assert.isTrue(matcher.matches(), String.format("Registered storage file or directory does not match the expected Hive sub-directory pattern. " + "Storage: {%s}, file/directory: {%s}, business object data: {%s}, S3 key prefix: {%s}, pattern: {^%s$}", storageName, storageFile, businessObjectDataHelper.businessObjectDataEntityAltKeyToString(businessObjectDataEntity), s3KeyPrefix, pattern.pattern())); // Add the top level partition value. HivePartitionDto newHivePartition = new HivePartitionDto(); newHivePartition.getPartitionValues().add(businessObjectDataKey.getPartitionValue()); newHivePartition.getPartitionValues().addAll(businessObjectDataKey.getSubPartitionValues()); // Extract relative partition values. for (int i = 1; i <= matcher.groupCount(); i++) { newHivePartition.getPartitionValues().add(matcher.group(i)); } // Remove the trailing "/" plus an optional file name from the file path and store the result string as this partition relative path. newHivePartition.setPath(relativeFilePath.replaceAll("/[^/]*$", "")); // Check if we already have that partition discovered - that would happen if partition contains multiple data files. HivePartitionDto hivePartition = linkedHashMap.get(newHivePartition.getPartitionValues()); if (hivePartition != null) { // Partition is already discovered, so just validate that the relative file paths match. Assert.isTrue(hivePartition.getPath().equals(newHivePartition.getPath()), String.format( "Found two different locations for the same Hive partition. Storage: {%s}, business object data: {%s}, " + "S3 key prefix: {%s}, path[1]: {%s}, path[2]: {%s}", storageName, businessObjectDataHelper.businessObjectDataEntityAltKeyToString(businessObjectDataEntity), s3KeyPrefix, hivePartition.getPath(), newHivePartition.getPath())); } else { // Add this partition to the hash map of discovered partitions. linkedHashMap.put(newHivePartition.getPartitionValues(), newHivePartition); } } List<HivePartitionDto> hivePartitions = new ArrayList<>(); hivePartitions.addAll(linkedHashMap.values()); return hivePartitions; } /** * Gets a pattern to match Hive partition sub-directories. * * @param partitionColumns the list of partition columns * * @return the newly created pattern to match Hive partition sub-directories. */ public Pattern getHivePathPattern(List<SchemaColumn> partitionColumns) { StringBuilder sb = new StringBuilder(26); // For each partition column, add a regular expression to match "<COLUMN_NAME|COLUMN-NAME>=<VALUE>" sub-directory. for (SchemaColumn partitionColumn : partitionColumns) { String partitionColumnName = partitionColumn.getName(); // We are using a non-capturing group for the partition column names here - this is done by adding "?:" to the beginning of a capture group. sb.append("\\/(?:"); sb.append(Matcher.quoteReplacement(partitionColumnName)); // Please note that for subpartition folder, we do support partition column names having all underscores replaced with hyphens. sb.append('|'); sb.append(Matcher.quoteReplacement(partitionColumnName.replace("_", "-"))); sb.append(")=([^/]+)"); } // Add a regular expression for a trailing "/" and an optional file name. sb.append("\\/[^/]*"); // We do a case-insensitive match for partition column names. return Pattern.compile(sb.toString(), Pattern.CASE_INSENSITIVE); } /** * Asserts that there exists at least one column specified in the business object format schema. * * @param businessObjectFormat The {@link BusinessObjectFormat} containing schema columns. * @param businessObjectFormatEntity The entity used to generate the error message. */ private void assertSchemaColumnsNotEmpty(BusinessObjectFormat businessObjectFormat, BusinessObjectFormatEntity businessObjectFormatEntity) { Assert.notEmpty(businessObjectFormat.getSchema().getColumns(), String.format("No schema columns specified for business object format {%s}.", businessObjectFormatHelper.businessObjectFormatEntityAltKeyToString(businessObjectFormatEntity))); } /** * Generates and append to the string builder the create table Hive 13 DDL as per specified parameters. */ private String generateCreateTableDdlHelper(GenerateDdlRequest generateDdlRequest) { // TODO: We might want to consider using a template engine such as Velocity to generate this DDL so we don't wind up just doing string manipulation. StringBuilder sb = new StringBuilder(); // For custom DDL, we would need to substitute the custom DDL tokens with their relative values. HashMap<String, String> replacements = new HashMap<>(); // Validate that partition values passed in the list of partition filters do not contain '/' character. if (generateDdlRequest.isPartitioned && !CollectionUtils.isEmpty(generateDdlRequest.partitionFilters)) { // Validate that partition values do not contain '/' characters. for (List<String> partitionFilter : generateDdlRequest.partitionFilters) { for (String partitionValue : partitionFilter) { Assert.doesNotContain(partitionValue, "/", String.format("Partition value \"%s\" can not contain a '/' character.", partitionValue)); } } } // Get business object format model object to directly access schema columns and partitions. BusinessObjectFormat businessObjectFormat = businessObjectFormatHelper.createBusinessObjectFormatFromEntity(generateDdlRequest.businessObjectFormatEntity); // Validate that we have at least one column specified in the business object format schema. assertSchemaColumnsNotEmpty(businessObjectFormat, generateDdlRequest.businessObjectFormatEntity); if (generateDdlRequest.isPartitioned) { // Validate that we have at least one partition column specified in the business object format schema. Assert.notEmpty(businessObjectFormat.getSchema().getPartitions(), String.format("No schema partitions specified for business object format {%s}.", businessObjectFormatHelper.businessObjectFormatEntityAltKeyToString(generateDdlRequest.businessObjectFormatEntity))); // Validate that partition column names do not contain '/' characters. for (SchemaColumn partitionColumn : businessObjectFormat.getSchema().getPartitions()) { Assert.doesNotContain(partitionColumn.getName(), "/", String .format("Partition column name \"%s\" can not contain a '/' character. Business object format: {%s}", partitionColumn.getName(), businessObjectFormatHelper.businessObjectFormatEntityAltKeyToString(generateDdlRequest.businessObjectFormatEntity))); } } // Add drop table if requested. if (BooleanUtils.isTrue(generateDdlRequest.includeDropTableStatement)) { sb.append(String.format("DROP TABLE IF EXISTS `%s`;\n\n", generateDdlRequest.tableName)); } // Depending on the flag, prepare "if not exists" option text or leave it an empty string. String ifNotExistsOption = BooleanUtils.isTrue(generateDdlRequest.includeIfNotExistsOption) ? "IF NOT EXISTS " : ""; // Only generate the create table DDL statement, if custom DDL was not specified. if (generateDdlRequest.customDdlEntity == null) { generateStandardBaseDdl(generateDdlRequest, sb, businessObjectFormat, ifNotExistsOption); } else { // Use the custom DDL in place of the create table statement. sb.append(String.format("%s\n\n", generateDdlRequest.customDdlEntity.getDdl())); // We need to substitute the relative custom DDL token with an actual table name. replacements.put(TABLE_NAME_CUSTOM_DDL_TOKEN, generateDdlRequest.tableName); } // Add alter table statements only if the list of partition filters is not empty - this is applicable to generating DDL for business object data only. if (!CollectionUtils.isEmpty(generateDdlRequest.partitionFilters)) { processPartitionFiltersForGenerateDdl(generateDdlRequest, sb, replacements, generateDdlRequest.businessObjectFormatEntity, businessObjectFormat, ifNotExistsOption); } // Add a location statement with a token if this is format dll that does not use custom ddl. else if (!generateDdlRequest.isPartitioned && generateDdlRequest.customDdlEntity == null) { // Since custom DDL is not specified, there are no partition values, and this table is not partitioned, add a LOCATION clause with a token. sb.append(String.format("LOCATION '%s';", NON_PARTITIONED_TABLE_LOCATION_CUSTOM_DDL_TOKEN)); } // Trim to remove unnecessary end-of-line characters, if any, from the end of the generated DDL. String resultDdl = sb.toString().trim(); // For custom DDL, substitute the relative custom DDL tokens with their values. if (generateDdlRequest.customDdlEntity != null) { for (Map.Entry<String, String> entry : replacements.entrySet()) { String token = entry.getKey(); String value = entry.getValue(); resultDdl = resultDdl.replaceAll(Pattern.quote(token), value); } } return resultDdl; } /** * Generates the DDL column definitions based on the given business object format. The generated column definitions look like: * <p/> * <pre> * `COL_NAME1` VARCHAR(2) COMMENT 'some comment', * `COL_NAME2` VARCHAR(2), * `ORIG_COL_NAME3` DATE * ) * </pre> * <p/> * Each column definition is indented using 4 spaces. If a column is also a partition, the text 'ORIG_' will be prefixed in the column name. Note the * closing parenthesis at the end of the statement. * * @param businessObjectFormatEntity The persistent entity of business object format * @param businessObjectFormat The {@link BusinessObjectFormat} * * @return String containing the generated column definitions. */ private String generateDdlColumns(BusinessObjectFormatEntity businessObjectFormatEntity, BusinessObjectFormat businessObjectFormat) { StringBuilder sb = new StringBuilder(); // Add schema columns. Boolean firstRow = true; for (SchemaColumn schemaColumn : businessObjectFormat.getSchema().getColumns()) { if (!firstRow) { sb.append(",\n"); } else { firstRow = false; } // Add a schema column declaration. Check if a schema column is also a partition column and prepend "ORGNL_" prefix if this is the case. sb.append(String.format(" `%s%s` %s%s", (!CollectionUtils.isEmpty(businessObjectFormat.getSchema().getPartitions()) && businessObjectFormat.getSchema().getPartitions().contains(schemaColumn) ? "ORGNL_" : ""), schemaColumn.getName(), getHiveDataType(schemaColumn, businessObjectFormatEntity), StringUtils.isNotBlank(schemaColumn.getDescription()) ? String.format(" COMMENT '%s'", escapeSingleQuotes(schemaColumn.getDescription())) : "")); } sb.append(")\n"); return sb.toString(); } private void generateStandardBaseDdl(GenerateDdlRequest generateDdlRequest, StringBuilder sb, BusinessObjectFormat businessObjectFormat, String ifNotExistsOption) { // Please note that we escape table name and all column names to avoid Hive reserved words in DDL statement generation. sb.append(String.format("CREATE EXTERNAL TABLE %s`%s` (\n", ifNotExistsOption, generateDdlRequest.tableName)); // Add schema columns. sb.append(generateDdlColumns(generateDdlRequest.businessObjectFormatEntity, businessObjectFormat)); if (generateDdlRequest.isPartitioned) { // Add a partitioned by clause. sb.append("PARTITIONED BY ("); // List all partition columns. List<String> partitionColumnDeclarations = new ArrayList<>(); for (SchemaColumn partitionColumn : businessObjectFormat.getSchema().getPartitions()) { partitionColumnDeclarations .add(String.format("`%s` %s", partitionColumn.getName(), getHiveDataType(partitionColumn, generateDdlRequest.businessObjectFormatEntity))); } sb.append(StringUtils.join(partitionColumnDeclarations, ", ")); sb.append(")\n"); } // We output delimiter character, escape character, and null value only when they are defined in the business object format schema. sb.append("ROW FORMAT DELIMITED"); if (!StringUtils.isEmpty(generateDdlRequest.businessObjectFormatEntity.getDelimiter())) { // Note that the escape character is only output when the delimiter is present. sb.append(String.format(" FIELDS TERMINATED BY '%s'%s", escapeSingleQuotes(getDdlCharacterValue(generateDdlRequest.businessObjectFormatEntity.getDelimiter(), true)), StringUtils.isEmpty(generateDdlRequest.businessObjectFormatEntity.getEscapeCharacter()) ? "" : String.format(" ESCAPED BY '%s'", escapeSingleQuotes(getDdlCharacterValue(generateDdlRequest.businessObjectFormatEntity.getEscapeCharacter(), true))))); } sb.append( String.format(" NULL DEFINED AS '%s'\n", escapeSingleQuotes(getDdlCharacterValue(generateDdlRequest.businessObjectFormatEntity.getNullValue())))); // If this table is not partitioned, then STORED AS clause will be followed by LOCATION. Otherwise, the CREATE TABLE is complete. sb.append( String.format("STORED AS %s%s\n", getHiveFileFormat(generateDdlRequest.businessObjectFormatEntity), generateDdlRequest.isPartitioned ? ";\n" : "")); } /** * Returns the corresponding Hive data type per specified schema column entity. * * @param schemaColumn the schema column that we want to get the corresponding Hive data type for * @param businessObjectFormatEntity the business object format entity that schema column belongs to * * @return the Hive data type * @throws IllegalArgumentException if schema column data type is not supported */ private String getHiveDataType(SchemaColumn schemaColumn, BusinessObjectFormatEntity businessObjectFormatEntity) { String hiveDataType; if (schemaColumn.getType().equalsIgnoreCase("TINYINT") || schemaColumn.getType().equalsIgnoreCase("SMALLINT") || schemaColumn.getType().equalsIgnoreCase("INT") || schemaColumn.getType().equalsIgnoreCase("BIGINT") || schemaColumn.getType().equalsIgnoreCase("FLOAT") || schemaColumn.getType().equalsIgnoreCase("DOUBLE") || schemaColumn.getType().equalsIgnoreCase("TIMESTAMP") || schemaColumn.getType().equalsIgnoreCase("DATE") || schemaColumn.getType().equalsIgnoreCase("STRING") || schemaColumn.getType().equalsIgnoreCase("BOOLEAN") || schemaColumn.getType().equalsIgnoreCase("BINARY")) { hiveDataType = schemaColumn.getType().toUpperCase(); } else if (schemaColumn.getType().equalsIgnoreCase("DECIMAL") || schemaColumn.getType().equalsIgnoreCase("NUMBER")) { hiveDataType = StringUtils.isNotBlank(schemaColumn.getSize()) ? String.format("DECIMAL(%s)", schemaColumn.getSize()) : "DECIMAL"; } else if (schemaColumn.getType().equalsIgnoreCase("VARCHAR") || schemaColumn.getType().equalsIgnoreCase("CHAR")) { hiveDataType = String.format("%s(%s)", schemaColumn.getType().toUpperCase(), schemaColumn.getSize()); } else if (schemaColumn.getType().equalsIgnoreCase("VARCHAR2")) { hiveDataType = String.format("VARCHAR(%s)", schemaColumn.getSize()); } else { throw new IllegalArgumentException(String .format("Column \"%s\" has an unsupported data type \"%s\" in the schema for business object format {%s}.", schemaColumn.getName(), schemaColumn.getType(), businessObjectFormatHelper.businessObjectFormatEntityAltKeyToString(businessObjectFormatEntity))); } return hiveDataType; } /** * Returns the corresponding Hive file format. * * @param businessObjectFormatEntity the business object format entity that schema column belongs to * * @return the Hive file format * @throws IllegalArgumentException if business object format file type is not supported */ private String getHiveFileFormat(BusinessObjectFormatEntity businessObjectFormatEntity) { String fileFormat = businessObjectFormatEntity.getFileType().getCode(); String hiveFileFormat; if (fileFormat.equalsIgnoreCase(FileTypeEntity.BZ_FILE_TYPE) || fileFormat.equalsIgnoreCase(FileTypeEntity.GZ_FILE_TYPE) || fileFormat.equalsIgnoreCase(FileTypeEntity.TXT_FILE_TYPE)) { hiveFileFormat = TEXT_HIVE_FILE_FORMAT; } else if (fileFormat.equalsIgnoreCase(FileTypeEntity.PARQUET_FILE_TYPE)) { hiveFileFormat = PARQUET_HIVE_FILE_FORMAT; } else if (fileFormat.equalsIgnoreCase(FileTypeEntity.ORC_FILE_TYPE)) { hiveFileFormat = ORC_HIVE_FILE_FORMAT; } else { throw new IllegalArgumentException(String.format("Unsupported format file type for business object format {%s}.", businessObjectFormatHelper.businessObjectFormatEntityAltKeyToString(businessObjectFormatEntity))); } return hiveFileFormat; } /** * Processes partition filters for DDL generation as per generate DDL request. * * @param generateDdlRequest the generate DDL request * @param sb the string builder to be updated with the "alter table add partition" statements * @param replacements the hash map of string values to be used to substitute the custom DDL tokens with their actual values * @param businessObjectFormatEntity the business object format entity * @param businessObjectFormat the business object format * @param ifNotExistsOption specifies if generated DDL contains "if not exists" option */ private void processPartitionFiltersForGenerateDdl(GenerateDdlRequest generateDdlRequest, StringBuilder sb, HashMap<String, String> replacements, BusinessObjectFormatEntity businessObjectFormatEntity, BusinessObjectFormat businessObjectFormat, String ifNotExistsOption) { // Get the business object format key from the entity. BusinessObjectFormatKey businessObjectFormatKey = businessObjectFormatHelper.getBusinessObjectFormatKey(generateDdlRequest.businessObjectFormatEntity); // Override the business object format version with the original (optional) value from the request. businessObjectFormatKey.setBusinessObjectFormatVersion(generateDdlRequest.businessObjectFormatVersion); // Retrieve a list of storage unit entities for the specified list of partition filters. The entities will be sorted by partition values and storages. // For a non-partitioned table, there should only exist a single business object data entity (with partitionValue equals to "none"). We do validate that // all specified storages are of "S3" storage platform type, so we specify S3 storage platform type in the herdDao call below, so we select storage // units only from all S3 storages, when the specified list of storages is empty. We also specify to select only "available" storage units. List<StorageUnitEntity> storageUnitEntities = storageUnitDao .getStorageUnitsByPartitionFiltersAndStorages(businessObjectFormatKey, generateDdlRequest.partitionFilters, generateDdlRequest.businessObjectDataVersion, BusinessObjectDataStatusEntity.VALID, generateDdlRequest.storageNames, StoragePlatformEntity.S3, null, true); // Exclude duplicate business object data per specified list of storage names. // If storage names are not specified, the method fails on business object data instances registered with multiple storages. storageUnitEntities = excludeDuplicateBusinessObjectData(storageUnitEntities, generateDdlRequest.storageNames, generateDdlRequest.storageEntities); // Build a list of matched partition filters. Please note that each request partition // filter might result in multiple available business object data entities. List<List<String>> matchedAvailablePartitionFilters = new ArrayList<>(); List<List<String>> availablePartitions = new ArrayList<>(); for (StorageUnitEntity storageUnitEntity : storageUnitEntities) { BusinessObjectDataKey businessObjectDataKey = businessObjectDataHelper.getBusinessObjectDataKey(storageUnitEntity.getBusinessObjectData()); matchedAvailablePartitionFilters .add(businessObjectDataHelper.getPartitionFilter(businessObjectDataKey, generateDdlRequest.partitionFilters.get(0))); availablePartitions.add(businessObjectDataHelper.getPrimaryAndSubPartitionValues(businessObjectDataKey)); } // If request specifies to include all registered sub-partitions, fail if any of "non-available" registered sub-partitions are found. if (generateDdlRequest.businessObjectDataVersion == null && BooleanUtils.isTrue(generateDdlRequest.includeAllRegisteredSubPartitions) && !CollectionUtils.isEmpty(matchedAvailablePartitionFilters)) { notAllowNonAvailableRegisteredSubPartitions(businessObjectFormatKey, matchedAvailablePartitionFilters, availablePartitions, generateDdlRequest.storageNames); } // Fail on any missing business object data unless the flag is set to allow missing business object data. if (!BooleanUtils.isTrue(generateDdlRequest.allowMissingData)) { // Get a list of unmatched partition filters. List<List<String>> unmatchedPartitionFilters = new ArrayList<>(generateDdlRequest.partitionFilters); unmatchedPartitionFilters.removeAll(matchedAvailablePartitionFilters); // Throw an exception if we have any unmatched partition filters. if (!unmatchedPartitionFilters.isEmpty()) { // Get the first unmatched partition filter and throw exception. List<String> unmatchedPartitionFilter = getFirstUnmatchedPartitionFilter(unmatchedPartitionFilters); throw new ObjectNotFoundException( String.format("Business object data {namespace: \"%s\", businessObjectDefinitionName: \"%s\", businessObjectFormatUsage: \"%s\", " + "businessObjectFormatFileType: \"%s\", businessObjectFormatVersion: %d, partitionValue: \"%s\", " + "subpartitionValues: \"%s\", businessObjectDataVersion: %d} is not available in \"%s\" storage(s).", businessObjectFormatKey.getNamespace(), businessObjectFormatKey.getBusinessObjectDefinitionName(), businessObjectFormatKey.getBusinessObjectFormatUsage(), businessObjectFormatKey.getBusinessObjectFormatFileType(), businessObjectFormatKey.getBusinessObjectFormatVersion(), unmatchedPartitionFilter.get(0), StringUtils.join(unmatchedPartitionFilter.subList(1, unmatchedPartitionFilter.size()), ","), generateDdlRequest.businessObjectDataVersion, StringUtils.join(generateDdlRequest.storageNames, ","))); } } // We still need to close/complete the create table statement when there is no custom DDL, // the table is non-partitioned, and there is no business object data found. if (generateDdlRequest.customDdlEntity == null && !generateDdlRequest.isPartitioned && CollectionUtils.isEmpty(storageUnitEntities)) { // Add a LOCATION clause with a token. sb.append(String.format("LOCATION '%s';", NON_PARTITIONED_TABLE_LOCATION_CUSTOM_DDL_TOKEN)); } // The table is partitioned, custom DDL is specified, or there is at least one business object data instance found. else { // If drop partitions flag is set and the table is partitioned, drop partitions specified by the partition filters. if (generateDdlRequest.isPartitioned && BooleanUtils.isTrue(generateDdlRequest.includeDropPartitions)) { // Add a drop partition statement for each partition filter entry. for (List<String> partitionFilter : generateDdlRequest.partitionFilters) { sb.append(String.format("ALTER TABLE `%s` DROP IF EXISTS PARTITION (", generateDdlRequest.tableName)); // Specify all partition column values as per this partition filter. List<String> partitionKeyValuePairs = new ArrayList<>(); for (int i = 0; i < partitionFilter.size(); i++) { if (StringUtils.isNotBlank(partitionFilter.get(i))) { // We cannot hit ArrayIndexOutOfBoundsException on getPartitions() since partitionFilter would // not have a value set at an index that is greater or equal than the number of partitions in the schema. String partitionColumnName = businessObjectFormat.getSchema().getPartitions().get(i).getName(); partitionKeyValuePairs.add(String.format("`%s`='%s'", partitionColumnName, partitionFilter.get(i))); } } sb.append(StringUtils.join(partitionKeyValuePairs, ", ")); sb.append(");\n"); } sb.append('\n'); } // Process storage unit entities. if (!CollectionUtils.isEmpty(storageUnitEntities)) { processStorageUnitsForGenerateDdl(generateDdlRequest, sb, replacements, businessObjectFormatEntity, businessObjectFormat, ifNotExistsOption, storageUnitEntities); } } } /** * Gets a first unmatched partition filter from the list of unmatched filters. * * @param unmatchedPartitionFilters the list of unmatchedPartitionFilters * * @return the first unmatched partition filter */ private List<String> getFirstUnmatchedPartitionFilter(List<List<String>> unmatchedPartitionFilters) { // Get the first unmatched partition filter from the list. List<String> unmatchedPartitionFilter = unmatchedPartitionFilters.get(0); // Replace all null partition values with an empty string. for (int i = 0; i < unmatchedPartitionFilter.size(); i++) { if (unmatchedPartitionFilter.get(i) == null) { unmatchedPartitionFilter.set(i, ""); } } return unmatchedPartitionFilter; } /** * Adds the relative "alter table add partition" statements for each storage unit entity. Please note that each request partition value might result in * multiple available storage unit entities (subpartitions). * * @param sb the string builder to be updated with the "alter table add partition" statements * @param replacements the hash map of string values to be used to substitute the custom DDL tokens with their actual values * @param businessObjectFormatEntity the business object format entity * @param businessObjectFormat the business object format * @param ifNotExistsOption specifies if generated DDL contains "if not exists" option * @param storageUnitEntities the list of storage unit entities */ private void processStorageUnitsForGenerateDdl(GenerateDdlRequest generateDdlRequest, StringBuilder sb, HashMap<String, String> replacements, BusinessObjectFormatEntity businessObjectFormatEntity, BusinessObjectFormat businessObjectFormat, String ifNotExistsOption, List<StorageUnitEntity> storageUnitEntities) { // If flag is not set to suppress scan for unregistered sub-partitions, retrieve all storage // file paths for the relative storage units loaded in a multi-valued map for easy access. MultiValuedMap<Integer, String> storageUnitIdToStorageFilePathsMap = BooleanUtils.isTrue(generateDdlRequest.suppressScanForUnregisteredSubPartitions) ? new ArrayListValuedHashMap<>() : storageFileDao.getStorageFilePathsByStorageUnitIds(storageUnitHelper.getStorageUnitIds(storageUnitEntities)); // Process all available business object data instances. for (StorageUnitEntity storageUnitEntity : storageUnitEntities) { // Get business object data key and S3 key prefix for this business object data. BusinessObjectDataKey businessObjectDataKey = businessObjectDataHelper.getBusinessObjectDataKey(storageUnitEntity.getBusinessObjectData()); String s3KeyPrefix = s3KeyPrefixHelper .buildS3KeyPrefix(storageUnitEntity.getStorage(), storageUnitEntity.getBusinessObjectData().getBusinessObjectFormat(), businessObjectDataKey); // If flag is set to suppress scan for unregistered sub-partitions, use the directory path or the S3 key prefix // as the partition's location, otherwise, use storage files to discover all unregistered sub-partitions. Collection<String> storageFilePaths = new ArrayList<>(); if (BooleanUtils.isTrue(generateDdlRequest.suppressScanForUnregisteredSubPartitions)) { // Validate the directory path value if it is present. if (storageUnitEntity.getDirectoryPath() != null) { Assert.isTrue(storageUnitEntity.getDirectoryPath().equals(s3KeyPrefix), String.format( "Storage directory path \"%s\" registered with business object data {%s} " + "in \"%s\" storage does not match the expected S3 key prefix \"%s\".", storageUnitEntity.getDirectoryPath(), businessObjectDataHelper.businessObjectDataEntityAltKeyToString(storageUnitEntity.getBusinessObjectData()), storageUnitEntity.getStorage().getName(), s3KeyPrefix)); } // Add the S3 key prefix to the list of storage files. // We add a trailing '/' character to the prefix, since it represents a directory. storageFilePaths.add(StringUtils.appendIfMissing(s3KeyPrefix, "/")); } else { // Retrieve storage file paths registered with this business object data in the specified storage. storageFilePaths = storageUnitIdToStorageFilePathsMap.containsKey(storageUnitEntity.getId()) ? storageUnitIdToStorageFilePathsMap.get(storageUnitEntity.getId()) : new ArrayList<>(); // Validate storage file paths registered with this business object data in the specified storage. // The validation check below is required even if we have no storage files registered. storageFileHelper .validateStorageFiles(storageFilePaths, s3KeyPrefix, storageUnitEntity.getBusinessObjectData(), storageUnitEntity.getStorage().getName()); // If there are no storage files registered for this storage unit, we should use the storage directory path value. if (storageFilePaths.isEmpty()) { // Validate that directory path value is present and it matches the S3 key prefix. Assert.isTrue(storageUnitEntity.getDirectoryPath() != null && storageUnitEntity.getDirectoryPath().startsWith(s3KeyPrefix), String.format( "Storage directory path \"%s\" registered with business object data {%s} " + "in \"%s\" storage does not match the expected S3 key prefix \"%s\".", storageUnitEntity.getDirectoryPath(), businessObjectDataHelper.businessObjectDataEntityAltKeyToString(storageUnitEntity.getBusinessObjectData()), storageUnitEntity.getStorage().getName(), s3KeyPrefix)); // Add storage directory path the empty storage files list. // We add a trailing '/' character to the path, since it represents a directory. storageFilePaths.add(storageUnitEntity.getDirectoryPath() + "/"); } } // Retrieve the s3 bucket name. String s3BucketName = getS3BucketName(storageUnitEntity.getStorage(), generateDdlRequest.s3BucketNames); // For partitioned table, add the relative partitions to the generated DDL. if (generateDdlRequest.isPartitioned) { // If flag is set to suppress scan for unregistered sub-partitions, validate that the number of primary and sub-partition values specified for // the business object data equals to the number of partition columns defined in schema for the format selected for DDL generation. if (BooleanUtils.isTrue(generateDdlRequest.suppressScanForUnregisteredSubPartitions)) { int businessObjectDataRegisteredPartitions = 1 + CollectionUtils.size(businessObjectDataKey.getSubPartitionValues()); Assert.isTrue(businessObjectFormat.getSchema().getPartitions().size() == businessObjectDataRegisteredPartitions, String.format("Number of primary and sub-partition values (%d) specified for the business object data is not equal to " + "the number of partition columns (%d) defined in the schema of the business object format selected for DDL generation. " + "Business object data: {%s}, business object format: {%s}", businessObjectDataRegisteredPartitions, businessObjectFormat.getSchema().getPartitions().size(), businessObjectDataHelper.businessObjectDataKeyToString(businessObjectDataKey), businessObjectFormatHelper.businessObjectFormatEntityAltKeyToString(businessObjectFormatEntity))); } // Otherwise, since the format version selected for DDL generation might not match the relative business object format version that business // object data is registered against, validate that the number of sub-partition values specified for the business object data is less than // the number of partition columns defined in schema for the format selected for DDL generation. else { Assert.isTrue(businessObjectFormat.getSchema().getPartitions().size() > CollectionUtils.size(businessObjectDataKey.getSubPartitionValues()), String.format("Number of subpartition values specified for the business object data is greater than or equal to " + "the number of partition columns defined in the schema of the business object format selected for DDL generation. " + "Business object data: {%s}, business object format: {%s}", businessObjectDataHelper.businessObjectDataKeyToString(businessObjectDataKey), businessObjectFormatHelper.businessObjectFormatEntityAltKeyToString(businessObjectFormatEntity))); } // Get partition information. For multiple level partitioning, auto-discover subpartitions (subdirectories) not already included into the S3 key // prefix. Each discovered partition requires a standalone "add partition" clause. Please note that due to the above validation check, there // should be no auto discoverable sub-partition columns, when flag is set to suppress scan for unregistered sub-partitions. List<SchemaColumn> autoDiscoverableSubPartitionColumns = businessObjectFormat.getSchema().getPartitions() .subList(1 + CollectionUtils.size(businessObjectDataKey.getSubPartitionValues()), businessObjectFormat.getSchema().getPartitions().size()); for (HivePartitionDto hivePartition : getHivePartitions(businessObjectDataKey, autoDiscoverableSubPartitionColumns, s3KeyPrefix, storageFilePaths, storageUnitEntity.getBusinessObjectData(), storageUnitEntity.getStorage().getName())) { sb.append(String.format("ALTER TABLE `%s` ADD %sPARTITION (", generateDdlRequest.tableName, ifNotExistsOption)); // Specify all partition column values. List<String> partitionKeyValuePairs = new ArrayList<>(); for (int i = 0; i < businessObjectFormat.getSchema().getPartitions().size(); i++) { String partitionColumnName = businessObjectFormat.getSchema().getPartitions().get(i).getName(); String partitionValue = hivePartition.getPartitionValues().get(i); partitionKeyValuePairs.add(String.format("`%s`='%s'", partitionColumnName, partitionValue)); } sb.append(StringUtils.join(partitionKeyValuePairs, ", ")); sb.append(String.format(") LOCATION 's3n://%s/%s%s';\n", s3BucketName, s3KeyPrefix, StringUtils.isNotBlank(hivePartition.getPath()) ? hivePartition.getPath() : "")); } } else // This is a non-partitioned table. { // Get location for this non-partitioned table. String tableLocation = String.format("s3n://%s/%s", s3BucketName, s3KeyPrefix); if (generateDdlRequest.customDdlEntity == null) { // Since custom DDL was not specified and this table is not partitioned, add a LOCATION clause. // This is the last line in the non-partitioned table DDL. sb.append(String.format("LOCATION '%s';", tableLocation)); } else { // Since custom DDL was used for a non-partitioned table, substitute the relative custom DDL token with the actual table location. replacements.put(NON_PARTITIONED_TABLE_LOCATION_CUSTOM_DDL_TOKEN, tableLocation); } } } } /** * Gets an S3 bucket name for the specified storage entity. The method memorizes the responses for performance reasons. * * @param storageEntity the storage entity * @param s3BucketNames the map of storage names to their relative S3 bucket names * * @return the S3 bucket name */ private String getS3BucketName(StorageEntity storageEntity, Map<StorageEntity, String> s3BucketNames) { String s3BucketName; // If bucket name was already retrieved for this storage, use it. if (s3BucketNames.containsKey(storageEntity)) { s3BucketName = s3BucketNames.get(storageEntity); } // Otherwise, retrieve the S3 bucket name attribute value and store it in memory. Please note that it is required, so we pass in a "true" flag. else { s3BucketName = storageHelper .getStorageAttributeValueByName(configurationHelper.getProperty(ConfigurationValue.S3_ATTRIBUTE_NAME_BUCKET_NAME), storageEntity, true); s3BucketNames.put(storageEntity, s3BucketName); } return s3BucketName; } /** * Parameters grouping for {@link Hive13DdlGenerator#generateCreateTableDdlHelper(GenerateDdlRequest)} */ private static class GenerateDdlRequest { private Boolean allowMissingData; private Integer businessObjectDataVersion; private BusinessObjectFormatEntity businessObjectFormatEntity; private Integer businessObjectFormatVersion; private CustomDdlEntity customDdlEntity; private Boolean includeAllRegisteredSubPartitions; private Boolean includeDropPartitions; private Boolean includeDropTableStatement; private Boolean includeIfNotExistsOption; private Boolean isPartitioned; private List<List<String>> partitionFilters; private Map<StorageEntity, String> s3BucketNames; private List<StorageEntity> storageEntities; private List<String> storageNames; private Boolean suppressScanForUnregisteredSubPartitions; private String tableName; } /** * Eliminate storage units that belong to the same business object data by picking storage unit registered in a storage listed earlier in the list of * storage names specified in the request. If storage names are not specified, simply fail on business object data instances registered with multiple * storages. * * @param storageUnitEntities the list of storage unit entities * @param storageNames the list of storage names * @param storageEntities the list of storage entities * * @return the updated list of storage unit entities * @throws IllegalArgumentException on business object data being registered in multiple storages and storage names are not specified to resolve this */ protected List<StorageUnitEntity> excludeDuplicateBusinessObjectData(List<StorageUnitEntity> storageUnitEntities, List<String> storageNames, List<StorageEntity> storageEntities) throws IllegalArgumentException { // If storage names are not specified, fail on business object data instances registered with multiple storages. // Otherwise, in a case when the same business object data is registered with multiple storages, // pick storage unit registered in a storage listed earlier in the list of storage names specified in the request. Map<BusinessObjectDataEntity, StorageUnitEntity> businessObjectDataToStorageUnitMap = new LinkedHashMap<>(); for (StorageUnitEntity storageUnitEntity : storageUnitEntities) { BusinessObjectDataEntity businessObjectDataEntity = storageUnitEntity.getBusinessObjectData(); if (businessObjectDataToStorageUnitMap.containsKey(businessObjectDataEntity)) { // Duplicate business object data is found, so check if storage names are specified. if (CollectionUtils.isEmpty(storageNames)) { // Fail on business object data registered in multiple storages. throw new IllegalArgumentException(String.format("Found business object data registered in more than one storage. " + "Please specify storage(s) in the request to resolve this. Business object data {%s}", businessObjectDataHelper.businessObjectDataEntityAltKeyToString(businessObjectDataEntity))); } else { // Replace the storage unit entity if it belongs to a "higher priority" storage. StorageEntity currentStorageEntity = businessObjectDataToStorageUnitMap.get(businessObjectDataEntity).getStorage(); int currentStorageIndex = storageEntities.indexOf(currentStorageEntity); int newStorageIndex = storageEntities.indexOf(storageUnitEntity.getStorage()); if (newStorageIndex < currentStorageIndex) { businessObjectDataToStorageUnitMap.put(storageUnitEntity.getBusinessObjectData(), storageUnitEntity); } } } else { businessObjectDataToStorageUnitMap.put(storageUnitEntity.getBusinessObjectData(), storageUnitEntity); } } return new ArrayList<>(businessObjectDataToStorageUnitMap.values()); } /** * Searches for and fails on any of "non-available" registered sub-partitions as per list of "matched" partition filters. * * @param businessObjectFormatKey the business object format key * @param matchedAvailablePartitionFilters the list of "matched" partition filters * @param availablePartitions the list of already discovered "available" partitions, where each partition consists of primary and optional sub-partition * values * @param storageNames the list of storage names */ protected void notAllowNonAvailableRegisteredSubPartitions(BusinessObjectFormatKey businessObjectFormatKey, List<List<String>> matchedAvailablePartitionFilters, List<List<String>> availablePartitions, List<String> storageNames) { // Query all matched partition filters to discover any non-available registered sub-partitions. Retrieve latest business object data per list of // matched filters regardless of business object data and/or storage unit statuses. This is done to discover all registered sub-partitions regardless // of business object data or storage unit statuses. We do validate that all specified storages are of "S3" storage platform type, so we specify S3 // storage platform type in the herdDao call below, so we select storage units only from all S3 storages, when the specified list of storages is empty. // We want to select any existing storage units regardless of their status, so we pass "false" for selectOnlyAvailableStorageUnits parameter. List<StorageUnitEntity> matchedNotAvailableStorageUnitEntities = storageUnitDao .getStorageUnitsByPartitionFiltersAndStorages(businessObjectFormatKey, matchedAvailablePartitionFilters, null, null, storageNames, StoragePlatformEntity.S3, null, false); // Exclude all storage units with business object data having "DELETED" status. matchedNotAvailableStorageUnitEntities = storageUnitHelper.excludeBusinessObjectDataStatus(matchedNotAvailableStorageUnitEntities, BusinessObjectDataStatusEntity.DELETED); // Exclude all already discovered "available" partitions. Please note that, since we got here, the list of matched partitions can not be empty. matchedNotAvailableStorageUnitEntities = storageUnitHelper.excludePartitions(matchedNotAvailableStorageUnitEntities, availablePartitions); // Fail on any "non-available" registered sub-partitions. if (!CollectionUtils.isEmpty(matchedNotAvailableStorageUnitEntities)) { // Get the business object data key for the first "non-available" registered sub-partition. BusinessObjectDataKey businessObjectDataKey = businessObjectDataHelper.getBusinessObjectDataKey(matchedNotAvailableStorageUnitEntities.get(0).getBusinessObjectData()); throw new ObjectNotFoundException( String.format("Business object data {namespace: \"%s\", businessObjectDefinitionName: \"%s\", businessObjectFormatUsage: \"%s\", " + "businessObjectFormatFileType: \"%s\", businessObjectFormatVersion: %d, partitionValue: \"%s\", " + "subpartitionValues: \"%s\", businessObjectDataVersion: %d} is not available in \"%s\" storage(s).", businessObjectFormatKey.getNamespace(), businessObjectFormatKey.getBusinessObjectDefinitionName(), businessObjectFormatKey.getBusinessObjectFormatUsage(), businessObjectFormatKey.getBusinessObjectFormatFileType(), businessObjectFormatKey.getBusinessObjectFormatVersion(), businessObjectDataKey.getPartitionValue(), StringUtils.join(businessObjectDataKey.getSubPartitionValues(), ","), businessObjectDataKey.getBusinessObjectDataVersion(), StringUtils.join(storageNames, ","))); } } }