package com.thinkbiganalytics.feedmgr.rest.model.schema; /*- * #%L * thinkbig-feed-manager-rest-model * %% * Copyright (C) 2017 ThinkBig Analytics * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #L% */ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import com.fasterxml.jackson.databind.annotation.JsonSerialize; import com.thinkbiganalytics.discovery.model.DefaultField; import com.thinkbiganalytics.discovery.model.DefaultTableSchema; import com.thinkbiganalytics.discovery.schema.Field; import com.thinkbiganalytics.discovery.schema.TableSchema; import com.thinkbiganalytics.metadata.MetadataField; import com.thinkbiganalytics.policy.rest.model.FieldPolicy; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.List; import java.util.stream.Collectors; /** */ @JsonIgnoreProperties(ignoreUnknown = true) public class TableSetup { private static final Logger log = LoggerFactory.getLogger(TableSetup.class); @MetadataField(description = "Nifi property name 'table_column_specs'") public String fieldStructure; @Deprecated //this is now referenced in the sourceTableSchema.name //${metadata.table.existingTableName} will still work, but it is advised to switch it to ${metadata.table.sourceTableSchema.name} public String existingTableName; @JsonSerialize(as = DefaultTableSchema.class) @JsonDeserialize(as = DefaultTableSchema.class) private TableSchema tableSchema; @JsonSerialize(as = DefaultTableSchema.class) @JsonDeserialize(as = DefaultTableSchema.class) private TableSchema sourceTableSchema; @JsonSerialize(as = DefaultTableSchema.class) @JsonDeserialize(as = DefaultTableSchema.class) private TableSchema feedTableSchema; private String method; private String description = ""; private List<FieldPolicy> fieldPolicies; private List<PartitionField> partitions; private String tableType; @MetadataField private String incrementalDateField; @MetadataField(description = "Source Field to be used when incrementally querying Table Data ") private String sourceTableIncrementalDateField; private TableOptions options; @MetadataField(description = "Hive Row Format String for the Feed Table (example: ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\\n' STORED AS\n TEXTFILE ") private String feedFormat; @MetadataField(description = "Format of the Destination Table storage. Supported Values are: [STORED AS PARQUET, STORED AS ORC]") private String targetFormat; @MetadataField(description = "Destination Hive Table Properties string (i.e. tblproperties(\"orc.compress\"=\"SNAPPY\") ") private String targetTblProperties; @MetadataField(description = "Strategy for merging data into the destination. Supported Values are [Sync, Merge, Dedupe and Merge]") private String targetMergeStrategy; @MetadataField(description = "JSON array of FieldPolicy objects") private String fieldPoliciesJson; @MetadataField(description = "Nifi propety name 'elasticsearch.columns'") private String fieldIndexString; @MetadataField(description = "Nifi property name 'table_partition_specs'") private String partitionStructure; @MetadataField(description = "Nifi property name 'partition_specs'") private String partitionSpecs; @MetadataField(description = "List of destination (feed table) field names separated by a new line") private String fieldsString; @MetadataField(description = "List of source table field names separated by a new line") private String sourceFields; @MetadataField(description = "List of source table field names separated by a comma") private String sourceFieldsCommaString; @MetadataField(description = "Structure of the feed table") private String feedFieldStructure; @MetadataField(description = "List of fields that can be null separated by a comma") private String nullableFields; @MetadataField(description = "List of fields that are primary keys separated by a comma") private String primaryKeyFields; public String getDescription() { return description; } public void setDescription(String description) { this.description = description; } public List<FieldPolicy> getFieldPolicies() { return fieldPolicies; } public void setFieldPolicies(List<FieldPolicy> fieldPolicies) { this.fieldPolicies = fieldPolicies; } public List<PartitionField> getPartitions() { return partitions; } public void setPartitions(List<PartitionField> partitions) { this.partitions = partitions; } public String getPartitionStructure() { return partitionStructure; } public void setPartitionStructure(String partitionStructure) { this.partitionStructure = partitionStructure; } public String getFieldStructure() { return fieldStructure; } public void setFieldStructure(String fieldStructure) { this.fieldStructure = fieldStructure; } public String getFieldsString() { return fieldsString; } public void setFieldsString(String fieldsString) { this.fieldsString = fieldsString; } private void setStringBuffer(StringBuffer sb, String name, String separator) { if (StringUtils.isNotBlank(sb.toString())) { sb.append(separator); } sb.append(name); } private Field getFieldForName(String name) { if (tableSchema != null && tableSchema.getFields() != null) { return tableSchema.getFields().stream().filter(field -> field.getName().equalsIgnoreCase(name)).findFirst().orElse(null); } return null; } /** * Ensure that the partition sourceFieldDataType matches the referencing source datatype * This is needed for the partitoins with the "val" as it needs to use that datatype */ private void ensurePartitionSourceDataTypes() { if (partitions != null) { partitions.stream().forEach(partition -> { Field field = getFieldForName(partition.getSourceField()); if (field != null) { partition.setSourceDataType(field.getDataTypeWithPrecisionAndScale()); } }); } } @JsonIgnore public void simplifyFieldPoliciesForSerialization() { if (fieldPolicies != null) { getFieldPolicies().stream().forEach(fieldPolicy -> { if (fieldPolicy.getStandardization() != null) { fieldPolicy.getStandardization().stream().forEach(policy -> policy.simplifyForSerialization()); } if (fieldPolicy.getValidation() != null) { fieldPolicy.getValidation().stream().forEach(policy -> policy.simplifyForSerialization()); } boolean isPartitionColumn = getPartitions().stream().anyMatch(partitionArrayItem -> partitionArrayItem.getSourceField().equalsIgnoreCase(fieldPolicy.getFieldName())); fieldPolicy.setPartitionColumn(isPartitionColumn); }); } } @JsonIgnore public void updateFieldStringData() { StringBuffer fieldsString = new StringBuffer(); StringBuffer nullableFieldsString = new StringBuffer(); StringBuffer primaryKeyFieldsString = new StringBuffer(); if (tableSchema != null && tableSchema.getFields() != null) { for (Field field : tableSchema.getFields()) { setStringBuffer(fieldsString, field.getName(), "\n"); if (field.isNullable()) { setStringBuffer(nullableFieldsString, field.getName(), ","); } if (field.isPrimaryKey()) { setStringBuffer(primaryKeyFieldsString, field.getName(), ","); } } } setFieldsString(fieldsString.toString()); setNullableFields(nullableFieldsString.toString()); setPrimaryKeyFields(primaryKeyFieldsString.toString()); } /** * ensure the source names are set to some value */ private void ensureSourceTableSchemaFieldNames() { if (sourceTableSchema != null && sourceTableSchema.getFields() != null) { long nullFields = sourceTableSchema.getFields().stream().filter(field -> StringUtils.isBlank(field.getName())).count(); //if the source fields are all null and the counts match that from the dest table, reset the source to the dest names if (nullFields == sourceTableSchema.getFields().size() && tableSchema.getFields() != null && tableSchema.getFields().size() == sourceTableSchema.getFields().size()) { //reset the names to be that of the dest table? List<String> names = tableSchema.getFields().stream().map(f -> f.getName()).collect(Collectors.toList()); for (int i = 0; i < tableSchema.getFields().size(); i++) { Field f = sourceTableSchema.getFields().get(i); if (f instanceof DefaultField) { ((DefaultField) f).setName(names.get(i)); } } } } } @JsonIgnore public void updateSourceFieldsString() { StringBuffer sb = new StringBuffer(); if (sourceTableSchema != null && sourceTableSchema.getFields() != null) { for (Field field : sourceTableSchema.getFields()) { setStringBuffer(sb, field.getName(), "\n"); } setSourceFields(sb.toString()); } } @JsonIgnore public void updateSourceFieldsCommaString() { StringBuffer sb = new StringBuffer(); if (sourceTableSchema != null && sourceTableSchema.getFields() != null) { for (Field field : sourceTableSchema.getFields()) { setStringBuffer(sb, field.getName(), ","); } setSourceFieldsCommaString(sb.toString()); } } @JsonIgnore public String getFieldStructure(TableSchema schema) { StringBuffer sb = new StringBuffer(); if (schema != null && schema.getFields() != null) { for (Field field : schema.getFields()) { if (StringUtils.isNotBlank(sb.toString())) { sb.append("\n"); } sb.append(field.asFieldStructure()); } } return sb.toString(); } @JsonIgnore public void updateFieldStructure() { setFieldStructure(getFieldStructure(tableSchema)); } @JsonIgnore public void updateFeedStructure() { setFeedFieldStructure(getFieldStructure(feedTableSchema)); } @JsonIgnore public void updateFieldIndexString() { StringBuffer sb = new StringBuffer(); if (tableSchema != null && tableSchema.getFields() != null && fieldPolicies != null) { int idx = 0; for (FieldPolicy field : fieldPolicies) { if (field.isIndex() && StringUtils.isNotBlank(sb.toString())) { sb.append(","); } if (field.isIndex()) { sb.append(tableSchema.getFields().get(idx).getName()); } idx++; } } fieldIndexString = sb.toString(); } @JsonIgnore public void updateFieldPolicyNames() { if (tableSchema != null && tableSchema.getFields() != null && fieldPolicies != null && fieldPolicies.size() == tableSchema.getFields().size()) { int idx = 0; for (FieldPolicy field : fieldPolicies) { field.setFieldName(tableSchema.getFields().get(idx).getName()); idx++; } } } @JsonIgnore public void updatePartitionStructure() { StringBuffer sb = new StringBuffer(); if (partitions != null) { for (PartitionField field : partitions) { if (StringUtils.isNotBlank(sb.toString())) { sb.append("\n"); } sb.append(field.asPartitionStructure()); } } setPartitionStructure(sb.toString()); } @JsonIgnore public void updatePartitionSpecs() { StringBuffer sb = new StringBuffer(); if (partitions != null) { for (PartitionField field : partitions) { if (StringUtils.isNotBlank(sb.toString())) { sb.append("\n"); } sb.append(field.asPartitionSpec()); } } setPartitionSpecs(sb.toString()); } @JsonIgnore private void updateFieldPolicyJson() { ObjectMapper mapper = new ObjectMapper(); String json = "[]"; try { simplifyFieldPoliciesForSerialization(); json = mapper.writeValueAsString(getFieldPolicies()); } catch (JsonProcessingException e) { throw new RuntimeException(e); } fieldPoliciesJson = json; } private void updateTargetTblProperties() { this.targetTblProperties = ""; //build based upon compression options if (options != null && StringUtils.isNotBlank(options.getCompressionFormat()) && !"NONE".equalsIgnoreCase(options.getCompressionFormat())) { if ("STORED AS PARQUET".equalsIgnoreCase(getTargetFormat())) { this.targetTblProperties = "tblproperties(\"parquet.compression\"=\"" + options.getCompressionFormat() + "\")"; } else if ("STORED AS ORC".equalsIgnoreCase(getTargetFormat())) { this.targetTblProperties = "tblproperties(\"orc.compress\"=\"" + options.getCompressionFormat() + "\")"; } else { log.warn("Compression enabled with unsupported target format: {}", getTargetFormat()); } } } public void ensureNotEmpty() { if (StringUtils.isBlank(sourceFields)) { sourceFields = "NA"; } if (StringUtils.isBlank(sourceFieldsCommaString)) { sourceFieldsCommaString = "NA"; } if (sourceTableSchema != null) { if (StringUtils.isBlank(sourceTableSchema.getName())) { sourceTableSchema.setName("NA"); } } } public void updateMetadataFieldValues() { ensurePartitionSourceDataTypes(); updatePartitionStructure(); updateFieldStructure(); updateFeedStructure(); updateFieldStringData(); ensureSourceTableSchemaFieldNames(); updateSourceFieldsString(); updateSourceFieldsCommaString(); updateFieldIndexString(); updatePartitionSpecs(); updateFieldPolicyNames(); updateFieldPolicyJson(); updateTargetTblProperties(); ensureNotEmpty(); } public String getMethod() { return method; } public void setMethod(String method) { this.method = method; } public String getTableType() { return tableType; } public void setTableType(String tableType) { this.tableType = tableType; } public TableOptions getOptions() { return options; } public void setOptions(TableOptions options) { this.options = options; } public String getPartitionSpecs() { return partitionSpecs; } public void setPartitionSpecs(String partitionSpecs) { this.partitionSpecs = partitionSpecs; } public TableSchema getTableSchema() { return tableSchema; } public void setTableSchema(TableSchema tableSchema) { this.tableSchema = tableSchema; } public String getFieldIndexString() { return fieldIndexString; } public void setFieldIndexString(String fieldIndexString) { this.fieldIndexString = fieldIndexString; } public String getExistingTableName() { return existingTableName; } public void setExistingTableName(String existingTableName) { this.existingTableName = existingTableName; } public String getIncrementalDateField() { return incrementalDateField; } public void setIncrementalDateField(String incrementalDateField) { this.incrementalDateField = incrementalDateField; } public TableSchema getSourceTableSchema() { return sourceTableSchema; } public void setSourceTableSchema(TableSchema sourceTableSchema) { this.sourceTableSchema = sourceTableSchema; } public TableSchema getFeedTableSchema() { return feedTableSchema; } public void setFeedTableSchema(TableSchema feedTableSchema) { this.feedTableSchema = feedTableSchema; } public String getSourceFields() { return sourceFields; } public void setSourceFields(String sourceFields) { this.sourceFields = sourceFields; } public String getNullableFields() { return nullableFields; } public void setNullableFields(String nullableFields) { this.nullableFields = nullableFields; } public String getPrimaryKeyFields() { return primaryKeyFields; } public void setPrimaryKeyFields(String primaryKeyFields) { this.primaryKeyFields = primaryKeyFields; } public String getFieldPoliciesJson() { return fieldPoliciesJson; } public void setFieldPoliciesJson(String fieldPoliciesJson) { this.fieldPoliciesJson = fieldPoliciesJson; } public String getFeedFormat() { return feedFormat; } public void setFeedFormat(String feedFormat) { this.feedFormat = feedFormat; } public String getTargetFormat() { return targetFormat; } public void setTargetFormat(String targetFormat) { this.targetFormat = targetFormat; } public String getTargetTblProperties() { return targetTblProperties; } public void setTargetTblProperties(String targetTblProperties) { this.targetTblProperties = targetTblProperties; } public String getSourceTableIncrementalDateField() { return sourceTableIncrementalDateField; } public void setSourceTableIncrementalDateField(String sourceTableIncrementalDateField) { this.sourceTableIncrementalDateField = sourceTableIncrementalDateField; } public String getTargetMergeStrategy() { return targetMergeStrategy; } public void setTargetMergeStrategy(String targetMergeStrategy) { this.targetMergeStrategy = targetMergeStrategy; } public String getSourceFieldsCommaString() { return sourceFieldsCommaString; } public void setSourceFieldsCommaString(String sourceFieldsCommaString) { this.sourceFieldsCommaString = sourceFieldsCommaString; } public String getFeedFieldStructure() { return feedFieldStructure; } public void setFeedFieldStructure(String feedFieldStructure) { this.feedFieldStructure = feedFieldStructure; } }