package com.thinkbiganalytics.util;
/*-
* #%L
* thinkbig-nifi-core-processors
* %%
* Copyright (C) 2017 ThinkBig Analytics
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import com.thinkbiganalytics.hive.util.HiveUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.Validate;
import java.nio.file.Path;
import java.util.HashSet;
import java.util.Set;
/*
Specifications for managed Hive tables
*/
public enum TableType {
FEED("feed", true, false, true, false, true),
VALID("valid", true, true, false, false, false),
INVALID("invalid", true, true, true, true, false),
MASTER("", false, true, false, false, false, true),
PROFILE("profile", true, true, true, false, false);
//private String tableLocation;
//private String partitionLocation;
private String tableSuffix;
private boolean useTargetStorageSpec;
private boolean strings;
private boolean feedPartition;
private boolean addReasonCode;
private boolean external;
private boolean appendProcessingDttmField;
TableType(String suffix, boolean feedPartition, boolean useTargetStorageSpec, boolean strings, boolean addReasonCode, boolean external, boolean appendProcessingDttmField) {
this.tableSuffix = suffix;
this.feedPartition = feedPartition;
this.useTargetStorageSpec = useTargetStorageSpec;
this.strings = strings;
this.addReasonCode = addReasonCode;
this.external = external;
this.appendProcessingDttmField = appendProcessingDttmField;
}
TableType(String suffix, boolean feedPartition, boolean useTargetStorageSpec, boolean strings, boolean addReasonCode, boolean external) {
this(suffix, feedPartition, useTargetStorageSpec, strings, addReasonCode, external, false);
}
public String deriveTablename(String entity) {
return entity + (!StringUtils.isEmpty(tableSuffix) ? "_" + tableSuffix : "");
}
public String deriveQualifiedName(String source, String entity) {
return HiveUtils.quoteIdentifier(source.trim(), deriveTablename(entity.trim()));
}
public String deriveLocationSpecification(Path tableLocation, String source, String entity) {
Validate.notNull(tableLocation, "tableLocation expected");
Validate.notNull(source, "source expected");
Validate.notNull(entity, "entity expected");
Path path = tableLocation.resolve(source).resolve(entity).resolve(tableSuffix);
String location = path.toString().replace(":/", "://");
StringBuffer sb = new StringBuffer();
sb.append(" LOCATION '");
sb.append(location).append("'");
return sb.toString();
}
public boolean isStrings(String feedFormatOptions) {
boolean allStrings = strings;
// Hack for now. Need a better way to identify if this is text file (no schema enforced or schema enforced)
if (allStrings && feedFormatOptions != null) {
String urawFormatOptions = feedFormatOptions.toUpperCase();
if (urawFormatOptions.contains(" PARQUET") || urawFormatOptions.contains(" ORC") || urawFormatOptions.contains(" AVRO") || urawFormatOptions.contains("JSON")) {
// Structured file so we will use native
allStrings = false;
}
}
return allStrings;
}
public String deriveColumnSpecification(ColumnSpec[] columns, ColumnSpec[] partitionColumns, String feedFormatOptions) {
boolean allStrings = isStrings(feedFormatOptions);
Set<String> partitionSet = new HashSet<>();
if (!feedPartition && partitionColumns != null && partitionColumns.length > 0) {
for (ColumnSpec partition : partitionColumns) {
partitionSet.add(partition.getName());
}
}
StringBuffer sb = new StringBuffer();
int i = 0;
for (ColumnSpec spec : columns) {
if (!partitionSet.contains(spec.getName())) {
if (i++ > 0) {
sb.append(", ");
}
sb.append(spec.toCreateSQL(allStrings));
}
}
// Handle the special case for writing error reason in invalid table
if (addReasonCode) {
sb.append(", dlp_reject_reason string ");
}
if (appendProcessingDttmField) {
sb.append(", processing_dttm string");
}
return sb.toString();
}
/**
* Derive the STORED AS clause for the table
*
* @param rawSpecification the clause for the raw specification
* @param targetSpecification the target specification
*/
public String deriveFormatSpecification(String rawSpecification, String targetSpecification) {
StringBuffer sb = new StringBuffer();
if (isUseTargetStorageSpec()) {
sb.append(targetSpecification);
} else {
sb.append(rawSpecification);
}
return sb.toString();
}
public boolean isUseTargetStorageSpec() {
return useTargetStorageSpec;
}
public boolean isFeedPartition() {
return feedPartition;
}
public String derivePartitionSpecification(ColumnSpec[] partitions) {
StringBuffer sb = new StringBuffer();
if (feedPartition) {
sb.append(" PARTITIONED BY (`processing_dttm` string) ");
} else {
if (partitions != null && partitions.length > 0) {
sb.append(" PARTITIONED BY (");
int i = partitions.length;
for (ColumnSpec partition : partitions) {
sb.append(partition.toPartitionSQL());
if (i-- > 1) {
sb.append(", ");
}
}
sb.append(") ");
}
}
return sb.toString();
}
public String deriveTableProperties(String targetTableProperties) {
if (isUseTargetStorageSpec()) {
return targetTableProperties;
}
return "";
}
public boolean isExternal() {
return external;
}
}