package com.thinkbiganalytics.util; /*- * #%L * thinkbig-nifi-core-processors * %% * Copyright (C) 2017 ThinkBig Analytics * %% * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * #L% */ import com.thinkbiganalytics.hive.util.HiveUtils; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.BufferedReader; import java.io.IOException; import java.io.StringReader; import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.Vector; import java.util.stream.Collectors; import javax.annotation.Nonnull; /** * Represents a partition specification for a target table */ public class PartitionSpec implements Cloneable { private static final Logger log = LoggerFactory.getLogger(PartitionSpec.class); private List<PartitionKey> keys; public PartitionSpec(PartitionKey... partitionKeys) { super(); keys = Arrays.asList(partitionKeys); } /** * Creates partition keys from a string specification in format: field|type|formula\n * format, e.g. * year|string|year(hired) * month|int|month(hired) * country|int|country **/ public PartitionSpec(String spec) { super(); keys = new Vector<>(); if (!StringUtils.isEmpty(spec)) { try (BufferedReader br = new BufferedReader(new StringReader(spec))) { String line = null; while ((line = br.readLine()) != null) { PartitionKey partitionKey = PartitionKey.createFromString(line); if (partitionKey != null) { keys.add(partitionKey); } } } catch (IOException e) { throw new RuntimeException("Failed to process specification [" + spec + "]"); } } } public static void main(String[] args) { PartitionKey key1 = new PartitionKey("country", "string", "country"); PartitionKey key2 = new PartitionKey("year", "int", "year(hired)"); PartitionKey key3 = new PartitionKey("month", "int", "month(hired)"); PartitionSpec spec = new PartitionSpec(key1, key2, key3); String[] selectFields = new String[]{"id", "name", "company", "zip", "phone", "email", "hired"}; String selectSQL = StringUtils.join(selectFields, ","); String[] values = new String[]{"USA", "2015", "4"}; String targetSqlWhereClause = spec.toTargetSQLWhere(values); String sourceSqlWhereClause = spec.toSourceSQLWhere(values); String partitionClause = spec.toPartitionSpec(values); /* insert overwrite table employee partition (year=2015,month=10,country='USA') select id, name, company, zip, phone, email, hired from employee_feed where year(hired)=2015 and month(hired)=10 and country='USA' union distinct select id, name, company, zip, phone, email, hired from employee where year=2015 and month=10 and country='USA' */ String targetTable = "employee"; String sourceTable = "employee_feed"; String sqlWhere = "employee_feed"; StringBuffer sb = new StringBuffer(); sb.append("insert overwrite table ").append(targetTable).append(" ") .append(partitionClause) .append(" select ").append(selectSQL) .append(" from ").append(sourceTable).append(" ") .append(" where ") .append(sourceSqlWhereClause) .append(" union distinct ") .append(" select ").append(selectSQL) .append(" from ").append(targetTable).append(" ") .append(" where ") .append(targetSqlWhereClause); log.info(sb.toString()); } public Set<String> getKeyNames() { HashSet<String> keySet = new HashSet<>(); for (PartitionKey partitionKey : keys) { keySet.add(partitionKey.getKey()); } return keySet; } public boolean isNonPartitioned() { return keys.size() == 0; } /** * Generates a where clause against the target table using the partition keys */ public String toTargetSQLWhere(String[] values) { String[] parts = new String[keys.size()]; for (int i = 0; i < keys.size(); i++) { parts[i] = keys.get(i).toTargetSQLWhere(values[i]); } return StringUtils.join(parts, " and "); } public String toSourceSQLWhere(String[] values) { String[] parts = new String[keys.size()]; for (int i = 0; i < keys.size(); i++) { parts[i] = keys.get(i).toSourceSQLWhere(values[i]); } return StringUtils.join(parts, " and "); } public String toPartitionSpec(String[] values) { String[] parts = new String[keys.size()]; for (int i = 0; i < keys.size(); i++) { parts[i] = keys.get(i).toPartitionNameValue(values[i]); } return "partition (" + StringUtils.join(parts, ",") + ")"; } public String toDynamicPartitionSpec() { String[] parts = new String[keys.size()]; for (int i = 0; i < keys.size(); i++) { parts[i] = keys.get(i).getKeyWithAlias(); } return "partition (" + toPartitionSelectSQL() + ")"; } public String toPartitionSelectSQL() { String[] parts = new String[keys.size()]; for (int i = 0; i < keys.size(); i++) { parts[i] = keys.get(i).getKeyWithAlias(); } return StringUtils.join(parts, ","); } public String toDynamicSelectSQLSpec() { String[] parts = new String[keys.size()]; for (int i = 0; i < keys.size(); i++) { parts[i] = keys.get(i).getFormulaWithAlias() + " " + keys.get(i).getKeyForSql(); } return StringUtils.join(parts, ","); } /** * Generates a select statement that will find all unique data partitions in the source table. * * @param sourceSchema the schema or database name of the source table * @param sourceTable the source table name * @param feedPartitionValue the source processing partition value */ public String toDistinctSelectSQL(@Nonnull final String sourceSchema, @Nonnull final String sourceTable, @Nonnull final String feedPartitionValue) { final String keysWithAliases = keys.stream() .map(PartitionKey::getFormulaWithAlias) .collect(Collectors.joining(", ")); return "select " + keysWithAliases + ", count(0) as `tb_cnt` from " + HiveUtils.quoteIdentifier(sourceSchema, sourceTable) + " where `processing_dttm` = " + HiveUtils.quoteString(feedPartitionValue) + " group by " + keysWithAliases; } public PartitionSpec newForAlias(String alias) { return new PartitionSpec(PartitionKey.partitionKeysForTableAlias(this.keys.toArray(new PartitionKey[0]), alias)); } }