/*
* Copyright © 2015 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.explore.table;
import co.cask.cdap.api.data.schema.Schema;
import co.cask.cdap.api.data.schema.UnsupportedTypeException;
import co.cask.cdap.api.dataset.lib.Partitioning;
import co.cask.cdap.common.conf.Constants;
import co.cask.cdap.common.utils.ProjectInfo;
import co.cask.cdap.data2.dataset2.lib.partitioned.FieldTypes;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import org.apache.twill.filesystem.Location;
import java.util.Map;
import javax.annotation.Nullable;
/**
* Builds create table statements for Hive. Table DDL we support is of the form:
*
* CREATE EXTERNAL TABLE IF NOT EXISTS [db_name.]table_name
* [(col_name data_type, ...)]
* [COMMENT table_comment]
* [PARTITIONED BY (col_name data_type, ...)]
* [
* [ROW FORMAT row_format]
* [STORED AS file_format]
* | STORED BY 'storage.handler.class.name' [WITH SERDEPROPERTIES (...)]
* ]
* [LOCATION hdfs_path]
* [TBLPROPERTIES (property_name=property_value, ...)]
*
*
* row_format
* : DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]]
* | SERDE serde_name [WITH SERDEPROPERTIES (property_name=property_value, property_name=property_value, ...)]
*
* file_format:
* : SEQUENCEFILE
* | TEXTFILE -- (Default, depending on hive.default.fileformat configuration)
* | RCFILE -- (Note: Available in Hive 0.6.0 and later)
* | ORC -- (Note: Available in Hive 0.11.0 and later)
* | PARQUET -- (Note: Available in Hive 0.13.0 and later)
* | AVRO -- (Note: Available in Hive 0.14.0 and later)
* | INPUTFORMAT input_format_classname OUTPUTFORMAT output_format_classname
*
* We only support a subset of what Hive can do. For example, there is no support for SKEWED BY or CLUSTERED BY.
*/
public class CreateStatementBuilder {
private final String name;
private final String hiveTableName;
private String hiveSchema;
private String location;
private String tableComment;
private String rowFormat;
private Partitioning partitioning;
private Map<String, String> tableProperties;
public CreateStatementBuilder(String name, String hiveTableName) {
this.name = name;
this.hiveTableName = hiveTableName;
this.tableProperties = addRequiredTableProperties(Maps.<String, String>newHashMap());
}
/**
* Set the schema for the table. Throws an exception if it is not valid for Hive.
*/
public CreateStatementBuilder setSchema(Schema schema) throws UnsupportedTypeException {
this.hiveSchema = SchemaConverter.toHiveSchema(schema);
return this;
}
/**
* Set the hive schema for the table. Should be of the form "column_name column_type, ...".
*/
public CreateStatementBuilder setSchema(String hiveSchema) {
this.hiveSchema = "(" + hiveSchema + ")";
return this;
}
/**
* Set table properties. CDAP name and version must not be in the given properties, as they are added by the builder.
*/
public CreateStatementBuilder setTableProperties(Map<String, String> tableProperties) {
this.tableProperties = addRequiredTableProperties(tableProperties);
return this;
}
/**
* Set the location of the Hive table.
*/
public CreateStatementBuilder setLocation(String location) {
this.location = location;
return this;
}
/**
* Set the location of the Hive table.
*/
public CreateStatementBuilder setLocation(Location location) {
this.location = location.toURI().toString();
return this;
}
/**
* Set partitions of the Hive table.
*/
public CreateStatementBuilder setPartitioning(Partitioning partitioning) {
this.partitioning = partitioning;
return this;
}
/**
* Set a comment for the Hive table.
*/
public CreateStatementBuilder setTableComment(String tableComment) {
this.tableComment = tableComment;
return this;
}
/**
* Set the row format serde without properties.
*/
public CreateStatementBuilder setRowFormatSerde(String rowFormatSerde) {
return setRowFormatSerde(rowFormatSerde, null);
}
/**
* Set the row format serde with properties. Corresponds to using:
* ROW FORMAT SERDE serde_name [WITH SERDEPROPERTIES (property_name=property_value, ...)].
*/
public CreateStatementBuilder setRowFormatSerde(String rowFormatSerde,
@Nullable Map<String, String> serdeProperties) {
Preconditions.checkArgument(rowFormat == null, "row format can only be set once.");
StringBuilder strBuilder = new StringBuilder()
.append("SERDE '")
.append(rowFormatSerde)
.append("'");
if (serdeProperties != null && !serdeProperties.isEmpty()) {
strBuilder.append(" WITH SERDEPROPERTIES ");
appendMap(strBuilder, serdeProperties);
}
this.rowFormat = strBuilder.toString();
return this;
}
/**
* Set the row format using delimited by. Corresponds to using:
* ROW FORMAT DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]]
* The escapedBy char can only be given if terminatedBy is not null.
*/
public CreateStatementBuilder setRowFormatDelimited(@Nullable String terminatedBy, @Nullable String escapedBy) {
Preconditions.checkArgument(rowFormat == null, "row format can only be set once.");
StringBuilder strBuilder = new StringBuilder()
.append("DELIMITED");
if (terminatedBy != null && !terminatedBy.isEmpty()) {
strBuilder.append(" FIELDS TERMINATED BY '")
.append(terminatedBy)
.append("'");
if (escapedBy != null && !escapedBy.isEmpty()) {
strBuilder.append(" ESCAPED BY '")
.append(escapedBy)
.append("'");
}
}
this.rowFormat = strBuilder.toString();
return this;
}
/**
* Builds a create statement for a custom (non-native) Hive table. This means it uses a storage handler
* and has optional serde properties. For example:
*
* CREATE EXTERNAL TABLE IF NOT EXISTS nn
* STORED BY 'co.cask.cdap.hive.datasets.DatasetStorageHandler'
* WITH SERDEPROPERTIES ('cdap.name' = 'nn', 'cdap.namespace' = 'default')
* LOCATION '<uri>'
* TBLPROPERTIES ( ... )
*/
public String buildWithStorageHandler(String storageHandler, @Nullable Map<String, String> serdeProperties) {
StringBuilder strBuilder = startBuild()
.append(" STORED BY '")
.append(storageHandler)
.append("'");
if (serdeProperties != null && !serdeProperties.isEmpty()) {
strBuilder.append(" WITH SERDEPROPERTIES ");
appendMap(strBuilder, serdeProperties);
}
return finishBuild(strBuilder);
}
/**
* Builds a create statement for a native Hive table which is stored as a native Hive file format.
* For example:
* CREATE EXTERNAL TABLE IF NOT EXISTS nn
* [ PARTITIONED BY (field type, ...) ]
* ROW FORMAT SERDE '<serde class>'
* STORED AS 'avro'
* LOCATION '<uri>'
* TBLPROPERTIES ('avro.schema.literal'='...');
*/
public String buildWithFileFormat(String fileFormat) {
StringBuilder strBuilder = startBuild()
.append(" STORED AS ")
.append(fileFormat);
return finishBuild(strBuilder);
}
/**
* Builds a create statement for a native Hive table which is stored as a file format that uses an input format
* and output format.
* For example:
* CREATE EXTERNAL TABLE IF NOT EXISTS nn
* [ PARTITIONED BY (field type, ...) ]
* ROW FORMAT SERDE '<serde class>'
* STORED AS INPUTFORMAT '<input format class>'
* OUTPUTFORMAT '<output format class>'
* LOCATION '<uri>'
* TBLPROPERTIES ('avro.schema.literal'='...');
*/
public String buildWithFormats(String inputFormat, String outputFormat) {
StringBuilder strBuilder = startBuild()
.append(" STORED AS INPUTFORMAT '")
.append(inputFormat)
.append("' OUTPUTFORMAT '")
.append(outputFormat)
.append("'");
return finishBuild(strBuilder);
}
// required properties for every CDAP Hive table
private Map<String, String> addRequiredTableProperties(Map<String, String> map) {
return ImmutableMap.<String, String>builder().putAll(map)
.put(Constants.Explore.CDAP_NAME, name)
.put(Constants.Explore.CDAP_VERSION, ProjectInfo.getVersion().toString())
.build();
}
/**
* Start the create statement.
* CREATE EXTERNAL TABLE IF NOT EXISTS [db_name.]table_name
* [(col_name data_type, ...)]
* [COMMENT table_comment]
* [PARTITIONED BY (col_name data_type, ...)]
*/
private StringBuilder startBuild() {
StringBuilder strBuilder = new StringBuilder()
.append("CREATE EXTERNAL TABLE IF NOT EXISTS ")
.append(hiveTableName);
// yeah... schema is not always required.
if (hiveSchema != null) {
strBuilder.append(" ").append(hiveSchema);
}
if (tableComment != null && !tableComment.isEmpty()) {
strBuilder.append(" COMMENT '")
.append(tableComment)
.append("'");
}
if (partitioning != null && !partitioning.getFields().isEmpty()) {
strBuilder.append(" PARTITIONED BY (");
for (Map.Entry<String, Partitioning.FieldType> entry : partitioning.getFields().entrySet()) {
strBuilder.append(entry.getKey())
.append(" ")
.append(FieldTypes.toHiveType(entry.getValue()))
.append(", ");
}
// remove trailing ", "
strBuilder.deleteCharAt(strBuilder.length() - 1)
.deleteCharAt(strBuilder.length() - 1)
.append(")");
}
if (rowFormat != null) {
strBuilder.append(" ROW FORMAT ").append(rowFormat);
}
return strBuilder;
}
/**
* Finish the create statement:
* ...
* [LOCATION hdfs_path]
* [TBLPROPERTIES (property_name=property_value, ...)]
*/
private String finishBuild(StringBuilder strBuilder) {
if (location != null && !location.isEmpty()) {
strBuilder.append(" LOCATION '")
.append(location)
.append("'");
}
// table properties is never empty because of required cdap properties
strBuilder.append(" TBLPROPERTIES ");
appendMap(strBuilder, tableProperties);
return strBuilder.toString();
}
// appends the contents of the map as ('key'='val', ...). Also escapes any single quotes in the map.
private void appendMap(StringBuilder strBuilder, Map<String, String> map) {
strBuilder.append("(");
for (Map.Entry<String, String> entry : map.entrySet()) {
strBuilder.append("'")
.append(entry.getKey().replaceAll("'", "\\\\'"))
.append("'='")
.append(entry.getValue().replaceAll("'", "\\\\'"))
.append("', ");
}
// remove trailing ", "
strBuilder.deleteCharAt(strBuilder.length() - 1)
.deleteCharAt(strBuilder.length() - 1)
.append(")");
}
}