/* * Copyright © 2015 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.explore.table; import co.cask.cdap.api.data.schema.Schema; import co.cask.cdap.api.dataset.lib.Partitioning; import co.cask.cdap.common.conf.Constants; import co.cask.cdap.common.utils.ProjectInfo; import co.cask.cdap.hive.stream.StreamStorageHandler; import com.google.common.collect.ImmutableMap; import org.junit.Assert; import org.junit.Test; import java.util.Map; /** * */ public class CreateStatementBuilderTest { @Test public void testStorageHandlerCreate() throws Exception { String expected = "CREATE EXTERNAL TABLE IF NOT EXISTS stream_purchases " + "(f1 string, f2 int, f3 double, f4 boolean, f5 float, f6 binary) COMMENT 'CDAP Stream' " + "STORED BY 'co.cask.cdap.hive.stream.StreamStorageHandler' " + "WITH SERDEPROPERTIES ('explore.stream.name'='purchases', 'explore.stream.namespace'='default') " + "LOCATION 'hdfs://namenode/my/path' " + "TBLPROPERTIES ('somekey'='someval', 'cdap.name'='purchases', " + "'cdap.version'='" + ProjectInfo.getVersion().toString() + "')"; Schema schema = Schema.recordOf( "stuff", Schema.Field.of("f1", Schema.of(Schema.Type.STRING)), Schema.Field.of("f2", Schema.of(Schema.Type.INT)), Schema.Field.of("f3", Schema.of(Schema.Type.DOUBLE)), Schema.Field.of("f4", Schema.of(Schema.Type.BOOLEAN)), Schema.Field.of("f5", Schema.of(Schema.Type.FLOAT)), Schema.Field.of("f6", Schema.of(Schema.Type.BYTES))); Map<String, String> serdeProperties = ImmutableMap.of( Constants.Explore.STREAM_NAME, "purchases", Constants.Explore.STREAM_NAMESPACE, "default"); String actual = new CreateStatementBuilder("purchases", "stream_purchases") .setSchema(schema) .setLocation("hdfs://namenode/my/path") .setTableProperties(ImmutableMap.of("somekey", "someval")) .setTableComment("CDAP Stream") .buildWithStorageHandler(StreamStorageHandler.class.getName(), serdeProperties); Assert.assertEquals(expected, actual); } @Test public void testRowDelimitedCreate() throws Exception { String expected = "CREATE EXTERNAL TABLE IF NOT EXISTS dataset_myfiles " + "(f1 string, f2 int, f3 double, f4 binary, f5 array<int>) COMMENT 'CDAP Dataset' " + "PARTITIONED BY (f1 STRING, f2 INT) " + "ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' " + "STORED AS TEXTFILE LOCATION 'hdfs://namenode/my/path' " + "TBLPROPERTIES ('cdap.name'='myfiles', 'cdap.version'='" + ProjectInfo.getVersion().toString() + "')"; String hiveSchema = "f1 string, f2 int, f3 double, f4 binary, f5 array<int>"; Partitioning partitioning = Partitioning.builder() .addStringField("f1") .addIntField("f2") .build(); String actual = new CreateStatementBuilder("myfiles", "dataset_myfiles") .setSchema(hiveSchema) .setLocation("hdfs://namenode/my/path") .setTableComment("CDAP Dataset") .setPartitioning(partitioning) .setRowFormatDelimited(",", null) .buildWithFileFormat("TEXTFILE"); Assert.assertEquals(expected, actual); } @Test public void testRowSerdeCreate() throws Exception { String expected = "CREATE EXTERNAL TABLE IF NOT EXISTS dataset_myfiles " + "(f1 string, f2 int, f3 double, f4 binary, f5 array<int>) COMMENT 'CDAP Dataset' " + "PARTITIONED BY (f1 STRING, f2 INT) " + "ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe' " + "WITH SERDEPROPERTIES ('input.regex'='escapeme!\\'') " + "STORED AS TEXTFILE LOCATION 'hdfs://namenode/my/path' " + "TBLPROPERTIES ('cdap.name'='myfiles', 'cdap.version'='" + ProjectInfo.getVersion().toString() + "')"; String hiveSchema = "f1 string, f2 int, f3 double, f4 binary, f5 array<int>"; Partitioning partitioning = Partitioning.builder() .addStringField("f1") .addIntField("f2") .build(); String actual = new CreateStatementBuilder("myfiles", "dataset_myfiles") .setSchema(hiveSchema) .setLocation("hdfs://namenode/my/path") .setTableComment("CDAP Dataset") .setPartitioning(partitioning) .setRowFormatSerde("org.apache.hadoop.hive.serde2.RegexSerDe", ImmutableMap.of("input.regex", "escapeme!'")) .buildWithFileFormat("TEXTFILE"); Assert.assertEquals(expected, actual); } @Test public void testRowSerdeFormatsCreate() throws Exception { Schema schema = Schema.recordOf( "record", Schema.Field.of("f1", Schema.of(Schema.Type.STRING)), Schema.Field.of("f2", Schema.of(Schema.Type.INT)), Schema.Field.of("f3", Schema.of(Schema.Type.DOUBLE)) ); String expected = "CREATE EXTERNAL TABLE IF NOT EXISTS dataset_myfiles (f1 string, f2 int, f3 double) " + "COMMENT 'CDAP Dataset' " + "PARTITIONED BY (f1 STRING, f2 INT) " + "ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' " + "STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' " + "OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' " + "LOCATION 'hdfs://namenode/my/path' " + "TBLPROPERTIES ('avro.schema.literal'='" + schema.toString() + "', " + "'cdap.name'='myfiles', 'cdap.version'='" + ProjectInfo.getVersion().toString() + "')"; Partitioning partitioning = Partitioning.builder() .addStringField("f1") .addIntField("f2") .build(); String actual = new CreateStatementBuilder("myfiles", "dataset_myfiles") .setSchema(schema) .setTableProperties(ImmutableMap.of("avro.schema.literal", schema.toString())) .setLocation("hdfs://namenode/my/path") .setTableComment("CDAP Dataset") .setPartitioning(partitioning) .setRowFormatSerde("org.apache.hadoop.hive.serde2.avro.AvroSerDe") .buildWithFormats("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat", "org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat"); Assert.assertEquals(expected, actual); } }