/**
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.io.parquet;
import static org.apache.hadoop.hive.ql.io.parquet.HiveParquetSchemaTestUtils.createHiveColumnsFrom;
import static org.apache.hadoop.hive.ql.io.parquet.HiveParquetSchemaTestUtils.createHiveTypeInfoFrom;
import static org.apache.hadoop.hive.ql.io.parquet.HiveParquetSchemaTestUtils.testConversion;
import static org.junit.Assert.assertEquals;
import java.util.List;
import org.apache.hadoop.hive.ql.io.parquet.convert.HiveSchemaConverter;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.Type.Repetition;
import org.junit.Test;
public class TestHiveSchemaConverter {
@Test
public void testSimpleType() throws Exception {
testConversion(
"a,b,c,d",
"int,bigint,double,boolean",
"message hive_schema {\n"
+ " optional int32 a;\n"
+ " optional int64 b;\n"
+ " optional double c;\n"
+ " optional boolean d;\n"
+ "}\n");
}
@Test
public void testSpecialIntType() throws Exception {
testConversion(
"a,b",
"tinyint,smallint",
"message hive_schema {\n"
+ " optional int32 a (INT_8);\n"
+ " optional int32 b (INT_16);\n"
+ "}\n");
}
@Test
public void testDecimalType() throws Exception {
testConversion(
"a",
"decimal(5,2)",
"message hive_schema {\n"
+ " optional fixed_len_byte_array(3) a (DECIMAL(5,2));\n"
+ "}\n");
}
@Test
public void testCharType() throws Exception {
testConversion(
"a",
"char(5)",
"message hive_schema {\n"
+ " optional binary a (UTF8);\n"
+ "}\n");
}
@Test
public void testVarcharType() throws Exception {
testConversion(
"a",
"varchar(10)",
"message hive_schema {\n"
+ " optional binary a (UTF8);\n"
+ "}\n");
}
@Test
public void testDateType() throws Exception {
testConversion(
"a",
"date",
"message hive_schema {\n"
+ " optional int32 a (DATE);\n"
+ "}\n");
}
@Test
public void testArray() throws Exception {
testConversion("arrayCol",
"array<int>",
"message hive_schema {\n"
+ " optional group arrayCol (LIST) {\n"
+ " repeated group bag {\n"
+ " optional int32 array_element;\n"
+ " }\n"
+ " }\n"
+ "}\n");
}
@Test
public void testArrayDecimal() throws Exception {
testConversion("arrayCol",
"array<decimal(5,2)>",
"message hive_schema {\n"
+ " optional group arrayCol (LIST) {\n"
+ " repeated group bag {\n"
+ " optional fixed_len_byte_array(3) array_element (DECIMAL(5,2));\n"
+ " }\n"
+ " }\n"
+ "}\n");
}
@Test
public void testStruct() throws Exception {
testConversion("structCol",
"struct<a:int,b:double,c:boolean,d:decimal(5,2)>",
"message hive_schema {\n"
+ " optional group structCol {\n"
+ " optional int32 a;\n"
+ " optional double b;\n"
+ " optional boolean c;\n"
+ " optional fixed_len_byte_array(3) d (DECIMAL(5,2));\n"
+ " }\n"
+ "}\n");
}
@Test
public void testMap() throws Exception {
testConversion("mapCol",
"map<string,string>",
"message hive_schema {\n"
+ " optional group mapCol (MAP) {\n"
+ " repeated group map (MAP_KEY_VALUE) {\n"
+ " required binary key;\n"
+ " optional binary value;\n"
+ " }\n"
+ " }\n"
+ "}\n");
}
@Test
public void testMapDecimal() throws Exception {
testConversion("mapCol",
"map<string,decimal(5,2)>",
"message hive_schema {\n"
+ " optional group mapCol (MAP) {\n"
+ " repeated group map (MAP_KEY_VALUE) {\n"
+ " required binary key;\n"
+ " optional fixed_len_byte_array(3) value (DECIMAL(5,2));\n"
+ " }\n"
+ " }\n"
+ "}\n");
}
@Test
public void testMapOriginalType() throws Exception {
final String hiveColumnTypes = "map<string,string>";
final String hiveColumnNames = "mapCol";
final List<String> columnNames = createHiveColumnsFrom(hiveColumnNames);
final List<TypeInfo> columnTypes = createHiveTypeInfoFrom(hiveColumnTypes);
final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, columnTypes);
// this messageType only has one optional field, whose name is mapCol, original Type is MAP
assertEquals(1, messageTypeFound.getFieldCount());
org.apache.parquet.schema.Type topLevel = messageTypeFound.getFields().get(0);
assertEquals("mapCol",topLevel.getName());
assertEquals(OriginalType.MAP, topLevel.getOriginalType());
assertEquals(Repetition.OPTIONAL, topLevel.getRepetition());
assertEquals(1, topLevel.asGroupType().getFieldCount());
org.apache.parquet.schema.Type secondLevel = topLevel.asGroupType().getFields().get(0);
//there is one repeated field for mapCol, the field name is "map" and its original Type is MAP_KEY_VALUE;
assertEquals("map", secondLevel.getName());
assertEquals(OriginalType.MAP_KEY_VALUE, secondLevel.getOriginalType());
assertEquals(Repetition.REPEATED, secondLevel.getRepetition());
}
}