/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.serde2.avro; import com.google.common.io.Resources; import org.junit.Assert; import org.apache.avro.Schema; import org.apache.commons.io.IOUtils; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; public class TestTypeInfoToSchema { private static Logger LOGGER = LoggerFactory.getLogger(TestTypeInfoToSchema.class); private static final List<String> COLUMN_NAMES = Arrays.asList("testCol"); private static final TypeInfo STRING = TypeInfoFactory.getPrimitiveTypeInfo( serdeConstants.STRING_TYPE_NAME); private static final TypeInfo INT = TypeInfoFactory.getPrimitiveTypeInfo( serdeConstants.INT_TYPE_NAME); private static final TypeInfo BOOLEAN = TypeInfoFactory.getPrimitiveTypeInfo( serdeConstants.BOOLEAN_TYPE_NAME); private static final TypeInfo LONG = TypeInfoFactory.getPrimitiveTypeInfo( serdeConstants.BIGINT_TYPE_NAME); private static final TypeInfo FLOAT = TypeInfoFactory.getPrimitiveTypeInfo( serdeConstants.FLOAT_TYPE_NAME); private static final TypeInfo DOUBLE = TypeInfoFactory.getPrimitiveTypeInfo( serdeConstants.DOUBLE_TYPE_NAME); private static final TypeInfo BINARY = TypeInfoFactory.getPrimitiveTypeInfo( serdeConstants.BINARY_TYPE_NAME); private static final TypeInfo BYTE = TypeInfoFactory.getPrimitiveTypeInfo( serdeConstants.TINYINT_TYPE_NAME); private static final TypeInfo SHORT = TypeInfoFactory.getPrimitiveTypeInfo( serdeConstants.SMALLINT_TYPE_NAME); private static final TypeInfo VOID = TypeInfoFactory.getPrimitiveTypeInfo( serdeConstants.VOID_TYPE_NAME); private static final TypeInfo DATE = TypeInfoFactory.getPrimitiveTypeInfo( serdeConstants.DATE_TYPE_NAME); private static final TypeInfo TIMESTAMP = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.TIMESTAMP_TYPE_NAME); private static final int PRECISION = 4; private static final int SCALE = 2; private static final TypeInfo DECIMAL = TypeInfoFactory.getPrimitiveTypeInfo( new DecimalTypeInfo(PRECISION, SCALE).getQualifiedName()); private static final int CHAR_LEN = 5; private static final TypeInfo CHAR = TypeInfoFactory.getPrimitiveTypeInfo( new CharTypeInfo(CHAR_LEN).getQualifiedName()); private static final TypeInfo VARCHAR = TypeInfoFactory.getPrimitiveTypeInfo( new VarcharTypeInfo(CHAR_LEN).getQualifiedName()); private TypeInfoToSchema typeInfoToSchema; private final String lineSeparator = System.getProperty("line.separator"); private String getAvroSchemaString(TypeInfo columnType) { return typeInfoToSchema.convert( COLUMN_NAMES, Arrays.asList(columnType), Arrays.asList(""), "org.apache.hive.avro.testing", "avrotest", "This is to test hive-avro").toString(); } private String genSchemaWithoutNull(String specificSchema) { return "{" + "\"type\":\"record\"," + "\"name\":\"avrotest\"," + "\"namespace\":\"org.apache.hive.avro.testing\"," + "\"doc\":\"This is to test hive-avro\"," + "\"fields\":[" + "{\"name\":\"testCol\"," + "\"type\":" + specificSchema + "," + "\"doc\":\"\"," + "\"default\":null}" + "]}"; } private String genSchema(String specificSchema) { specificSchema = "[\"null\"," + specificSchema + "]"; return genSchemaWithoutNull(specificSchema); } @Before public void setUp() { typeInfoToSchema = new TypeInfoToSchema(); } @Test public void createAvroStringSchema() { final String specificSchema = "\"string\""; String expectedSchema = genSchema(specificSchema); Assert.assertEquals("Test for string's avro schema failed", expectedSchema, getAvroSchemaString(STRING)); } @Test public void createAvroBinarySchema() { final String specificSchema = "\"bytes\""; String expectedSchema = genSchema(specificSchema); Assert.assertEquals("Test for binary's avro schema failed", expectedSchema, getAvroSchemaString(BINARY)); } @Test public void createAvroBytesSchema() { final String specificSchema = "\"int\""; String expectedSchema = genSchema(specificSchema); Assert.assertEquals("Test for bytes's avro schema failed", expectedSchema, getAvroSchemaString(BYTE)); } @Test public void createAvroShortSchema() { final String specificSchema = "\"int\""; String expectedSchema = genSchema(specificSchema); Assert.assertEquals("Test for short's avro schema failed", expectedSchema, getAvroSchemaString(SHORT)); } @Test public void createAvroIntSchema() { final String specificSchema = "\"int\""; String expectedSchema = genSchema(specificSchema); Assert.assertEquals("Test for int's avro schema failed", expectedSchema, getAvroSchemaString(INT)); } @Test public void createAvroLongSchema() { final String specificSchema = "\"long\""; String expectedSchema = genSchema(specificSchema); Assert.assertEquals("Test for long's avro schema failed", expectedSchema, getAvroSchemaString(LONG)); } @Test public void createAvroFloatSchema() { final String specificSchema = "\"float\""; String expectedSchema = genSchema(specificSchema); Assert.assertEquals("Test for float's avro schema failed", expectedSchema, getAvroSchemaString(FLOAT)); } @Test public void createAvroDoubleSchema() { final String specificSchema = "\"double\""; String expectedSchema = genSchema(specificSchema); Assert.assertEquals("Test for double's avro schema failed", expectedSchema, getAvroSchemaString(DOUBLE)); } @Test public void createAvroBooleanSchema() { final String specificSchema = "\"boolean\""; String expectedSchema = genSchema(specificSchema); Assert.assertEquals("Test for boolean's avro schema failed", expectedSchema, getAvroSchemaString(BOOLEAN)); } @Test public void createAvroVoidSchema() { final String specificSchema = "\"null\""; String expectedSchema = genSchemaWithoutNull(specificSchema); Assert.assertEquals("Test for void's avro schema failed", expectedSchema, getAvroSchemaString(VOID)); } @Test public void createAvroDecimalSchema() { final String specificSchema = "{" + "\"type\":\"bytes\"," + "\"logicalType\":\"decimal\"," + "\"precision\":" + PRECISION + "," + "\"scale\":" + SCALE + "}"; String expectedSchema = genSchema(specificSchema); Assert.assertEquals("Test for decimal's avro schema failed", expectedSchema, getAvroSchemaString(DECIMAL)); } @Test public void createAvroCharSchema() { final String specificSchema = "{" + "\"type\":\"string\"," + "\"logicalType\":\"char\"," + "\"maxLength\":" + CHAR_LEN + "}"; String expectedSchema = genSchema(specificSchema); Assert.assertEquals("Test for char's avro schema failed", expectedSchema, getAvroSchemaString(CHAR)); } @Test public void createAvroVarcharSchema() { final String specificSchema = "{" + "\"type\":\"string\"," + "\"logicalType\":\"varchar\"," + "\"maxLength\":" + CHAR_LEN + "}"; String expectedSchema = genSchema(specificSchema); Assert.assertEquals("Test for varchar's avro schema failed", expectedSchema, getAvroSchemaString(VARCHAR)); } @Test public void createAvroDateSchema() { final String specificSchema = "{" + "\"type\":\"int\"," + "\"logicalType\":\"date\"}"; String expectedSchema = genSchema(specificSchema); Assert.assertEquals("Test for date in avro schema failed", expectedSchema, getAvroSchemaString(DATE)); } @Test public void createAvroTimestampSchema() { final String specificSchema = "{" + "\"type\":\"long\"," + "\"logicalType\":\"timestamp-millis\"}"; String expectedSchema = genSchema(specificSchema); Assert.assertEquals("Test for timestamp in avro schema failed", expectedSchema, getAvroSchemaString(TIMESTAMP)); } @Test public void createAvroListSchema() { ListTypeInfo listTypeInfo = new ListTypeInfo(); listTypeInfo.setListElementTypeInfo(STRING); final String specificSchema = Schema.createArray(Schema.createUnion(Arrays.asList( Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.STRING)))).toString(); String expectedSchema = genSchema(specificSchema); Assert.assertEquals("Test for list's avro schema failed", expectedSchema, getAvroSchemaString(listTypeInfo)); } @Test public void createAvroMapSchema() { MapTypeInfo mapTypeInfo = new MapTypeInfo(); mapTypeInfo.setMapKeyTypeInfo(STRING); mapTypeInfo.setMapValueTypeInfo(INT); final String specificSchema = Schema.createMap(Schema.createUnion(Arrays.asList( Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.INT)))).toString(); String expectedSchema = genSchema(specificSchema); Assert.assertEquals("Test for map's avro schema failed", expectedSchema, getAvroSchemaString(mapTypeInfo)); } @Test public void createAvroUnionSchema() { UnionTypeInfo unionTypeInfo = new UnionTypeInfo(); unionTypeInfo.setAllUnionObjectTypeInfos(Arrays.asList(INT, FLOAT, STRING)); final String specificSchema = Schema.createUnion( Arrays.asList( Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.INT), Schema.create(Schema.Type.FLOAT), Schema.create(Schema.Type.STRING))).toString(); String expectedSchema = genSchemaWithoutNull(specificSchema); Assert.assertEquals("Test for union's avro schema failed", expectedSchema, getAvroSchemaString(unionTypeInfo)); } @Test public void createAvroUnionSchemaOfNull() { UnionTypeInfo unionTypeInfo = new UnionTypeInfo(); unionTypeInfo.setAllUnionObjectTypeInfos(Arrays.asList(VOID)); final String specificSchema = Schema.createUnion( Arrays.asList( Schema.create(Schema.Type.NULL))).toString(); String expectedSchema = genSchemaWithoutNull(specificSchema); Assert.assertEquals("Test for union's avro schema failed", expectedSchema, getAvroSchemaString(unionTypeInfo)); } @Test public void createAvroUnionSchemaOfOne() { UnionTypeInfo unionTypeInfo = new UnionTypeInfo(); unionTypeInfo.setAllUnionObjectTypeInfos(Arrays.asList(STRING)); final String specificSchema = Schema.createUnion( Arrays.asList( Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.STRING))).toString(); String expectedSchema = genSchemaWithoutNull(specificSchema); Assert.assertEquals("Test for union's avro schema failed", expectedSchema, getAvroSchemaString(unionTypeInfo)); } @Test public void createAvroUnionSchemaWithNull() { UnionTypeInfo unionTypeInfo = new UnionTypeInfo(); unionTypeInfo.setAllUnionObjectTypeInfos(Arrays.asList(INT, FLOAT, STRING, VOID)); final String specificSchema = Schema.createUnion( Arrays.asList( Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.INT), Schema.create(Schema.Type.FLOAT), Schema.create(Schema.Type.STRING))).toString(); String expectedSchema = genSchemaWithoutNull(specificSchema); Assert.assertEquals("Test for union's avro schema failed", expectedSchema, getAvroSchemaString(unionTypeInfo)); } @Test public void createAvroStructSchema() throws IOException { StructTypeInfo structTypeInfo = new StructTypeInfo(); ArrayList<String> names = new ArrayList<String>(); names.add("field1"); names.add("field2"); names.add("field3"); names.add("field4"); names.add("field5"); names.add("field6"); names.add("field7"); names.add("field8"); names.add("field9"); names.add("field10"); names.add("field11"); names.add("field12"); names.add("field13"); names.add("field14"); structTypeInfo.setAllStructFieldNames(names); ArrayList<TypeInfo> typeInfos = new ArrayList<TypeInfo>(); typeInfos.add(STRING); typeInfos.add(CHAR); typeInfos.add(VARCHAR); typeInfos.add(BINARY); typeInfos.add(BYTE); typeInfos.add(SHORT); typeInfos.add(INT); typeInfos.add(LONG); typeInfos.add(FLOAT); typeInfos.add(DOUBLE); typeInfos.add(BOOLEAN); typeInfos.add(DECIMAL); typeInfos.add(DATE); typeInfos.add(VOID); structTypeInfo.setAllStructFieldTypeInfos(typeInfos); LOGGER.info("structTypeInfo is " + structTypeInfo); final String specificSchema = IOUtils.toString(Resources.getResource("avro-struct.avsc") .openStream()).replace(lineSeparator, ""); String expectedSchema = genSchema( specificSchema); Assert.assertEquals("Test for struct's avro schema failed", expectedSchema, getAvroSchemaString(structTypeInfo)); } @Test public void createAvroNestedStructSchema() throws IOException { StructTypeInfo structTypeInfo = new StructTypeInfo(); ArrayList<String> names = new ArrayList<String>(); names.add("field1"); names.add("field2"); structTypeInfo.setAllStructFieldNames(names); ArrayList<TypeInfo> typeInfos = new ArrayList<TypeInfo>(); typeInfos.add(STRING); typeInfos.add(INT); structTypeInfo.setAllStructFieldTypeInfos(typeInfos); StructTypeInfo superStructTypeInfo = new StructTypeInfo(); ArrayList<String> superNames = new ArrayList<String>(); superNames.add("superfield1"); superNames.add("superfield2"); superStructTypeInfo.setAllStructFieldNames(superNames); ArrayList<TypeInfo> superTypeInfos = new ArrayList<TypeInfo>(); superTypeInfos.add(STRING); superTypeInfos.add(structTypeInfo); superStructTypeInfo.setAllStructFieldTypeInfos(superTypeInfos); final String specificSchema = IOUtils.toString(Resources.getResource("avro-nested-struct.avsc") .openStream()).replace(lineSeparator, ""); String expectedSchema = genSchema( specificSchema); Assert.assertEquals("Test for nested struct's avro schema failed", expectedSchema, getAvroSchemaString(superStructTypeInfo)); } }