/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package parquet.thrift; import static org.junit.Assert.assertEquals; import static parquet.schema.MessageTypeParser.parseMessageType; import org.apache.thrift.TBase; import org.junit.Test; import parquet.schema.MessageType; import parquet.schema.MessageTypeParser; import parquet.thrift.projection.FieldProjectionFilter; import parquet.thrift.projection.ThriftProjectionException; import parquet.thrift.struct.ThriftType; import parquet.thrift.struct.ThriftType.StructType; import com.twitter.data.proto.tutorial.thrift.AddressBook; import com.twitter.data.proto.tutorial.thrift.Person; import com.twitter.elephantbird.thrift.test.TestStructInMap; public class TestThriftSchemaConverter { @Test public void testToMessageType() throws Exception { String expected = "message ParquetSchema {\n" + " optional group persons (LIST) = 1 {\n" + " repeated group persons_tuple {\n" + " required group name = 1 {\n" + " optional binary first_name (UTF8) = 1;\n" + " optional binary last_name (UTF8) = 2;\n" + " }\n" + " optional int32 id = 2;\n" + " optional binary email (UTF8) = 3;\n" + " optional group phones (LIST) = 4 {\n" + " repeated group phones_tuple {\n" + " optional binary number (UTF8) = 1;\n" + " optional binary type (ENUM) = 2;\n" + " }\n" + " }\n" + " }\n" + " }\n" + "}"; ThriftSchemaConverter schemaConverter = new ThriftSchemaConverter(); final MessageType converted = schemaConverter.convert(AddressBook.class); assertEquals(MessageTypeParser.parseMessageType(expected), converted); } @Test public void testToProjectedThriftType() { shouldGetProjectedSchema("name/first_name", "message ParquetSchema {" + " required group name = 1 {" + " optional binary first_name (UTF8) = 1;" + " }}", Person.class); shouldGetProjectedSchema("name/first_name;name/last_name", "message ParquetSchema {" + " required group name = 1 {" + " optional binary first_name (UTF8) = 1;" + " optional binary last_name (UTF8) = 2;" + " }}", Person.class); shouldGetProjectedSchema("name/{first,last}_name;", "message ParquetSchema {" + " required group name = 1 {" + " optional binary first_name (UTF8) = 1;" + " optional binary last_name (UTF8) = 2;" + " }}", Person.class); shouldGetProjectedSchema("name/*", "message ParquetSchema {" + " required group name = 1 {" + " optional binary first_name (UTF8) = 1;" + " optional binary last_name (UTF8) = 2;" + " }" + "}", Person.class); shouldGetProjectedSchema("name/*", "message ParquetSchema {" + " required group name = 1 {" + " optional binary first_name (UTF8) = 1;" + " optional binary last_name (UTF8) = 2;" + " }" + "}", Person.class); shouldGetProjectedSchema("*/*_name", "message ParquetSchema {" + " required group name = 1 {" + " optional binary first_name (UTF8) = 1;" + " optional binary last_name (UTF8) = 2;" + " }" + "}", Person.class); shouldGetProjectedSchema("name/first_*", "message ParquetSchema {" + " required group name = 1 {" + " optional binary first_name (UTF8) = 1;" + " }" + "}", Person.class); shouldGetProjectedSchema("*/*", "message ParquetSchema {" + " required group name = 1 {" + " optional binary first_name (UTF8) = 1;" + " optional binary last_name (UTF8) = 2;" + "} " + " optional group phones (LIST) = 4 {" + " repeated group phones_tuple {" + " optional binary number (UTF8) = 1;" + " optional binary type (ENUM) = 2;" + " }" + "}}", Person.class); // MessageType mapSchema= MessageTypeParser.parseMessageType() } /* Original message type, before projection message TestStructInMap { optional binary name(UTF8); optional group names(MAP) { repeated group map(MAP_KEY_VALUE) { required binary key(UTF8); optional group value { optional group name { optional binary first_name(UTF8); optional binary last_name(UTF8); } optional group phones(MAP) { repeated group map(MAP_KEY_VALUE) { required binary key(ENUM); optional binary value(UTF8); } } } } } } */ @Test public void testProjectMapThriftType() { //project nested map shouldGetProjectedSchema("name;names/key*;names/value/**", "message ParquetSchema {\n" + " optional binary name (UTF8) = 1;\n" + " optional group names (MAP) = 2 {\n" + " repeated group map (MAP_KEY_VALUE) {\n" + " required binary key (UTF8);\n" + " optional group value {\n" + " optional group name = 1 {\n" + " optional binary first_name (UTF8) = 1;\n" + " optional binary last_name (UTF8) = 2;\n" + " }\n" + " optional group phones (MAP) = 2 {\n" + " repeated group map (MAP_KEY_VALUE) {\n" + " required binary key (ENUM);\n" + " optional binary value (UTF8);\n" + " }\n" + " }\n" + " }\n" + " }\n" + " }\n" + "}", TestStructInMap.class); //project only one level of nested map shouldGetProjectedSchema("name;names/key;names/value/name/*", "message ParquetSchema {\n" + " optional binary name (UTF8) = 1;\n" + " optional group names (MAP) = 2 {\n" + " repeated group map (MAP_KEY_VALUE) {\n" + " required binary key (UTF8);\n" + " optional group value {\n" + " optional group name = 1 {\n" + " optional binary first_name (UTF8) = 1;\n" + " optional binary last_name (UTF8) = 2;\n" + " }\n" + " }\n" + " }\n" + " }\n" + "}", TestStructInMap.class); } @Test public void testProjectOnlyKeyInMap() { shouldGetProjectedSchema("name;names/key","message ParquetSchema {\n" + " optional binary name (UTF8) = 1;\n" + " optional group names (MAP) = 2 {\n" + " repeated group map (MAP_KEY_VALUE) {\n" + " required binary key (UTF8);\n" + " }\n" + " }\n" + "}",TestStructInMap.class); } @Test(expected = ThriftProjectionException.class) public void testProjectOnlyValueInMap() { System.out.println(getFilteredSchema("name;names/value/**", TestStructInMap.class)); } private void shouldGetProjectedSchema(String filterDesc, String expectedSchemaStr, Class<? extends TBase<?,?>> thriftClass) { MessageType requestedSchema = getFilteredSchema(filterDesc, thriftClass); MessageType expectedSchema = parseMessageType(expectedSchemaStr); assertEquals(expectedSchema, requestedSchema); } private MessageType getFilteredSchema(String filterDesc, Class<? extends TBase<?,?>> thriftClass) { FieldProjectionFilter fieldProjectionFilter = new FieldProjectionFilter(filterDesc); return new ThriftSchemaConverter(fieldProjectionFilter).convert(thriftClass); } @Test public void testToThriftType() throws Exception { ThriftSchemaConverter schemaConverter = new ThriftSchemaConverter(); final StructType converted = schemaConverter.toStructType(AddressBook.class); final String json = converted.toJSON(); System.out.println(json); final ThriftType fromJSON = StructType.fromJSON(json); assertEquals(json, fromJSON.toJSON()); } }