/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.io.parquet; import java.util.Arrays; import java.util.List; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.junit.Assert; import org.junit.Test; import org.apache.parquet.io.api.Binary; import org.apache.parquet.io.api.RecordConsumer; import org.apache.parquet.schema.Types; import static org.apache.parquet.schema.OriginalType.*; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.*; public class TestMapStructures extends AbstractTestParquetDirect { @Test public void testStringMapRequiredPrimitive() throws Exception { Path test = writeDirect("StringMapRequiredPrimitive", Types.buildMessage() .optionalGroup().as(MAP) .repeatedGroup() .required(BINARY).as(UTF8).named("key") .required(INT32).named("value") .named("key_value") .named("votes") .named("StringMapRequiredPrimitive"), new TestArrayCompatibility.DirectWriter() { @Override public void write(RecordConsumer rc) { rc.startMessage(); rc.startField("votes", 0); rc.startGroup(); rc.startField("key_value", 0); rc.startGroup(); rc.startField("key", 0); rc.addBinary(Binary.fromString("lettuce")); rc.endField("key", 0); rc.startField("value", 1); rc.addInteger(34); rc.endField("value", 1); rc.endGroup(); rc.startGroup(); rc.startField("key", 0); rc.addBinary(Binary.fromString("cabbage")); rc.endField("key", 0); rc.startField("value", 1); rc.addInteger(18); rc.endField("value", 1); rc.endGroup(); rc.endField("key_value", 0); rc.endGroup(); rc.endField("votes", 0); rc.endMessage(); } }); ArrayWritable expected = list( record(new Text("lettuce"), new IntWritable(34)), record(new Text("cabbage"), new IntWritable(18))); List<ArrayWritable> records = read(test); Assert.assertEquals("Should have only one record", 1, records.size()); assertEquals("Should match expected record", expected, records.get(0)); deserialize(records.get(0), Arrays.asList("votes"), Arrays.asList("map<string,int>")); } @Test public void testStringMapOptionalPrimitive() throws Exception { Path test = writeDirect("StringMapOptionalPrimitive", Types.buildMessage() .optionalGroup().as(MAP) .repeatedGroup() .required(BINARY).as(UTF8).named("key") .optional(INT32).named("value") .named("key_value") .named("votes") .named("StringMapOptionalPrimitive"), new TestArrayCompatibility.DirectWriter() { @Override public void write(RecordConsumer rc) { rc.startMessage(); rc.startField("votes", 0); rc.startGroup(); rc.startField("key_value", 0); rc.startGroup(); rc.startField("key", 0); rc.addBinary(Binary.fromString("lettuce")); rc.endField("key", 0); rc.startField("value", 1); rc.addInteger(34); rc.endField("value", 1); rc.endGroup(); rc.startGroup(); rc.startField("key", 0); rc.addBinary(Binary.fromString("kale")); rc.endField("key", 0); // no value for kale rc.endGroup(); rc.startGroup(); rc.startField("key", 0); rc.addBinary(Binary.fromString("cabbage")); rc.endField("key", 0); rc.startField("value", 1); rc.addInteger(18); rc.endField("value", 1); rc.endGroup(); rc.endField("key_value", 0); rc.endGroup(); rc.endField("votes", 0); rc.endMessage(); } }); ArrayWritable expected = list( record(new Text("lettuce"), new IntWritable(34)), record(new Text("kale"), null), record(new Text("cabbage"), new IntWritable(18))); List<ArrayWritable> records = read(test); Assert.assertEquals("Should have only one record", 1, records.size()); assertEquals("Should match expected record", expected, records.get(0)); deserialize(records.get(0), Arrays.asList("votes"), Arrays.asList("map<string,int>")); } @Test public void testStringMapOfOptionalArray() throws Exception { // tests a multimap structure Path test = writeDirect("StringMapOfOptionalArray", Types.buildMessage() .optionalGroup().as(MAP) .repeatedGroup() .required(BINARY).as(UTF8).named("key") .optionalGroup().as(LIST) .repeatedGroup() .optional(BINARY).as(UTF8).named("element") .named("list") .named("value") .named("key_value") .named("examples") .named("StringMapOfOptionalArray"), new TestArrayCompatibility.DirectWriter() { @Override public void write(RecordConsumer rc) { rc.startMessage(); rc.startField("examples", 0); rc.startGroup(); rc.startField("key_value", 0); rc.startGroup(); rc.startField("key", 0); rc.addBinary(Binary.fromString("green")); rc.endField("key", 0); rc.startField("value", 1); rc.startGroup(); rc.startField("list", 0); rc.startGroup(); rc.startField("element", 0); rc.addBinary(Binary.fromString("lettuce")); rc.endField("element", 0); rc.endGroup(); rc.startGroup(); rc.startField("element", 0); rc.addBinary(Binary.fromString("kale")); rc.endField("element", 0); rc.endGroup(); rc.startGroup(); // adds a null element rc.endGroup(); rc.endField("list", 0); rc.endGroup(); rc.endField("value", 1); rc.endGroup(); rc.startGroup(); rc.startField("key", 0); rc.addBinary(Binary.fromString("brown")); rc.endField("key", 0); // no values array rc.endGroup(); rc.endField("key_value", 0); rc.endGroup(); rc.endField("examples", 0); rc.endMessage(); } }); ArrayWritable expected = list( record(new Text("green"), record(new Text("lettuce"), new Text("kale"), null)), record(new Text("brown"), null)); List<ArrayWritable> records = read(test); Assert.assertEquals("Should have only one record", 1, records.size()); assertEquals("Should match expected record", expected, records.get(0)); deserialize(records.get(0), Arrays.asList("examples"), Arrays.asList("map<string,array<string>>")); } @Test public void testStringMapOfOptionalIntArray() throws Exception { // tests a multimap structure for PARQUET-26 Path test = writeDirect("StringMapOfOptionalIntArray", Types.buildMessage() .optionalGroup().as(MAP) .repeatedGroup() .required(BINARY).as(UTF8).named("key") .optionalGroup().as(LIST) .repeatedGroup() .optional(INT32).named("element") .named("list") .named("value") .named("key_value") .named("examples") .named("StringMapOfOptionalIntArray"), new TestArrayCompatibility.DirectWriter() { @Override public void write(RecordConsumer rc) { rc.startMessage(); rc.startField("examples", 0); rc.startGroup(); rc.startField("key_value", 0); rc.startGroup(); rc.startField("key", 0); rc.addBinary(Binary.fromString("low")); rc.endField("key", 0); rc.startField("value", 1); rc.startGroup(); rc.startField("list", 0); rc.startGroup(); rc.startField("element", 0); rc.addInteger(34); rc.endField("element", 0); rc.endGroup(); rc.startGroup(); rc.startField("element", 0); rc.addInteger(35); rc.endField("element", 0); rc.endGroup(); rc.startGroup(); // adds a null element rc.endGroup(); rc.endField("list", 0); rc.endGroup(); rc.endField("value", 1); rc.endGroup(); rc.startGroup(); rc.startField("key", 0); rc.addBinary(Binary.fromString("high")); rc.endField("key", 0); rc.startField("value", 1); rc.startGroup(); rc.startField("list", 0); rc.startGroup(); rc.startField("element", 0); rc.addInteger(340); rc.endField("element", 0); rc.endGroup(); rc.startGroup(); rc.startField("element", 0); rc.addInteger(360); rc.endField("element", 0); rc.endGroup(); rc.endField("list", 0); rc.endGroup(); rc.endField("value", 1); rc.endGroup(); rc.endField("key_value", 0); rc.endGroup(); rc.endField("examples", 0); rc.endMessage(); } }); ArrayWritable expected = list( record(new Text("low"), record(new IntWritable(34), new IntWritable(35), null)), record(new Text("high"), record(new IntWritable(340), new IntWritable(360)))); List<ArrayWritable> records = read(test); Assert.assertEquals("Should have only one record", 1, records.size()); assertEquals("Should match expected record", expected, records.get(0)); deserialize(records.get(0), Arrays.asList("examples"), Arrays.asList("map<string,array<int>>")); } @Test public void testMapWithComplexKey() throws Exception { Path test = writeDirect("MapWithComplexKey", Types.buildMessage() .optionalGroup().as(MAP) .repeatedGroup() .requiredGroup() .required(INT32).named("x") .required(INT32).named("y") .named("key") .optional(DOUBLE).named("value") .named("key_value") .named("matrix") .named("MapWithComplexKey"), new TestArrayCompatibility.DirectWriter() { @Override public void write(RecordConsumer rc) { rc.startMessage(); rc.startField("matrix", 0); rc.startGroup(); rc.startField("key_value", 0); rc.startGroup(); rc.startField("key", 0); rc.startGroup(); rc.startField("x", 0); rc.addInteger(7); rc.endField("x", 0); rc.startField("y", 1); rc.addInteger(22); rc.endField("y", 1); rc.endGroup(); rc.endField("key", 0); rc.startField("value", 1); rc.addDouble(3.14); rc.endField("value", 1); rc.endGroup(); rc.endField("key_value", 0); rc.endGroup(); rc.endField("matrix", 0); rc.endMessage(); } }); ArrayWritable expected = list(record( record(new IntWritable(7), new IntWritable(22)), new DoubleWritable(3.14))); List<ArrayWritable> records = read(test); Assert.assertEquals("Should have only one record", 1, records.size()); assertEquals("Should match expected record", expected, records.get(0)); deserialize(records.get(0), Arrays.asList("matrix"), Arrays.asList("map<struct<x:int,y:int>,bigint>")); } @Test public void testDoubleMapWithStructValue() throws Exception { Path test = writeDirect("DoubleMapWithStructValue", Types.buildMessage() .optionalGroup().as(MAP) .repeatedGroup() .optional(DOUBLE).named("key") .optionalGroup() .required(INT32).named("x") .required(INT32).named("y") .named("value") .named("key_value") .named("approx") .named("DoubleMapWithStructValue"), new TestArrayCompatibility.DirectWriter() { @Override public void write(RecordConsumer rc) { rc.startMessage(); rc.startField("approx", 0); rc.startGroup(); rc.startField("key_value", 0); rc.startGroup(); rc.startField("key", 0); rc.addDouble(3.14); rc.endField("key", 0); rc.startField("value", 1); rc.startGroup(); rc.startField("x", 0); rc.addInteger(7); rc.endField("x", 0); rc.startField("y", 1); rc.addInteger(22); rc.endField("y", 1); rc.endGroup(); rc.endField("value", 1); rc.endGroup(); rc.endField("key_value", 0); rc.endGroup(); rc.endField("approx", 0); rc.endMessage(); } }); ArrayWritable expected = list(record( new DoubleWritable(3.14), record(new IntWritable(7), new IntWritable(22)))); List<ArrayWritable> records = read(test); Assert.assertEquals("Should have only one record", 1, records.size()); assertEquals("Should match expected record", expected, records.get(0)); deserialize(records.get(0), Arrays.asList("approx"), Arrays.asList("map<bigint,struct<x:int,y:int>>")); } @Test public void testNestedMap() throws Exception { Path test = writeDirect("DoubleMapWithStructValue", Types.buildMessage() .optionalGroup().as(MAP) .repeatedGroup() .optional(BINARY).as(UTF8).named("key") .optionalGroup().as(MAP) .repeatedGroup() .optional(BINARY).as(UTF8).named("key") .required(INT32).named("value") .named("key_value") .named("value") .named("key_value") .named("map_of_maps") .named("NestedMap"), new TestArrayCompatibility.DirectWriter() { @Override public void write(RecordConsumer rc) { rc.startMessage(); rc.startField("map_of_maps", 0); rc.startGroup(); rc.startField("key_value", 0); rc.startGroup(); rc.startField("key", 0); rc.addBinary(Binary.fromString("a")); rc.endField("key", 0); rc.startField("value", 1); rc.startGroup(); rc.startField("key_value", 0); rc.startGroup(); rc.startField("key", 0); rc.addBinary(Binary.fromString("b")); rc.endField("key", 0); rc.startField("value", 1); rc.addInteger(1); rc.endField("value", 1); rc.endGroup(); rc.endField("key_value", 0); rc.endGroup(); rc.endField("value", 1); rc.endGroup(); rc.startGroup(); rc.startField("key", 0); rc.addBinary(Binary.fromString("b")); rc.endField("key", 0); rc.startField("value", 1); rc.startGroup(); rc.startField("key_value", 0); rc.startGroup(); rc.startField("key", 0); rc.addBinary(Binary.fromString("a")); rc.endField("key", 0); rc.startField("value", 1); rc.addInteger(-1); rc.endField("value", 1); rc.endGroup(); rc.startGroup(); rc.startField("key", 0); rc.addBinary(Binary.fromString("b")); rc.endField("key", 0); rc.startField("value", 1); rc.addInteger(-2); rc.endField("value", 1); rc.endGroup(); rc.endField("key_value", 0); rc.endGroup(); rc.endField("value", 1); rc.endGroup(); rc.endField("key_value", 0); rc.endGroup(); rc.endField("map_of_maps", 0); rc.endMessage(); } }); ArrayWritable expected = list( record(new Text("a"), record( record(new Text("b"), new IntWritable(1)))), record(new Text("b"), record( record(new Text("a"), new IntWritable(-1)), record(new Text("b"), new IntWritable(-2)))) ); List<ArrayWritable> records = read(test); Assert.assertEquals("Should have only one record", 1, records.size()); assertEquals("Should match expected record", expected, records.get(0)); deserialize(records.get(0), Arrays.asList("map_of_maps"), Arrays.asList("map<string,map<string,int>>")); } }