/** * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.ql.io.parquet; import java.util.Properties; import junit.framework.TestCase; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ParquetHiveRecord; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Writable; public class TestParquetSerDe extends TestCase { public void testParquetHiveSerDe() throws Throwable { try { // Create the SerDe System.out.println("test: testParquetHiveSerDe"); final ParquetHiveSerDe serDe = new ParquetHiveSerDe(); final Configuration conf = new Configuration(); final Properties tbl = createProperties(); SerDeUtils.initializeSerDe(serDe, conf, tbl, null); // Data final Writable[] arr = new Writable[9]; //primitive types arr[0] = new ByteWritable((byte) 123); arr[1] = new ShortWritable((short) 456); arr[2] = new IntWritable(789); arr[3] = new LongWritable(1000l); arr[4] = new DoubleWritable((double) 5.3); arr[5] = new BytesWritable("hive and hadoop and parquet. Big family.".getBytes("UTF-8")); arr[6] = new BytesWritable("parquetSerde binary".getBytes("UTF-8")); final Writable[] map = new Writable[3]; for (int i = 0; i < 3; ++i) { final Writable[] pair = new Writable[2]; pair[0] = new BytesWritable(("key_" + i).getBytes("UTF-8")); pair[1] = new IntWritable(i); map[i] = new ArrayWritable(Writable.class, pair); } arr[7] = new ArrayWritable(Writable.class, map); final Writable[] array = new Writable[5]; for (int i = 0; i < 5; ++i) { array[i] = new BytesWritable(("elem_" + i).getBytes("UTF-8")); } arr[8] = new ArrayWritable(Writable.class, array); final ArrayWritable arrWritable = new ArrayWritable(Writable.class, arr); // Test deserializeAndSerializeLazySimple(serDe, arrWritable); System.out.println("test: testParquetHiveSerDe - OK"); } catch (final Throwable e) { e.printStackTrace(); throw e; } } public void testParquetHiveSerDeComplexTypes() throws Throwable { // Initialize ParquetHiveSerDe serDe = new ParquetHiveSerDe(); Configuration conf = new Configuration(); Properties tblProperties = new Properties(); tblProperties.setProperty(serdeConstants.LIST_COLUMNS, "a,s"); tblProperties.setProperty(serdeConstants.LIST_COLUMN_TYPES, "int,struct<a:int,b:string>"); conf.set(ColumnProjectionUtils.READ_NESTED_COLUMN_PATH_CONF_STR, "s.b"); serDe.initialize(conf, tblProperties); // Generate test data Writable[] wb = new Writable[1]; wb[0] = new BytesWritable("foo".getBytes("UTF-8")); Writable[] ws = new Writable[2]; ws[0] = null; ArrayWritable awb = new ArrayWritable(Writable.class, wb); ws[1] = awb; ArrayWritable aws = new ArrayWritable(Writable.class, ws); // Inspect the test data StructObjectInspector soi = (StructObjectInspector) serDe.getObjectInspector(); StructField s = soi.getStructFieldRef("s"); assertEquals(awb, soi.getStructFieldData(aws, s)); StructObjectInspector boi = (StructObjectInspector) s.getFieldObjectInspector(); StructField b = boi.getStructFieldRef("b"); assertEquals(wb[0], boi.getStructFieldData(awb, b)); } private void deserializeAndSerializeLazySimple(final ParquetHiveSerDe serDe, final ArrayWritable t) throws SerDeException { // Get the row structure final StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector(); // Deserialize final Object row = serDe.deserialize(t); assertEquals("deserialization gives the wrong object class", row.getClass(), ArrayWritable.class); assertEquals("size correct after deserialization", serDe.getSerDeStats().getRawDataSize(), t.get().length); assertEquals("deserialization gives the wrong object", t, row); // Serialize final ParquetHiveRecord serializedArr = (ParquetHiveRecord) serDe.serialize(row, oi); assertEquals("size correct after serialization", serDe.getSerDeStats().getRawDataSize(), ((ArrayWritable)serializedArr.getObject()).get().length); assertTrue("serialized object should be equal to starting object", arrayWritableEquals(t, (ArrayWritable)serializedArr.getObject())); } private Properties createProperties() { final Properties tbl = new Properties(); // Set the configuration parameters tbl.setProperty("columns", "abyte,ashort,aint,along,adouble,astring,abinary,amap,alist"); tbl.setProperty("columns.types", "tinyint:smallint:int:bigint:double:string:binary:map<string,int>:array<string>"); tbl.setProperty(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL"); return tbl; } public static boolean arrayWritableEquals(final ArrayWritable a1, final ArrayWritable a2) { final Writable[] a1Arr = a1.get(); final Writable[] a2Arr = a2.get(); if (a1Arr.length != a2Arr.length) { return false; } for (int i = 0; i < a1Arr.length; ++i) { if (a1Arr[i] instanceof ArrayWritable) { if (!(a2Arr[i] instanceof ArrayWritable)) { return false; } if (!arrayWritableEquals((ArrayWritable) a1Arr[i], (ArrayWritable) a2Arr[i])) { return false; } } else { if (!a1Arr[i].equals(a2Arr[i])) { return false; } } } return true; } }