/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.serde2.columnar; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.TreeMap; import junit.framework.Assert; import junit.framework.TestCase; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.CrossMapEqualComparer; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.SimpleMapEqualComparer; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.LongWritable; public class TestLazyBinaryColumnarSerDe extends TestCase { private static class InnerStruct { public InnerStruct(Integer i, Long l) { mInt = i; mLong = l; } Integer mInt; Long mLong; } private static class OuterStruct { Byte mByte; Short mShort; Integer mInt; Long mLong; Float mFloat; Double mDouble; String mString; byte[] mBA; List<InnerStruct> mArray; Map<String, InnerStruct> mMap; InnerStruct mStruct; } public void testSerDe() throws SerDeException { StructObjectInspector oi = (StructObjectInspector) ObjectInspectorFactory .getReflectionObjectInspector(OuterStruct.class, ObjectInspectorOptions.JAVA); String cols = ObjectInspectorUtils.getFieldNames(oi); Properties props = new Properties(); props.setProperty(serdeConstants.LIST_COLUMNS, cols); props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi)); LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe(); SerDeUtils.initializeSerDe(serde, new Configuration(), props, null); OuterStruct outerStruct = new OuterStruct(); outerStruct.mByte = 1; outerStruct.mShort = 2; outerStruct.mInt = 3; outerStruct.mLong = 4l; outerStruct.mFloat = 5.01f; outerStruct.mDouble = 6.001d; outerStruct.mString = "seven"; outerStruct.mBA = new byte[]{'2'}; InnerStruct is1 = new InnerStruct(8, 9l); InnerStruct is2 = new InnerStruct(10, 11l); outerStruct.mArray = new ArrayList<InnerStruct>(2); outerStruct.mArray.add(is1); outerStruct.mArray.add(is2); outerStruct.mMap = new TreeMap<String, InnerStruct>(); outerStruct.mMap.put(new String("twelve"), new InnerStruct(13, 14l)); outerStruct.mMap.put(new String("fifteen"), new InnerStruct(16, 17l)); outerStruct.mStruct = new InnerStruct(18, 19l); BytesRefArrayWritable braw = (BytesRefArrayWritable) serde.serialize(outerStruct, oi); ObjectInspector out_oi = serde.getObjectInspector(); Object out_o = serde.deserialize(braw); if (0 != ObjectInspectorUtils.compare(outerStruct, oi, out_o, out_oi, new CrossMapEqualComparer())) { System.out.println("expected = " + SerDeUtils.getJSONString(outerStruct, oi)); System.out.println("actual = " + SerDeUtils.getJSONString(out_o, out_oi)); fail("Deserialized object does not compare"); } } public void testSerDeEmpties() throws SerDeException { StructObjectInspector oi = (StructObjectInspector) ObjectInspectorFactory .getReflectionObjectInspector(OuterStruct.class, ObjectInspectorOptions.JAVA); String cols = ObjectInspectorUtils.getFieldNames(oi); Properties props = new Properties(); props.setProperty(serdeConstants.LIST_COLUMNS, cols); props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi)); LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe(); SerDeUtils.initializeSerDe(serde, new Configuration(), props, null); OuterStruct outerStruct = new OuterStruct(); outerStruct.mByte = 101; outerStruct.mShort = 2002; outerStruct.mInt = 3003; outerStruct.mLong = 4004l; outerStruct.mFloat = 5005.01f; outerStruct.mDouble = 6006.001d; outerStruct.mString = ""; outerStruct.mBA = new byte[]{'a'}; outerStruct.mArray = new ArrayList<InnerStruct>(); outerStruct.mMap = new TreeMap<String, InnerStruct>(); outerStruct.mStruct = new InnerStruct(180018, 190019l); BytesRefArrayWritable braw = (BytesRefArrayWritable) serde.serialize(outerStruct, oi); ObjectInspector out_oi = serde.getObjectInspector(); Object out_o = serde.deserialize(braw); if (0 != ObjectInspectorUtils.compare(outerStruct, oi, out_o, out_oi, new SimpleMapEqualComparer())) { System.out.println("expected = " + SerDeUtils.getJSONString(outerStruct, oi)); System.out.println("actual = " + SerDeUtils.getJSONString(out_o, out_oi)); fail("Deserialized object does not compare"); } } public void testLazyBinaryColumnarSerDeWithEmptyBinary() throws SerDeException { StructObjectInspector oi = (StructObjectInspector) ObjectInspectorFactory .getReflectionObjectInspector(OuterStruct.class, ObjectInspectorOptions.JAVA); String cols = ObjectInspectorUtils.getFieldNames(oi); Properties props = new Properties(); props.setProperty(serdeConstants.LIST_COLUMNS, cols); props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi)); LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe(); SerDeUtils.initializeSerDe(serde, new Configuration(), props, null); OuterStruct outerStruct = new OuterStruct(); outerStruct.mByte = 101; outerStruct.mShort = 2002; outerStruct.mInt = 3003; outerStruct.mLong = 4004l; outerStruct.mFloat = 5005.01f; outerStruct.mDouble = 6006.001d; outerStruct.mString = ""; outerStruct.mBA = new byte[]{}; outerStruct.mArray = new ArrayList<InnerStruct>(); outerStruct.mMap = new TreeMap<String, InnerStruct>(); outerStruct.mStruct = new InnerStruct(180018, 190019l); try{ serde.serialize(outerStruct, oi); } catch (RuntimeException re){ assertEquals(re.getMessage(), "LazyBinaryColumnarSerde cannot serialize a non-null " + "zero length binary field. Consider using either LazyBinarySerde or ColumnarSerde."); return; } assert false; } public void testSerDeOuterNulls() throws SerDeException { StructObjectInspector oi = (StructObjectInspector) ObjectInspectorFactory .getReflectionObjectInspector(OuterStruct.class, ObjectInspectorOptions.JAVA); String cols = ObjectInspectorUtils.getFieldNames(oi); Properties props = new Properties(); props.setProperty(serdeConstants.LIST_COLUMNS, cols); props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi)); LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe(); SerDeUtils.initializeSerDe(serde, new Configuration(), props, null); OuterStruct outerStruct = new OuterStruct(); BytesRefArrayWritable braw = (BytesRefArrayWritable) serde.serialize(outerStruct, oi); ObjectInspector out_oi = serde.getObjectInspector(); Object out_o = serde.deserialize(braw); if (0 != ObjectInspectorUtils.compare(outerStruct, oi, out_o, out_oi, new SimpleMapEqualComparer())) { System.out.println("expected = " + SerDeUtils.getJSONString(outerStruct, oi)); System.out.println("actual = " + SerDeUtils.getJSONString(out_o, out_oi)); fail("Deserialized object does not compare"); } } public void testSerDeInnerNulls() throws SerDeException { StructObjectInspector oi = (StructObjectInspector) ObjectInspectorFactory .getReflectionObjectInspector(OuterStruct.class, ObjectInspectorOptions.JAVA); String cols = ObjectInspectorUtils.getFieldNames(oi); Properties props = new Properties(); props.setProperty(serdeConstants.LIST_COLUMNS, cols); props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi)); LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe(); SerDeUtils.initializeSerDe(serde, new Configuration(), props, null); OuterStruct outerStruct = new OuterStruct(); outerStruct.mByte = 1; outerStruct.mShort = 2; outerStruct.mInt = 3; outerStruct.mLong = 4l; outerStruct.mFloat = 5.01f; outerStruct.mDouble = 6.001d; outerStruct.mString = "seven"; outerStruct.mBA = new byte[]{'3'}; InnerStruct is1 = new InnerStruct(null, 9l); InnerStruct is2 = new InnerStruct(10, null); outerStruct.mArray = new ArrayList<InnerStruct>(2); outerStruct.mArray.add(is1); outerStruct.mArray.add(is2); outerStruct.mMap = new HashMap<String, InnerStruct>(); outerStruct.mMap.put(null, new InnerStruct(13, 14l)); outerStruct.mMap.put(new String("fifteen"), null); outerStruct.mStruct = new InnerStruct(null, null); BytesRefArrayWritable braw = (BytesRefArrayWritable) serde.serialize(outerStruct, oi); ObjectInspector out_oi = serde.getObjectInspector(); Object out_o = serde.deserialize(braw); if (0 != ObjectInspectorUtils.compare(outerStruct, oi, out_o, out_oi, new SimpleMapEqualComparer())) { System.out.println("expected = " + SerDeUtils.getJSONString(outerStruct, oi)); System.out.println("actual = " + SerDeUtils.getJSONString(out_o, out_oi)); fail("Deserialized object does not compare"); } } private static class BeforeStruct { Long l1; Long l2; } private static class AfterStruct { Long l1; Long l2; Long l3; } /** * HIVE-5788 * <p> * Background: in cases of "add column", table metadata changes but data does not. Columns * missing from the data but which are required by metadata are interpreted as null. * <p> * This tests the use-case of altering columns of a table with already some data, then adding more data * in the new schema, and seeing if this serde can to read both types of data from the resultant table. * @throws SerDeException */ public void testHandlingAlteredSchemas() throws SerDeException { StructObjectInspector oi = (StructObjectInspector) ObjectInspectorFactory .getReflectionObjectInspector(BeforeStruct.class, ObjectInspectorOptions.JAVA); String cols = ObjectInspectorUtils.getFieldNames(oi); Properties props = new Properties(); props.setProperty(serdeConstants.LIST_COLUMNS, cols); props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi)); // serialize some data in the schema before it is altered. LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe(); SerDeUtils.initializeSerDe(serde, new Configuration(), props, null); BeforeStruct bs1 = new BeforeStruct(); bs1.l1 = 1L; bs1.l2 = 2L; BytesRefArrayWritable braw1 = (BytesRefArrayWritable) serde.serialize(bs1, oi); // alter table add column: change the metadata oi = (StructObjectInspector) ObjectInspectorFactory .getReflectionObjectInspector(AfterStruct.class, ObjectInspectorOptions.JAVA); cols = ObjectInspectorUtils.getFieldNames(oi); props = new Properties(); props.setProperty(serdeConstants.LIST_COLUMNS, cols); props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi)); serde = new LazyBinaryColumnarSerDe(); SerDeUtils.initializeSerDe(serde, new Configuration(), props, null); // serialize some data in the schema after it is altered. AfterStruct as = new AfterStruct(); as.l1 = 11L; as.l2 = 12L; as.l3 = 13L; BytesRefArrayWritable braw2 = (BytesRefArrayWritable) serde.serialize(as, oi); // fetch operator serde = new LazyBinaryColumnarSerDe(); SerDeUtils.initializeSerDe(serde, new Configuration(), props, null); //fetch the row inserted before schema is altered and verify LazyBinaryColumnarStruct struct1 = (LazyBinaryColumnarStruct) serde .deserialize(braw1); oi = (StructObjectInspector) serde.getObjectInspector(); List<Object> objs1 = oi.getStructFieldsDataAsList(struct1); Assert.assertEquals(((LongWritable) objs1.get(0)).get(), 1L); Assert.assertEquals(((LongWritable) objs1.get(1)).get(), 2L); Assert.assertNull(objs1.get(2)); //fetch the row inserted after schema is altered and verify LazyBinaryColumnarStruct struct2 = (LazyBinaryColumnarStruct) serde .deserialize(braw2); List<Object> objs2 = struct2.getFieldsAsList(); Assert.assertEquals(((LongWritable) objs2.get(0)).get(), 11L); Assert.assertEquals(((LongWritable) objs2.get(1)).get(), 12L); Assert.assertEquals(((LongWritable) objs2.get(2)).get(), 13L); } }