/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.serde2.lazybinary; import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Random; import junit.framework.TestCase; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde.Constants; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.binarysortable.MyTestClass; import org.apache.hadoop.hive.serde2.binarysortable.MyTestInnerStruct; import org.apache.hadoop.hive.serde2.binarysortable.TestBinarySortableSerDe; import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.BytesWritable; /** * TestLazyBinarySerDe. * */ public class TestLazyBinarySerDe extends TestCase { /** * Generate a random struct array. * * @param r * random number generator * @return an struct array */ static List<MyTestInnerStruct> getRandStructArray(Random r) { int length = r.nextInt(10); ArrayList<MyTestInnerStruct> result = new ArrayList<MyTestInnerStruct>( length); for (int i = 0; i < length; i++) { MyTestInnerStruct ti = new MyTestInnerStruct(r.nextInt(), r.nextInt()); result.add(ti); } return result; } /** * Initialize the LazyBinarySerDe. * * @param fieldNames * table field names * @param fieldTypes * table field types * @return the initialized LazyBinarySerDe * @throws Throwable */ private SerDe getSerDe(String fieldNames, String fieldTypes) throws Throwable { Properties schema = new Properties(); schema.setProperty(Constants.LIST_COLUMNS, fieldNames); schema.setProperty(Constants.LIST_COLUMN_TYPES, fieldTypes); LazyBinarySerDe serde = new LazyBinarySerDe(); serde.initialize(new Configuration(), schema); return serde; } /** * Test the LazyBinarySerDe. * * @param rows * array of structs to be serialized * @param rowOI * array of struct object inspectors * @param serde * the serde * @throws Throwable */ private void testLazyBinarySerDe(Object[] rows, ObjectInspector rowOI, SerDe serde) throws Throwable { ObjectInspector serdeOI = serde.getObjectInspector(); // Try to serialize BytesWritable bytes[] = new BytesWritable[rows.length]; for (int i = 0; i < rows.length; i++) { BytesWritable s = (BytesWritable) serde.serialize(rows[i], rowOI); bytes[i] = new BytesWritable(); bytes[i].set(s); } // Try to deserialize Object[] deserialized = new Object[rows.length]; for (int i = 0; i < rows.length; i++) { deserialized[i] = serde.deserialize(bytes[i]); if (0 != ObjectInspectorUtils.compare(rows[i], rowOI, deserialized[i], serdeOI)) { System.out.println("structs[" + i + "] = " + SerDeUtils.getJSONString(rows[i], rowOI)); System.out.println("deserialized[" + i + "] = " + SerDeUtils.getJSONString(deserialized[i], serdeOI)); System.out.println("serialized[" + i + "] = " + TestBinarySortableSerDe.hexString(bytes[i])); assertEquals(rows[i], deserialized[i]); } } } /** * Compare two structs that have different number of fields. We just compare * the first few common fields, ignoring the fields existing in one struct but * not the other. * * @see ObjectInspectorUtils#compare(Object, ObjectInspector, Object, * ObjectInspector) */ int compareDiffSizedStructs(Object o1, ObjectInspector oi1, Object o2, ObjectInspector oi2) { StructObjectInspector soi1 = (StructObjectInspector) oi1; StructObjectInspector soi2 = (StructObjectInspector) oi2; List<? extends StructField> fields1 = soi1.getAllStructFieldRefs(); List<? extends StructField> fields2 = soi2.getAllStructFieldRefs(); int minimum = Math.min(fields1.size(), fields2.size()); for (int i = 0; i < minimum; i++) { int result = ObjectInspectorUtils.compare(soi1.getStructFieldData(o1, fields1.get(i)), fields1.get(i).getFieldObjectInspector(), soi2 .getStructFieldData(o2, fields2.get(i)), fields2.get(i) .getFieldObjectInspector()); if (result != 0) { return result; } } return 0; } /** * Test shorter schema deserialization where a bigger struct is serialized and * it is then deserialized with a smaller struct. Here the serialized struct * has 10 fields and we deserialized to a struct of 9 fields. */ private void testShorterSchemaDeserialization(Random r) throws Throwable { StructObjectInspector rowOI1 = (StructObjectInspector) ObjectInspectorFactory .getReflectionObjectInspector(MyTestClassBigger.class, ObjectInspectorOptions.JAVA); String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1); String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1); SerDe serde1 = getSerDe(fieldNames1, fieldTypes1); serde1.getObjectInspector(); StructObjectInspector rowOI2 = (StructObjectInspector) ObjectInspectorFactory .getReflectionObjectInspector(MyTestClass.class, ObjectInspectorOptions.JAVA); String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2); String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2); SerDe serde2 = getSerDe(fieldNames2, fieldTypes2); ObjectInspector serdeOI2 = serde2.getObjectInspector(); int num = 100; for (int itest = 0; itest < num; itest++) { int randField = r.nextInt(11); Byte b = randField > 0 ? null : Byte.valueOf((byte) r.nextInt()); Short s = randField > 1 ? null : Short.valueOf((short) r.nextInt()); Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt()); Long l = randField > 3 ? null : Long.valueOf(r.nextLong()); Float f = randField > 4 ? null : Float.valueOf(r.nextFloat()); Double d = randField > 5 ? null : Double.valueOf(r.nextDouble()); String st = randField > 6 ? null : TestBinarySortableSerDe .getRandString(r); MyTestInnerStruct is = randField > 7 ? null : new MyTestInnerStruct(r .nextInt(5) - 2, r.nextInt(5) - 2); List<Integer> li = randField > 8 ? null : TestBinarySortableSerDe .getRandIntegerArray(r); ByteArrayRef ba = TestBinarySortableSerDe.getRandBA(r, itest); Map<String, List<MyTestInnerStruct>> mp = new HashMap<String, List<MyTestInnerStruct>>(); String key = TestBinarySortableSerDe.getRandString(r); List<MyTestInnerStruct> value = randField > 10 ? null : getRandStructArray(r); mp.put(key, value); String key1 = TestBinarySortableSerDe.getRandString(r); mp.put(key1, null); String key2 = TestBinarySortableSerDe.getRandString(r); List<MyTestInnerStruct> value2 = getRandStructArray(r); mp.put(key2, value2); MyTestClassBigger input = new MyTestClassBigger(b, s, n, l, f, d, st, is, li, ba, mp); BytesWritable bw = (BytesWritable) serde1.serialize(input, rowOI1); Object output = serde2.deserialize(bw); if (0 != compareDiffSizedStructs(input, rowOI1, output, serdeOI2)) { System.out.println("structs = " + SerDeUtils.getJSONString(input, rowOI1)); System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2)); System.out.println("serialized = " + TestBinarySortableSerDe.hexString(bw)); assertEquals(input, output); } } } /** * Test shorter schema deserialization where a bigger struct is serialized and * it is then deserialized with a smaller struct. Here the serialized struct * has 9 fields and we deserialized to a struct of 8 fields. */ private void testShorterSchemaDeserialization1(Random r) throws Throwable { StructObjectInspector rowOI1 = (StructObjectInspector) ObjectInspectorFactory .getReflectionObjectInspector(MyTestClass.class, ObjectInspectorOptions.JAVA); String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1); String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1); SerDe serde1 = getSerDe(fieldNames1, fieldTypes1); serde1.getObjectInspector(); StructObjectInspector rowOI2 = (StructObjectInspector) ObjectInspectorFactory .getReflectionObjectInspector(MyTestClassSmaller.class, ObjectInspectorOptions.JAVA); String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2); String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2); SerDe serde2 = getSerDe(fieldNames2, fieldTypes2); ObjectInspector serdeOI2 = serde2.getObjectInspector(); int num = 100; for (int itest = 0; itest < num; itest++) { int randField = r.nextInt(10); Byte b = randField > 0 ? null : Byte.valueOf((byte) r.nextInt()); Short s = randField > 1 ? null : Short.valueOf((short) r.nextInt()); Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt()); Long l = randField > 3 ? null : Long.valueOf(r.nextLong()); Float f = randField > 4 ? null : Float.valueOf(r.nextFloat()); Double d = randField > 5 ? null : Double.valueOf(r.nextDouble()); String st = randField > 6 ? null : TestBinarySortableSerDe .getRandString(r); MyTestInnerStruct is = randField > 7 ? null : new MyTestInnerStruct(r .nextInt(5) - 2, r.nextInt(5) - 2); List<Integer> li = randField > 8 ? null : TestBinarySortableSerDe .getRandIntegerArray(r); ByteArrayRef ba = TestBinarySortableSerDe.getRandBA(r, itest); MyTestClass input = new MyTestClass(b, s, n, l, f, d, st, is, li, ba); BytesWritable bw = (BytesWritable) serde1.serialize(input, rowOI1); Object output = serde2.deserialize(bw); if (0 != compareDiffSizedStructs(input, rowOI1, output, serdeOI2)) { System.out.println("structs = " + SerDeUtils.getJSONString(input, rowOI1)); System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2)); System.out.println("serialized = " + TestBinarySortableSerDe.hexString(bw)); assertEquals(input, output); } } } /** * Test longer schema deserialization where a smaller struct is serialized and * it is then deserialized with a bigger struct Here the serialized struct has * 9 fields and we deserialized to a struct of 10 fields. */ void testLongerSchemaDeserialization(Random r) throws Throwable { StructObjectInspector rowOI1 = (StructObjectInspector) ObjectInspectorFactory .getReflectionObjectInspector(MyTestClass.class, ObjectInspectorOptions.JAVA); String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1); String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1); SerDe serde1 = getSerDe(fieldNames1, fieldTypes1); serde1.getObjectInspector(); StructObjectInspector rowOI2 = (StructObjectInspector) ObjectInspectorFactory .getReflectionObjectInspector(MyTestClassBigger.class, ObjectInspectorOptions.JAVA); String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2); String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2); SerDe serde2 = getSerDe(fieldNames2, fieldTypes2); ObjectInspector serdeOI2 = serde2.getObjectInspector(); int num = 100; for (int itest = 0; itest < num; itest++) { int randField = r.nextInt(10); Byte b = randField > 0 ? null : Byte.valueOf((byte) r.nextInt()); Short s = randField > 1 ? null : Short.valueOf((short) r.nextInt()); Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt()); Long l = randField > 3 ? null : Long.valueOf(r.nextLong()); Float f = randField > 4 ? null : Float.valueOf(r.nextFloat()); Double d = randField > 5 ? null : Double.valueOf(r.nextDouble()); String st = randField > 6 ? null : TestBinarySortableSerDe .getRandString(r); MyTestInnerStruct is = randField > 7 ? null : new MyTestInnerStruct(r .nextInt(5) - 2, r.nextInt(5) - 2); List<Integer> li = randField > 8 ? null : TestBinarySortableSerDe .getRandIntegerArray(r); ByteArrayRef ba = TestBinarySortableSerDe.getRandBA(r, itest); MyTestClass input = new MyTestClass(b, s, n, l, f, d, st, is, li,ba); BytesWritable bw = (BytesWritable) serde1.serialize(input, rowOI1); Object output = serde2.deserialize(bw); if (0 != compareDiffSizedStructs(input, rowOI1, output, serdeOI2)) { System.out.println("structs = " + SerDeUtils.getJSONString(input, rowOI1)); System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2)); System.out.println("serialized = " + TestBinarySortableSerDe.hexString(bw)); assertEquals(input, output); } } } /** * Test longer schema deserialization where a smaller struct is serialized and * it is then deserialized with a bigger struct Here the serialized struct has * 8 fields and we deserialized to a struct of 9 fields. */ void testLongerSchemaDeserialization1(Random r) throws Throwable { StructObjectInspector rowOI1 = (StructObjectInspector) ObjectInspectorFactory .getReflectionObjectInspector(MyTestClassSmaller.class, ObjectInspectorOptions.JAVA); String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1); String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1); SerDe serde1 = getSerDe(fieldNames1, fieldTypes1); serde1.getObjectInspector(); StructObjectInspector rowOI2 = (StructObjectInspector) ObjectInspectorFactory .getReflectionObjectInspector(MyTestClass.class, ObjectInspectorOptions.JAVA); String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2); String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2); SerDe serde2 = getSerDe(fieldNames2, fieldTypes2); ObjectInspector serdeOI2 = serde2.getObjectInspector(); int num = 100; for (int itest = 0; itest < num; itest++) { int randField = r.nextInt(9); Byte b = randField > 0 ? null : Byte.valueOf((byte) r.nextInt()); Short s = randField > 1 ? null : Short.valueOf((short) r.nextInt()); Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt()); Long l = randField > 3 ? null : Long.valueOf(r.nextLong()); Float f = randField > 4 ? null : Float.valueOf(r.nextFloat()); Double d = randField > 5 ? null : Double.valueOf(r.nextDouble()); String st = randField > 6 ? null : TestBinarySortableSerDe .getRandString(r); MyTestInnerStruct is = randField > 7 ? null : new MyTestInnerStruct(r .nextInt(5) - 2, r.nextInt(5) - 2); MyTestClassSmaller input = new MyTestClassSmaller(b, s, n, l, f, d, st, is); BytesWritable bw = (BytesWritable) serde1.serialize(input, rowOI1); Object output = serde2.deserialize(bw); if (0 != compareDiffSizedStructs(input, rowOI1, output, serdeOI2)) { System.out.println("structs = " + SerDeUtils.getJSONString(input, rowOI1)); System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2)); System.out.println("serialized = " + TestBinarySortableSerDe.hexString(bw)); assertEquals(input, output); } } } void testLazyBinaryMap(Random r) throws Throwable { StructObjectInspector rowOI = (StructObjectInspector) ObjectInspectorFactory .getReflectionObjectInspector(MyTestClassBigger.class, ObjectInspectorOptions.JAVA); String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI); String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI); SerDe serde = getSerDe(fieldNames, fieldTypes); ObjectInspector serdeOI = serde.getObjectInspector(); StructObjectInspector soi1 = (StructObjectInspector) serdeOI; List<? extends StructField> fields1 = soi1.getAllStructFieldRefs(); LazyBinaryMapObjectInspector lazympoi = (LazyBinaryMapObjectInspector) fields1 .get(10).getFieldObjectInspector(); ObjectInspector lazympkeyoi = lazympoi.getMapKeyObjectInspector(); ObjectInspector lazympvalueoi = lazympoi.getMapValueObjectInspector(); StructObjectInspector soi2 = rowOI; List<? extends StructField> fields2 = soi2.getAllStructFieldRefs(); MapObjectInspector inputmpoi = (MapObjectInspector) fields2.get(10) .getFieldObjectInspector(); ObjectInspector inputmpkeyoi = inputmpoi.getMapKeyObjectInspector(); ObjectInspector inputmpvalueoi = inputmpoi.getMapValueObjectInspector(); int num = 100; for (int testi = 0; testi < num; testi++) { Map<String, List<MyTestInnerStruct>> mp = new LinkedHashMap<String, List<MyTestInnerStruct>>(); int randFields = r.nextInt(10); for (int i = 0; i < randFields; i++) { String key = TestBinarySortableSerDe.getRandString(r); int randField = r.nextInt(10); List<MyTestInnerStruct> value = randField > 4 ? null : getRandStructArray(r); mp.put(key, value); } MyTestClassBigger input = new MyTestClassBigger(null, null, null, null, null, null, null, null, null, null, mp); BytesWritable bw = (BytesWritable) serde.serialize(input, rowOI); Object output = serde.deserialize(bw); Object lazyobj = soi1.getStructFieldData(output, fields1.get(10)); Map<?, ?> outputmp = lazympoi.getMap(lazyobj); if (outputmp.size() != mp.size()) { throw new RuntimeException("Map size changed from " + mp.size() + " to " + outputmp.size() + " after serialization!"); } for (Map.Entry<?, ?> entryinput : mp.entrySet()) { boolean bEqual = false; for (Map.Entry<?, ?> entryoutput : outputmp.entrySet()) { // find the same key if (0 == ObjectInspectorUtils.compare(entryoutput.getKey(), lazympkeyoi, entryinput.getKey(), inputmpkeyoi)) { if (0 != ObjectInspectorUtils.compare(entryoutput.getValue(), lazympvalueoi, entryinput.getValue(), inputmpvalueoi)) { assertEquals(entryoutput.getValue(), entryinput.getValue()); } else { bEqual = true; } break; } } if (!bEqual) { throw new RuntimeException( "Could not find matched key in deserialized map : " + entryinput.getKey()); } } } } /** * The test entrance function. * * @throws Throwable */ public void testLazyBinarySerDe() throws Throwable { try { System.out.println("Beginning Test TestLazyBinarySerDe:"); // generate the data int num = 1000; Random r = new Random(1234); MyTestClass rows[] = new MyTestClass[num]; for (int i = 0; i < num; i++) { int randField = r.nextInt(10); Byte b = randField > 0 ? null : Byte.valueOf((byte) r.nextInt()); Short s = randField > 1 ? null : Short.valueOf((short) r.nextInt()); Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt()); Long l = randField > 3 ? null : Long.valueOf(r.nextLong()); Float f = randField > 4 ? null : Float.valueOf(r.nextFloat()); Double d = randField > 5 ? null : Double.valueOf(r.nextDouble()); String st = randField > 6 ? null : TestBinarySortableSerDe .getRandString(r); MyTestInnerStruct is = randField > 7 ? null : new MyTestInnerStruct(r .nextInt(5) - 2, r.nextInt(5) - 2); List<Integer> li = randField > 8 ? null : TestBinarySortableSerDe .getRandIntegerArray(r); ByteArrayRef ba = TestBinarySortableSerDe.getRandBA(r, i); MyTestClass t = new MyTestClass(b, s, n, l, f, d, st, is, li, ba); rows[i] = t; } StructObjectInspector rowOI = (StructObjectInspector) ObjectInspectorFactory .getReflectionObjectInspector(MyTestClass.class, ObjectInspectorOptions.JAVA); String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI); String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI); // call the tests // 1/ test LazyBinarySerDe testLazyBinarySerDe(rows, rowOI, getSerDe(fieldNames, fieldTypes)); // 2/ test LazyBinaryMap testLazyBinaryMap(r); // 3/ test serialization and deserialization with different schemas testShorterSchemaDeserialization(r); // 4/ test serialization and deserialization with different schemas testLongerSchemaDeserialization(r); // 5/ test serialization and deserialization with different schemas testShorterSchemaDeserialization1(r); // 6/ test serialization and deserialization with different schemas testLongerSchemaDeserialization1(r); System.out.println("Test TestLazyBinarySerDe passed!"); } catch (Throwable e) { e.printStackTrace(); throw e; } } }