/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.serde2.avro; import org.apache.avro.Schema; import org.apache.avro.generic.GenericData; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaStringObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector; import org.junit.Test; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Hashtable; import java.util.List; import java.util.Map; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; public class TestAvroDeserializer { private final GenericData GENERIC_DATA = GenericData.get(); @Test public void canDeserializeVoidType() throws IOException, SerDeException { String schemaString = "{\n" + " \"type\": \"record\", \n" + " \"name\": \"nullTest\",\n" + " \"fields\" : [\n" + " {\"name\": \"isANull\", \"type\": \"null\"}\n" + " ]\n" + "}"; Schema s = Schema.parse(schemaString); GenericData.Record record = new GenericData.Record(s); record.put("isANull", null); assertTrue(GENERIC_DATA.validate(s, record)); AvroGenericRecordWritable garw = Utils.serializeAndDeserializeRecord(record); AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s); AvroDeserializer de = new AvroDeserializer(); ArrayList<Object> row = (ArrayList<Object>)de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, s); assertEquals(1, row.size()); Object theVoidObject = row.get(0); assertNull(theVoidObject); StandardStructObjectInspector oi = (StandardStructObjectInspector)aoig.getObjectInspector(); StructField fieldRef = oi.getStructFieldRef("isANull"); Object shouldBeNull = oi.getStructFieldData(row, fieldRef); assertNull(shouldBeNull); assertTrue(fieldRef.getFieldObjectInspector() instanceof VoidObjectInspector); } @Test public void canDeserializeMapsWithPrimitiveKeys() throws SerDeException, IOException { Schema s = Schema.parse(TestAvroObjectInspectorGenerator.MAP_WITH_PRIMITIVE_VALUE_TYPE); GenericData.Record record = new GenericData.Record(s); Map<String, Long> m = new Hashtable<String, Long>(); m.put("one", 1l); m.put("two", 2l); m.put("three", 3l); record.put("aMap", m); assertTrue(GENERIC_DATA.validate(s, record)); System.out.println("record = " + record); AvroGenericRecordWritable garw = Utils.serializeAndDeserializeRecord(record); AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s); AvroDeserializer de = new AvroDeserializer(); ArrayList<Object> row = (ArrayList<Object>)de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, s); assertEquals(1, row.size()); Object theMapObject = row.get(0); assertTrue(theMapObject instanceof Map); Map theMap = (Map)theMapObject; // Verify the raw object that's been created assertEquals(1l, theMap.get("one")); assertEquals(2l, theMap.get("two")); assertEquals(3l, theMap.get("three")); // Verify that the provided object inspector can pull out these same values StandardStructObjectInspector oi = (StandardStructObjectInspector)aoig.getObjectInspector(); List<Object> z = oi.getStructFieldsDataAsList(row); assertEquals(1, z.size()); StructField fieldRef = oi.getStructFieldRef("amap"); Map theMap2 = (Map)oi.getStructFieldData(row, fieldRef); assertEquals(1l, theMap2.get("one")); assertEquals(2l, theMap2.get("two")); assertEquals(3l, theMap2.get("three")); } @Test public void canDeserializeArrays() throws SerDeException, IOException { Schema s = Schema.parse(TestAvroObjectInspectorGenerator.ARRAY_WITH_PRIMITIVE_ELEMENT_TYPE); GenericData.Record record = new GenericData.Record(s); List<String> list = new ArrayList<String>(); list.add("Eccleston"); list.add("Tennant"); list.add("Smith"); record.put("anArray", list); assertTrue(GENERIC_DATA.validate(s, record)); System.out.println("Array-backed record = " + record); AvroGenericRecordWritable garw = Utils.serializeAndDeserializeRecord(record); AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s); AvroDeserializer de = new AvroDeserializer(); ArrayList<Object> row = (ArrayList<Object>)de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, s); assertEquals(1, row.size()); Object theArrayObject = row.get(0); assertTrue(theArrayObject instanceof List); List theList = (List)theArrayObject; // Verify the raw object that's been created assertEquals("Eccleston", theList.get(0)); assertEquals("Tennant", theList.get(1)); assertEquals("Smith", theList.get(2)); // Now go the correct way, through objectinspectors StandardStructObjectInspector oi = (StandardStructObjectInspector)aoig.getObjectInspector(); StructField fieldRefToArray = oi.getStructFieldRef("anArray"); Object anArrayData = oi.getStructFieldData(row, fieldRefToArray); StandardListObjectInspector anArrayOI = (StandardListObjectInspector)fieldRefToArray.getFieldObjectInspector(); assertEquals(3, anArrayOI.getListLength(anArrayData)); JavaStringObjectInspector elementOI = (JavaStringObjectInspector)anArrayOI.getListElementObjectInspector(); Object firstElement = anArrayOI.getListElement(anArrayData, 0); assertEquals("Eccleston", elementOI.getPrimitiveJavaObject(firstElement)); assertTrue(firstElement instanceof String); Object secondElement = anArrayOI.getListElement(anArrayData, 1); assertEquals("Tennant", elementOI.getPrimitiveJavaObject(secondElement)); assertTrue(secondElement instanceof String); Object thirdElement = anArrayOI.getListElement(anArrayData, 2); assertEquals("Smith", elementOI.getPrimitiveJavaObject(thirdElement)); assertTrue(thirdElement instanceof String); } @Test public void canDeserializeRecords() throws SerDeException, IOException { Schema s = Schema.parse(TestAvroObjectInspectorGenerator.RECORD_SCHEMA); GenericData.Record record = new GenericData.Record(s); GenericData.Record innerRecord = new GenericData.Record(s.getField("aRecord").schema()); innerRecord.put("int1", 42); innerRecord.put("boolean1", true); innerRecord.put("long1", 42432234234l); record.put("aRecord", innerRecord); assertTrue(GENERIC_DATA.validate(s, record)); AvroGenericRecordWritable garw = Utils.serializeAndDeserializeRecord(record); AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s); AvroDeserializer de = new AvroDeserializer(); ArrayList<Object> row = (ArrayList<Object>)de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, s); assertEquals(1, row.size()); Object theRecordObject = row.get(0); System.out.println("theRecordObject = " + theRecordObject.getClass().getCanonicalName()); // The original record was lost in the deserialization, so just go the // correct way, through objectinspectors StandardStructObjectInspector oi = (StandardStructObjectInspector)aoig.getObjectInspector(); List<? extends StructField> allStructFieldRefs = oi.getAllStructFieldRefs(); assertEquals(1, allStructFieldRefs.size()); StructField fieldRefForaRecord = allStructFieldRefs.get(0); assertEquals("arecord", fieldRefForaRecord.getFieldName()); Object innerRecord2 = oi.getStructFieldData(row, fieldRefForaRecord); // Extract innerRecord field refs StandardStructObjectInspector innerRecord2OI = (StandardStructObjectInspector) fieldRefForaRecord.getFieldObjectInspector(); List<? extends StructField> allStructFieldRefs1 = innerRecord2OI.getAllStructFieldRefs(); assertEquals(3, allStructFieldRefs1.size()); assertEquals("int1", allStructFieldRefs1.get(0).getFieldName()); assertEquals("boolean1", allStructFieldRefs1.get(1).getFieldName()); assertEquals("long1", allStructFieldRefs1.get(2).getFieldName()); innerRecord2OI.getStructFieldsDataAsList(innerRecord2); assertEquals(42, innerRecord2OI.getStructFieldData(innerRecord2, allStructFieldRefs1.get(0))); assertEquals(true, innerRecord2OI.getStructFieldData(innerRecord2, allStructFieldRefs1.get(1))); assertEquals(42432234234l, innerRecord2OI.getStructFieldData(innerRecord2, allStructFieldRefs1.get(2))); } private class ResultPair { // Because Pairs give Java the vapors. public final ObjectInspector oi; public final Object value; public final Object unionObject; private ResultPair(ObjectInspector oi, Object value, Object unionObject) { this.oi = oi; this.value = value; this.unionObject = unionObject; } } @Test public void canDeserializeUnions() throws SerDeException, IOException { Schema s = Schema.parse(TestAvroObjectInspectorGenerator.UNION_SCHEMA); GenericData.Record record = new GenericData.Record(s); record.put("aUnion", "this is a string"); ResultPair result = unionTester(s, record); assertTrue(result.value instanceof String); assertEquals("this is a string", result.value); UnionObjectInspector uoi = (UnionObjectInspector)result.oi; assertEquals(1, uoi.getTag(result.unionObject)); // Now the other enum possibility record = new GenericData.Record(s); record.put("aUnion", 99); result = unionTester(s, record); assertTrue(result.value instanceof Integer); assertEquals(99, result.value); uoi = (UnionObjectInspector)result.oi; assertEquals(0, uoi.getTag(result.unionObject)); } private ResultPair unionTester(Schema s, GenericData.Record record) throws SerDeException, IOException { assertTrue(GENERIC_DATA.validate(s, record)); AvroGenericRecordWritable garw = Utils.serializeAndDeserializeRecord(record); AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s); AvroDeserializer de = new AvroDeserializer(); ArrayList<Object> row = (ArrayList<Object>)de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, s); assertEquals(1, row.size()); StandardStructObjectInspector oi = (StandardStructObjectInspector)aoig.getObjectInspector(); List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs(); assertEquals(1, fieldRefs.size()); StructField fieldRef = fieldRefs.get(0); assertEquals("aunion", fieldRef.getFieldName()); Object theUnion = oi.getStructFieldData(row, fieldRef); assertTrue(fieldRef.getFieldObjectInspector() instanceof UnionObjectInspector); UnionObjectInspector fieldObjectInspector = (UnionObjectInspector)fieldRef.getFieldObjectInspector(); Object value = fieldObjectInspector.getField(theUnion); return new ResultPair(fieldObjectInspector, value, theUnion); } @Test // Enums are one of two types we fudge for Hive. Enums go in, Strings come out. public void canDeserializeEnums() throws SerDeException, IOException { Schema s = Schema.parse(TestAvroObjectInspectorGenerator.ENUM_SCHEMA); GenericData.Record record = new GenericData.Record(s); record.put("baddies", "DALEKS"); assertTrue(GENERIC_DATA.validate(s, record)); AvroGenericRecordWritable garw = Utils.serializeAndDeserializeRecord(record); AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s); AvroDeserializer de = new AvroDeserializer(); ArrayList<Object> row = (ArrayList<Object>)de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, s); assertEquals(1, row.size()); StandardStructObjectInspector oi = (StandardStructObjectInspector)aoig.getObjectInspector(); List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs(); assertEquals(1, fieldRefs.size()); StructField fieldRef = fieldRefs.get(0); assertEquals("baddies", fieldRef.getFieldName()); Object theStringObject = oi.getStructFieldData(row, fieldRef); assertTrue(fieldRef.getFieldObjectInspector() instanceof StringObjectInspector); StringObjectInspector soi = (StringObjectInspector)fieldRef.getFieldObjectInspector(); String finalValue = soi.getPrimitiveJavaObject(theStringObject); assertEquals("DALEKS", finalValue); } @Test // Fixed doesn't exist in Hive. Fixeds go in, lists of bytes go out. public void canDeserializeFixed() throws SerDeException, IOException { Schema s = Schema.parse(TestAvroObjectInspectorGenerator.FIXED_SCHEMA); GenericData.Record record = new GenericData.Record(s); byte [] bytes = "ANANCIENTBLUEBOX".getBytes(); record.put("hash", new GenericData.Fixed(s, bytes)); assertTrue(GENERIC_DATA.validate(s, record)); AvroGenericRecordWritable garw = Utils.serializeAndDeserializeRecord(record); AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s); AvroDeserializer de = new AvroDeserializer(); ArrayList<Object> row = (ArrayList<Object>)de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, s); assertEquals(1, row.size()); Object theArrayObject = row.get(0); assertTrue(theArrayObject instanceof List); List theList = (List)theArrayObject; // Verify the raw object that's been created for(int i = 0; i < bytes.length; i++) { assertEquals(bytes[i], theList.get(i)); } // Now go the correct way, through objectinspectors StandardStructObjectInspector oi = (StandardStructObjectInspector)aoig.getObjectInspector(); List<Object> fieldsDataAsList = oi.getStructFieldsDataAsList(row); assertEquals(1, fieldsDataAsList.size()); StructField fieldRef = oi.getStructFieldRef("hash"); List theList2 = (List)oi.getStructFieldData(row, fieldRef); for(int i = 0; i < bytes.length; i++) { assertEquals(bytes[i], theList2.get(i)); } } @Test public void canDeserializeBytes() throws SerDeException, IOException { Schema s = Schema.parse(TestAvroObjectInspectorGenerator.BYTES_SCHEMA); GenericData.Record record = new GenericData.Record(s); byte [] bytes = "ANANCIENTBLUEBOX".getBytes(); ByteBuffer bb = ByteBuffer.wrap(bytes); bb.rewind(); record.put("bytesField", bb); assertTrue(GENERIC_DATA.validate(s, record)); AvroGenericRecordWritable garw = Utils.serializeAndDeserializeRecord(record); AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s); AvroDeserializer de = new AvroDeserializer(); ArrayList<Object> row = (ArrayList<Object>)de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, s); assertEquals(1, row.size()); Object theArrayObject = row.get(0); assertTrue(theArrayObject instanceof List); // Now go the correct way, through objectinspectors StandardStructObjectInspector oi = (StandardStructObjectInspector)aoig.getObjectInspector(); List<Object> fieldsDataAsList = oi.getStructFieldsDataAsList(row); assertEquals(1, fieldsDataAsList.size()); StructField fieldRef = oi.getStructFieldRef("bytesField"); List theList2 = (List)oi.getStructFieldData(row, fieldRef); for(int i = 0; i < bytes.length; i++) { assertEquals(bytes[i], theList2.get(i)); } } @Test public void canDeserializeNullableTypes() throws IOException, SerDeException { Schema s = Schema.parse(TestAvroObjectInspectorGenerator.NULLABLE_STRING_SCHEMA); GenericData.Record record = new GenericData.Record(s); record.put("nullableString", "this is a string"); verifyNullableType(record, s, "this is a string"); record = new GenericData.Record(s); record.put("nullableString", null); verifyNullableType(record, s, null); } private void verifyNullableType(GenericData.Record record, Schema s, String expected) throws SerDeException, IOException { assertTrue(GENERIC_DATA.validate(s, record)); AvroGenericRecordWritable garw = Utils.serializeAndDeserializeRecord(record); AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s); AvroDeserializer de = new AvroDeserializer(); ArrayList<Object> row = (ArrayList<Object>)de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, s); assertEquals(1, row.size()); Object rowElement = row.get(0); StandardStructObjectInspector oi = (StandardStructObjectInspector)aoig.getObjectInspector(); List<Object> fieldsDataAsList = oi.getStructFieldsDataAsList(row); assertEquals(1, fieldsDataAsList.size()); StructField fieldRef = oi.getStructFieldRef("nullablestring"); ObjectInspector fieldObjectInspector = fieldRef.getFieldObjectInspector(); StringObjectInspector soi = (StringObjectInspector)fieldObjectInspector; if(expected == null) assertNull(soi.getPrimitiveJavaObject(rowElement)); else assertEquals("this is a string", soi.getPrimitiveJavaObject(rowElement)); } }