/*
* Copyright © 2014 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.hive.serde;
import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.data.format.StructuredRecord;
import co.cask.cdap.api.data.schema.Schema;
import co.cask.cdap.internal.io.ReflectionSchemaGenerator;
import co.cask.cdap.internal.io.SchemaGenerator;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.junit.Assert;
import org.junit.Test;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.ByteBuffer;
import java.util.List;
import java.util.Map;
import java.util.UUID;
/**
*
*/
public class ObjectDeserializerTest {
private static final SchemaGenerator schemaGenerator = new ReflectionSchemaGenerator();
// contains all types our Schema allows
private static class SimpleRecord {
final boolean booleanField = Boolean.TRUE;
final byte byteField = Byte.MAX_VALUE;
final char charField = Character.MAX_VALUE;
final short shortField = Short.MAX_VALUE;
final int intField = Integer.MAX_VALUE;
final long longField = Long.MAX_VALUE;
final float floatField = Float.MAX_VALUE;
final double doubleField = Double.MAX_VALUE;
final String stringField = "foobar";
final byte[] bytesField = new byte[] { 1, 2, 3 };
final ByteBuffer byteBufferField = ByteBuffer.wrap(new byte[] { 4, 5, 6 });
final UUID uuidField = UUID.fromString("92633f3c-f358-47ef-89bd-2d8dd59a600d");
URI uriField;
URL urlField;
final int[] intsField = new int[] { 1, 2, 3 };
private SimpleRecord(URI uri, URL url) {
this.uriField = uri;
this.urlField = url;
}
}
// Hive version of SimpleRecord, with types changed to expected types for Hive
private static class HiveSimpleRecord {
boolean booleanField = Boolean.TRUE;
int byteField = Byte.MAX_VALUE;
int charField = Character.MAX_VALUE;
int shortField = Short.MAX_VALUE;
int intField = Integer.MAX_VALUE;
long longField = Long.MAX_VALUE;
float floatField = Float.MAX_VALUE;
double doubleField = Double.MAX_VALUE;
String stringField = "foobar";
byte[] bytesField = new byte[] { 1, 2, 3 };
byte[] byteBufferField = new byte[] { 4, 5, 6 };
byte[] uuidField;
String uriField;
String urlField;
int[] intsField = new int[] { 1, 2, 3 };
private HiveSimpleRecord(SimpleRecord r) {
this.booleanField = r.booleanField;
this.byteField = r.byteField;
this.charField = r.charField;
this.shortField = r.shortField;
this.intField = r.intField;
this.longField = r.longField;
this.floatField = r.floatField;
this.doubleField = r.doubleField;
this.stringField = r.stringField;
this.bytesField = r.bytesField;
this.byteBufferField = Bytes.toBytes(r.byteBufferField);
this.uuidField = Bytes.toBytes(r.uuidField);
this.uriField = r.uriField.toString();
this.urlField = r.urlField.toString();
this.intsField = r.intsField;
}
// everything is lowercase to simulate hive
private static List<String> getFieldNames() {
return Lists.newArrayList(
"booleanfield",
"bytefield",
"charfield",
"shortfield",
"intfield",
"longfield",
"floatfield",
"doublefield",
"stringfield",
"urifield",
"urlfield",
"bytesfield",
"bytebufferfield",
"uuidfield",
"intsfield"
);
}
private static List<TypeInfo> getFieldTypes() {
return Lists.newArrayList(
TypeInfoFactory.booleanTypeInfo,
TypeInfoFactory.intTypeInfo,
TypeInfoFactory.intTypeInfo,
TypeInfoFactory.intTypeInfo,
TypeInfoFactory.intTypeInfo,
TypeInfoFactory.longTypeInfo,
TypeInfoFactory.floatTypeInfo,
TypeInfoFactory.doubleTypeInfo,
TypeInfoFactory.stringTypeInfo,
TypeInfoFactory.stringTypeInfo,
TypeInfoFactory.stringTypeInfo,
TypeInfoFactory.binaryTypeInfo,
TypeInfoFactory.binaryTypeInfo,
TypeInfoFactory.binaryTypeInfo,
TypeInfoFactory.getListTypeInfo(TypeInfoFactory.intTypeInfo)
);
}
private List<Object> getAsList() {
return Lists.<Object>newArrayList(
booleanField,
byteField,
charField,
shortField,
intField,
longField,
floatField,
doubleField,
stringField,
uriField,
urlField,
bytesField,
byteBufferField,
uuidField,
intsField
);
}
}
public static class NestedRecord {
final Map<Integer, String> mapField;
final List<Boolean> listField;
final SimpleRecord record;
public NestedRecord() throws URISyntaxException, MalformedURLException {
this.mapField = ImmutableMap.of(1, "1", 2, "2", 3, "3");
this.listField = Lists.newArrayList(true, false, false);
this.record = new SimpleRecord(new URI("http://abc.com"), new URL("http://123.com"));
}
private static List<String> getFieldNames() {
return Lists.newArrayList(
"mapfield",
"listfield",
"record"
);
}
private static List<TypeInfo> getFieldTypes() {
return Lists.newArrayList(
TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo),
TypeInfoFactory.getListTypeInfo(TypeInfoFactory.booleanTypeInfo),
TypeInfoFactory.getStructTypeInfo(HiveSimpleRecord.getFieldNames(), HiveSimpleRecord.getFieldTypes())
);
}
private List<Object> getAsList() {
return Lists.newArrayList(
mapField,
listField,
new HiveSimpleRecord(record).getAsList()
);
}
}
@Test
public void testIdentityTranslations() throws Exception {
List<String> names = Lists.newArrayList("dummy-name");
// string
ObjectDeserializer deserializer = new ObjectDeserializer(
names, Lists.<TypeInfo>newArrayList(TypeInfoFactory.stringTypeInfo), Schema.of(Schema.Type.STRING));
Assert.assertEquals("foobar", deserializer.deserialize("foobar"));
// int
deserializer = new ObjectDeserializer(
names, Lists.<TypeInfo>newArrayList(TypeInfoFactory.intTypeInfo), Schema.of(Schema.Type.INT));
Assert.assertEquals(Integer.MIN_VALUE, deserializer.deserialize(Integer.MIN_VALUE));
// long
deserializer = new ObjectDeserializer(
names, Lists.<TypeInfo>newArrayList(TypeInfoFactory.longTypeInfo), Schema.of(Schema.Type.LONG));
Assert.assertEquals(Long.MAX_VALUE, deserializer.deserialize(Long.MAX_VALUE));
// boolean
deserializer = new ObjectDeserializer(
names, Lists.<TypeInfo>newArrayList(TypeInfoFactory.booleanTypeInfo), Schema.of(Schema.Type.BOOLEAN));
Assert.assertTrue((Boolean) deserializer.deserialize(true));
// float
deserializer = new ObjectDeserializer(
names, Lists.<TypeInfo>newArrayList(TypeInfoFactory.floatTypeInfo), Schema.of(Schema.Type.FLOAT));
Assert.assertEquals(3.14f, deserializer.deserialize(3.14f));
// double
deserializer = new ObjectDeserializer(
names, Lists.<TypeInfo>newArrayList(TypeInfoFactory.doubleTypeInfo), Schema.of(Schema.Type.DOUBLE));
Assert.assertEquals(3.14, deserializer.deserialize(3.14));
// bytes
deserializer = new ObjectDeserializer(
names, Lists.<TypeInfo>newArrayList(TypeInfoFactory.binaryTypeInfo), Schema.of(Schema.Type.BYTES));
Assert.assertArrayEquals(new byte[] { 1, 2, 3 }, (byte[]) deserializer.deserialize(new byte[] { 1, 2, 3 }));
// array<tinyint>
deserializer = new ObjectDeserializer(
names,
Lists.newArrayList(TypeInfoFactory.getListTypeInfo(TypeInfoFactory.byteTypeInfo)),
Schema.of(Schema.Type.BYTES));
Assert.assertArrayEquals(new Byte[] { 1, 2, 3 },
(Byte[]) deserializer.deserialize(new byte[] { 1, 2, 3 }));
Assert.assertArrayEquals(new Byte[] { 1, 2, 3 },
(Byte[]) deserializer.deserialize(ByteBuffer.wrap(new byte[] { 1, 2, 3 })));
}
@Test
public void testIntFieldTranslations() throws Exception {
ObjectDeserializer translator = new ObjectDeserializer(
Lists.newArrayList("dummy-name"),
Lists.<TypeInfo>newArrayList(TypeInfoFactory.intTypeInfo),
Schema.of(Schema.Type.INT));
Byte byteVal = Byte.MAX_VALUE;
Character charVal = Character.MAX_VALUE;
Short shortVal = Short.MAX_VALUE;
Assert.assertEquals(byteVal.intValue(), translator.deserialize(byteVal));
Assert.assertEquals((int) charVal, translator.deserialize(charVal));
Assert.assertEquals(shortVal.intValue(), translator.deserialize(shortVal));
}
@Test
public void testURLAndURITranslation() throws Exception {
ObjectDeserializer translator = new ObjectDeserializer(
Lists.newArrayList("dummy-name"),
Lists.<TypeInfo>newArrayList(TypeInfoFactory.stringTypeInfo),
Schema.of(Schema.Type.STRING));
String str = "http://abc.com/123";
Assert.assertEquals(str, translator.deserialize(new URL(str)));
Assert.assertEquals(str, translator.deserialize(new URI(str)));
}
@Test
public void testByteBufferTranslation() throws Exception {
ObjectDeserializer translator = new ObjectDeserializer(
Lists.newArrayList("dummy-name"),
Lists.<TypeInfo>newArrayList(TypeInfoFactory.binaryTypeInfo),
Schema.of(Schema.Type.BYTES));
ByteBuffer byteBuffer = ByteBuffer.wrap(new byte[] { 1, 2, 3 });
byte[] translated = (byte[]) translator.deserialize(byteBuffer);
Assert.assertArrayEquals(new byte[]{1, 2, 3}, translated);
// check to make sure bytes before the position are not copied
byteBuffer.get();
translated = (byte[]) translator.deserialize(byteBuffer);
Assert.assertArrayEquals(new byte[]{2, 3}, translated);
}
@Test
public void testUUIDTranslation() throws Exception {
ObjectDeserializer translator = new ObjectDeserializer(
Lists.newArrayList("dummy-name"),
Lists.<TypeInfo>newArrayList(TypeInfoFactory.binaryTypeInfo),
Schema.of(Schema.Type.BYTES));
UUID uuid = UUID.randomUUID();
byte[] translated = (byte[]) translator.deserialize(uuid);
Assert.assertArrayEquals(Bytes.toBytes(uuid), translated);
Assert.assertEquals(uuid, Bytes.toUUID(translated));
}
@Test
public void testListTranslation() throws Exception {
ObjectDeserializer translator = new ObjectDeserializer(
Lists.newArrayList("dummy-name"),
Lists.newArrayList(TypeInfoFactory.getListTypeInfo(TypeInfoFactory.stringTypeInfo)),
Schema.arrayOf(Schema.of(Schema.Type.STRING)));
List<String> list = Lists.newArrayList("foo", "bar", "baz");
Assert.assertEquals(list, translator.deserialize(list));
}
@Test
public void testMapTranslation() throws Exception {
ObjectDeserializer translator = new ObjectDeserializer(
Lists.newArrayList("dummy-name"),
Lists.newArrayList(TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo)),
Schema.mapOf(Schema.of(Schema.Type.INT), Schema.of(Schema.Type.STRING)));
Map<Character, URL> input = Maps.newHashMap();
input.put('a', new URL("http://abc.com"));
input.put('1', new URL("http://123.com"));
Map<Integer, String> expected = Maps.newHashMap();
expected.put((int) 'a', "http://abc.com");
expected.put((int) '1', "http://123.com");
Assert.assertEquals(expected, translator.deserialize(input));
}
@Test
public void testFlattenSimpleRecord() throws Exception {
SimpleRecord simpleRecord = new SimpleRecord(new URI("http://abc.com"), new URL("http://123.com"));
HiveSimpleRecord hiveSimpleRecord = new HiveSimpleRecord(simpleRecord);
List<String> fieldNames = HiveSimpleRecord.getFieldNames();
List<TypeInfo> fieldTypes = HiveSimpleRecord.getFieldTypes();
List<Object> expected = hiveSimpleRecord.getAsList();
ObjectDeserializer translator =
new ObjectDeserializer(fieldNames, fieldTypes, schemaGenerator.generate(SimpleRecord.class));
List<Object> translated = translator.translateRecord(simpleRecord);
assertSimpleRecordEquals(expected, translated);
}
@Test
public void testFlattenSimpleStructuredRecord() throws Exception {
SimpleRecord simpleRecord = new SimpleRecord(new URI("http://abc.com"), new URL("http://123.com"));
Schema schema = new ReflectionSchemaGenerator().generate(SimpleRecord.class);
StructuredRecord structuredRecord = StructuredRecord.builder(schema)
.set("booleanField", simpleRecord.booleanField)
.set("byteField", simpleRecord.byteField)
.set("charField", simpleRecord.charField)
.set("shortField", simpleRecord.shortField)
.set("intField", simpleRecord.intField)
.set("longField", simpleRecord.longField)
.set("floatField", simpleRecord.floatField)
.set("doubleField", simpleRecord.doubleField)
.set("stringField", simpleRecord.stringField)
.set("bytesField", simpleRecord.bytesField)
.set("byteBufferField", simpleRecord.byteBufferField)
.set("uuidField", simpleRecord.uuidField)
.set("uriField", simpleRecord.uriField)
.set("urlField", simpleRecord.urlField)
.set("intsField", simpleRecord.intsField)
.build();
// create the Hive version of the record
HiveSimpleRecord hiveSimpleRecord = new HiveSimpleRecord(simpleRecord);
List<String> fieldNames = HiveSimpleRecord.getFieldNames();
List<TypeInfo> fieldTypes = HiveSimpleRecord.getFieldTypes();
List<Object> expected = hiveSimpleRecord.getAsList();
// flatten the StructuredRecord into a list of objects
ObjectDeserializer translator =
new ObjectDeserializer(fieldNames, fieldTypes, schemaGenerator.generate(SimpleRecord.class));
List<Object> translated = translator.translateRecord(structuredRecord);
assertSimpleRecordEquals(expected, translated);
}
@SuppressWarnings("unchecked")
@Test
public void testNestedRecord() throws Exception {
NestedRecord nestedRecord = new NestedRecord();
ObjectDeserializer translator = new ObjectDeserializer(
NestedRecord.getFieldNames(), NestedRecord.getFieldTypes(), schemaGenerator.generate(NestedRecord.class));
List<Object> translated = translator.translateRecord(nestedRecord);
List<Object> expected = nestedRecord.getAsList();
// first 2 fields are a map and list and can be compared directly
Assert.assertEquals(expected.get(0), translated.get(0));
Assert.assertEquals(expected.get(1), translated.get(1));
// 3rd field is a list of objects which contains byte arrays and must be compared separately.
assertSimpleRecordEquals((List<Object>) expected.get(2), (List<Object>) translated.get(2));
}
@SuppressWarnings("unchecked")
private void assertSimpleRecordEquals(List<Object> expected, List<Object> actual) {
// compare the non-array fields
Assert.assertEquals(expected.subList(0, expected.size() - 4),
actual.subList(0, actual.size() - 4));
// compare the byte array fields
for (int i = expected.size() - 2; i > expected.size() - 5; i--) {
Assert.assertArrayEquals((byte[]) expected.get(i), (byte[]) actual.get(i));
}
// compare the int array field, which becomes a list
List<Integer> actualInts = (List<Integer>) actual.get(actual.size() - 1);
int[] expectedInts = (int[]) expected.get(expected.size() - 1);
for (int i = 0; i < expectedInts.length; i++) {
Assert.assertEquals(expectedInts[i], (int) actualInts.get(i));
}
}
}