/*
* Copyright © 2015 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.format;
import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.data.format.FormatSpecification;
import co.cask.cdap.api.data.format.Formats;
import co.cask.cdap.api.data.format.RecordFormat;
import co.cask.cdap.api.data.format.StructuredRecord;
import co.cask.cdap.api.data.schema.Schema;
import co.cask.cdap.api.flow.flowlet.StreamEvent;
import com.google.common.base.Charsets;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.hash.Hashing;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.GenericRecordBuilder;
import org.apache.avro.io.BinaryEncoder;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.EncoderFactory;
import org.junit.Assert;
import org.junit.Test;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Collections;
import java.util.List;
import java.util.Map;
/**
*
*/
public class AvroRecordFormatTest {
@Test
public void testMultipleReads() throws Exception {
Schema schema = Schema.recordOf("record", Schema.Field.of("x", Schema.of(Schema.Type.INT)));
FormatSpecification formatSpecification =
new FormatSpecification(Formats.AVRO, schema, Collections.<String, String>emptyMap());
org.apache.avro.Schema avroSchema = convertSchema(schema);
RecordFormat<StreamEvent, StructuredRecord> format = RecordFormats.createInitializedFormat(formatSpecification);
GenericRecord record = new GenericRecordBuilder(avroSchema).set("x", 5).build();
StructuredRecord actual = format.read(toStreamEvent(record));
Assert.assertEquals(5, actual.get("x"));
record = new GenericRecordBuilder(avroSchema).set("x", 10).build();
actual = format.read(toStreamEvent(record));
Assert.assertEquals(10, actual.get("x"));
}
@Test
public void testFlatRecord() throws Exception {
Schema schema = Schema.recordOf(
"record",
Schema.Field.of("int", Schema.of(Schema.Type.INT)),
Schema.Field.of("long", Schema.of(Schema.Type.LONG)),
Schema.Field.of("boolean", Schema.of(Schema.Type.BOOLEAN)),
Schema.Field.of("bytes", Schema.of(Schema.Type.BYTES)),
Schema.Field.of("double", Schema.of(Schema.Type.DOUBLE)),
Schema.Field.of("float", Schema.of(Schema.Type.FLOAT)),
Schema.Field.of("string", Schema.of(Schema.Type.STRING)),
Schema.Field.of("array", Schema.arrayOf(Schema.of(Schema.Type.INT))),
Schema.Field.of("map", Schema.mapOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.INT))),
Schema.Field.of("nullable", Schema.unionOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.NULL))),
Schema.Field.of("nullable2", Schema.unionOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.NULL)))
);
FormatSpecification formatSpecification = new FormatSpecification(
Formats.AVRO,
schema,
Collections.<String, String>emptyMap()
);
org.apache.avro.Schema avroSchema = convertSchema(schema);
GenericRecord record = new GenericRecordBuilder(avroSchema)
.set("int", Integer.MAX_VALUE)
.set("long", Long.MAX_VALUE)
.set("boolean", false)
.set("bytes", Charsets.UTF_8.encode("hello world"))
.set("double", Double.MAX_VALUE)
.set("float", Float.MAX_VALUE)
.set("string", "foo bar")
.set("array", Lists.newArrayList(1, 2, 3))
.set("map", ImmutableMap.of("k1", 1, "k2", 2))
.set("nullable", null)
.set("nullable2", "Hello")
.build();
RecordFormat<StreamEvent, StructuredRecord> format = RecordFormats.createInitializedFormat(formatSpecification);
StructuredRecord actual = format.read(toStreamEvent(record));
Assert.assertEquals(Integer.MAX_VALUE, actual.get("int"));
Assert.assertEquals(Long.MAX_VALUE, actual.get("long"));
Assert.assertFalse((Boolean) actual.get("boolean"));
Assert.assertArrayEquals(Bytes.toBytes("hello world"), Bytes.toBytes((ByteBuffer) actual.get("bytes")));
Assert.assertEquals(Double.MAX_VALUE, actual.get("double"));
Assert.assertEquals(Float.MAX_VALUE, actual.get("float"));
Assert.assertEquals("foo bar", actual.get("string"));
Assert.assertEquals(Lists.newArrayList(1, 2, 3), actual.get("array"));
assertMapEquals(ImmutableMap.<String, Object>of("k1", 1, "k2", 2), (Map<Object, Object>) actual.get("map"));
Assert.assertNull(actual.get("nullable"));
Assert.assertEquals("Hello", actual.get("nullable2"));
}
@Test
public void testNestedRecord() throws Exception {
Schema innerSchema = Schema.recordOf(
"inner",
Schema.Field.of("int", Schema.of(Schema.Type.INT)),
Schema.Field.of("double", Schema.of(Schema.Type.DOUBLE)),
Schema.Field.of("array", Schema.arrayOf(Schema.of(Schema.Type.FLOAT))),
Schema.Field.of("map", Schema.mapOf(Schema.of(Schema.Type.STRING), Schema.of(Schema.Type.STRING)))
);
Schema schema = Schema.recordOf(
"record",
Schema.Field.of("int", Schema.of(Schema.Type.INT)),
Schema.Field.of("record", innerSchema));
org.apache.avro.Schema avroInnerSchema = convertSchema(innerSchema);
org.apache.avro.Schema avroSchema = convertSchema(schema);
GenericRecord record = new GenericRecordBuilder(avroSchema)
.set("int", Integer.MAX_VALUE)
.set("record",
new GenericRecordBuilder(avroInnerSchema)
.set("int", 5)
.set("double", 3.14159)
.set("array", ImmutableList.of(1.0f, 2.0f))
.set("map", ImmutableMap.of("key", "value"))
.build())
.build();
FormatSpecification formatSpecification = new FormatSpecification(
Formats.AVRO,
schema,
Collections.<String, String>emptyMap()
);
RecordFormat<StreamEvent, StructuredRecord> format = RecordFormats.createInitializedFormat(formatSpecification);
StructuredRecord actual = format.read(toStreamEvent(record));
Assert.assertEquals(Integer.MAX_VALUE, actual.get("int"));
StructuredRecord actualInner = actual.get("record");
Assert.assertEquals(5, actualInner.get("int"));
Assert.assertEquals(3.14159, actualInner.get("double"));
List<Float> array = actualInner.get("array");
Assert.assertEquals(ImmutableList.of(1.0f, 2.0f), array);
Map<String, String> map = actualInner.get("map");
Assert.assertEquals(ImmutableMap.of("key", "value"), map);
}
@Test
public void testSchemaProjection() throws Exception {
Schema sourceSchema = Schema.recordOf("source",
Schema.Field.of("id", Schema.of(Schema.Type.INT)),
Schema.Field.of("name", Schema.nullableOf(Schema.of(Schema.Type.STRING))));
Schema readSchema = Schema.recordOf("read", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
GenericRecord record = new GenericRecordBuilder(convertSchema(sourceSchema))
.set("id", 1)
.set("name", "value")
.build();
FormatSpecification formatSpecification = new FormatSpecification(Formats.AVRO, readSchema,
ImmutableMap.<String, String>of());
RecordFormat<StreamEvent, StructuredRecord> format = RecordFormats.createInitializedFormat(formatSpecification);
// Convert an event that has schema associated
StructuredRecord projectedRecord = format.read(toStreamEvent(record, true));
Assert.assertEquals(record.get("name").toString(), projectedRecord.get("name").toString());
// Convert an event that has no schema associated. The record must be written with the read schema.
record = new GenericRecordBuilder(convertSchema(readSchema))
.set("name", "value2")
.build();
projectedRecord = format.read(toStreamEvent(record));
Assert.assertEquals(record.get("name").toString(), projectedRecord.get("name").toString());
}
private org.apache.avro.Schema convertSchema(Schema cdapSchema) {
return new org.apache.avro.Schema.Parser().parse(cdapSchema.toString());
}
// needed since avro uses their own utf8 class for strings
private void assertMapEquals(Map<String, Object> expected, Map<Object, Object> actual) {
Assert.assertEquals(expected.size(), actual.size());
for (Map.Entry<Object, Object> entry : actual.entrySet()) {
Assert.assertEquals(expected.get(entry.getKey().toString()), entry.getValue());
}
}
private StreamEvent toStreamEvent(GenericRecord record) throws IOException {
return toStreamEvent(record, false);
}
private StreamEvent toStreamEvent(GenericRecord record, boolean writeSchema) throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(out, null);
DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(record.getSchema());
writer.write(record, encoder);
encoder.flush();
out.close();
byte[] serializedRecord = out.toByteArray();
String schemaString = record.getSchema().toString();
Map<String, String> headers = Maps.newHashMap();
if (writeSchema) {
headers.put(AvroRecordFormat.SCHEMA, schemaString);
headers.put(AvroRecordFormat.SCHEMA_HASH,
Hashing.md5().hashString(schemaString, Charsets.UTF_8).toString());
}
return new StreamEvent(headers, ByteBuffer.wrap(serializedRecord));
}
}