package com.linkedin.databus.client;
/*
*
* Copyright 2013 LinkedIn Corp. All rights reserved
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.nio.BufferUnderflowException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.WritableByteChannel;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.BinaryDecoder;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.JsonEncoder;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.specific.SpecificRecord;
import org.apache.log4j.Logger;
import org.codehaus.jackson.JsonEncoding;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonGenerationException;
import org.codehaus.jackson.JsonGenerator;
import com.linkedin.databus.client.pub.DbusEventDecoder;
import com.linkedin.databus.core.DatabusRuntimeException;
import com.linkedin.databus.core.DbusEvent;
import com.linkedin.databus.core.DbusEventPart;
import com.linkedin.databus2.schemas.SchemaId;
import com.linkedin.databus2.schemas.SchemaRegistryService;
import com.linkedin.databus2.schemas.VersionedSchema;
import com.linkedin.databus2.schemas.VersionedSchemaSet;
public class DbusEventAvroDecoder implements DbusEventDecoder
{
public static final String MODULE = DbusEventAvroDecoder.class.getName();
public static final Logger LOG = Logger.getLogger(MODULE);
public static final String SRC_ID_FIELD_NAME = "srcId";
public static final String VALUE_FIELD_NAME = "value";
public static final String OPCODE_FIELD_NAME = "opCode";
//BinaryDecoder is threadunsafe. So use threadlocal to wrap it
private static final ThreadLocal<BinaryDecoder> binDecoder = new ThreadLocal<BinaryDecoder>();
private final VersionedSchemaSet _schemaSet;
private final VersionedSchemaSet _metadataSchemaSet;
public DbusEventAvroDecoder(VersionedSchemaSet schemaSet)
{
this(schemaSet,null);
}
public DbusEventAvroDecoder(VersionedSchemaSet schemaSet, VersionedSchemaSet metadataSchemaSet)
{
super();
_schemaSet = schemaSet;
_metadataSchemaSet = metadataSchemaSet;
}
@Override
public GenericRecord getGenericRecord(DbusEvent e, GenericRecord reuse)
{
byte[] md5 = new byte[16];
e.schemaId(md5);
SchemaId schemaId = new SchemaId(md5);
VersionedSchema writerSchema = _schemaSet.getById(schemaId);
if (null == writerSchema)
{
LOG.error("Unable to find schema for id " + schemaId + "; event = " + e);
throw new DatabusRuntimeException("No schema available to decode event " + e);
}
ByteBuffer valueBuffer = e.value();
byte[] valueBytes = null;
if (valueBuffer.hasArray())
{
valueBytes = valueBuffer.array();
}
else
{
valueBytes = new byte[valueBuffer.remaining()];
valueBuffer.get(valueBytes);
}
return getGenericRecord(valueBytes, writerSchema.getSchema(), reuse);
}
/**
* Creates a generic Record from the DbusEvent.
*
* @param e DbusEvent to be converted to generic record
* @return the GenericRecord for the DbusEvent's payload
*/
public GenericRecord getGenericRecord(DbusEvent e)
{
return getGenericRecord(e, null);
}
/**
* Creates a generic record from a byte array.
*
* @param valueBytes byte[] to be converted to generic record
* @param schema schema of the input record
* @return GenericRecord for the given byte array + schema combo
*
* TODO: Add a getGenericRecord(InputStream data, Schema schema, GenericRecord reuse)
* variant; it can use DecoderFactory.createBinaryDecoder(InputStream, BinaryDecorder)
* and will allow us to use something like org.apache.avro.ipc.ByteBufferInputStream
* to avoid the data copy to a temp array. (https://rb.corp.linkedin.com/r/172879/)
*/
public GenericRecord getGenericRecord(byte[] valueBytes, Schema schema, GenericRecord reuse)
{
GenericRecord result = null;
try
{
binDecoder.set(DecoderFactory.defaultFactory().createBinaryDecoder(valueBytes, binDecoder.get()));
GenericDatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema);
result = reader.read(reuse, binDecoder.get());
return result;
}
catch (Exception ex) // IOException, ArrayIndexOutOfBoundsException, ...
{
LOG.error("getGenericRecord Avro error: " + ex.getMessage(), ex);
}
return result;
}
/**
* Deserializes the metadata (if any) of a Databus event to an Avro GenericRecord. This method
* is for INTERNAL USE ONLY (by Espresso and Databus). It is NOT a stable API and may change
* without warning!
*
* @param e the Databus event whose metadata is to be decoded
* @param reuse an existing {@link org.apache.avro.generic.GenericRecord} object where the
* deserialized values will be written to. The object can be <b>null</b>, in
* which case a new object will be allocated.
* @return {@link org.apache.avro.generic.GenericRecord} object with the deserialized data, or
* null if no metadata exists. Returned in <b>reuse</b> if provided, else in a newly
* allocated object.
* @throws DatabusRuntimeException if event contains metadata but schema to decode it is missing
*/
public GenericRecord getMetadata(DbusEvent e, GenericRecord reuse)
{
DbusEventPart metadataPart = e.getPayloadMetadataPart();
ByteBuffer dataBuffer = null;
if (null == metadataPart || null == (dataBuffer = metadataPart.getData()) || dataBuffer.remaining() <= 0)
{
LOG.debug("No metadata for event " + e);
return null;
}
VersionedSchema schema = getMetadataSchema(metadataPart);
if (null == schema)
{
throw new DatabusRuntimeException("No schema available to decode metadata for event " + e);
}
byte[] dataBytes = null;
if (dataBuffer.hasArray())
{
dataBytes = dataBuffer.array();
}
else
{
dataBytes = new byte[dataBuffer.remaining()];
try
{
dataBuffer.get(dataBytes);
}
catch (BufferUnderflowException ex)
{
LOG.error("metadata buffer error (remaining = " + dataBuffer.remaining() + ") for event " + e, ex);
return null;
}
}
return getGenericRecord(dataBytes, schema.getSchema(), reuse);
}
@Override
public <T extends SpecificRecord> T getTypedValue(DbusEvent e, T reuse, Class<T> targetClass)
{
if (null == reuse)
{
try
{
reuse = targetClass.newInstance();
}
catch (InstantiationException e1)
{
LOG.error("getTypedValue class instantiation error (" + e1.getMessage() + ") for event " + e, e1);
return null;
}
catch (IllegalAccessException e1)
{
LOG.error("getTypedValue access error (" + e1.getMessage() + ") for event " + e, e1);
return null;
}
}
byte[] md5 = new byte[16];
e.schemaId(md5);
SchemaId schemaId = new SchemaId(md5);
VersionedSchema writerSchema = _schemaSet.getById(schemaId);
if (null == writerSchema)
{
LOG.error("Unable to find schema for id " + schemaId + "; event = " + e);
throw new DatabusRuntimeException("No schema available to decode event " + e);
}
ByteBuffer valueBuffer = e.value();
byte[] valueBytes = new byte[valueBuffer.remaining()];
valueBuffer.get(valueBytes);
try
{
//JsonDecoder jsonDec = new JsonDecoder(sourceSchema.getSchema(),new ByteArrayInputStream(valueBytes));
binDecoder.set(DecoderFactory.defaultFactory().createBinaryDecoder(valueBytes, binDecoder.get()));
SpecificDatumReader<SpecificRecord> reader =
new SpecificDatumReader<SpecificRecord>(writerSchema.getSchema(), reuse.getSchema());
return targetClass.cast(reader.read(reuse, binDecoder.get()));
}
catch (IOException e1)
{
LOG.error("getTypedValue IO error (" + e1.getMessage() + ") for event " + e, e1);
}
return reuse;
}
public void dumpEventValueInJSON(DbusEvent e, OutputStream out)
{
byte[] md5 = new byte[16];
e.schemaId(md5);
SchemaId schemaId = new SchemaId(md5);
VersionedSchema sourceSchema = _schemaSet.getById(schemaId);
ByteBuffer valueBuffer = e.value();
byte[] valueBytes = new byte[valueBuffer.remaining()];
valueBuffer.get(valueBytes);
try {
Schema schema = sourceSchema.getSchema();
DatumReader<Object> reader = new GenericDatumReader<Object>(schema);
binDecoder.set(DecoderFactory.defaultFactory().createBinaryDecoder(valueBytes, binDecoder.get()));
Object datum = reader.read(null, binDecoder.get());
DatumWriter<Object> writer = new GenericDatumWriter<Object>(schema);
JsonGenerator g = new JsonFactory().createJsonGenerator(out, JsonEncoding.UTF8);
// write the src ID
g.writeStartObject();
g.writeFieldName(SRC_ID_FIELD_NAME);
g.writeNumber(e.getSourceId());
g.writeFieldName(OPCODE_FIELD_NAME);
g.writeString(e.getOpcode().toString());
g.writeFieldName("partId");
g.writeNumber(Integer.valueOf(e.getPartitionId()));
g.writeFieldName(VALUE_FIELD_NAME);
writer.write(datum, new JsonEncoder(schema, g));
g.writeEndObject();
g.writeEndObject();
try {
g.writeEndObject();
}
catch (JsonGenerationException e_json) {
// ignore the error as some how avro JsonEncoder may some times missing two }
}
g.flush();
} catch (IOException e1) {
LOG.error("event value serialization error; event = " + e, e1);
}
}
@Override
public void dumpEventValueInJSON(DbusEvent e, WritableByteChannel writeChannel)
{
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try {
dumpEventValueInJSON(e, baos);
baos.write("\n".getBytes("UTF-8"));
ByteBuffer writeBuffer = ByteBuffer.wrap(baos.toByteArray());
writeChannel.write(writeBuffer);
} catch (IOException e1) {
LOG.error("event value serialization error; event = " + e, e1);
}
}
@Override
/**
* @param e DatabusEvent
* @return Avro Schema, sourceName, version tuple describing the payload data appearing in 'e'.
*/
public VersionedSchema getPayloadSchema(DbusEvent e)
{
byte[] md5 = new byte[16];
e.schemaId(md5);
SchemaId schemaId = new SchemaId(md5);
VersionedSchema writerSchema = _schemaSet.getById(schemaId);
return writerSchema;
}
protected VersionedSchemaSet getSchemaSet()
{
return _schemaSet;
}
/**
* Returns the single version of the metadata schema specified in the given event's header.
* For INTERNAL USE ONLY (by Espresso and Databus). This is not a stable API and may change
* without warning!
*
* @param e DbusEvent
* @return {AvroSchema, "metadata-source", version} tuple for given event 'e' with
* metadata-schema-id; null if event contains no metadata
* @throws DatabusRuntimeException if event contains metadata but schema to decode it is missing
*/
public VersionedSchema getMetadataSchema(DbusEvent e)
{
DbusEventPart metadataPart = e.getPayloadMetadataPart();
if (null == metadataPart)
{
LOG.debug("No metadata for event " + e);
return null;
}
VersionedSchema schema = getMetadataSchema(metadataPart);
if (null == schema)
{
throw new DatabusRuntimeException("No schema available to decode metadata for event " + e);
}
return schema;
}
/**
* Returns the single version of the metadata schema specified in the metadata portion of an
* event's header. For INTERNAL USE ONLY (by Espresso and Databus). This is not a stable
* API and may change without warning!
*
* @param metadataPart metadata portion of a DbusEvent
* @return {AvroSchema, "metadata-source", version} tuple for metadataPart, or null if
* is not available
*/
public VersionedSchema getMetadataSchema(DbusEventPart metadataPart)
{
if (null == _metadataSchemaSet)
{
return null;
}
SchemaId id = new SchemaId(metadataPart.getSchemaDigest());
return _metadataSchemaSet.getById(id);
}
/**
* Returns the specified version of the metadata schema. For INTERNAL USE ONLY (by Espresso
* and Databus). This is not a stable API and may change without warning!
*
* @param version version number of the desired metadata schema
* @return {AvroSchema, "metadata-source", version} tuple for given metadata schema version;
# null if none exists
*/
public VersionedSchema getMetadataSchema(short version)
{
if (_metadataSchemaSet != null)
{
return _metadataSchemaSet.getSchemaByNameVersion(SchemaRegistryService.DEFAULT_METADATA_SCHEMA_SOURCE, version);
}
return null;
}
/**
* Returns the latest version of the metadata schema. For INTERNAL USE ONLY (by Espresso
* and Databus). This is not a stable API and may change without warning!
*
* @return {AvroSchema, "metadata-source", version} tuple of highest-numbered version of
# metadata schema; null if none exists
*/
VersionedSchema getLatestMetadataSchema()
{
if (_metadataSchemaSet != null)
{
return _metadataSchemaSet.getLatestVersionByName(SchemaRegistryService.DEFAULT_METADATA_SCHEMA_SOURCE);
}
return null;
}
public void dumpMetadata(DbusEvent e, FileChannel writeChannel)
{
GenericRecord genericRecord = this.getMetadata(e, null);
if( genericRecord == null ) //no metadata
return;
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try
{
String metadataInfo = genericRecord.toString() + "\n";
baos.write(metadataInfo.getBytes("UTF-8"));
ByteBuffer writeBuffer = ByteBuffer.wrap(baos.toByteArray());
writeChannel.write(writeBuffer);
}
catch (UnsupportedEncodingException e1)
{
LOG.error("event metadata serialization error; event = " + e + "; metadata = " + genericRecord, e1);
}
catch (IOException e1)
{
LOG.error("event metadata serialization error; event = " + e + "; metadata = " + genericRecord, e1);
}
}
}