/*
* Copyright 2013 Produban
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.produban.openbus.serialization;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumWriter;
import org.apache.log4j.Logger;
/**
* A Kafka Serializer class that encode a Kafka Message into Avro binary format with the schema embedded
*/
public class AvroSerializer {
static final Logger logger = Logger.getLogger(AvroSerializer.class);
private Schema schema;
private String[] fields;
/**
* Create a new Serializer from an byte stream representing an Avro schema and a list of Avro field names.
* @param schemaIs Input stream containing an Avro schema that will be embedded.
* @param fields list of field names that will be included in the Avro format. Those have to be present in the schema
*/
public AvroSerializer(InputStream schemaIs, String[] fields){
try {
this.schema = new Schema.Parser().parse(schemaIs);
}
catch (IOException e) {
throw new RuntimeException("Could not parse Avro schema from provided InputStream", e);
}
this.fields = fields;
}
/**
* Create a new Serializer from an Avro schema and a list of Avro field names.
* @param schema Avro schema that will be embedded
* @param fields list of field names that will be included in the Avro message. Those have to be present in the schema
*/
public AvroSerializer(Schema schema, String[] fields) {
this.schema = schema;
this.fields = fields;
}
/**
* Takes an array of Avro field values and encodes it in Avro binary format, according to the defined
* field list and schema.
* @param values an array of field values that are to be persisted
* @return an array of bytes that represents the encoded field values and has the Avro schema embedded.
*/
public byte[] serialize(Object[] values) {
ByteArrayOutputStream os = new ByteArrayOutputStream();
DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(writer);
try {
dataFileWriter.create(schema, os);
GenericRecord datum = new GenericData.Record(schema);
//String[] values = str.split(delimiter);
int i=0;
for(Object value : values){
datum.put(this.fields[i++], value);
}
dataFileWriter.append(datum);
dataFileWriter.close();
logger.debug("encoded string: " + os.toString());
os.close();
} catch (IOException e) {
throw new RuntimeException("Error trying to encode provided data into Avro format with provided schema", e);
}
logger.debug("serialized byte array size: " + os.size());
return os.toByteArray();
}
}