/*
* Copyright 2014 CyberVision, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kaaproject.kaa.server.flume.sink.hdfs;
import static org.apache.flume.serialization.AvroEventSerializerConfigurationConstants.COMPRESSION_CODEC;
import static org.apache.flume.serialization.AvroEventSerializerConfigurationConstants.DEFAULT_COMPRESSION_CODEC;
import static org.apache.flume.serialization.AvroEventSerializerConfigurationConstants.DEFAULT_SYNC_INTERVAL_BYTES;
import static org.apache.flume.serialization.AvroEventSerializerConfigurationConstants.SYNC_INTERVAL_BYTES;
import static org.kaaproject.kaa.server.common.log.shared.RecordWrapperSchemaGenerator.RECORD_HEADER_FIELD;
import static org.kaaproject.kaa.server.common.log.shared.RecordWrapperSchemaGenerator.generateRecordWrapperSchema;
import org.apache.avro.AvroRuntimeException;
import org.apache.avro.Schema;
import org.apache.avro.file.CodecFactory;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.BinaryDecoder;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.DecoderFactory;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.FlumeException;
import org.apache.flume.conf.Configurable;
import org.apache.flume.serialization.EventSerializer;
import org.kaaproject.kaa.server.common.log.shared.RecordWrapperSchemaGenerator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.OutputStream;
import java.util.HashMap;
import java.util.Map;
public class AvroKaaEventSerializer
implements EventSerializer, Configurable, EventConstants {
private static final Logger LOG = LoggerFactory.getLogger(AvroKaaEventSerializer.class);
private static Map<KaaSinkKey, Schema> schemaCache = new HashMap<>();
private final OutputStream out;
private DatumReader<GenericRecord> datumReader;
private BinaryDecoder binaryDecoder;
private DatumWriter<Object> writer = null;
private DataFileWriter<Object> dataFileWriter = null;
private GenericRecord wrapperRecord;
private int syncIntervalBytes;
private String compressionCodec;
private AvroSchemaSource schemaSource;
private AvroKaaEventSerializer(OutputStream out) {
this.out = out;
this.schemaSource = new AvroSchemaSource();
}
@Override
public void configure(Context context) {
syncIntervalBytes = context.getInteger(SYNC_INTERVAL_BYTES,
DEFAULT_SYNC_INTERVAL_BYTES);
compressionCodec = context.getString(COMPRESSION_CODEC,
DEFAULT_COMPRESSION_CODEC);
schemaSource.configure(context);
}
@Override
public void afterCreate() throws IOException {
// no-op
}
@Override
public void afterReopen() throws IOException {
// impossible to initialize DataFileWriter without writing the schema?
throw new UnsupportedOperationException(
"Avro API doesn't support append");
}
@Override
public void write(Event event) throws IOException {
if (dataFileWriter == null) {
initialize(event);
}
if (!(event instanceof KaaRecordEvent)) {
throw new IOException("Not instance of KaaRecordEvent!");
}
KaaRecordEvent kaaRecordEvent = (KaaRecordEvent) event;
binaryDecoder = DecoderFactory.get().binaryDecoder(kaaRecordEvent.getBody(), binaryDecoder);
GenericRecord recordData = datumReader.read(null, binaryDecoder);
wrapperRecord.put(RECORD_HEADER_FIELD, kaaRecordEvent.getRecordHeader());
wrapperRecord.put(RecordWrapperSchemaGenerator.RECORD_DATA_FIELD, recordData);
dataFileWriter.append(wrapperRecord);
}
private void initialize(Event event) throws IOException {
Schema schema;
KaaSinkKey key = new KaaSinkKey(event.getHeaders());
schema = schemaCache.get(key);
if (schema == null) {
try {
schema = schemaSource.loadByKey(key);
} catch (Exception ex) {
LOG.error("Unable to load schema by key {}", key);
LOG.error("Caused by: ", ex);
throw new FlumeException("Could not find schema for event "
+ event);
}
schemaCache.put(key, schema);
}
if (schema == null) {
String schemaString = event.getHeaders().get(
AVRO_SCHEMA_LITERAL_HEADER);
if (schemaString == null) {
throw new FlumeException("Could not find schema for event "
+ event);
}
schema = new Schema.Parser().parse(schemaString);
}
datumReader = new GenericDatumReader<>(schema);
Schema wrapperSchema = generateRecordWrapperSchema(schema.toString());
writer = new GenericDatumWriter<>(wrapperSchema);
dataFileWriter = new DataFileWriter<>(writer);
dataFileWriter.setSyncInterval(syncIntervalBytes);
try {
CodecFactory codecFactory = CodecFactory
.fromString(compressionCodec);
dataFileWriter.setCodec(codecFactory);
} catch (AvroRuntimeException ex) {
LOG.warn("Unable to instantiate avro codec with name ("
+ compressionCodec
+ "). Compression disabled. Exception follows.", ex);
}
dataFileWriter.create(wrapperSchema, out);
wrapperRecord = new GenericData.Record(wrapperSchema);
}
@Override
public void flush() throws IOException {
if (dataFileWriter != null) {
dataFileWriter.flush();
}
}
@Override
public void beforeClose() throws IOException {
// no-op
}
@Override
public boolean supportsReopen() {
return false;
}
public static class Builder implements EventSerializer.Builder {
@Override
public EventSerializer build(Context context, OutputStream out) {
AvroKaaEventSerializer writer = new AvroKaaEventSerializer(out);
writer.configure(context);
return writer;
}
}
}