/*
* Copyright 2015 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.opencb.hpg.bigdata.core.io.avro;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.Encoder;
import org.apache.avro.io.EncoderFactory;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
/**
* Created by hpccoll1 on 02/04/15.
*/
public class AvroEncoder<T> {
private final DatumWriter<T> datumWriter;
private final Encoder encoder;
private final ByteArrayOutputStream byteArrayOutputStream;
private int encodeFails = 0;
private boolean abortOnFail = false;
public static final int SIZE = 1000000;
public AvroEncoder(Schema schema) {
this(schema, true);
}
public AvroEncoder(Schema schema, boolean abortOnFail) {
this.abortOnFail = abortOnFail;
this.datumWriter = new GenericDatumWriter<>(schema);
this.byteArrayOutputStream = new ByteArrayOutputStream(SIZE); //Initialize with 1MB
this.encoder = EncoderFactory.get().binaryEncoder(byteArrayOutputStream, null);
}
public List<ByteBuffer> encode(List<T> batch) throws IOException {
List<ByteBuffer> encoded = new ArrayList<>(batch.size());
for (T elem : batch) {
try {
datumWriter.write(elem, encoder);
} catch (Exception e) {
if (abortOnFail) {
throw e;
}
encodeFails++;
System.err.println(e.getMessage());
encoder.flush();
byteArrayOutputStream.reset();
continue;
}
encoder.flush();
encoded.add(ByteBuffer.wrap(byteArrayOutputStream.toByteArray()));
byteArrayOutputStream.reset();
}
return encoded;
}
public int getEncodeFails() {
return encodeFails;
}
public boolean isAbortOnFail() {
return abortOnFail;
}
}