package org.opencb.hpg.bigdata.core.converters.variation;
import com.google.protobuf.GeneratedMessage;
import org.opencb.biodata.tools.variant.converters.Converter;
import org.opencb.commons.run.ParallelTaskRunner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.atomic.AtomicLong;
/**
* Created on 12/10/15.
*
* @author Jacobo Coll <jacobo167@gmail.com>
*/
public class ProtoEncoderTask<T extends GeneratedMessage> implements ParallelTaskRunner.Task<CharSequence, ByteBuffer> {
private final Converter<CharSequence, T> converter;
private final int bufferSize;
private int maxBufferSize;
private static final int LOG_BATCH_SIZE = 1000;
private static AtomicLong numConverts = new AtomicLong(0);
private static Logger logger = LoggerFactory.getLogger(ProtoEncoderTask.class.toString());
public ProtoEncoderTask(Converter<CharSequence, T> converter, int bufferSize) {
this.converter = converter;
this.bufferSize = bufferSize;
this.maxBufferSize = bufferSize;
}
public static class ByteBufferOutputStream extends ByteArrayOutputStream {
public ByteBufferOutputStream() {
}
public ByteBufferOutputStream(int size) {
super(size);
}
public ByteBuffer toByteBuffer() {
return ByteBuffer.wrap(buf, 0, count);
}
}
@Override
public void pre() {
maxBufferSize = bufferSize;
}
@Override
public List<ByteBuffer> apply(List<CharSequence> batch) {
List<T> converted = converter.apply(batch);
ByteBufferOutputStream outputStream = new ByteBufferOutputStream(maxBufferSize);
try {
for (T element : converted) {
element.writeDelimitedTo(outputStream);
}
logProgress(batch.size());
ByteBuffer byteBuffer = outputStream.toByteBuffer();
maxBufferSize = Math.max(maxBufferSize, byteBuffer.array().length);
return Collections.singletonList(byteBuffer);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
private void logProgress(int size) {
long num = numConverts.getAndAdd(size);
long batch = num / LOG_BATCH_SIZE;
long newBatch = (num + size) / LOG_BATCH_SIZE;
logger.debug("Another batch of: " + size);
if (batch != newBatch) {
logger.info("Num processed variants: " + newBatch * LOG_BATCH_SIZE);
}
}
}