/*
* Copyright (c) 2013, the authors.
*
* This file is part of 'DXFS'.
*
* DXFS is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* DXFS is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with DXFS. If not, see <http://www.gnu.org/licenses/>.
*/
package nextflow.fs.dx;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.util.Map;
import java.util.Queue;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Phaser;
import java.util.concurrent.TimeUnit;
import nextflow.fs.dx.api.DxApi;
import nextflow.fs.dx.api.DxHttpClient;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.InputStreamEntity;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import sun.nio.ch.DirectBuffer;
/**
* Gather the stream data to a byte buffer, when the buffer is full upload it
* in background and allocate a new byte buffer to let the writer to
*
* @author Paolo Di Tommaso <paolo.ditommaso@gmail.com>
*/
public class DxUploadOutputStream extends OutputStream {
private static Logger log = LoggerFactory.getLogger(DxUploadOutputStream.class);
/* Set when the close method is invoked. It signals to stop waiting for new chunks */
private volatile boolean closed;
// note: no need to be synchronized since it is access only the queue thread
private int chunkCount = 0;
/**
* The file-id to be uploaded.
*/
final String fileId;
private static int _1MB = 1024*1024;
/**
* Minimum size of each chunk (5M)
*
* Read more https://wiki.dnanexus.com/API-Specification-v1.0.0/Files#API-method%3A-%2Ffile-xxxx%2Fupload
*/
final static int MIN_CHUNK_SIZE = 5 * _1MB ;
/**
* Maximum size of each chunk (500M)
*/
final static long MAX_CHUNK_SIZE = 500 * _1MB ;
/**
* Default buffer capacity (20MB)
*/
final static private int defaultCapacity = 20 * _1MB ;
/**
* Instead of allocate a new buffer for each chunks recycle them, putting
* a buffer instance into this queue when the upload process is completed
*/
final private Queue<ByteBuffer> bufferPool = new ConcurrentLinkedQueue<ByteBuffer>();
/**
* The executor service (thread pool) which manages the upload in background
*/
final private ExecutorService executor;
/**
* DnaNexus API wrapper
*/
final private DxApi remote;
/**
*
*/
final private BlockingQueue<ByteBuffer> queue;
/**
* Sync phaser
*/
final private Phaser phaser;
/**
* The current working buffer
*/
private ByteBuffer buf;
/**
* Initialize the uploader output stream for the specified file
*
* @param fileId The target DnaNexus file
* @param remote The DnaNexus API wrapper object
* @param maxForks Maximum number of parallel upload jobs allowed (default: 5)
*/
public DxUploadOutputStream(String fileId, DxApi remote, int maxForks) {
this.fileId = fileId;
this.queue = new ArrayBlockingQueue<>(maxForks);
this.phaser = new Phaser();
this.remote = remote;
this.buf = allocate();
this.executor = Executors.newCachedThreadPool();
checkCapacity();
start();
}
/**
* Initialize the uploader output stream for the specified file using
* up to 5 parallel upload threads.
*
* @param fileId The target DnaNexus file
* @param remote The DnaNexus API wrapper object
*/
public DxUploadOutputStream(String fileId, DxApi remote) {
this(fileId, remote, 5);
}
/**
* Create a new byte buffer to hold parallel chunks uploads. Override to use custom
* buffer capacity or strategy e.g. {@code DirectBuffer}
*
* @return The {@code ByteBuffer} instance
*/
protected ByteBuffer allocate() {
return ByteBuffer.allocateDirect(defaultCapacity);
}
/**
* Check the capacity of the buffer is within the min and max limits
*/
final protected void checkCapacity() {
if( buf == null ) return;
if( buf.capacity()<MIN_CHUNK_SIZE ) {
throw new IllegalStateException("Buffer capacity cannot be less than: " + MIN_CHUNK_SIZE);
}
if( buf.capacity()>MAX_CHUNK_SIZE ) {
throw new IllegalStateException("Buffer capacity cannot be greater than: " + MAX_CHUNK_SIZE);
}
}
/**
* When a buffer reach its capacity, this method is called.
* It does two things:
* <li>Flush the current buffer i.e. upload it</li>
* <li>Get a new buffer to continue the out streaming</li>
*
*/
final protected void swapBuffer() {
// send out the current current
flush();
// try to reuse a buffer from the poll
buf = bufferPool.poll();
if( buf != null ) {
buf.clear();
}
else {
// allocate a new buffer
buf = allocate();
checkCapacity();
}
}
/**
* Flush the current buffer content scheduling it for upload
*/
@Override
public void flush() {
log.trace("File: {} > Flushing buffer", fileId);
// when the buffer is empty nothing to do
if( buf == null || buf.position()==0 ) { return; }
buf.flip();
try {
queue.put(buf);
}
catch( InterruptedException e ) {
throw new IllegalStateException(e);
}
buf = null;
}
@Override
public void write (int b) throws IOException {
if (!buf.hasRemaining()) {
swapBuffer();
}
buf.put((byte) b);
}
// TODO write (byte[] bytes, int offset, int length)
// @Override
// public void write (byte[] bytes, int offset, int length) throws IOException {
// if (buf.remaining() < length) flush();
// buf.put(bytes, offset, length);
// }
/**
* Start the uploading process
*/
private void start() {
log.trace("Starting upload process");
// register the phaser for the main thread
phaser.register();
Runnable watcher = new Runnable() {
@Override
public void run() {
try {
dequeueAndSubmit();
}
finally {
phaser.arriveAndDeregister();
}
}
};
// submit the task for execution
executor.submit(watcher);
// register the phaser for the 'watcher' thread
phaser.register();
}
/*
* Wait for a chunk in the queue, take it and submit for upload
*/
private void dequeueAndSubmit() {
log.trace("Entering received loop");
while( !closed || queue.size()>0 ) {
ByteBuffer buffer;
try {
buffer = queue.poll(1, TimeUnit.SECONDS);
log.trace("File: {} > Received a buffer -- limit: ", fileId, buffer.limit());
executor.submit( consumeBuffer0(buffer, ++chunkCount) );
}
catch (InterruptedException e) {
log.trace("File: {} > Got an interrupted exception while waiting new chunk to upload -- cause: {}", fileId, e.getMessage());
}
}
log.trace("Exiting received loop");
}
/**
* Upload a chunk of data
*
* @param buffer The buffer to be uploaded
* @param chunkIndex The index count
* @return
*/
private Runnable consumeBuffer0(final ByteBuffer buffer, final int chunkIndex) {
phaser.register();
return new Runnable() {
@Override
public void run() {
try {
consumeBuffer(buffer, chunkIndex);
}
catch (IOException e) {
log.debug("File: {} > Error for chunk: %s -- cause: %s", fileId, chunkIndex, e.getMessage());
throw new IllegalStateException(e);
}
finally {
phaser.arriveAndDeregister();
}
}
};
}
@SuppressWarnings("unchecked")
private void consumeBuffer(final ByteBuffer buffer, final int chunkIndex) throws IOException {
log.debug("File: {} > uploading chunk: {}", fileId, chunkIndex);
// request to upload a new chunk
// note: dnanexus upload chunk index is 1-based
Map<String,Object> upload = remote.fileUpload(fileId, chunkIndex);
log.trace("File: {} > chunk [{}] > FileUpload: {}", fileId, chunkIndex, upload);
// the response provide the url when 'post' the chunk and the
// 'authorization' code
String url = (String)upload.get("url");
Map<String,Object> headers = (Map<String,Object>)upload.get("headers");
String auth = (String)headers.get("Authorization");
// create a 'post' request to upload the stuff
HttpPost post = new HttpPost(url);
post.setHeader("Authorization", auth);
log.trace("File: {} > chunk [{}] > buffer limit: {}; remaining: {}", fileId, chunkIndex, buffer.limit(), buffer.remaining());
HttpEntity payload = new InputStreamEntity(new ByteBufferBackedInputStream(buffer), buffer.limit()) ;
post.setEntity(payload);
// HttpClient client = new DefaultHttpClient();
// client.getParams().setParameter(CoreProtocolPNames.PROTOCOL_VERSION, HttpVersion.HTTP_1_1);
// log.trace("File: {} > chunk [{}] > Post starting: {}", fileId, chunkIndex, post);
HttpEntity entity = DxHttpClient.getInstance().http().execute(post).getEntity();
String response = EntityUtils.toString(entity, "UTF-8");
log.trace("File: {} > chunk [{}] > post response: {}", fileId, chunkIndex, response);
// // close the client (maybe not really necessary)
// client.getConnectionManager().shutdown();
// put the 'buffer' in the pool, so that it can be recycled
bufferPool.offer(buffer);
log.trace("File: {} > completed upload chunk: ", fileId, chunkIndex);
}
/**
* Close the output streaming waiting the upload process for completion
*
* @throws IOException
*/
@Override
public void close() throws IOException {
log.trace("Entering close");
// flush current buffer
flush();
// close and wait on-going upload finishes
closed = true;
phaser.arriveAndAwaitAdvance();
log.trace("Phaser advanced");
executor.shutdown();
// DnaNexus api raises an error when trying to close a file for which no chunks have been uploaded
if( chunkCount>0 ) {
// dx file close
log.trace("Closing DX file");
remote.fileClose(fileId);
}
// dispose the buffers
for( ByteBuffer item : bufferPool ) {
if( item instanceof DirectBuffer ) {
((DirectBuffer) item).cleaner().clean();
}
}
log.trace("File: {} > closed -- {} chunks processed", fileId, chunkCount);
}
}