/*
* Data Hub Service (DHuS) - For Space data distribution.
* Copyright (C) 2016 GAEL Systems
*
* This file is part of DHuS software sources.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package fr.gael.dhus.util.http;
import java.io.IOException;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.nio.file.StandardOpenOption;
import java.security.MessageDigest;
import java.util.Formatter;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.Future;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
/**
* This class manages a pool of interruptible daemon threads which use instances of
* {@link InterruptibleHttpClient} to download data.
*
* <p>Backed by a {@link ThreadPoolExecutor}, it creates only daemon threads and uses a unbounded
* LinkedBlockingQueue to store tasks.
*
* <p>It will store the downloaded data in the current temp directory using
* {@link Files#createTempFile(String, String, FileAttribute...)}.
*
* <p>If the HTTP Headers provided with the data contain a Filename field, this name will be used
* to name the output file.
*/
public final class ParallelizedDownloadManager
{
/** Log. */
private static final Logger LOGGER = LogManager.getLogger(ParallelizedDownloadManager.class);
private static final String TMP_FILE_SUFFIX = ".sync-data";
/** Thread pool. */
private final ThreadPoolExecutor threadPool;
/** One HTTP client can be used by many concurrent threads. */
private final InterruptibleHttpClient http_client;
/** Pattern for the filename property in the Content-Disposition HTTP Header field. */
private final Pattern pattern = Pattern.compile("filename=\"(.+?)\"", Pattern.CASE_INSENSITIVE);
private final Path tempDir;
/**
* Creates a new Manager.
*
* @param core_pool_size the number of threads to keep in the pool, even if they are idle.
*
* @param max_pool_size the maximum number of threads to allow in the pool.
*
* @param keep_alive when the number of threads is greater than the core, this is the
* maximum time that excess idle threads will wait for new tasks before
* terminating.
*
* @param time_unit the time unit for the keepAliveTime argument.
*
* @param client_producer a custom http client provider to use custom http clients.
* may be null.
*
* @param temp_dir base path for incomplete files (temporary directory).
* may be null.
*/
public ParallelizedDownloadManager(int core_pool_size, int max_pool_size,
long keep_alive, TimeUnit time_unit, HttpAsyncClientProducer client_producer,
Path temp_dir)
{
BlockingQueue<Runnable> work_queue = new LinkedBlockingDeque<>();
this.threadPool = new ThreadPoolExecutor(core_pool_size, max_pool_size, keep_alive,
time_unit, work_queue, new DaemonThreadFactory());
if (client_producer != null)
{
this.http_client = new InterruptibleHttpClient(client_producer);
}
else
{
this.http_client = new InterruptibleHttpClient();
}
if (temp_dir != null)
{
if (!Files.isDirectory(temp_dir))
{
throw new IllegalArgumentException("Given temp dir is not a dir");
}
this.tempDir = temp_dir;
}
else
{
this.tempDir = null;
}
}
/**
* Calls `shutdownNow` on the {@link ThreadPoolExecutor} backing this manager.
* @see ThreadPoolExecutor#shutdownNow()
*/
public void shutdownNow() {
this.threadPool.shutdownNow();
}
/**
* The manager will create and add a download task to its task executor.
* @param url_to_download url to download.
* @return a Future holding a path to the downloaded data.
*/
public Future<DownloadResult> download(String url_to_download)
{
return this.threadPool.<DownloadResult>submit(new DownloadTask(url_to_download));
}
/** Result type for {@link #download(String)}. */
public static class DownloadResult
{
/** Path to downloaded data. */
public final Path data;
/** Content-Type of downloaded data. */
public final String dataType;
/** Content-Length of downloaded data. */
public final long dataSize;
/** MD5 sum of downloaded data. */
public final byte[] md5sum;
/**
* Create new instance, sets public fields.
* @param data see {@link #data}.
* @param dataType see {@link #dataType}.
* @param dataSize see {@link #dataSize}.
* @param md5sum see {@link #md5sum}.
*/
public DownloadResult(Path data, String dataType, long dataSize, byte[] md5sum)
{
this.data = data;
this.dataType = dataType;
this.dataSize = dataSize;
this.md5sum = md5sum;
}
}
// vvv Private classes vvv
/** Download the given url, saves the data to a file */
private class DownloadTask implements Callable<DownloadResult>
{
private final String urlToDownload;
/** Create a new DownloadTask with an URL to download. */
public DownloadTask(String url_to_download)
{
this.urlToDownload = url_to_download;
}
/**
* In-thread code.
* @return path to the downloaded data.
*/
@Override
public DownloadResult call() throws Exception
{
Path out_file_path;
if (tempDir != null)
{
out_file_path = Files.createTempFile(tempDir, null, TMP_FILE_SUFFIX);
}
else
{
out_file_path = Files.createTempFile(null, TMP_FILE_SUFFIX);
}
try (FileChannel output = FileChannel.open(out_file_path, StandardOpenOption.WRITE))
{
// Computes the data's md5 sum on the fly
MessageDigest md = MessageDigest.getInstance("MD5");
DigestIWC decorator = new DigestIWC(md, output);
long delta = System.currentTimeMillis();
HttpResponse response = http_client.interruptibleGet(this.urlToDownload, decorator);
LOGGER.debug(String.format("Downloaded '%s' in %d ms",
this.urlToDownload, System.currentTimeMillis() - delta));
// If the response's status code is not 200, something wrong happened
if (response.getStatusLine().getStatusCode() != HttpStatus.SC_OK)
{
Formatter ff = new Formatter();
ff.format("Cannot download from %s,"
+ " remote host returned message '%s' (HTTP%d)",
this.urlToDownload,
response.getStatusLine().getReasonPhrase(),
response.getStatusLine().getStatusCode());
throw new IOException(ff.out().toString());
}
// Gets the filename from the HTTP header field `Content-Disposition'
String contdis = response.getFirstHeader("Content-Disposition").getValue();
if (contdis != null && !contdis.isEmpty())
{
Matcher m = pattern.matcher(contdis);
if (m.find())
{
String filename = m.group(1);
if (filename != null && !filename.isEmpty())
{
decorator.close();
// Renames the downloaded file
Path rpath = Paths.get(filename);
if (rpath.isAbsolute() || rpath.getNameCount() != 1)
{
String msg = String.format("invalid filename '%s' from %s",
filename, this.urlToDownload);
throw new IllegalStateException(msg);
}
Path dest = out_file_path.resolveSibling(rpath);
Files.move(out_file_path, dest, StandardCopyOption.ATOMIC_MOVE);
out_file_path = dest;
}
}
}
DownloadResult res = new DownloadResult(
out_file_path,
response.getEntity().getContentType().getValue(),
response.getEntity().getContentLength(),
md.digest());
return res;
}
catch (Exception e)
{
// cleanup if an error occured
if (Files.exists(out_file_path))
{
Files.delete(out_file_path);
}
throw e;
}
}
}
/** Creates only daemon threads. */
private class DaemonThreadFactory implements ThreadFactory
{
@Override
public Thread newThread(Runnable r)
{
Thread thread = new Thread(r, "DownloadThread");
thread.setDaemon(true);
return thread;
}
}
}