ProvisionFilesUtil.java example

Explorer
seqware-master
package net.sourceforge.seqware.common.util.filetools;

import com.amazonaws.AmazonClientException;
import com.amazonaws.AmazonServiceException;
import com.amazonaws.ClientConfiguration;
import com.amazonaws.HttpMethod;
import com.amazonaws.Protocol;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.retry.PredefinedRetryPolicies;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.model.GeneratePresignedUrlRequest;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.ObjectMetadata;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.services.s3.transfer.Transfer;
import com.amazonaws.services.s3.transfer.Transfer.TransferState;
import com.amazonaws.services.s3.transfer.TransferManager;
import com.amazonaws.services.s3.transfer.Upload;
import com.google.common.hash.HashCode;
import com.google.common.hash.Hashing;
import io.seqware.pipeline.SqwKeys;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.security.Key;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.crypto.Cipher;
import javax.crypto.CipherInputStream;
import javax.crypto.CipherOutputStream;
import javax.crypto.spec.SecretKeySpec;
import net.sourceforge.seqware.common.module.FileMetadata;
import net.sourceforge.seqware.common.util.Log;
import net.sourceforge.seqware.common.util.configtools.ConfigTools;
import org.apache.commons.codec.binary.Base64;
//import org.apache.hadoop.conf.Configuration;
//import org.apache.hadoop.fs.FSDataInputStream;
//import org.apache.hadoop.fs.FSDataOutputStream;
//import org.apache.hadoop.fs.FileSystem;
//import org.apache.hadoop.fs.FsUrlStreamHandlerFactory;
//import org.apache.hadoop.fs.Path;
//import org.apache.hadoop.io.IOUtils;

/**
 * <p>
 * ProvisionFilesUtil class.
 * </p>
 *
 * @author boconnor
 * @version $Id: $Id
 */
public class ProvisionFilesUtil {

    protected final int READ_ATTEMPTS = 1000;
    protected long inputSize = 0L;
    protected long position = 0L;
    protected String fileName = "";
    protected String originalFileName = "";
    protected File inputFile = null;
    protected Key dataEncryptionKey = null;
    protected Key dataDecryptionKey = null;
    private boolean verbose;
    private AmazonS3Client s3;
    private static final String DATA_ENCRYPTION_ALGORITHM = "DESede";
    private static final int MAXRETRY = 3;

    /**
     * Default ctor.
     */
    public ProvisionFilesUtil() {
    }

    ;

    /**
     * Set verbose mode ctor.
     *
     * @param verbose
     *            a boolean.
     */
    public ProvisionFilesUtil(boolean verbose) {
        this.setVerbose(true);
    }

    /**
     * Gets the file name. Available after the getSourceReader has been invoked.
     *
     * @return String representation of the proceeded file name
     */
    public String getFileName() {
        return fileName;
    }

    /**
     * Creates symlink of input to output.
     *
     * @param output
     *            a {@link java.lang.String} object.
     * @param fullOutputPath
     * @param input
     *            a {@link java.lang.String} object.
     * @return a boolean.
     */
    public boolean createSymlink(String output, boolean fullOutputPath, String input) {
        boolean retry = true;
        int retryCount = 0;

        while (retry && retryCount < ProvisionFilesUtil.MAXRETRY) {
            try {
                // no point is having an input file stream since just make a sym link
                Runtime rt = Runtime.getRuntime();
                Process result;
                // FIXME: in JDK 7 this will be replaced with an API call
                String exe = "ln" + " -s " + input + " " + output + File.separator + fileName;
                if (fullOutputPath) {
                    exe = "ln" + " -s " + input + " " + output;
                }
                Log.debug(exe);
                result = rt.exec(exe);
                try {
                    // see if the command worked
                    if (result.waitFor() == 0) {
                        // then we're done and can exit the retry loop
                        retry = false;
                    }
                } catch (Exception e) {
                    // see http://www.javaworld.com/jw-12-2000/jw-1229-traps.html?page=2 for info on this exception
                    Log.error(e.getMessage());
                }
            } catch (IOException e) {
                Log.error(e.getMessage());
            }
            retryCount++;
        }
        // now check to see if the file is actually there!
        File outputFilePath = new File(output + File.separator + fileName);
        if (fullOutputPath) {
            outputFilePath = new File(output);
        }

        if (outputFilePath.exists()) {
            return true;
        } else {
            Log.error("Output file does not exist! " + outputFilePath.getAbsolutePath());
            return false;
        }
    }

    /**
     * Gets cipher by DecryptKey.
     *
     * @param decryptKey
     *            a {@link java.lang.String} object.
     * @return Cipher object
     */
    public Cipher getDecryptCipher(String decryptKey) {
        Cipher cipher;
        setDataDecryptionKeyString(decryptKey);
        try {
            cipher = createDecryptCipherInternal();
        } catch (Exception e) {
            Log.error(e.getMessage());
            cipher = null;
        }
        return cipher;
    }

    /**
     * Gets cipher by EncryptKey.
     *
     * @param encryptKey
     *            a {@link java.lang.String} object.
     * @return Cipher object
     */
    public Cipher getEncryptCipher(String encryptKey) {
        Cipher cipher;
        setDataEncryptionKeyString(encryptKey);
        try {
            cipher = createEncryptCipherInternal();
        } catch (Exception e) {
            Log.error(e.getMessage());
            cipher = null;
        }
        return cipher;
    }

    public File copyToFile(BufferedInputStream reader, String output, int bufLen, String input) {
        return copyToFile(reader, output, false, bufLen, input, null, null, null);
    }

    /**
     * Copy reader into output using Cipher.
     *
     * @param reader
     *            a {@link java.io.BufferedInputStream} object.
     * @param output
     *            a {@link java.lang.String} object.
     * @param fullOutputPath
     * @param bufLen
     *            a int.
     * @param input
     *            a {@link java.lang.String} object.
     * @param metadata
     *            store the file size and md5sum here after checking the destination
     * @return written File object
     * @param decryptCipher
     *            a {@link javax.crypto.Cipher} object.
     * @param encryptCipher
     *            a {@link javax.crypto.Cipher} object.
     */
    public File copyToFile(BufferedInputStream reader, String output, boolean fullOutputPath, int bufLen, String input,
            Cipher decryptCipher, Cipher encryptCipher, FileMetadata metadata) {

        OutputStream writer;

        // then it's a remote input (or we want to copy regardless) just a
        // directory output
        // figure out the output
        File outputObj = new File(output + File.separator + fileName);
        if (fullOutputPath) {
            outputObj = new File(output);
        }
        outputObj.getParentFile().mkdirs();
        // now write input to output
        try {
            int attempts = 0; // READ_ATTEMPTS

            writer = new FileOutputStream(outputObj);

            // add decryption to the output stream
            if (decryptCipher != null) {
                writer = new CipherOutputStream(writer, decryptCipher);
            }

            // add encryption to the output stream
            if (encryptCipher != null) {
                writer = new CipherOutputStream(writer, encryptCipher);
            }

            // wrap in a buffered stream
            writer = new BufferedOutputStream(writer, bufLen);

            boolean cont = true;

            // count of positions, every thousand do an update
            long positionCount = 0;

            while (cont) {

                int data = -1;
                positionCount++;

                // calculate stats
                int divisions = (int) (this.inputSize / 1000);
                if (positionCount > 0 && divisions > 0 && positionCount % divisions == 0 && isVerbose()) {
                    float percent = (positionCount * 100.0f) / this.inputSize;
                    System.out.printf("  + completed: %.2f", percent);
                    System.out.print("%\r");
                }

                while (true) {

                    try {
                        data = reader.read();
                        this.position++;
                        if (data == -1) {
                            cont = false;
                        }
                        break;
                    } catch (IOException e) {

                        attempts++;
                        Log.error("There has been an exception while reading the stream: " + e.getMessage());

                        if (attempts > this.READ_ATTEMPTS) {
                            Log.error("Giving up after " + attempts + " attempts!");
                            return null;
                        }

                        Log.error("Trying to recover from read or write error, opening the reader at position " + this.position);
                        // FIXME: notice I'm assuming this is a problem with the reader
                        try {
                            reader.close();
                        } catch (IOException e1) {
                            Log.error(e1.getMessage());
                        }
                        try {
                            Thread.sleep(2000);
                        } catch (java.lang.InterruptedException e2) {
                            Log.error("thread sleep failed: " + e2.getMessage());
                        }
                        reader = getSourceReader(input, bufLen, this.position);
                        if (reader == null) {
                            return null;
                        }
                    }
                }

                while (true && cont) {
                    try {
                        writer.write(data);
                        break;
                    } catch (IOException e) {
                        attempts++;
                        Log.error("There has been an exception while writing the stream: " + e.getMessage());
                        if (attempts > this.READ_ATTEMPTS) {
                            Log.error("Giving up after " + attempts + " attempts!");
                            return null;
                        }
                        try {
                            Thread.sleep(2000);
                        } catch (java.lang.InterruptedException e2) {
                            Log.error("thread sleep failed: " + e2.getMessage());
                        }
                    }
                }

                // possibly print out status

            }

            // print newline
            Log.stdout("");

            // close reader/writer
            reader.close();
            writer.close();

        } catch (FileNotFoundException e) {
            Log.error(e.getMessage());
            return null;
        } catch (IOException e) {
            Log.error(e.getMessage());
            return null;
        }

        // now that the copy is complete, make sure the file out and the input size are equal
        if (this.inputSize != outputObj.length() && decryptCipher == null && encryptCipher == null) {
            Log.error("The output file size of " + outputObj.length() + " and the input file size of " + this.inputSize
                    + " do not match so the file provisioning failed!");
            return null;
        }

        // do the calculations to record file size and md5sum to see if they are consistent between source and destination
        FileMetadata outputMetadata = new FileMetadata();
        if (decryptCipher == null && encryptCipher == null && metadata != null) {
            Map<String, String> settings = ConfigTools.getSettings();
            if (settings.containsKey(SqwKeys.SW_PROVISION_FILES_MD5.getSettingKey())) {
                String value = settings.get(SqwKeys.SW_PROVISION_FILES_MD5.getSettingKey());
                boolean usemd5 = Boolean.valueOf(value);
                if (usemd5) {
                    calculateInputMetadata(outputObj.getAbsolutePath(), outputMetadata);
                } else {
                    return outputObj;
                }
            } else {
                calculateInputMetadata(outputObj.getAbsolutePath(), outputMetadata);
            }
            // verify source and destination values. this does the file size check again but we'll probably delete the former code given a
            // chance
            if (!Objects.equals(metadata.getSize(), outputMetadata.getSize())) {
                Log.error("The output file size of " + outputMetadata.getSize() + " and the input file size of " + metadata.getSize()
                        + " do not match so the file provisioning failed!");
                return null;
            }
            if (!Objects.equals(metadata.getMd5sum(), outputMetadata.getMd5sum())) {
                Log.error("The output md5sum of " + outputMetadata.getMd5sum() + " and the input md5sum of " + metadata.getMd5sum()
                        + " do not match so the file provisioning failed!");
                return null;
            }

        }

        return outputObj;
    }

    /**
     * Not supported yet.
     *
     * @return a boolean.
     */
    public boolean putToHttp() {
        // TODO: not going to support HTTP PUT initially
        Log.warn("HTTP upload not yet supported");
        return (false);
    }

    // public boolean putToHDFS(InputStream reader, String output, boolean fullOutputPath){
    // return(putToHDFS(reader, output, fullOutputPath, null, null));
    // }
    //
    // /**
    // * The output path is an HDFS URL that look like
    // * hdfs://<host>/<path>/<filename>
    // *
    // * TODO:
    // *
    // * 1) encryption/decryption
    // *
    // * @param reader
    // * @param output
    // * @param decryptCipher
    // * @param encryptCipher
    // * @return
    // */
    // public boolean putToHDFS(InputStream reader, String output, boolean fullOutputPath, Cipher decryptCipher, Cipher encryptCipher) {
    // try {
    //
    // // the final URL
    // String outputStr = null;
    //
    // // first, try and figure out if the output is a dir or an actual file
    // if (fullOutputPath) {
    // outputStr = output;
    // } else {
    // if (output.endsWith("/")) {
    // outputStr = output + this.getFileName();
    // } else {
    // outputStr = output + "/" + this.getFileName();
    // }
    // }
    //
    // // Hadoop stuff
    // Configuration conf = new Configuration();
    // // FIXME: is this OK to pass in the complete URL?
    // FileSystem fs = FileSystem.get(URI.create(outputStr), conf);
    // Path outputPath = new Path(outputStr);
    //
    // // delete if it already exists
    // if (fs.exists(outputPath) && !fs.isDirectory(outputPath)) {
    // // delete it and re-upload
    // fs.delete(outputPath, false);
    // }
    //
    // OutputStream out = fs.create(outputPath);
    // IOUtils.copyBytes(reader, out, 4096, true);
    //
    // // Close all the file descripters
    // reader.close();
    // out.close();
    // fs.close();
    // return(true);
    //
    // } catch (IOException ex) {
    // Logger.getLogger(ProvisionFilesUtil.class.getName()).log(Level.SEVERE, "There was a problem in putToHDFS", ex);
    // }
    //
    // return(false);
    //
    // }

    /**
     * Copy file using reader into output.
     *
     * @param reader
     *            a {@link java.io.BufferedInputStream} object.
     * @param output
     *            a {@link java.lang.String} object.
     * @param fullOutputPath
     * @return true if OK
     */
    public boolean putToS3(BufferedInputStream reader, String output, boolean fullOutputPath) {
        return (putToS3(reader, output, fullOutputPath, ClientConfiguration.DEFAULT_SOCKET_TIMEOUT,
                ClientConfiguration.DEFAULT_MAX_CONNECTIONS, PredefinedRetryPolicies.DEFAULT_MAX_ERROR_RETRY,
                ClientConfiguration.DEFAULT_SOCKET_TIMEOUT));
    }

    /**
     *
     * @param reader
     * @param output
     * @param fullOutputPath
     * @param connectionTimeout
     * @param maxConnections
     * @param maxErrorRetry
     * @param socketTimeout
     * @return
     */
    public boolean putToS3(BufferedInputStream reader, String output, boolean fullOutputPath, int connectionTimeout, int maxConnections,
            int maxErrorRetry, int socketTimeout) {
        return (putToS3(reader, output, fullOutputPath, connectionTimeout, maxConnections, maxErrorRetry, socketTimeout, null, null));
    }

    /**
     * Copy file using reader into output.
     *
     * @param reader
     *            a {@link java.io.InputStream} object.
     * @param output
     *            a {@link java.lang.String} object.
     * @param fullOutputPath
     * @param decryptCipher
     *            a {@link javax.crypto.Cipher} object.
     * @param encryptCipher
     *            a {@link javax.crypto.Cipher} object.
     * @return true if OK
     */
    public boolean putToS3(InputStream reader, String output, boolean fullOutputPath, Cipher decryptCipher, Cipher encryptCipher) {
        return putToS3(reader, output, fullOutputPath, ClientConfiguration.DEFAULT_SOCKET_TIMEOUT,
                ClientConfiguration.DEFAULT_MAX_CONNECTIONS, PredefinedRetryPolicies.DEFAULT_MAX_ERROR_RETRY,
                ClientConfiguration.DEFAULT_SOCKET_TIMEOUT, decryptCipher, encryptCipher);
    }

    /**
     *
     * @param reader
     * @param output
     * @param fullOutputPath
     * @param connectionTimeout
     * @param maxConnections
     * @param maxErrorRetry
     * @param socketTimeout
     * @param decryptCipher
     * @param encryptCipher
     * @return
     */
    public boolean putToS3(InputStream reader, String output, boolean fullOutputPath, int connectionTimeout, int maxConnections,
            int maxErrorRetry, int socketTimeout, Cipher decryptCipher, Cipher encryptCipher) {

        // can encode the access key and secret key within the URL
        // see http://www.cs.rutgers.edu/~watrous/user-pass-url.html
        Pattern p = Pattern.compile("s3://(\\S+):(\\S+)@(\\S+)");
        Matcher m = p.matcher(output);
        boolean result = m.find();
        String accessKey;
        String secretKey;
        String stringURL = output;
        if (result) {
            accessKey = m.group(1);
            secretKey = m.group(2);
            stringURL = "s3://" + m.group(3);
        } else {
            // get the access/secret key from the .seqware/settings file
            try {
                HashMap<String, String> settings = (HashMap<String, String>) ConfigTools.getSettings();
                accessKey = settings.get(SqwKeys.AWS_ACCESS_KEY.getSettingKey());
                secretKey = settings.get(SqwKeys.AWS_SECRET_KEY.getSettingKey());
            } catch (Exception e) {
                Log.error(e.getMessage());
                return false;
            }
        }

        if (accessKey == null || secretKey == null) {
            Log.error("Couldn't find access or secret key for S3 output so will exit!");
            return false;
        }

        // parse out the bucket and key
        p = Pattern.compile("s3://([^/]+)/*(\\S*)");
        m = p.matcher(stringURL);
        result = m.find();

        if (result) {
            String bucket = m.group(1);
            String key = m.group(2);
            if (key == null) {
                key = "";
            }
            if (key.endsWith("/")) {
                // then add fileName to the target
                key = key + fileName;
            } else if (!key.endsWith(fileName) && !fullOutputPath) {
                // then add a / then fileName to the target
                key = key + "/" + fileName;
            }
            ObjectMetadata omd = new ObjectMetadata();
            // this is the size of what's being read
            omd.setContentLength(this.inputSize);
            // just encrypt everything via Server-Side encryption, see
            // http://docs.amazonwebservices.com/AmazonS3/latest/dev/SSEUsingJavaSDK.html
            omd.setServerSideEncryption(ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION);
            BasicAWSCredentials credentials = new BasicAWSCredentials(accessKey, secretKey);
            ClientConfiguration config = new ClientConfiguration();
            config.setConnectionTimeout(connectionTimeout);
            config.setMaxConnections(maxConnections);
            config.setMaxErrorRetry(maxErrorRetry);
            config.setProtocol(Protocol.HTTPS);
            config.setSocketTimeout(socketTimeout);
            AmazonS3Client s3 = new AmazonS3Client(credentials, config);
            TransferManager tm = new TransferManager(s3);

            // if reading from a local file and not decrypting or encrypting then we can use the API call below that works on a file
            if (this.inputFile != null && decryptCipher == null && encryptCipher == null) {

                // just go ahead and close this, won't use it
                try {
                    reader.close();
                } catch (IOException e1) {
                    Log.error(e1.getMessage());
                }

                Log.info("S3 WRITES: BUCKET: " + bucket + " KEY: " + key + " INPUT FILE: " + inputFile);

                Upload upload = tm.upload(bucket, key, this.inputFile);

                boolean uploadStatus = (waitForS3Upload(upload));
                if (!uploadStatus) {
                    Log.error("The S3 upload returned false!");
                    tm.shutdownNow();
                    return (false);
                }

                // now that the copy is complete, make sure the file out and the input size are equal
                try {
                    ObjectMetadata om = s3.getObjectMetadata(bucket, key);
                    if (this.inputSize != om.getContentLength()) {
                        Log.error("The S3 output file size of " + om.getContentLength() + " and the input file size of " + this.inputSize
                                + " do not match so the file provisioning failed!");
                        tm.shutdownNow();
                        return (false);
                    }
                } catch (Exception e) {
                    Log.error("Can't get metadata on key: " + key + " bucket: " + bucket);
                }

            } else {

                // add decryption to the reader
                if (decryptCipher != null) {
                    reader = new CipherInputStream(reader, decryptCipher);
                }

                // add encryption to the output stream
                if (encryptCipher != null) {
                    reader = new CipherInputStream(reader, encryptCipher);
                }

                // trigger the upload
                Transfer myUpload = tm.upload(bucket, key, reader, omd);

                boolean uploadStatus = waitForS3Upload(myUpload);
                if (!uploadStatus) {
                    tm.shutdownNow();
                    Log.error("S3 Upload failed:" + myUpload);
                    return (false);
                }

            }

            // need to shut down the transfer manager
            tm.shutdownNow();

            // this is how to do it without multipart, not usable for large files!
            // s3.putObject(bucket, key, reader, new ObjectMetadata());

        } else {
            Log.error("Unable to parse a bucket and file name from " + stringURL
                    + " it should be in the form s3://<bucket>/<key>/ or s3://<bucket>/");
            return false;
        }

        return true;
    }

    private boolean waitForS3Upload(Transfer myUpload) {
        boolean success = true;
        try {
            // FIXME: this doesn't try to reconnect via a new reader if things go badly
            while (myUpload.isDone() == false) {
                if (isVerbose()) {
                    float percent = (myUpload.getProgress().getBytesTransfered() * 100.0f) / this.inputSize;
                    System.out.printf("  + completed: %.2f", percent);
                    System.out.print("%\r");
                    Log.info("Transfer: " + myUpload.getDescription());
                    Log.info("  - State:    " + myUpload.getState());
                    Log.info("  - Progress: " + myUpload.getProgress().getBytesTransfered() + " of " + this.inputSize);
                }
                // Do work while we wait for our upload to complete...
                if (myUpload.getState() == TransferState.Failed) {
                    Log.error("Failure Uploading: " + myUpload.getDescription());
                    return false;
                }
                Thread.sleep(2000);
            }
            if (isVerbose()) {
                System.out.print("\n");
            }

        } catch (InterruptedException e) {
            Log.error(e.getMessage());
            return false;
        }
        return (success);
    }

    /**
     * <p>
     * getS3Url.
     * </p>
     *
     * @param input
     *            a {@link java.lang.String} object.
     * @return a {@link java.net.URL} object.
     */
    public URL getS3Url(String input) {
        URL url = null;
        if (input.startsWith("s3://")) {

            Pattern p = Pattern.compile("s3://(\\S+):(\\S+)@(\\S+)");
            Matcher m = p.matcher(input);
            boolean result = m.find();
            String accessKey = null;
            String secretKey = null;
            String localUrl = input;
            if (result) {
                accessKey = m.group(1);
                secretKey = m.group(2);
                localUrl = "s3://" + m.group(3);
            } else {
                // get the access/secret key from the .seqware/settings file
                try {
                    HashMap<String, String> settings = (HashMap<String, String>) ConfigTools.getSettings();
                    accessKey = settings.get(SqwKeys.AWS_ACCESS_KEY.getSettingKey());
                    secretKey = settings.get(SqwKeys.AWS_SECRET_KEY.getSettingKey());
                } catch (Exception e) {
                    e.printStackTrace();
                    return null;
                }
            }

            if (accessKey == null || secretKey == null) {
                return null;
            }

            // parse out the bucket and key
            p = Pattern.compile("s3://([^/]+)/(\\S+)");
            m = p.matcher(localUrl);
            if (m.find()) {
                String bucket = m.group(1);
                String key = m.group(2);

                // now get this from S3
                s3 = new AmazonS3Client(new BasicAWSCredentials(accessKey, secretKey));
                url = s3.generatePresignedUrl(new GeneratePresignedUrlRequest(bucket, key, HttpMethod.GET).withExpiration(new Date(
                        new Date().getTime() + 10000)));
            }
        }
        return url;
    }

    /**
     * <p>
     * getS3Url.
     * </p>
     *
     * @param input
     *            a {@link java.lang.String} object.
     * @param accessKey
     *            a {@link java.lang.String} object.
     * @param secretKey
     *            a {@link java.lang.String} object.
     * @return a {@link java.net.URL} object.
     */
    public URL getS3Url(String input, String accessKey, String secretKey) {

        URL url = null;
        Pattern p = Pattern.compile("s3://(\\S+):(\\S+)@(\\S+)");
        Matcher m = p.matcher(input);
        boolean result = m.find();
        String stringUrl = input;
        if (result) {
            accessKey = m.group(1);
            secretKey = m.group(2);
            stringUrl = "s3://" + m.group(3);
        }

        // parse out the bucket and key
        p = Pattern.compile("s3://([^/]+)/(\\S+)");
        m = p.matcher(stringUrl);

        if (m.find()) {
            String bucket = m.group(1);
            String key = m.group(2);

            // now get this from S3
            s3 = new AmazonS3Client(new BasicAWSCredentials(accessKey, secretKey));
            url = s3.generatePresignedUrl(new GeneratePresignedUrlRequest(bucket, key, HttpMethod.GET).withExpiration(new Date(new Date()
                    .getTime() + 10000)));
        }
        return url;
    }

    /**
     * This attempts to resume if passed in startPosition > 0.
     *
     * @param input
     *            a {@link java.lang.String} object.
     * @param bufLen
     *            a int.
     * @param startPosition
     *            a long.
     * @return reader of input file
     */
    public BufferedInputStream getSourceReader(String input, int bufLen, long startPosition) {

        this.originalFileName = input;

        BufferedInputStream reader = null;
        this.inputFile = null;

        if (input.startsWith("s3://")) {
            reader = getS3InputStream(input, bufLen, startPosition);
        } else if (input.startsWith("http://") || input.startsWith("https://")) {
            reader = getHttpInputStream(input, bufLen, startPosition);
        }/**
         * else if (input.startsWith("hdfs://")) { reader = getHDFSInputStream(input, bufLen, startPosition); }
         */
        else {
            reader = getFileInputStream(input, startPosition);
        }

        return reader;

    }

    private BufferedInputStream getFileInputStream(String input, long startPosition) {
        BufferedInputStream reader = null;
        try {
            this.inputFile = new File(input);
            String[] paths = inputFile.getAbsolutePath().split("/");
            fileName = paths[paths.length - 1];
            this.inputSize = inputFile.length();
            reader = new BufferedInputStream(new FileInputStream(new File(input)));
            // does this actually work?
            // see
            // http://download.oracle.com/javase/1.4.2/docs/api/java/io/InputStream.html#skip%28long%29
            reader.skip(startPosition);

        } catch (FileNotFoundException e) {
            Log.error(e.getMessage());
            return null;
        } catch (IOException e) {
            Log.error(e.getMessage());
            return null;
        }
        return reader;
    }

    /**
     * <p>
     * getHttpInputStream.
     * </p>
     *
     * @param input
     *            a {@link java.lang.String} object.
     * @param bufLen
     *            a int.
     * @param startPosition
     *            a long.
     * @return a {@link java.io.BufferedInputStream} object.
     */
    public BufferedInputStream getHttpInputStream(String input, int bufLen, long startPosition) {
        BufferedInputStream reader = null;
        Pattern p = Pattern.compile("(https*)://(\\S+):(\\S+)@(\\S+)");
        Matcher m = p.matcher(input);
        boolean result = m.find();
        String protocol;
        String user = null;
        String pass = null;
        String stringURL = input;
        if (result) {
            protocol = m.group(1);
            user = m.group(2);
            pass = m.group(3);
            stringURL = protocol + "://" + m.group(4);
        }
        URL urlObj = null;
        try {
            urlObj = new URL(stringURL);
            URLConnection urlConn = urlObj.openConnection();
            if (user != null && pass != null) {
                String userPassword = user + ":" + pass;
                String encoding = Base64.encodeBase64String(userPassword.getBytes());
                urlConn.setRequestProperty("Authorization", "Basic " + encoding);
            }
            // deal with resumption, look at
            // http://stackoverflow.com/questions/6237079/resume-http-file-download-in-java
            urlConn.setRequestProperty("Range", "bytes=" + startPosition + "-");
            // download data and write out
            p = Pattern.compile("://([^/]+)/(\\S+)");
            m = p.matcher(stringURL);
            result = m.find();
            if (result) {
                // String host = m.group(1);
                String path = m.group(2);
                String[] paths = path.split("/");
                this.fileName = paths[paths.length - 1];
                this.inputSize = urlConn.getContentLength();
                reader = new BufferedInputStream(urlConn.getInputStream(), bufLen);
            } else {
                Log.error("getHttpInputStream doesn't know how to deal with URL: " + stringURL);
                return null;
            }
        } catch (MalformedURLException e) {
            Log.error(e.getMessage());
            return null;
        } catch (IOException e) {
            Log.error(e.getMessage());
            return null;
        }
        return reader;
    }

    // public BufferedInputStream getHDFSInputStream(String input, int bufLen, long startPosition) {
    //
    // try {
    //
    // BufferedInputStream reader = null;
    //
    // // figure out the filename
    // String[] path = input.split("/");
    // this.fileName = path[path.length - 1];
    //
    // // Hadoop stuff
    // Configuration conf = new Configuration();
    // // FIXME: is this OK to pass in the complete URL?
    // FileSystem fs = FileSystem.get(URI.create(input), conf);
    // Path inputPath = new Path(input);
    //
    // // should exist and not be a directory
    // if (!fs.exists(inputPath) || fs.isDirectory(inputPath)) {
    // Log.error("Can't read the input file "+input+", it is either missing or a directory!");
    // return(null);
    // }
    //
    // // open the HDFS input stream
    // reader = new BufferedInputStream(fs.open(inputPath, bufLen));
    // this.inputSize = fs.getFileStatus(inputPath).getBlockSize();
    //
    // // move forward if given an offset
    // if (startPosition > 0) {
    // reader.skip(startPosition);
    // }
    //
    // // and just return this
    // return(reader);
    //
    // } catch (IOException ex) {
    // Logger.getLogger(ProvisionFilesUtil.class.getName()).log(Level.SEVERE, null, ex);
    // }
    //
    // // default null
    // return(null);
    // }

    /**
     * <p>
     * getS3InputStream.
     * </p>
     *
     * @param input
     *            a {@link java.lang.String} object.
     * @param bufLen
     *            a int.
     * @param startPosition
     *            a long.
     * @param accessKey
     *            a {@link java.lang.String} object.
     * @param secretKey
     *            a {@link java.lang.String} object.
     * @return a {@link java.io.BufferedInputStream} object.
     */
    public BufferedInputStream getS3InputStream(String input, int bufLen, long startPosition, String accessKey, String secretKey) {

        BufferedInputStream reader = null;
        S3Object object = null;

        // now get this from S3
        s3 = new AmazonS3Client(new BasicAWSCredentials(accessKey, secretKey));

        // parse out the bucket and key
        Pattern p = Pattern.compile("s3://([^/]+)/(\\S+)");
        Matcher m = p.matcher(input);
        boolean result = m.find();

        if (result) {
            String bucket = m.group(1);
            String key = m.group(2);

            // now figure out the actual file name from the input
            String[] paths = key.split("/");
            this.fileName = paths[paths.length - 1];

            try {
                // I hope this works to restart downloads from fixed locations
                GetObjectRequest gor = new GetObjectRequest(bucket, key);
                this.inputSize = s3.getObject(gor).getObjectMetadata().getContentLength();
                gor.setRange(startPosition, inputSize);
                object = s3.getObject(gor);
                reader = new BufferedInputStream(object.getObjectContent(), bufLen);
            } catch (AmazonServiceException e) {
                Log.error(e.getMessage());
                return null;
            } catch (AmazonClientException e) {
                Log.error(e.getMessage());
                return null;
            }
        } else {
            Log.error("Couldn't figure out the bucket and key from the URL provided: " + input);
            return null;
        }
        return reader;
    }

    /**
     * <p>
     * getS3InputStream.
     * </p>
     *
     * @param input
     *            a {@link java.lang.String} object.
     * @param bufLen
     *            a int.
     * @param startPosition
     *            a long.
     * @return a {@link java.io.BufferedInputStream} object.
     */
    public BufferedInputStream getS3InputStream(String input, int bufLen, long startPosition) {

        String accessKey = null;
        String secretKey = null;

        // can encode the access key and secret key within the URL
        // see http://www.cs.rutgers.edu/~watrous/user-pass-url.html
        Pattern p = Pattern.compile("s3://(\\S+):(\\S+)@(\\S+)");
        Matcher m = p.matcher(input);
        boolean result = m.find();
        String stringURL = input;
        if (result) {
            accessKey = m.group(1);
            secretKey = m.group(2);
            stringURL = "s3://" + m.group(3);
        }

        // if the access and secret access keys are not found in the URL then pull from settings file
        if (!result) {
            // get the access/secret key from the .seqware/settings file
            try {
                HashMap<String, String> settings = (HashMap<String, String>) ConfigTools.getSettings();
                accessKey = settings.get(SqwKeys.AWS_ACCESS_KEY.getSettingKey());
                secretKey = settings.get(SqwKeys.AWS_SECRET_KEY.getSettingKey());
            } catch (Exception e) {
                Log.error(e.getMessage());
                return null;
            }
        }

        if (accessKey == null || secretKey == null) {
            Log.error("Couldn't continue because missing S3 access key and/or secret key");
            return null;
        }

        return getS3InputStream(stringURL, bufLen, startPosition, accessKey, secretKey);

    }

    // utils
    /**
     * Sets data encryption key.
     *
     * @param value
     *            BASE64-encoded key
     */
    public void setDataEncryptionKeyString(String value) {
        byte[] bytes = getBase64().decode(value);
        dataEncryptionKey = new SecretKeySpec(bytes, DATA_ENCRYPTION_ALGORITHM);
    }

    /**
     * Sets data decryption key.
     *
     * @param value
     *            BASE64-encoded key
     */
    public void setDataDecryptionKeyString(String value) {
        byte[] bytes = getBase64().decode(value);
        dataDecryptionKey = new SecretKeySpec(bytes, DATA_ENCRYPTION_ALGORITHM);
    }

    private static Base64 getBase64() {
        return new Base64(Integer.MAX_VALUE, new byte[0]);
    }

    /**
     * <p>
     * isVerbose.
     * </p>
     *
     * @return a boolean.
     */
    public boolean isVerbose() {
        return verbose;
    }

    /**
     * Enable class verbose mode.
     *
     * @param verbose
     *            a boolean.
     */
    public void setVerbose(boolean verbose) {
        this.verbose = verbose;
    }

    private Cipher createDecryptCipherInternal() throws Exception {
        Cipher cipher = Cipher.getInstance(DATA_ENCRYPTION_ALGORITHM);
        cipher.init(Cipher.DECRYPT_MODE, dataDecryptionKey);
        return cipher;
    }

    private Cipher createEncryptCipherInternal() throws Exception {
        Cipher cipher = Cipher.getInstance(DATA_ENCRYPTION_ALGORITHM);
        cipher.init(Cipher.ENCRYPT_MODE, dataEncryptionKey);
        return cipher;
    }

    /**
     * Creates abstract pathname.
     *
     * @param folderStore
     *            a {@link java.lang.String} object.
     * @param email
     *            a {@link java.lang.String} object.
     * @param fileName
     *            a {@link java.lang.String} object.
     * @return a {@link java.lang.String} object.
     */
    public static String createTargetPath(String folderStore, String email, String fileName) {
        String fileDownlodName = fileName.trim();
        String separator = java.io.File.separator;
        Date dateNow = new Date();
        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmmssSSS");
        StringBuilder strNow = new StringBuilder(dateFormat.format(dateNow));
        String pathCurrDir = (new StringBuilder()).append(folderStore).append(email).append(separator).append(strNow).append(separator)
                .toString();
        java.io.File currDir = new java.io.File(pathCurrDir);
        if (!currDir.exists()) {
            currDir.mkdirs();
        }
        String targetPath = (new StringBuilder()).append(pathCurrDir).append(fileDownlodName).toString();
        return targetPath;
    }

    /**
     * Creates abstract pathname.
     *
     * @param folderStore
     *            a {@link java.lang.String} object.
     * @param email
     *            a {@link java.lang.String} object.
     * @return a {@link java.lang.String} object.
     */
    public static String createTargetDirectory(String folderStore, String email) {
        String separator = java.io.File.separator;
        Date dateNow = new Date();
        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmmssSSS");
        StringBuilder strNow = new StringBuilder(dateFormat.format(dateNow));
        String pathCurrDir = (new StringBuilder()).append(folderStore).append(email).append(separator).append(strNow).append(separator)
                .toString();
        java.io.File currDir = new java.io.File(pathCurrDir);
        if (!currDir.exists()) {
            currDir.mkdirs();
        }
        String targetPath = (new StringBuilder()).append(pathCurrDir).toString();
        return targetPath;
    }

    /**
     * <p>
     * getFileSize.
     * </p>
     *
     * @param path
     *            a {@link java.lang.String} object.
     * @return a long.
     * @throws java.lang.Exception
     *             if any.
     */
    public static long getFileSize(String path) throws Exception {
        if (path.startsWith("http://") || path.startsWith("https://")) {
            Pattern p = Pattern.compile("(https*)://(\\S+):(\\S+)@(\\S+)");
            Matcher m = p.matcher(path);
            boolean result = m.find();
            String protocol = null;
            String user = null;
            String pass = null;
            String stringURL = path;
            if (result) {
                protocol = m.group(1);
                user = m.group(2);
                pass = m.group(3);
                stringURL = protocol + "://" + m.group(4);
            }
            URL urlObj = null;
            try {
                urlObj = new URL(stringURL);
                URLConnection urlConn = urlObj.openConnection();
                if (user != null && pass != null) {
                    String userPassword = user + ":" + pass;
                    String encoding = Base64.encodeBase64String(userPassword.getBytes());
                    urlConn.setRequestProperty("Authorization", "Basic " + encoding);
                }
                // get size
                return urlConn.getContentLength();

            } catch (MalformedURLException e) {
                Log.stderr(e.getMessage());
                e.printStackTrace();
                throw e;
            } catch (IOException e) {
                Log.stderr(e.getMessage());
                e.printStackTrace();
                throw e;
            }
        } else if (path.startsWith("s3://")) {

            String accessKey = null;
            String secretKey = null;

            Pattern p = Pattern.compile("s3://(\\S+):(\\S+)@(\\S+)");
            Matcher m = p.matcher(path);
            boolean result = m.find();
            String stringURL = path;
            if (result) {
                accessKey = m.group(1);
                secretKey = m.group(2);
                stringURL = "s3://" + m.group(3);
            } else {
                try {
                    HashMap<String, String> settings = (HashMap<String, String>) ConfigTools.getSettings();
                    accessKey = settings.get(SqwKeys.AWS_ACCESS_KEY.getSettingKey());
                    secretKey = settings.get(SqwKeys.AWS_SECRET_KEY.getSettingKey());
                } catch (Exception e) {
                    e.printStackTrace();
                    throw e;
                }
            }

            // now get this from S3
            AmazonS3Client s3 = new AmazonS3Client(new BasicAWSCredentials(accessKey, secretKey));

            // parse out the bucket and key
            p = Pattern.compile("s3://([^/]+)/(\\S+)");
            m = p.matcher(stringURL);
            result = m.find();

            if (result) {
                String bucket = m.group(1);
                String key = m.group(2);

                try {
                    GetObjectRequest gor = new GetObjectRequest(bucket, key);
                    return s3.getObject(gor).getObjectMetadata().getContentLength();

                } catch (AmazonServiceException e) {
                    e.printStackTrace();
                    throw e;
                } catch (AmazonClientException e) {
                    e.printStackTrace();
                    throw e;
                }
            } else {
                return 0;
            }
        }/**
         * else if (path.startsWith("hdfs://")) {
         *
         * Configuration conf = new Configuration(); // FIXME: is this OK to pass in the complete URL? FileSystem fs =
         * FileSystem.get(URI.create(path), conf); Path hdfsPath = new Path(path); return(fs.getFileStatus(hdfsPath).getBlockSize());
         *
         * }
         */
        else {
            File file = new File(path);
            if (!file.exists()) {
                throw new IllegalStateException("File not exist " + path);
            }
            return file.length();
        }
    }

    /**
     * <p>
     * Getter for the field <code>originalFileName</code>.
     * </p>
     *
     * @return a {@link java.lang.String} object.
     */
    public String getOriginalFileName() {
        return originalFileName;
    }

    /**
     * <p>
     * Setter for the field <code>originalFileName</code>.
     * </p>
     *
     * @param originalFileName
     *            a {@link java.lang.String} object.
     */
    public void setOriginalFileName(String originalFileName) {
        this.originalFileName = originalFileName;
    }

    public static void calculateInputMetadata(String input, FileMetadata metadata) throws RuntimeException {
        if (metadata == null) {
            Log.error("Could not calculate md5sum or size, no metadata provided");
            return;
        }
        // calculate and store source metadata information about input file
        Path inputPath = Paths.get(input);
        try {
            long size = Files.size(inputPath);
            metadata.setSize(size);
        } catch (IOException ex) {
            throw new RuntimeException("Could not calculate size of input file", ex);
        }

        HashCode hc;
        try {
            hc = com.google.common.io.Files.hash(inputPath.toFile(), Hashing.md5());
            Log.info("MD5: " + hc.toString());
            metadata.setMd5sum(hc.toString());
        } catch (IOException ex) {
            throw new RuntimeException("Could not calculate md5sum for input file", ex);
        }
    }
}