package net.sourceforge.seqware.common.util.filetools; import com.amazonaws.AmazonClientException; import com.amazonaws.AmazonServiceException; import com.amazonaws.ClientConfiguration; import com.amazonaws.HttpMethod; import com.amazonaws.Protocol; import com.amazonaws.auth.BasicAWSCredentials; import com.amazonaws.retry.PredefinedRetryPolicies; import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.model.GeneratePresignedUrlRequest; import com.amazonaws.services.s3.model.GetObjectRequest; import com.amazonaws.services.s3.model.ObjectMetadata; import com.amazonaws.services.s3.model.S3Object; import com.amazonaws.services.s3.transfer.Transfer; import com.amazonaws.services.s3.transfer.Transfer.TransferState; import com.amazonaws.services.s3.transfer.TransferManager; import com.amazonaws.services.s3.transfer.Upload; import com.google.common.hash.HashCode; import com.google.common.hash.Hashing; import io.seqware.pipeline.SqwKeys; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.security.Key; import java.text.SimpleDateFormat; import java.util.Date; import java.util.HashMap; import java.util.Map; import java.util.Objects; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.crypto.Cipher; import javax.crypto.CipherInputStream; import javax.crypto.CipherOutputStream; import javax.crypto.spec.SecretKeySpec; import net.sourceforge.seqware.common.module.FileMetadata; import net.sourceforge.seqware.common.util.Log; import net.sourceforge.seqware.common.util.configtools.ConfigTools; import org.apache.commons.codec.binary.Base64; //import org.apache.hadoop.conf.Configuration; //import org.apache.hadoop.fs.FSDataInputStream; //import org.apache.hadoop.fs.FSDataOutputStream; //import org.apache.hadoop.fs.FileSystem; //import org.apache.hadoop.fs.FsUrlStreamHandlerFactory; //import org.apache.hadoop.fs.Path; //import org.apache.hadoop.io.IOUtils; /** * <p> * ProvisionFilesUtil class. * </p> * * @author boconnor * @version $Id: $Id */ public class ProvisionFilesUtil { protected final int READ_ATTEMPTS = 1000; protected long inputSize = 0L; protected long position = 0L; protected String fileName = ""; protected String originalFileName = ""; protected File inputFile = null; protected Key dataEncryptionKey = null; protected Key dataDecryptionKey = null; private boolean verbose; private AmazonS3Client s3; private static final String DATA_ENCRYPTION_ALGORITHM = "DESede"; private static final int MAXRETRY = 3; /** * Default ctor. */ public ProvisionFilesUtil() { } ; /** * Set verbose mode ctor. * * @param verbose * a boolean. */ public ProvisionFilesUtil(boolean verbose) { this.setVerbose(true); } /** * Gets the file name. Available after the getSourceReader has been invoked. * * @return String representation of the proceeded file name */ public String getFileName() { return fileName; } /** * Creates symlink of input to output. * * @param output * a {@link java.lang.String} object. * @param fullOutputPath * @param input * a {@link java.lang.String} object. * @return a boolean. */ public boolean createSymlink(String output, boolean fullOutputPath, String input) { boolean retry = true; int retryCount = 0; while (retry && retryCount < ProvisionFilesUtil.MAXRETRY) { try { // no point is having an input file stream since just make a sym link Runtime rt = Runtime.getRuntime(); Process result; // FIXME: in JDK 7 this will be replaced with an API call String exe = "ln" + " -s " + input + " " + output + File.separator + fileName; if (fullOutputPath) { exe = "ln" + " -s " + input + " " + output; } Log.debug(exe); result = rt.exec(exe); try { // see if the command worked if (result.waitFor() == 0) { // then we're done and can exit the retry loop retry = false; } } catch (Exception e) { // see http://www.javaworld.com/jw-12-2000/jw-1229-traps.html?page=2 for info on this exception Log.error(e.getMessage()); } } catch (IOException e) { Log.error(e.getMessage()); } retryCount++; } // now check to see if the file is actually there! File outputFilePath = new File(output + File.separator + fileName); if (fullOutputPath) { outputFilePath = new File(output); } if (outputFilePath.exists()) { return true; } else { Log.error("Output file does not exist! " + outputFilePath.getAbsolutePath()); return false; } } /** * Gets cipher by DecryptKey. * * @param decryptKey * a {@link java.lang.String} object. * @return Cipher object */ public Cipher getDecryptCipher(String decryptKey) { Cipher cipher; setDataDecryptionKeyString(decryptKey); try { cipher = createDecryptCipherInternal(); } catch (Exception e) { Log.error(e.getMessage()); cipher = null; } return cipher; } /** * Gets cipher by EncryptKey. * * @param encryptKey * a {@link java.lang.String} object. * @return Cipher object */ public Cipher getEncryptCipher(String encryptKey) { Cipher cipher; setDataEncryptionKeyString(encryptKey); try { cipher = createEncryptCipherInternal(); } catch (Exception e) { Log.error(e.getMessage()); cipher = null; } return cipher; } public File copyToFile(BufferedInputStream reader, String output, int bufLen, String input) { return copyToFile(reader, output, false, bufLen, input, null, null, null); } /** * Copy reader into output using Cipher. * * @param reader * a {@link java.io.BufferedInputStream} object. * @param output * a {@link java.lang.String} object. * @param fullOutputPath * @param bufLen * a int. * @param input * a {@link java.lang.String} object. * @param metadata * store the file size and md5sum here after checking the destination * @return written File object * @param decryptCipher * a {@link javax.crypto.Cipher} object. * @param encryptCipher * a {@link javax.crypto.Cipher} object. */ public File copyToFile(BufferedInputStream reader, String output, boolean fullOutputPath, int bufLen, String input, Cipher decryptCipher, Cipher encryptCipher, FileMetadata metadata) { OutputStream writer; // then it's a remote input (or we want to copy regardless) just a // directory output // figure out the output File outputObj = new File(output + File.separator + fileName); if (fullOutputPath) { outputObj = new File(output); } outputObj.getParentFile().mkdirs(); // now write input to output try { int attempts = 0; // READ_ATTEMPTS writer = new FileOutputStream(outputObj); // add decryption to the output stream if (decryptCipher != null) { writer = new CipherOutputStream(writer, decryptCipher); } // add encryption to the output stream if (encryptCipher != null) { writer = new CipherOutputStream(writer, encryptCipher); } // wrap in a buffered stream writer = new BufferedOutputStream(writer, bufLen); boolean cont = true; // count of positions, every thousand do an update long positionCount = 0; while (cont) { int data = -1; positionCount++; // calculate stats int divisions = (int) (this.inputSize / 1000); if (positionCount > 0 && divisions > 0 && positionCount % divisions == 0 && isVerbose()) { float percent = (positionCount * 100.0f) / this.inputSize; System.out.printf(" + completed: %.2f", percent); System.out.print("%\r"); } while (true) { try { data = reader.read(); this.position++; if (data == -1) { cont = false; } break; } catch (IOException e) { attempts++; Log.error("There has been an exception while reading the stream: " + e.getMessage()); if (attempts > this.READ_ATTEMPTS) { Log.error("Giving up after " + attempts + " attempts!"); return null; } Log.error("Trying to recover from read or write error, opening the reader at position " + this.position); // FIXME: notice I'm assuming this is a problem with the reader try { reader.close(); } catch (IOException e1) { Log.error(e1.getMessage()); } try { Thread.sleep(2000); } catch (java.lang.InterruptedException e2) { Log.error("thread sleep failed: " + e2.getMessage()); } reader = getSourceReader(input, bufLen, this.position); if (reader == null) { return null; } } } while (true && cont) { try { writer.write(data); break; } catch (IOException e) { attempts++; Log.error("There has been an exception while writing the stream: " + e.getMessage()); if (attempts > this.READ_ATTEMPTS) { Log.error("Giving up after " + attempts + " attempts!"); return null; } try { Thread.sleep(2000); } catch (java.lang.InterruptedException e2) { Log.error("thread sleep failed: " + e2.getMessage()); } } } // possibly print out status } // print newline Log.stdout(""); // close reader/writer reader.close(); writer.close(); } catch (FileNotFoundException e) { Log.error(e.getMessage()); return null; } catch (IOException e) { Log.error(e.getMessage()); return null; } // now that the copy is complete, make sure the file out and the input size are equal if (this.inputSize != outputObj.length() && decryptCipher == null && encryptCipher == null) { Log.error("The output file size of " + outputObj.length() + " and the input file size of " + this.inputSize + " do not match so the file provisioning failed!"); return null; } // do the calculations to record file size and md5sum to see if they are consistent between source and destination FileMetadata outputMetadata = new FileMetadata(); if (decryptCipher == null && encryptCipher == null && metadata != null) { Map<String, String> settings = ConfigTools.getSettings(); if (settings.containsKey(SqwKeys.SW_PROVISION_FILES_MD5.getSettingKey())) { String value = settings.get(SqwKeys.SW_PROVISION_FILES_MD5.getSettingKey()); boolean usemd5 = Boolean.valueOf(value); if (usemd5) { calculateInputMetadata(outputObj.getAbsolutePath(), outputMetadata); } else { return outputObj; } } else { calculateInputMetadata(outputObj.getAbsolutePath(), outputMetadata); } // verify source and destination values. this does the file size check again but we'll probably delete the former code given a // chance if (!Objects.equals(metadata.getSize(), outputMetadata.getSize())) { Log.error("The output file size of " + outputMetadata.getSize() + " and the input file size of " + metadata.getSize() + " do not match so the file provisioning failed!"); return null; } if (!Objects.equals(metadata.getMd5sum(), outputMetadata.getMd5sum())) { Log.error("The output md5sum of " + outputMetadata.getMd5sum() + " and the input md5sum of " + metadata.getMd5sum() + " do not match so the file provisioning failed!"); return null; } } return outputObj; } /** * Not supported yet. * * @return a boolean. */ public boolean putToHttp() { // TODO: not going to support HTTP PUT initially Log.warn("HTTP upload not yet supported"); return (false); } // public boolean putToHDFS(InputStream reader, String output, boolean fullOutputPath){ // return(putToHDFS(reader, output, fullOutputPath, null, null)); // } // // /** // * The output path is an HDFS URL that look like // * hdfs://<host>/<path>/<filename> // * // * TODO: // * // * 1) encryption/decryption // * // * @param reader // * @param output // * @param decryptCipher // * @param encryptCipher // * @return // */ // public boolean putToHDFS(InputStream reader, String output, boolean fullOutputPath, Cipher decryptCipher, Cipher encryptCipher) { // try { // // // the final URL // String outputStr = null; // // // first, try and figure out if the output is a dir or an actual file // if (fullOutputPath) { // outputStr = output; // } else { // if (output.endsWith("/")) { // outputStr = output + this.getFileName(); // } else { // outputStr = output + "/" + this.getFileName(); // } // } // // // Hadoop stuff // Configuration conf = new Configuration(); // // FIXME: is this OK to pass in the complete URL? // FileSystem fs = FileSystem.get(URI.create(outputStr), conf); // Path outputPath = new Path(outputStr); // // // delete if it already exists // if (fs.exists(outputPath) && !fs.isDirectory(outputPath)) { // // delete it and re-upload // fs.delete(outputPath, false); // } // // OutputStream out = fs.create(outputPath); // IOUtils.copyBytes(reader, out, 4096, true); // // // Close all the file descripters // reader.close(); // out.close(); // fs.close(); // return(true); // // } catch (IOException ex) { // Logger.getLogger(ProvisionFilesUtil.class.getName()).log(Level.SEVERE, "There was a problem in putToHDFS", ex); // } // // return(false); // // } /** * Copy file using reader into output. * * @param reader * a {@link java.io.BufferedInputStream} object. * @param output * a {@link java.lang.String} object. * @param fullOutputPath * @return true if OK */ public boolean putToS3(BufferedInputStream reader, String output, boolean fullOutputPath) { return (putToS3(reader, output, fullOutputPath, ClientConfiguration.DEFAULT_SOCKET_TIMEOUT, ClientConfiguration.DEFAULT_MAX_CONNECTIONS, PredefinedRetryPolicies.DEFAULT_MAX_ERROR_RETRY, ClientConfiguration.DEFAULT_SOCKET_TIMEOUT)); } /** * * @param reader * @param output * @param fullOutputPath * @param connectionTimeout * @param maxConnections * @param maxErrorRetry * @param socketTimeout * @return */ public boolean putToS3(BufferedInputStream reader, String output, boolean fullOutputPath, int connectionTimeout, int maxConnections, int maxErrorRetry, int socketTimeout) { return (putToS3(reader, output, fullOutputPath, connectionTimeout, maxConnections, maxErrorRetry, socketTimeout, null, null)); } /** * Copy file using reader into output. * * @param reader * a {@link java.io.InputStream} object. * @param output * a {@link java.lang.String} object. * @param fullOutputPath * @param decryptCipher * a {@link javax.crypto.Cipher} object. * @param encryptCipher * a {@link javax.crypto.Cipher} object. * @return true if OK */ public boolean putToS3(InputStream reader, String output, boolean fullOutputPath, Cipher decryptCipher, Cipher encryptCipher) { return putToS3(reader, output, fullOutputPath, ClientConfiguration.DEFAULT_SOCKET_TIMEOUT, ClientConfiguration.DEFAULT_MAX_CONNECTIONS, PredefinedRetryPolicies.DEFAULT_MAX_ERROR_RETRY, ClientConfiguration.DEFAULT_SOCKET_TIMEOUT, decryptCipher, encryptCipher); } /** * * @param reader * @param output * @param fullOutputPath * @param connectionTimeout * @param maxConnections * @param maxErrorRetry * @param socketTimeout * @param decryptCipher * @param encryptCipher * @return */ public boolean putToS3(InputStream reader, String output, boolean fullOutputPath, int connectionTimeout, int maxConnections, int maxErrorRetry, int socketTimeout, Cipher decryptCipher, Cipher encryptCipher) { // can encode the access key and secret key within the URL // see http://www.cs.rutgers.edu/~watrous/user-pass-url.html Pattern p = Pattern.compile("s3://(\\S+):(\\S+)@(\\S+)"); Matcher m = p.matcher(output); boolean result = m.find(); String accessKey; String secretKey; String stringURL = output; if (result) { accessKey = m.group(1); secretKey = m.group(2); stringURL = "s3://" + m.group(3); } else { // get the access/secret key from the .seqware/settings file try { HashMap<String, String> settings = (HashMap<String, String>) ConfigTools.getSettings(); accessKey = settings.get(SqwKeys.AWS_ACCESS_KEY.getSettingKey()); secretKey = settings.get(SqwKeys.AWS_SECRET_KEY.getSettingKey()); } catch (Exception e) { Log.error(e.getMessage()); return false; } } if (accessKey == null || secretKey == null) { Log.error("Couldn't find access or secret key for S3 output so will exit!"); return false; } // parse out the bucket and key p = Pattern.compile("s3://([^/]+)/*(\\S*)"); m = p.matcher(stringURL); result = m.find(); if (result) { String bucket = m.group(1); String key = m.group(2); if (key == null) { key = ""; } if (key.endsWith("/")) { // then add fileName to the target key = key + fileName; } else if (!key.endsWith(fileName) && !fullOutputPath) { // then add a / then fileName to the target key = key + "/" + fileName; } ObjectMetadata omd = new ObjectMetadata(); // this is the size of what's being read omd.setContentLength(this.inputSize); // just encrypt everything via Server-Side encryption, see // http://docs.amazonwebservices.com/AmazonS3/latest/dev/SSEUsingJavaSDK.html omd.setServerSideEncryption(ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION); BasicAWSCredentials credentials = new BasicAWSCredentials(accessKey, secretKey); ClientConfiguration config = new ClientConfiguration(); config.setConnectionTimeout(connectionTimeout); config.setMaxConnections(maxConnections); config.setMaxErrorRetry(maxErrorRetry); config.setProtocol(Protocol.HTTPS); config.setSocketTimeout(socketTimeout); AmazonS3Client s3 = new AmazonS3Client(credentials, config); TransferManager tm = new TransferManager(s3); // if reading from a local file and not decrypting or encrypting then we can use the API call below that works on a file if (this.inputFile != null && decryptCipher == null && encryptCipher == null) { // just go ahead and close this, won't use it try { reader.close(); } catch (IOException e1) { Log.error(e1.getMessage()); } Log.info("S3 WRITES: BUCKET: " + bucket + " KEY: " + key + " INPUT FILE: " + inputFile); Upload upload = tm.upload(bucket, key, this.inputFile); boolean uploadStatus = (waitForS3Upload(upload)); if (!uploadStatus) { Log.error("The S3 upload returned false!"); tm.shutdownNow(); return (false); } // now that the copy is complete, make sure the file out and the input size are equal try { ObjectMetadata om = s3.getObjectMetadata(bucket, key); if (this.inputSize != om.getContentLength()) { Log.error("The S3 output file size of " + om.getContentLength() + " and the input file size of " + this.inputSize + " do not match so the file provisioning failed!"); tm.shutdownNow(); return (false); } } catch (Exception e) { Log.error("Can't get metadata on key: " + key + " bucket: " + bucket); } } else { // add decryption to the reader if (decryptCipher != null) { reader = new CipherInputStream(reader, decryptCipher); } // add encryption to the output stream if (encryptCipher != null) { reader = new CipherInputStream(reader, encryptCipher); } // trigger the upload Transfer myUpload = tm.upload(bucket, key, reader, omd); boolean uploadStatus = waitForS3Upload(myUpload); if (!uploadStatus) { tm.shutdownNow(); Log.error("S3 Upload failed:" + myUpload); return (false); } } // need to shut down the transfer manager tm.shutdownNow(); // this is how to do it without multipart, not usable for large files! // s3.putObject(bucket, key, reader, new ObjectMetadata()); } else { Log.error("Unable to parse a bucket and file name from " + stringURL + " it should be in the form s3://<bucket>/<key>/ or s3://<bucket>/"); return false; } return true; } private boolean waitForS3Upload(Transfer myUpload) { boolean success = true; try { // FIXME: this doesn't try to reconnect via a new reader if things go badly while (myUpload.isDone() == false) { if (isVerbose()) { float percent = (myUpload.getProgress().getBytesTransfered() * 100.0f) / this.inputSize; System.out.printf(" + completed: %.2f", percent); System.out.print("%\r"); Log.info("Transfer: " + myUpload.getDescription()); Log.info(" - State: " + myUpload.getState()); Log.info(" - Progress: " + myUpload.getProgress().getBytesTransfered() + " of " + this.inputSize); } // Do work while we wait for our upload to complete... if (myUpload.getState() == TransferState.Failed) { Log.error("Failure Uploading: " + myUpload.getDescription()); return false; } Thread.sleep(2000); } if (isVerbose()) { System.out.print("\n"); } } catch (InterruptedException e) { Log.error(e.getMessage()); return false; } return (success); } /** * <p> * getS3Url. * </p> * * @param input * a {@link java.lang.String} object. * @return a {@link java.net.URL} object. */ public URL getS3Url(String input) { URL url = null; if (input.startsWith("s3://")) { Pattern p = Pattern.compile("s3://(\\S+):(\\S+)@(\\S+)"); Matcher m = p.matcher(input); boolean result = m.find(); String accessKey = null; String secretKey = null; String localUrl = input; if (result) { accessKey = m.group(1); secretKey = m.group(2); localUrl = "s3://" + m.group(3); } else { // get the access/secret key from the .seqware/settings file try { HashMap<String, String> settings = (HashMap<String, String>) ConfigTools.getSettings(); accessKey = settings.get(SqwKeys.AWS_ACCESS_KEY.getSettingKey()); secretKey = settings.get(SqwKeys.AWS_SECRET_KEY.getSettingKey()); } catch (Exception e) { e.printStackTrace(); return null; } } if (accessKey == null || secretKey == null) { return null; } // parse out the bucket and key p = Pattern.compile("s3://([^/]+)/(\\S+)"); m = p.matcher(localUrl); if (m.find()) { String bucket = m.group(1); String key = m.group(2); // now get this from S3 s3 = new AmazonS3Client(new BasicAWSCredentials(accessKey, secretKey)); url = s3.generatePresignedUrl(new GeneratePresignedUrlRequest(bucket, key, HttpMethod.GET).withExpiration(new Date( new Date().getTime() + 10000))); } } return url; } /** * <p> * getS3Url. * </p> * * @param input * a {@link java.lang.String} object. * @param accessKey * a {@link java.lang.String} object. * @param secretKey * a {@link java.lang.String} object. * @return a {@link java.net.URL} object. */ public URL getS3Url(String input, String accessKey, String secretKey) { URL url = null; Pattern p = Pattern.compile("s3://(\\S+):(\\S+)@(\\S+)"); Matcher m = p.matcher(input); boolean result = m.find(); String stringUrl = input; if (result) { accessKey = m.group(1); secretKey = m.group(2); stringUrl = "s3://" + m.group(3); } // parse out the bucket and key p = Pattern.compile("s3://([^/]+)/(\\S+)"); m = p.matcher(stringUrl); if (m.find()) { String bucket = m.group(1); String key = m.group(2); // now get this from S3 s3 = new AmazonS3Client(new BasicAWSCredentials(accessKey, secretKey)); url = s3.generatePresignedUrl(new GeneratePresignedUrlRequest(bucket, key, HttpMethod.GET).withExpiration(new Date(new Date() .getTime() + 10000))); } return url; } /** * This attempts to resume if passed in startPosition > 0. * * @param input * a {@link java.lang.String} object. * @param bufLen * a int. * @param startPosition * a long. * @return reader of input file */ public BufferedInputStream getSourceReader(String input, int bufLen, long startPosition) { this.originalFileName = input; BufferedInputStream reader = null; this.inputFile = null; if (input.startsWith("s3://")) { reader = getS3InputStream(input, bufLen, startPosition); } else if (input.startsWith("http://") || input.startsWith("https://")) { reader = getHttpInputStream(input, bufLen, startPosition); }/** * else if (input.startsWith("hdfs://")) { reader = getHDFSInputStream(input, bufLen, startPosition); } */ else { reader = getFileInputStream(input, startPosition); } return reader; } private BufferedInputStream getFileInputStream(String input, long startPosition) { BufferedInputStream reader = null; try { this.inputFile = new File(input); String[] paths = inputFile.getAbsolutePath().split("/"); fileName = paths[paths.length - 1]; this.inputSize = inputFile.length(); reader = new BufferedInputStream(new FileInputStream(new File(input))); // does this actually work? // see // http://download.oracle.com/javase/1.4.2/docs/api/java/io/InputStream.html#skip%28long%29 reader.skip(startPosition); } catch (FileNotFoundException e) { Log.error(e.getMessage()); return null; } catch (IOException e) { Log.error(e.getMessage()); return null; } return reader; } /** * <p> * getHttpInputStream. * </p> * * @param input * a {@link java.lang.String} object. * @param bufLen * a int. * @param startPosition * a long. * @return a {@link java.io.BufferedInputStream} object. */ public BufferedInputStream getHttpInputStream(String input, int bufLen, long startPosition) { BufferedInputStream reader = null; Pattern p = Pattern.compile("(https*)://(\\S+):(\\S+)@(\\S+)"); Matcher m = p.matcher(input); boolean result = m.find(); String protocol; String user = null; String pass = null; String stringURL = input; if (result) { protocol = m.group(1); user = m.group(2); pass = m.group(3); stringURL = protocol + "://" + m.group(4); } URL urlObj = null; try { urlObj = new URL(stringURL); URLConnection urlConn = urlObj.openConnection(); if (user != null && pass != null) { String userPassword = user + ":" + pass; String encoding = Base64.encodeBase64String(userPassword.getBytes()); urlConn.setRequestProperty("Authorization", "Basic " + encoding); } // deal with resumption, look at // http://stackoverflow.com/questions/6237079/resume-http-file-download-in-java urlConn.setRequestProperty("Range", "bytes=" + startPosition + "-"); // download data and write out p = Pattern.compile("://([^/]+)/(\\S+)"); m = p.matcher(stringURL); result = m.find(); if (result) { // String host = m.group(1); String path = m.group(2); String[] paths = path.split("/"); this.fileName = paths[paths.length - 1]; this.inputSize = urlConn.getContentLength(); reader = new BufferedInputStream(urlConn.getInputStream(), bufLen); } else { Log.error("getHttpInputStream doesn't know how to deal with URL: " + stringURL); return null; } } catch (MalformedURLException e) { Log.error(e.getMessage()); return null; } catch (IOException e) { Log.error(e.getMessage()); return null; } return reader; } // public BufferedInputStream getHDFSInputStream(String input, int bufLen, long startPosition) { // // try { // // BufferedInputStream reader = null; // // // figure out the filename // String[] path = input.split("/"); // this.fileName = path[path.length - 1]; // // // Hadoop stuff // Configuration conf = new Configuration(); // // FIXME: is this OK to pass in the complete URL? // FileSystem fs = FileSystem.get(URI.create(input), conf); // Path inputPath = new Path(input); // // // should exist and not be a directory // if (!fs.exists(inputPath) || fs.isDirectory(inputPath)) { // Log.error("Can't read the input file "+input+", it is either missing or a directory!"); // return(null); // } // // // open the HDFS input stream // reader = new BufferedInputStream(fs.open(inputPath, bufLen)); // this.inputSize = fs.getFileStatus(inputPath).getBlockSize(); // // // move forward if given an offset // if (startPosition > 0) { // reader.skip(startPosition); // } // // // and just return this // return(reader); // // } catch (IOException ex) { // Logger.getLogger(ProvisionFilesUtil.class.getName()).log(Level.SEVERE, null, ex); // } // // // default null // return(null); // } /** * <p> * getS3InputStream. * </p> * * @param input * a {@link java.lang.String} object. * @param bufLen * a int. * @param startPosition * a long. * @param accessKey * a {@link java.lang.String} object. * @param secretKey * a {@link java.lang.String} object. * @return a {@link java.io.BufferedInputStream} object. */ public BufferedInputStream getS3InputStream(String input, int bufLen, long startPosition, String accessKey, String secretKey) { BufferedInputStream reader = null; S3Object object = null; // now get this from S3 s3 = new AmazonS3Client(new BasicAWSCredentials(accessKey, secretKey)); // parse out the bucket and key Pattern p = Pattern.compile("s3://([^/]+)/(\\S+)"); Matcher m = p.matcher(input); boolean result = m.find(); if (result) { String bucket = m.group(1); String key = m.group(2); // now figure out the actual file name from the input String[] paths = key.split("/"); this.fileName = paths[paths.length - 1]; try { // I hope this works to restart downloads from fixed locations GetObjectRequest gor = new GetObjectRequest(bucket, key); this.inputSize = s3.getObject(gor).getObjectMetadata().getContentLength(); gor.setRange(startPosition, inputSize); object = s3.getObject(gor); reader = new BufferedInputStream(object.getObjectContent(), bufLen); } catch (AmazonServiceException e) { Log.error(e.getMessage()); return null; } catch (AmazonClientException e) { Log.error(e.getMessage()); return null; } } else { Log.error("Couldn't figure out the bucket and key from the URL provided: " + input); return null; } return reader; } /** * <p> * getS3InputStream. * </p> * * @param input * a {@link java.lang.String} object. * @param bufLen * a int. * @param startPosition * a long. * @return a {@link java.io.BufferedInputStream} object. */ public BufferedInputStream getS3InputStream(String input, int bufLen, long startPosition) { String accessKey = null; String secretKey = null; // can encode the access key and secret key within the URL // see http://www.cs.rutgers.edu/~watrous/user-pass-url.html Pattern p = Pattern.compile("s3://(\\S+):(\\S+)@(\\S+)"); Matcher m = p.matcher(input); boolean result = m.find(); String stringURL = input; if (result) { accessKey = m.group(1); secretKey = m.group(2); stringURL = "s3://" + m.group(3); } // if the access and secret access keys are not found in the URL then pull from settings file if (!result) { // get the access/secret key from the .seqware/settings file try { HashMap<String, String> settings = (HashMap<String, String>) ConfigTools.getSettings(); accessKey = settings.get(SqwKeys.AWS_ACCESS_KEY.getSettingKey()); secretKey = settings.get(SqwKeys.AWS_SECRET_KEY.getSettingKey()); } catch (Exception e) { Log.error(e.getMessage()); return null; } } if (accessKey == null || secretKey == null) { Log.error("Couldn't continue because missing S3 access key and/or secret key"); return null; } return getS3InputStream(stringURL, bufLen, startPosition, accessKey, secretKey); } // utils /** * Sets data encryption key. * * @param value * BASE64-encoded key */ public void setDataEncryptionKeyString(String value) { byte[] bytes = getBase64().decode(value); dataEncryptionKey = new SecretKeySpec(bytes, DATA_ENCRYPTION_ALGORITHM); } /** * Sets data decryption key. * * @param value * BASE64-encoded key */ public void setDataDecryptionKeyString(String value) { byte[] bytes = getBase64().decode(value); dataDecryptionKey = new SecretKeySpec(bytes, DATA_ENCRYPTION_ALGORITHM); } private static Base64 getBase64() { return new Base64(Integer.MAX_VALUE, new byte[0]); } /** * <p> * isVerbose. * </p> * * @return a boolean. */ public boolean isVerbose() { return verbose; } /** * Enable class verbose mode. * * @param verbose * a boolean. */ public void setVerbose(boolean verbose) { this.verbose = verbose; } private Cipher createDecryptCipherInternal() throws Exception { Cipher cipher = Cipher.getInstance(DATA_ENCRYPTION_ALGORITHM); cipher.init(Cipher.DECRYPT_MODE, dataDecryptionKey); return cipher; } private Cipher createEncryptCipherInternal() throws Exception { Cipher cipher = Cipher.getInstance(DATA_ENCRYPTION_ALGORITHM); cipher.init(Cipher.ENCRYPT_MODE, dataEncryptionKey); return cipher; } /** * Creates abstract pathname. * * @param folderStore * a {@link java.lang.String} object. * @param email * a {@link java.lang.String} object. * @param fileName * a {@link java.lang.String} object. * @return a {@link java.lang.String} object. */ public static String createTargetPath(String folderStore, String email, String fileName) { String fileDownlodName = fileName.trim(); String separator = java.io.File.separator; Date dateNow = new Date(); SimpleDateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmmssSSS"); StringBuilder strNow = new StringBuilder(dateFormat.format(dateNow)); String pathCurrDir = (new StringBuilder()).append(folderStore).append(email).append(separator).append(strNow).append(separator) .toString(); java.io.File currDir = new java.io.File(pathCurrDir); if (!currDir.exists()) { currDir.mkdirs(); } String targetPath = (new StringBuilder()).append(pathCurrDir).append(fileDownlodName).toString(); return targetPath; } /** * Creates abstract pathname. * * @param folderStore * a {@link java.lang.String} object. * @param email * a {@link java.lang.String} object. * @return a {@link java.lang.String} object. */ public static String createTargetDirectory(String folderStore, String email) { String separator = java.io.File.separator; Date dateNow = new Date(); SimpleDateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmmssSSS"); StringBuilder strNow = new StringBuilder(dateFormat.format(dateNow)); String pathCurrDir = (new StringBuilder()).append(folderStore).append(email).append(separator).append(strNow).append(separator) .toString(); java.io.File currDir = new java.io.File(pathCurrDir); if (!currDir.exists()) { currDir.mkdirs(); } String targetPath = (new StringBuilder()).append(pathCurrDir).toString(); return targetPath; } /** * <p> * getFileSize. * </p> * * @param path * a {@link java.lang.String} object. * @return a long. * @throws java.lang.Exception * if any. */ public static long getFileSize(String path) throws Exception { if (path.startsWith("http://") || path.startsWith("https://")) { Pattern p = Pattern.compile("(https*)://(\\S+):(\\S+)@(\\S+)"); Matcher m = p.matcher(path); boolean result = m.find(); String protocol = null; String user = null; String pass = null; String stringURL = path; if (result) { protocol = m.group(1); user = m.group(2); pass = m.group(3); stringURL = protocol + "://" + m.group(4); } URL urlObj = null; try { urlObj = new URL(stringURL); URLConnection urlConn = urlObj.openConnection(); if (user != null && pass != null) { String userPassword = user + ":" + pass; String encoding = Base64.encodeBase64String(userPassword.getBytes()); urlConn.setRequestProperty("Authorization", "Basic " + encoding); } // get size return urlConn.getContentLength(); } catch (MalformedURLException e) { Log.stderr(e.getMessage()); e.printStackTrace(); throw e; } catch (IOException e) { Log.stderr(e.getMessage()); e.printStackTrace(); throw e; } } else if (path.startsWith("s3://")) { String accessKey = null; String secretKey = null; Pattern p = Pattern.compile("s3://(\\S+):(\\S+)@(\\S+)"); Matcher m = p.matcher(path); boolean result = m.find(); String stringURL = path; if (result) { accessKey = m.group(1); secretKey = m.group(2); stringURL = "s3://" + m.group(3); } else { try { HashMap<String, String> settings = (HashMap<String, String>) ConfigTools.getSettings(); accessKey = settings.get(SqwKeys.AWS_ACCESS_KEY.getSettingKey()); secretKey = settings.get(SqwKeys.AWS_SECRET_KEY.getSettingKey()); } catch (Exception e) { e.printStackTrace(); throw e; } } // now get this from S3 AmazonS3Client s3 = new AmazonS3Client(new BasicAWSCredentials(accessKey, secretKey)); // parse out the bucket and key p = Pattern.compile("s3://([^/]+)/(\\S+)"); m = p.matcher(stringURL); result = m.find(); if (result) { String bucket = m.group(1); String key = m.group(2); try { GetObjectRequest gor = new GetObjectRequest(bucket, key); return s3.getObject(gor).getObjectMetadata().getContentLength(); } catch (AmazonServiceException e) { e.printStackTrace(); throw e; } catch (AmazonClientException e) { e.printStackTrace(); throw e; } } else { return 0; } }/** * else if (path.startsWith("hdfs://")) { * * Configuration conf = new Configuration(); // FIXME: is this OK to pass in the complete URL? FileSystem fs = * FileSystem.get(URI.create(path), conf); Path hdfsPath = new Path(path); return(fs.getFileStatus(hdfsPath).getBlockSize()); * * } */ else { File file = new File(path); if (!file.exists()) { throw new IllegalStateException("File not exist " + path); } return file.length(); } } /** * <p> * Getter for the field <code>originalFileName</code>. * </p> * * @return a {@link java.lang.String} object. */ public String getOriginalFileName() { return originalFileName; } /** * <p> * Setter for the field <code>originalFileName</code>. * </p> * * @param originalFileName * a {@link java.lang.String} object. */ public void setOriginalFileName(String originalFileName) { this.originalFileName = originalFileName; } public static void calculateInputMetadata(String input, FileMetadata metadata) throws RuntimeException { if (metadata == null) { Log.error("Could not calculate md5sum or size, no metadata provided"); return; } // calculate and store source metadata information about input file Path inputPath = Paths.get(input); try { long size = Files.size(inputPath); metadata.setSize(size); } catch (IOException ex) { throw new RuntimeException("Could not calculate size of input file", ex); } HashCode hc; try { hc = com.google.common.io.Files.hash(inputPath.toFile(), Hashing.md5()); Log.info("MD5: " + hc.toString()); metadata.setMd5sum(hc.toString()); } catch (IOException ex) { throw new RuntimeException("Could not calculate md5sum for input file", ex); } } }