package net.sourceforge.seqware.pipeline.modules.utilities; import com.amazonaws.auth.BasicAWSCredentials; import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.model.Bucket; import com.amazonaws.services.s3.model.CannedAccessControlList; import com.amazonaws.services.s3.model.ListObjectsRequest; import com.amazonaws.services.s3.model.ObjectListing; import com.amazonaws.services.s3.model.S3ObjectSummary; import io.seqware.pipeline.SqwKeys; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.StringWriter; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; import joptsimple.OptionException; import joptsimple.OptionParser; import joptsimple.OptionSet; import net.sourceforge.seqware.common.module.ReturnValue; import net.sourceforge.seqware.common.util.Log; import net.sourceforge.seqware.common.util.configtools.ConfigTools; import net.sourceforge.seqware.pipeline.module.Module; import net.sourceforge.seqware.pipeline.module.ModuleInterface; import org.openide.util.lookup.ServiceProvider; /** * * Purpose: * * This module simply lists files at the S3 URL you provide. It's less useful in workflows and more likely to be used by end-users to see * what's in S3. * * @author boconnor * @since 20110928 * @version $Id: $Id */ @ServiceProvider(service = ModuleInterface.class) public class S3ListFiles extends Module { protected OptionSet options = null; protected String accessKey = null; protected String secretKey = null; private static final String[] Q = new String[] { "", "K", "M", "G", "T", "P", "E" }; /** * <p> * getOptionParser. * </p> * * @return a {@link joptsimple.OptionParser} object. */ @Override protected OptionParser getOptionParser() { OptionParser parser = new OptionParser(); parser.acceptsAll(Arrays.asList("s3-url", "u"), "Optional: a URL of the form s3://<bucket>/<path>/<file>").withRequiredArg() .describedAs("S3 path"); parser.acceptsAll(Arrays.asList("list-buckets", "l"), "Optional: list all the buckets you own."); parser.accepts( "reset-owner-permissions", "Optional: this will give the bucket owner full read/write permissions, useful if many different people have been writing to the same bucket."); parser.acceptsAll(Arrays.asList("tab-output-file", "t"), "Optional: tab-formated output file.").withRequiredArg() .describedAs("file path"); parser.acceptsAll(Arrays.asList("search-local-dir", "s"), "Optional: attempt to match files in S3 with files in this local directory.").withRequiredArg() .describedAs("directory path"); parser.accepts("in-bytes", "Optional: flag, if set values print in bytes rather than human friendsly"); return (parser); } /** * <p> * get_syntax. * </p> * * @return a {@link java.lang.String} object. */ @Override public String get_syntax() { OptionParser parser = getOptionParser(); StringWriter output = new StringWriter(); try { parser.printHelpOn(output); return (output.toString()); } catch (IOException e) { e.printStackTrace(); return (e.getMessage()); } } /** * {@inheritDoc} * * Not implemented * * @return */ @Override public ReturnValue do_test() { return new ReturnValue(ReturnValue.SUCCESS); } /** * {@inheritDoc} * * Just makes sure the param was passed in. * * @return */ @Override public ReturnValue do_verify_parameters() { ReturnValue ret = new ReturnValue(); ret.setExitStatus(ReturnValue.SUCCESS); try { OptionParser parser = getOptionParser(); options = parser.parse(this.getParameters().toArray(new String[this.getParameters().size()])); } catch (OptionException e) { ret.setStderr(e.getMessage() + System.getProperty("line.separator") + this.get_syntax()); ret.setExitStatus(ReturnValue.INVALIDPARAMETERS); e.printStackTrace(); return ret; } // Must specify input, output and binary file if (!options.has("s3-url") && !options.has("list-buckets")) { ret.setStderr("Must specify a --s3-url and/or --list-buckets option" + System.getProperty("line.separator") + this.get_syntax()); ret.setExitStatus(ReturnValue.INVALIDPARAMETERS); return ret; } return ret; } /** * {@inheritDoc} * * @return */ @Override public ReturnValue do_verify_input() { ReturnValue ret = new ReturnValue(); ret.setExitStatus(ReturnValue.SUCCESS); List<String> inputs = (List<String>) options.valuesOf("s3-url"); for (String input : inputs) { Pattern p = Pattern.compile("s3://(\\S+):(\\S+)@(\\S+)"); Matcher m = p.matcher(input); boolean result = m.find(); String url = input; if (result) { accessKey = m.group(1); secretKey = m.group(2); url = "s3://" + m.group(3); } } if (accessKey == null || secretKey == null) { try { HashMap<String, String> settings = (HashMap<String, String>) ConfigTools.getSettings(); accessKey = settings.get(SqwKeys.AWS_ACCESS_KEY.getSettingKey()); secretKey = settings.get(SqwKeys.AWS_SECRET_KEY.getSettingKey()); } catch (Exception e) { e.printStackTrace(); return null; } } if (accessKey == null || "".equals(accessKey) || secretKey == null || "".equals(secretKey)) { ret.setExitStatus(ReturnValue.INVALIDPARAMETERS); ret.setStderr(S3DeleteFiles.NEED_BOTH_AWS_SETTINGS); return ret; } return ret; } /** * {@inheritDoc} * * @return */ @Override public ReturnValue do_run() { ReturnValue ret = new ReturnValue(); ret.setExitStatus(ReturnValue.SUCCESS); // stores local file info HashMap<String, HashMap<String, Long>> localFiles = new HashMap<>(); // stores remote file info HashMap<String, HashMap<String, Long>> remoteFiles = new HashMap<>(); BufferedWriter tabWriter = null; if (options.has("tab-output-file")) { String tabOutFileStr = (String) options.valueOf("tab-output-file"); File tabOutFile = new File(tabOutFileStr); try { tabWriter = new BufferedWriter(new FileWriter(tabOutFile)); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } if (options.has("list-buckets") || options.has("l")) { AmazonS3 s3 = new AmazonS3Client(new BasicAWSCredentials(accessKey, secretKey)); boolean first = true; long allSize = 0L; for (Bucket bucket : s3.listBuckets()) { if (first) { first = false; Log.stdout("\nMY BUCKETS:\n"); } System.out.print(" - " + bucket.getName()); ObjectListing objectListing = s3.listObjects(new ListObjectsRequest().withBucketName(bucket.getName())); long totalSize = 0L; do { for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) { totalSize += objectSummary.getSize(); allSize += objectSummary.getSize(); } objectListing = s3.listNextBatchOfObjects(objectListing); } while (objectListing.isTruncated()); Log.stdout(" size=" + getAsString(totalSize)); } Log.stdout("\nTOTAL SIZE: " + getAsString(allSize) + "\n"); } // crawl the local filesystem and store in hash if (options.has("search-local-dir")) { List<String> searchDirs = (List<String>) options.valuesOf("search-local-dir"); for (String dir : searchDirs) { HashMap<String, Long> files = new HashMap<>(); findFiles(dir, dir, files); localFiles.put(dir, files); } } // now loop across every S3 bucket long allSize = 0L; List<String> inputs = (List<String>) options.valuesOf("s3-url"); for (String input : inputs) { if (input.startsWith("s3://")) { // for the time being will encode the access key and secret key within the // URL // see http://www.cs.rutgers.edu/~watrous/user-pass-url.html Pattern p = Pattern.compile("s3://(\\S+):(\\S+)@(\\S+)"); Matcher m = p.matcher(input); boolean result = m.find(); String url = input; if (result) { accessKey = m.group(1); secretKey = m.group(2); url = "s3://" + m.group(3); } else { // get the access/secret key from the .seqware/settings file try { HashMap<String, String> settings = (HashMap<String, String>) ConfigTools.getSettings(); accessKey = settings.get(SqwKeys.AWS_ACCESS_KEY.getSettingKey()); secretKey = settings.get(SqwKeys.AWS_SECRET_KEY.getSettingKey()); } catch (Exception e) { e.printStackTrace(); return null; } } if (accessKey == null || secretKey == null) { ret.setExitStatus(ReturnValue.INVALIDPARAMETERS); ret.setStderr(S3DeleteFiles.NEED_BOTH_AWS_SETTINGS); return ret; } // now get this from S3 AmazonS3 s3 = new AmazonS3Client(new BasicAWSCredentials(accessKey, secretKey)); // parse out the bucket and key p = Pattern.compile("s3://([^/]+)/*(\\S*)"); m = p.matcher(url); result = m.find(); if (result) { String bucket = m.group(1); String key = m.group(2); String bucketOwner = ""; /* sample code */ if (options.has("reset-owner-permissions")) { bucketOwner = s3.getBucketAcl(bucket).getOwner().getDisplayName(); Log.stdout("Bucket Owner: " + bucketOwner); } ObjectListing objectListing = s3.listObjects(new ListObjectsRequest().withBucketName(bucket).withPrefix(key)); long totalSize = 0L; boolean first = true; do { for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) { if (first) { first = false; if (key == null || "".equals(key)) { Log.stdout("\nLISTING BUCKET: " + bucket + "\n"); } else { Log.stdout("\nLISTING BUCKET: " + bucket + " AND KEY PREFIX: " + key + "\n"); } } totalSize += objectSummary.getSize(); allSize += objectSummary.getSize(); Log.stdout(" * " + objectSummary.getKey() + " " + "size=" + getAsString(objectSummary.getSize()) // " last_modified=" + objectSummary.getLastModified() + // " owner=" + objectSummary.getOwner().getDisplayName() ); // if I need to print a tab file make sure I save file information if (options.has("tab-output-file")) { try { HashMap<String, Long> bucketMap = remoteFiles.get(bucket); if (bucketMap == null) { bucketMap = new HashMap<>(); remoteFiles.put(bucket, bucketMap); } bucketMap.put(objectSummary.getKey(), objectSummary.getSize()); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } // if I'm searching local dir print if there's a match if (options.has("search-local-dir")) { for (String localDir : localFiles.keySet()) { HashMap<String, Long> currHash = localFiles.get(localDir); if (currHash.containsKey(objectSummary.getKey())) { Log.stdout(" -> matches local file " + localDir + "/" + objectSummary.getKey()); break; } } } if (options.has("reset-owner-permissions")) { try { Log.stdout(" resetting bucket owner (" + bucketOwner + ") permissions for file owned by " + objectSummary.getOwner().getDisplayName()); s3.setObjectAcl(bucket, objectSummary.getKey(), CannedAccessControlList.BucketOwnerFullControl); } catch (Exception e) { Log.error(" unable to reset permissions", e); } } } objectListing = s3.listNextBatchOfObjects(objectListing); } while (objectListing.isTruncated()); Log.stdout("\nBUCKET SIZE: " + getAsString(totalSize) + "\n"); } else { ret.setExitStatus(ReturnValue.FAILURE); ret.setStderr("Problems connecting to S3"); return ret; } } else { ret.setExitStatus(ReturnValue.FAILURE); ret.setStderr("You need to provide URLs that conform to the standard s3://<bucket>/<path>/<file>"); return ret; } } if (allSize > 0 && inputs.size() > 1) { Log.stdout("TOTAL SIZE: " + getAsString(allSize) + "\n"); } // now print everything out to local file if specified if (options.has("tab-output-file") && tabWriter != null) { try { tabWriter.write("# Remote_Files\n"); tabWriter.write("# Remote\tS3_Bucket\tS3_Key\tS3_Size\tLocal\tLocal_Root\tLocal_Path\tLocal_Size\tSize_Equal\n"); for (String remoteBucket : remoteFiles.keySet()) { HashMap<String, Long> remote = remoteFiles.get(remoteBucket); for (String file : remote.keySet()) { tabWriter.write("Remote\t" + remoteBucket + "\t" + file + "\t" + remote.get(file)); for (String localDir : localFiles.keySet()) { HashMap<String, Long> currHash = localFiles.get(localDir); if (currHash.containsKey(file)) { tabWriter.write("\tLocal\t" + localDir + "\t" + file + "\t" + currHash.get(file)); if (!remote.get(file).equals(currHash.get(file))) { tabWriter.write("\tNotEqualSize!"); } else { tabWriter.write("\tEqualSize!"); } break; } } tabWriter.write("\n"); } } // also print local file info if (options.has("search-local-dir")) { tabWriter.write("# Local_Files\n"); tabWriter.write("# Local\tLocal_Root\tLocal_Path\tLocal_Size\tRemote\tS3_Bucket\tS3_Key\tS3_Size\tSize_Equal\n"); for (String localDir : localFiles.keySet()) { HashMap<String, Long> currHash = localFiles.get(localDir); for (String localFile : currHash.keySet()) { tabWriter.write("Local\t" + localDir + "\t" + localFile + "\t" + currHash.get(localFile)); for (String remoteBucket : remoteFiles.keySet()) { HashMap<String, Long> remote = remoteFiles.get(remoteBucket); if (remote.containsKey(localFile)) { tabWriter.write("\tRemote\t" + remoteBucket + "\t" + localFile + "\t" + remote.get(localFile)); if (!remote.get(localFile).equals(currHash.get(localFile))) { tabWriter.write("\tNotEqualSize!"); } else { tabWriter.write("\tEqualSize!"); } break; } } tabWriter.write("\n"); } } } tabWriter.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } return ret; } private void findFiles(String rootDir, String file, HashMap<String, Long> fileMap) { File currFile = new File(file); if (currFile != null && currFile.canRead() && currFile.exists()) { if (currFile.isDirectory()) { File[] children = currFile.listFiles(); for (File child : children) { findFiles(rootDir, child.getPath(), fileMap); } } else { long size = currFile.length(); String path = currFile.getPath(); path = path.replace(rootDir, ""); if (path.startsWith("/")) { path = path.substring(1); } // Log.info(path); fileMap.put(path, size); } } } private String getAsString(long bytes) { if (!options.has("in-bytes")) { for (int i = 6; i > 0; i--) { double step = Math.pow(1024, i); if (bytes > step) return String.format("%3.1f%s", bytes / step, Q[i]); } } return Long.toString(bytes); } /** * {@inheritDoc} * * @return */ @Override public ReturnValue do_verify_output() { // TODO: should verify output, especially is they are local files! ReturnValue ret = new ReturnValue(); ret.setExitStatus(ReturnValue.SUCCESS); return ret; } /** * <p> * init. * </p> * * @return a {@link net.sourceforge.seqware.common.module.ReturnValue} object. */ @Override public ReturnValue init() { ReturnValue ret = new ReturnValue(); ret.setReturnValue(ReturnValue.SUCCESS); Logger logger = Logger.getLogger("com.amazonaws"); logger.setLevel(Level.SEVERE); return ret; } /** * <p> * clean_up. * </p> * * @return a {@link net.sourceforge.seqware.common.module.ReturnValue} object. */ @Override public ReturnValue clean_up() { ReturnValue ret = new ReturnValue(); ret.setReturnValue(ReturnValue.SUCCESS); return ret; } }