package com.amazonaws.services.dynamodbv2.json.demo.mars; import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.Map; import java.util.Map.Entry; import java.util.Properties; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.logging.Logger; import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; import com.amazonaws.services.dynamodbv2.AmazonDynamoDB; import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient; import com.amazonaws.services.dynamodbv2.json.demo.mars.util.ConfigParser; import com.amazonaws.services.dynamodbv2.json.demo.mars.util.DynamoDBManager; import com.amazonaws.services.dynamodbv2.json.demo.mars.util.MarsDynamoDBManager; import com.amazonaws.services.dynamodbv2.json.demo.mars.worker.DynamoDBImageWorker; import com.amazonaws.services.dynamodbv2.json.demo.mars.worker.DynamoDBJSONRootWorker; import com.amazonaws.services.dynamodbv2.json.demo.mars.worker.DynamoDBMissionWorker; import com.amazonaws.services.dynamodbv2.json.demo.mars.worker.DynamoDBSolWorker; import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughput; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ArrayNode; import com.fasterxml.jackson.databind.node.ObjectNode; /** * <p> * Downloads mars image JSON and ingests it into DynamoDB. * </p> * <p> * Makes use of 2 DynamoDB Tables: * <ul> * <li>Resource table: stores ETags for manifests and image resources.</li> * <li>Image table: stores the images from the Mars rover missions.</li> * </ul> * </p> */ public class ImageIngester implements Runnable { /** * Logger for the {@link ImageIngester}. */ public static final Logger LOGGER = Logger.getLogger(ImageIngester.class.getName()); // Configuration constants /** * Default number of threads for a thread pool. */ public static final int DEFAULT_THREADS = 1; /** * Properties key for number of threads in the pool for {@link DynamoDBJSONRootWorker}s and * {@link DynamoDBMissionWorker}s. */ public static final String CONFIG_NUM_MANIFEST_THREADS = "ingester.manifest.threads"; /** * Properties key for number of threads in the pool for {@link DynamoDBSolWorker}s. */ public static final String CONFIG_NUM_SOL_THREADS = "ingester.sol.threads"; /** * Properties key for number of threads in the pool for {@link DynamoDBImageWorker}s. */ public static final String CONFIG_NUM_IMAGE_THREADS = "ingester.image.threads"; /** * Flag for whether resources should be tracked by ETag in a DynamoDB table. */ public static final String CONFIG_TRACK_RESOURCES = "ingester.track-resources"; /** * Default behavior is to not track resources. */ public static final boolean DEFAULT_TRACK_RESOURCES = false; /** * Flag for whether to store image thumbnails in the DynamoDB table. */ public static final String CONFIG_STORE_THUMBNAILS = "ingester.store-thumbnails"; /** * Default is to not include thumbnails. */ public static final boolean DEFAULT_STORE_THUMBNAILS = false; /** * Properties key for the amount of time to wait between checking asynchronous tasks for completion. */ public static final String CONFIG_WAIT_TIME = "ingester.waitTime"; /** * Default amount of time to wait between checking asynchronous tasks for completion. */ public static final long DEFAULT_WAIT_TIME = 20 * 1000; // 20 seconds /** * Properties key for the connect timeout when retrieving an HTTP resource. */ public static final String CONFIG_CONNECT_TIMEOUT = "ingester.timeout"; /** * Default connect timeout when retrieving an HTTP resource. */ public static final int DEFAULT_CONNECT_TIMEOUT = 1000; /** * Properties key for image thumbnail width in pixels. */ public static final String CONFIG_THUMBNAIL_WIDTH = "ingester.image.thumbnail.width"; /** * Properties key for image thumbnail height in pixels. */ public static final String CONFIG_THUMBNAIL_HEIGHT = "ingester.image.thumbnail.height"; /** * Default value for image thumbnail width in pixels. */ public static final int DEFAULT_THUMBNAIL_WIDTH = 100; /** * Default value for image thumbnail height in pixels. */ public static final int DEFAULT_THUMBNAIL_HEIGHT = 100; /** * Properties key for DynamoDB endpoint. */ public static final String CONFIG_ENDPOINT = "dynamodb.endpoint"; /** * Properties key for the JSON root URL. */ public static final String CONFIG_JSON_ROOT = "JSON.root"; /** * Properties key for the DynamoDB resource table name. */ public static final String CONFIG_RESOURCE_TABLE = "dynamodb.resource"; /** * Properties key for the DynamoDB image table name. */ public static final String CONFIG_IMAGE_TABLE = "dynamodb.image"; /** * Properties key for the resource table create flag. */ public static final String CONFIG_RESOURCE_TABLE_CREATE = "dynamodb.resource.create"; /** * Properties key for the resource table read capacity units. */ public static final String CONFIG_RESOURCE_TABLE_RCU = "dynamodb.resource.readCapacityUnits"; /** * Properties key for the resource table write capacity units. */ public static final String CONFIG_RESOURCE_TABLE_WCU = "dynamodb.resource.writeCapacityUnits"; /** * Properties key for the image table create flag. */ public static final String CONFIG_IMAGE_TABLE_CREATE = "dynamodb.image.create"; /** * Properties key for the image table read capacity units. */ public static final String CONFIG_IMAGE_TABLE_RCU = "dynamodb.image.readCapacityUnits"; /** * Properties key for the image table write capacity units. */ public static final String CONFIG_IMAGE_TABLE_WCU = "dynamodb.image.writeCapacityUnits"; /** * Properties key for the read capacity units of the time global secondary index on the image table. */ public static final String CONFIG_IMAGE_TABLE_TIME_GSI_RCU = "dynamodb.image.globalSecondaryIndex.time.readCapacityUnits"; /** * Properties key for the write capacity units of the time global secondary index on the image table. */ public static final String CONFIG_IMAGE_TABLE_TIME_GSI_WCU = "dynamodb.image.globalSecondaryIndex.time.writeCapacityUnits"; /** * Properties key for the read capacity units of the vote global secondary index on the image table. */ public static final String CONFIG_IMAGE_TABLE_VOTE_GSI_RCU = "dynamodb.image.globalSecondaryIndex.vote.readCapacityUnits"; /** * Properties key for the write capacity units of the vote global secondary index on the image table. */ public static final String CONFIG_IMAGE_TABLE_VOTE_GSI_WCU = "dynamodb.image.globalSecondaryIndex.vote.writeCapacityUnits"; /** * Required String properties in configuration. */ private static final String[] REQUIRED_STRING_CONFIGURATIONS = {CONFIG_ENDPOINT, CONFIG_JSON_ROOT, CONFIG_IMAGE_TABLE}; /** * Required Boolean properties in configuration. */ private static final String[] REQUIRED_BOOLEAN_CONFIGURATIONS = {CONFIG_IMAGE_TABLE_CREATE}; /** * Required Integer properties in configuration. */ private static final String[] REQUIRED_INTEGER_CONFIGURATIONS = {}; /** * Required Long properties in configuration. */ private static final String[] REQUIRED_LONG_CONFIGURATIONS = {}; /** * Creates an {@link ImageIngester} with a {@link DefaultAWSCredentialsProviderChain}. * * @param args * The command line arguments for locating the application configuration. */ public static void main(final String[] args) { try { new Thread(new ImageIngester(args, new DefaultAWSCredentialsProviderChain())).start(); } catch (final ExitException e) { LOGGER.warning("Exiting: " + e.getMessage()); } catch (final HelpException e) { assert true; // Do nothing except quit } } /** * Checks for required DynamoDB tables. If the user specifies, this method will create the tables and block until * they have an ACTIVE TableStatus. If after these actions the tables are not setup properly, the program will exit. * * @param dynamoDB * {@link AmazonDynamoDB} to use to create DynamoDB tables * @param config * Configuration containing table creation parameters. * @throws ExitException * Error parsing the configuration */ private static void setupTables(final AmazonDynamoDB dynamoDB, final Properties config) throws ExitException { boolean eTagTableExists = false; boolean imageTableExists = false; boolean eTagTableActive = false; boolean imageTableActive = false; final boolean trackResources = ConfigParser.parseBoolean(config, CONFIG_TRACK_RESOURCES, DEFAULT_TRACK_RESOURCES); if (trackResources) { final String eTagTable = ConfigParser.parseString(config, CONFIG_RESOURCE_TABLE); final boolean createETagTable = ConfigParser.parseBoolean(config, CONFIG_RESOURCE_TABLE_CREATE); if (DynamoDBManager.doesTableExist(dynamoDB, eTagTable)) { LOGGER.info("Resource table " + eTagTable + " exists"); eTagTableExists = true; } else if (createETagTable) { try { final long eTagTableReadCapacityUnits = ConfigParser.parseLong(config, CONFIG_RESOURCE_TABLE_RCU); final long eTagTableWriteCapacityUnits = ConfigParser.parseLong(config, CONFIG_RESOURCE_TABLE_WCU); final ProvisionedThroughput eTagTablePT = new ProvisionedThroughput(eTagTableReadCapacityUnits, eTagTableWriteCapacityUnits); MarsDynamoDBManager.createResourceTable(dynamoDB, eTagTable, eTagTablePT); eTagTableExists = true; } catch (final Exception e) { LOGGER.severe(e.getMessage()); } } else { LOGGER.warning("Resource table " + eTagTable + " does not exist"); } } final String imageTable = ConfigParser.parseString(config, CONFIG_IMAGE_TABLE); final boolean createImageTable = ConfigParser.parseBoolean(config, CONFIG_IMAGE_TABLE_CREATE); if (DynamoDBManager.doesTableExist(dynamoDB, imageTable)) { LOGGER.info("Image table " + imageTable + " exists"); imageTableExists = true; } else if (createImageTable) { try { final long imageTableReadCapacityUnits = ConfigParser.parseLong(config, CONFIG_IMAGE_TABLE_RCU); final long imageTableWriteCapacityUnits = ConfigParser.parseLong(config, CONFIG_IMAGE_TABLE_WCU); final ProvisionedThroughput imageTablePT = new ProvisionedThroughput(imageTableReadCapacityUnits, imageTableWriteCapacityUnits); final long imageTableTimeGSIReadCapacityUnits = ConfigParser.parseLong(config, CONFIG_IMAGE_TABLE_TIME_GSI_RCU); final long imageTableTimeGSIWriteCapacityUnits = ConfigParser.parseLong(config, CONFIG_IMAGE_TABLE_TIME_GSI_WCU); final ProvisionedThroughput imageTableTimeGSIPT = new ProvisionedThroughput( imageTableTimeGSIReadCapacityUnits, imageTableTimeGSIWriteCapacityUnits); final long imageTableVoteGSIReadCapacityUnits = ConfigParser.parseLong(config, CONFIG_IMAGE_TABLE_VOTE_GSI_RCU); final long imageTableVoteGSIWriteCapacityUnits = ConfigParser.parseLong(config, CONFIG_IMAGE_TABLE_VOTE_GSI_WCU); final ProvisionedThroughput imageTableVoteGSIPT = new ProvisionedThroughput( imageTableVoteGSIReadCapacityUnits, imageTableVoteGSIWriteCapacityUnits); MarsDynamoDBManager.createImageTable(dynamoDB, imageTable, imageTablePT, imageTableTimeGSIPT, imageTableVoteGSIPT); imageTableExists = true; } catch (final Exception e) { LOGGER.severe(e.getMessage()); } } else { LOGGER.warning("Image table " + imageTable + " does not exist"); } if ((!trackResources || eTagTableExists) && imageTableExists) { try { if (trackResources) { final String eTagTable = ConfigParser.parseString(config, CONFIG_RESOURCE_TABLE); DynamoDBManager.waitForTableToBecomeActive(dynamoDB, eTagTable); eTagTableActive = true; } DynamoDBManager.waitForTableToBecomeActive(dynamoDB, imageTable); imageTableActive = true; } catch (final IllegalStateException e) { LOGGER.severe(e.getMessage()); } } if ((trackResources && !eTagTableActive) || !imageTableActive) { throw new ExitException("Tables are not set up properly"); } } // State variables /** * Configuration for the {@link ImageIngester} application. */ private final Properties config; /** * DynamoDB table for reading and writing image ETags. */ private final String resourceTable; /** * DynamoDB table for persisting images. */ private final String imageTable; /** * {@link AmazonDynamoDB} for accessing Amazon Web Services resources. */ private final AmazonDynamoDB dynamoDB; /** * Wait time between checks for asynchronous tasks to complete. */ private final long waitTime; /** * Timeout for retrieving HTTP URL resources. */ private final int connectTimeout; /** * Thread pool for {@link DynamoDBJSONRootWorker} and {@link DynamoDBMissionWorker}. */ private final ExecutorService manifestPool; /** * Thread pool for {@link DynamoDBSolWorker}s. */ private ExecutorService solPool; /** * Thread pool for {@link DynamoDBImageWorker}s. */ private ExecutorService imagePool; /** * Constructs a {@link ImageIngester} with the specified command line arguments and Amazon Web Services credentials * provider. * * @param args * Command line arguments for retrieving the configuration * @param credentialsProvider * Amazon Web Services credentials provider * @throws ExitException * Error parsing configuration */ public ImageIngester(final String[] args, final AWSCredentialsProvider credentialsProvider) throws ExitException { // Parse command line arguments to locate configuration file final ImageIngesterCLI cli = new ImageIngesterCLI(args); config = cli.getConfig(); // Validate the configuration file ConfigParser.validateConfig(config, REQUIRED_STRING_CONFIGURATIONS, REQUIRED_BOOLEAN_CONFIGURATIONS, REQUIRED_INTEGER_CONFIGURATIONS, REQUIRED_LONG_CONFIGURATIONS); // Parse configuration settings resourceTable = ConfigParser.parseString(config, CONFIG_RESOURCE_TABLE); imageTable = ConfigParser.parseString(config, CONFIG_IMAGE_TABLE); waitTime = ConfigParser.parseLong(config, CONFIG_WAIT_TIME, DEFAULT_WAIT_TIME); connectTimeout = ConfigParser.parseInteger(config, CONFIG_CONNECT_TIMEOUT, DEFAULT_CONNECT_TIMEOUT); final String endpoint = ConfigParser.parseString(config, CONFIG_ENDPOINT); final int numManifestThreads = ConfigParser.parseInteger(config, CONFIG_NUM_MANIFEST_THREADS, DEFAULT_THREADS); // Setup state dynamoDB = new AmazonDynamoDBClient(credentialsProvider); dynamoDB.setEndpoint(endpoint); manifestPool = Executors.newFixedThreadPool(numManifestThreads); } /** * Waits for all {@link DynamoDBImageWorker} tasks to complete. * * @param imageFutures * Collection of futures corresponding to {@link DynamoDBImageWorker} tasks */ private void awaitTermination(final Collection<Future<?>> imageFutures) { while (!imageFutures.isEmpty()) { LOGGER.info(imageFutures.size() + " images left to process"); try { Thread.sleep(waitTime); } catch (final InterruptedException e) { LOGGER.warning(e.getMessage()); } final Iterator<Future<?>> it = imageFutures.iterator(); while (it.hasNext()) { final Future<?> f = it.next(); if (f.isDone()) { it.remove(); } } } LOGGER.info("Ingestion completed."); } /** * <p> * Submits a {@link DynamoDBMissionWorker} for each mission. Gets results from mission futures as they become * available and submits a new {@link DynamoDBSolWorker} to process each sol in the mission. Returns a future for * each sol that contains an {@link ArrayNode} of all the images in the sol * </p> * <p> * If there is an error parsing a mission, a warning is logged and the mission is skipped. * </p> * * @param topLevelManifests * Map of mission to its manifest URL * * @return futures for each sol that will provide an {@link ArrayNode} of images in the sol * @throws ExitException * Error parsing configuration */ private Collection<Future<ArrayNode>> processMissions(final Map<String, String> topLevelManifests) throws ExitException { final Collection<Future<Map<Integer, String>>> missions = new ArrayList<>(); final Collection<Future<ArrayNode>> solFutures = new ArrayList<>(); // Submit task for each mission for (final Entry<String, String> manifest : topLevelManifests.entrySet()) { final String resource = manifest.getValue(); final DynamoDBMissionWorker worker = new DynamoDBMissionWorker(resource, connectTimeout); final Future<Map<Integer, String>> future = manifestPool.submit(worker); // Add future to collection missions.add(future); } manifestPool.shutdown(); final int numSolThreads = ConfigParser.parseInteger(config, CONFIG_NUM_SOL_THREADS, DEFAULT_THREADS); solPool = Executors.newFixedThreadPool(numSolThreads); // Process all mission futures while (!missions.isEmpty()) { final Iterator<Future<Map<Integer, String>>> it = missions.iterator(); while (it.hasNext()) { final Future<Map<Integer, String>> missionFuture = it.next(); if (missionFuture.isDone()) { // Process finished mission future try { final Map<Integer, String> mission = missionFuture.get(); // Submit task for each sol in the mission for (final String solURL : mission.values()) { final DynamoDBSolWorker worker = new DynamoDBSolWorker( /* dynamoDB, resourceTable, */solURL, connectTimeout); final Future<ArrayNode> future = solPool.submit(worker); // Add sol future to collection solFutures.add(future); } } catch (InterruptedException | ExecutionException e) { // Skip mission if there was an error, but report // warning LOGGER.warning(e.getMessage()); } finally { it.remove(); // future is processed (successful or // error), remove from collection } } } // Wait a bit for the tasks to finish before checking again try { Thread.sleep(waitTime); } catch (final InterruptedException e) { LOGGER.warning(e.getMessage()); } } solPool.shutdown(); return solFutures; } /** * Retrieves and parses the root JSON to get Mars mission manifests. * * @return Map of mission to manifest URL * @throws ExitException * if the parser cannot access or process the root manifest */ private Map<String, String> processRootJSON() throws ExitException { // Parse parameters from configuration final String rootURL = ConfigParser.parseString(config, CONFIG_JSON_ROOT); // Get and parse top level manifest final DynamoDBJSONRootWorker rootWorker = new DynamoDBJSONRootWorker(rootURL, connectTimeout); final Future<Map<String, String>> rootFuture = manifestPool.submit(rootWorker); Map<String, String> topLevelManifests; try { topLevelManifests = rootFuture.get(); } catch (InterruptedException | ExecutionException e) { throw new ExitException("Could not process root JSON", e); } return topLevelManifests; } /** * <p> * Gets the results from each sol future as they become available and submits a new {@link DynamoDBImageWorker} to * process each image contained in the sol. * </p> * <p> * If there is an error processing a sol, a warning is logged and the sol is skipped. * </p> * * @param solFutures * Futures for each sol that provides an {@link ArrayNode} that contains the images from the sol * @return Collection of {@link Future}s for monitoring {@link DynamoDBImageWorker} progress * @throws ExitException * Error parsing configuration */ private Collection<Future<?>> processSolFutures(final Collection<Future<ArrayNode>> solFutures) throws ExitException { final Collection<DynamoDBImageWorker> workers = new ArrayList<>(); final Collection<Future<?>> imageFutures = new ArrayList<>(); final int numImageThreads = ConfigParser.parseInteger(config, CONFIG_NUM_IMAGE_THREADS, DEFAULT_THREADS); imagePool = Executors.newFixedThreadPool(numImageThreads); // Process all sol futures while (!solFutures.isEmpty()) { LOGGER.info(solFutures.size() + " sols remaining"); // Wait a bit for the tasks to finish before checking again try { Thread.sleep(waitTime); } catch (final InterruptedException e) { LOGGER.warning(e.getMessage()); } final Iterator<Future<ArrayNode>> it = solFutures.iterator(); while (it.hasNext()) { final Future<ArrayNode> solFuture = it.next(); if (solFuture.isDone()) { try { final ArrayNode images = solFuture.get(); final int thumbnailWidth = ConfigParser.parseInteger(config, CONFIG_THUMBNAIL_WIDTH, DEFAULT_THUMBNAIL_WIDTH); final int thumbnailHeight = ConfigParser.parseInteger(config, CONFIG_THUMBNAIL_HEIGHT, DEFAULT_THUMBNAIL_HEIGHT); final boolean trackResources = ConfigParser.parseBoolean(config, CONFIG_TRACK_RESOURCES, DEFAULT_TRACK_RESOURCES); final boolean storeThumbnails = ConfigParser.parseBoolean(config, CONFIG_STORE_THUMBNAILS, DEFAULT_STORE_THUMBNAILS); // Submit task for each image in the sol for (final JsonNode image : images) { if (!image.isObject()) { LOGGER.warning("Unexpected image: " + image); continue; } final DynamoDBImageWorker worker = new DynamoDBImageWorker(dynamoDB, imageTable, resourceTable, (ObjectNode) image, connectTimeout, thumbnailWidth, thumbnailHeight, trackResources, storeThumbnails); workers.add(worker); } } catch (InterruptedException | ExecutionException e) { // Skip sol if there was an error, but report warning LOGGER.warning(e.getMessage()); } finally { it.remove(); // future is processed (successful or // error), remove from collection } } } } LOGGER.info("All sols processed."); for (final DynamoDBImageWorker worker : workers) { imageFutures.add(imagePool.submit(worker)); } imagePool.shutdown(); return imageFutures; } /** * {@inheritDoc} */ @Override public void run() { try { setupTables(dynamoDB, config); final Map<String, String> missions = processRootJSON(); final Collection<Future<ArrayNode>> imageArrayFutures = processMissions(missions); final Collection<Future<?>> imageFutures = processSolFutures(imageArrayFutures); awaitTermination(imageFutures); } catch (final ExitException e) { return; } } }