/* * Copyright 2013-2014 Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Amazon Software License (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at * * http://aws.amazon.com/asl/ * * or in the "license" file accompanying this file. This file is distributed * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing * permissions and limitations under the License. */ package com.amazonaws.hbase.kinesis; import java.io.IOException; import java.io.InputStream; import java.util.Properties; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.amazonaws.hbase.connector.EMRHBaseKinesisConnectorConfiguration; import com.amazonaws.hbase.kinesis.utils.EMRUtils; import com.amazonaws.hbase.kinesis.utils.HBaseUtils; import com.amazonaws.hbase.kinesis.utils.KinesisUtils; import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduceClient; import com.amazonaws.services.kinesis.connectors.KinesisConnectorExecutorBase; /** * This class defines the execution of a Amazon Kinesis Connector. * */ public abstract class KinesisConnectorExecutor<T, U> extends KinesisConnectorExecutorBase<T, U> { private static final Log LOG = LogFactory.getLog(KinesisConnectorExecutor.class); private static final String EMR_CLUSTER_IDENTIFIER = "emrClusterIdentifier"; private static final String EMR_CLUSTER_NAME = "emrClusterName"; //Create EMR cluster properties public static final String EMR_ENDPOINT = "emrEndpoint"; private static final String EMR_NUMBER_OF_NODES = "emrCoreNumberOfNodes"; private static final String EMR_AMI_VERSION = "emrAmiVersion"; private static final String EMR_MASTER_INSTANCE_TYPE = "emrMasterInstanceType"; private static final String EMR_CORE_INSTANCE_TYPE = "emrCoreInstanceType"; private static final String EMR_LOG_URI = "emrLogURI"; private static final String EC2_KEYPAIR = "ec2Keypair"; private static final int DEFAULT_EMR_NUMBER_OF_NODES = 2; // Create Stream Source constants private static final String CREATE_STREAM_SOURCE = "createStreamSource"; private static final String LOOP_OVER_STREAM_SOURCE = "loopOverStreamSource"; private static final boolean DEFAULT_CREATE_STREAM_SOURCE = false; private static final boolean DEFAULT_LOOP_OVER_STREAM_SOURCE = false; private static final String INPUT_STREAM_FILE = "inputStreamFile"; // Class variables protected final EMRHBaseKinesisConnectorConfiguration config; private final Properties properties; /** * Create a new KinesisConnectorExecutor based on the provided configuration (*.propertes) file. * * @param configFile * The name of the configuration file to look for on the classpath */ public KinesisConnectorExecutor(String configFile) { InputStream configStream = Thread.currentThread().getContextClassLoader().getResourceAsStream(configFile); if (configStream == null) { String msg = "Could not find resource " + configFile + " in the classpath"; throw new IllegalStateException(msg); } properties = new Properties(); try { properties.load(configStream); configStream.close(); } catch (IOException e) { String msg = "Could not load properties file " + configFile + " from classpath"; throw new IllegalStateException(msg, e); } this.config = new EMRHBaseKinesisConnectorConfiguration(properties, getAWSCredentialsProvider()); setupAWSResources(); setupInputStream(); // Initialize executor with configurations super.initialize(config); } /** * Returns an {@link AWSCredentialsProvider} with the permissions necessary to accomplish all specified * tasks. At the minimum it will require read permissions for Amazon Kinesis. Additional read permissions * and write permissions may be required based on the Pipeline used. * * @return */ public AWSCredentialsProvider getAWSCredentialsProvider() { return new DefaultAWSCredentialsProviderChain(); } /** * Setup necessary AWS resources for the sample. By default, the Executor will create any * AWS resources specified in the configuration file. */ private void setupAWSResources() { KinesisUtils.createInputStream(config); createEMRCluster(properties.getProperty(EMR_CLUSTER_IDENTIFIER), properties.getProperty(EMR_CLUSTER_NAME), properties.getProperty(EMR_AMI_VERSION), properties.getProperty(EC2_KEYPAIR), properties.getProperty(EMR_MASTER_INSTANCE_TYPE), properties.getProperty(EMR_CORE_INSTANCE_TYPE), properties.getProperty(EMR_LOG_URI), parseInt(EMR_NUMBER_OF_NODES, DEFAULT_EMR_NUMBER_OF_NODES, properties)); } /** * Helper method to spawn the {@link StreamSource} in a separate thread. */ private void setupInputStream() { if (parseBoolean(CREATE_STREAM_SOURCE, DEFAULT_CREATE_STREAM_SOURCE, properties)) { String inputFile = properties.getProperty(INPUT_STREAM_FILE); StreamSource streamSource; if (config.BATCH_RECORDS_IN_PUT_REQUEST) { streamSource = new BatchedStreamSource(config, inputFile, parseBoolean(LOOP_OVER_STREAM_SOURCE, DEFAULT_LOOP_OVER_STREAM_SOURCE, properties)); } else { streamSource = new StreamSource(config, inputFile, parseBoolean(LOOP_OVER_STREAM_SOURCE, DEFAULT_LOOP_OVER_STREAM_SOURCE, properties)); } Thread streamSourceThread = new Thread(streamSource); LOG.info("Starting stream source."); streamSourceThread.start(); } } /** * Helper class to create and Amazon EMR cluster with HBase installed on that cluster * * @param clusterIdentifier - cluster id if one exists * @param clusterName - name you want associated with this cluster * @param amiVersion - version of AMI that you wish to use for your HBase cluster * @param keypair - you need a keypair to SSH into the cluster * @param masterInstanceType - Amazon EC2 instance type for your master node * @param coreInstanceType - Amazon Ec2 instance tyoe for your core nodes * @param logUri - Specify a bucket for your EMR logs * @param numberOfNodes - total number of nodes in your cluster including the master node */ private void createEMRCluster(String clusterIdentifier, String clusterName, String amiVersion, String keypair, String masterInstanceType, String coreInstanceType, String logUri, int numberOfNodes) { // Make sure the EMR cluster is available AmazonElasticMapReduceClient emrClient = new AmazonElasticMapReduceClient(config.AWS_CREDENTIALS_PROVIDER); emrClient.setEndpoint(config.EMR_ENDPOINT); String clusterid = clusterIdentifier; if (!EMRUtils.clusterExists(emrClient, clusterIdentifier)) { clusterid = EMRUtils.createCluster(emrClient, clusterIdentifier, amiVersion, keypair, masterInstanceType, coreInstanceType, logUri, numberOfNodes); } // Update the emr cluster id and public DNS properties config.EMR_CLUSTER_IDENTIFIER = clusterid; config.EMR_CLUSTER_PUBLIC_DNS = EMRUtils.getPublicDns(emrClient, clusterid); //make sure table exists if (!HBaseUtils.tablesExists(config.HBASE_TABLE_NAME, config.EMR_CLUSTER_PUBLIC_DNS, config.HBASE_REST_PORT)){ HBaseUtils.createTable(config.HBASE_TABLE_NAME, config.EMR_CLUSTER_PUBLIC_DNS, config.HBASE_REST_PORT); } } /** * Helper method used to parse boolean properties. * * @param property * The String key for the property * @param defaultValue * The default value for the boolean property * @param properties * The properties file to get property from * @return property from property file, or if it is not specified, the default value */ private static boolean parseBoolean(String property, boolean defaultValue, Properties properties) { return Boolean.parseBoolean(properties.getProperty(property, Boolean.toString(defaultValue))); } /** * Helper method used to parse integer properties. * * @param property * The String key for the property * @param defaultValue * The default value for the integer property * @param properties * The properties file to get property from * @return property from property file, or if it is not specified, the default value */ private static int parseInt(String property, int defaultValue, Properties properties) { return Integer.parseInt(properties.getProperty(property, Integer.toString(defaultValue))); } }