/*
* Copyright 2013-2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Amazon Software License (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://aws.amazon.com/asl/
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package com.amazonaws.hbase.kinesis.utils;
import java.util.ArrayList;
import java.util.List;
import java.util.ListIterator;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.amazonaws.services.elasticmapreduce.AmazonElasticMapReduce;
import com.amazonaws.services.elasticmapreduce.model.BootstrapActionConfig;
import com.amazonaws.services.elasticmapreduce.model.Cluster;
import com.amazonaws.services.elasticmapreduce.model.ClusterSummary;
import com.amazonaws.services.elasticmapreduce.model.Command;
import com.amazonaws.services.elasticmapreduce.model.DescribeClusterRequest;
import com.amazonaws.services.elasticmapreduce.model.DescribeClusterResult;
import com.amazonaws.services.elasticmapreduce.model.DescribeJobFlowsRequest;
import com.amazonaws.services.elasticmapreduce.model.DescribeJobFlowsResult;
import com.amazonaws.services.elasticmapreduce.model.HadoopJarStepConfig;
import com.amazonaws.services.elasticmapreduce.model.JobFlowDetail;
import com.amazonaws.services.elasticmapreduce.model.JobFlowInstancesConfig;
import com.amazonaws.services.elasticmapreduce.model.JobFlowInstancesDetail;
import com.amazonaws.services.elasticmapreduce.model.ListBootstrapActionsRequest;
import com.amazonaws.services.elasticmapreduce.model.ListBootstrapActionsResult;
import com.amazonaws.services.elasticmapreduce.model.ListClustersResult;
import com.amazonaws.services.elasticmapreduce.model.RunJobFlowRequest;
import com.amazonaws.services.elasticmapreduce.model.RunJobFlowResult;
import com.amazonaws.services.elasticmapreduce.model.ScriptBootstrapActionConfig;
import com.amazonaws.services.elasticmapreduce.model.StepConfig;
import com.amazonaws.services.elasticmapreduce.util.StepFactory;
public class EMRUtils {
private static Log LOG = LogFactory.getLog(EMRUtils.class);
/**
* This method uses method the AWS Java to launch an Apache HBase cluster on Amazon EMR.
*
* @param client - AmazonElasticMapReduce client that interfaces directly with the Amazon EMR Web Service
* @param clusterIdentifier - identifier of an existing cluster
* @param amiVersion - AMI to use for launching this cluster
* @param keypair - A keypair for SSHing into the Amazon EMR master node
* @param masterInstanceType - Master node Amazon EC2 instance type
* @param coreInstanceType - core nodes Amazon EC2 instance type
* @param logUri - An Amazon S3 bucket for your
* @param numberOfNodes - total number of nodes in this cluster including master node
* @return
*/
public static String createCluster(AmazonElasticMapReduce client,
String clusterIdentifier,
String amiVersion,
String keypair,
String masterInstanceType,
String coreInstanceType,
String logUri,
int numberOfNodes) {
if (clusterExists(client, clusterIdentifier)) {
LOG.info("Cluster " + clusterIdentifier + " is available");
return clusterIdentifier;
}
//Error checking
if (amiVersion == null || amiVersion.isEmpty()) throw new RuntimeException("ERROR: Please specify an AMI Version");
if (keypair == null || keypair.isEmpty()) throw new RuntimeException("ERROR: Please specify a valid Amazon Key Pair");
if (masterInstanceType == null || masterInstanceType.isEmpty()) throw new RuntimeException("ERROR: Please specify a Master Instance Type");
if (logUri == null || logUri.isEmpty()) throw new RuntimeException("ERROR: Please specify a valid Amazon S3 bucket for your logs.");
if (numberOfNodes < 0) throw new RuntimeException("ERROR: Please specify at least 1 node");
RunJobFlowRequest request = new RunJobFlowRequest()
.withAmiVersion(amiVersion)
.withBootstrapActions(new BootstrapActionConfig()
.withName("Install HBase")
.withScriptBootstrapAction(new ScriptBootstrapActionConfig()
.withPath("s3://elasticmapreduce/bootstrap-actions/setup-hbase")))
.withName("Job Flow With HBAse Actions")
.withSteps(new StepConfig() //enable debugging step
.withName("Enable debugging")
.withActionOnFailure("TERMINATE_CLUSTER")
.withHadoopJarStep(new StepFactory().newEnableDebuggingStep()),
//Start HBase step - after installing it with a bootstrap action
createStepConfig("Start HBase","TERMINATE_CLUSTER", "/home/hadoop/lib/hbase.jar", getHBaseArgs()),
//add HBase backup step
createStepConfig("Modify backup schedule","TERMINATE_JOB_FLOW", "/home/hadoop/lib/hbase.jar", getHBaseBackupArgs()))
.withLogUri(logUri)
.withInstances(new JobFlowInstancesConfig()
.withEc2KeyName(keypair)
.withInstanceCount(numberOfNodes)
.withKeepJobFlowAliveWhenNoSteps(true)
.withMasterInstanceType(masterInstanceType)
.withSlaveInstanceType(coreInstanceType));
RunJobFlowResult result = client.runJobFlow(request);
String state = null;
while (!(state = clusterState(client, result.getJobFlowId())).equalsIgnoreCase("waiting")) {
try {
Thread.sleep(10 * 1000);
LOG.info(result.getJobFlowId() + " is " + state + ". Waiting for cluster to become available.");
} catch (InterruptedException e) {
}
if (state.equalsIgnoreCase("TERMINATED_WITH_ERRORS")){
LOG.error("Could not create EMR Cluster");
System.exit(-1);
}
}
LOG.info("Created cluster " + result.getJobFlowId());
LOG.info("Cluster " + clusterIdentifier + " is available");
return result.getJobFlowId();
}
/**
* Helper method to determine if an Amazon EMR cluster exists
*
* @param client
* The {@link AmazonElasticMapReduceClient} with read permissions
* @param clusterIdentifier
* The Amazon EMR cluster to check
* @return true if the Amazon EMR cluster exists, otherwise false
*/
public static boolean clusterExists(AmazonElasticMapReduce client, String clusterIdentifier) {
if (clusterIdentifier != null && !clusterIdentifier.isEmpty()) {
ListClustersResult clustersList = client.listClusters();
ListIterator<ClusterSummary> iterator = clustersList.getClusters().listIterator();
ClusterSummary summary;
for (summary = iterator.next() ; iterator.hasNext();summary = iterator.next()) {
if (summary.getId().equals(clusterIdentifier)) {
DescribeClusterRequest describeClusterRequest = new DescribeClusterRequest().withClusterId(clusterIdentifier);
DescribeClusterResult result = client.describeCluster(describeClusterRequest);
if (result != null) {
Cluster cluster = result.getCluster();
//check if HBase is installed on this cluster
if (isHBaseInstalled(client, cluster.getId())) return false;
String state = cluster.getStatus().getState();
LOG.info(clusterIdentifier + " is " + state + ". ");
if (state.equalsIgnoreCase("RUNNING") ||state.equalsIgnoreCase("WAITING")) {
LOG.info("The cluster with id " + clusterIdentifier + " exists and is " + state);
return true;
}
}
}
}
}
LOG.info("The cluster with id " + clusterIdentifier + " does not exist");
return false;
}
/**
* Helper method to determine the Amazon EMR cluster state
*
* @param client
* The {@link AmazonElasticMapReduceClient} with read permissions
* @param clusterIdentifier
* The Amazon EMR cluster to get the state of - e.g. j-2A98VJHDSU48M
* @return The String representation of the Amazon EMR cluster state
*/
public static String clusterState(AmazonElasticMapReduce client, String clusterIdentifier) {
DescribeClusterRequest describeClusterRequest = new DescribeClusterRequest().withClusterId(clusterIdentifier);
DescribeClusterResult result= client.describeCluster(describeClusterRequest);
if (result != null) {
return result.getCluster().getStatus().getState();
}
return null;
}
/**
* Helper method to determine the master node public DNS of an Amazon EMR cluster
*
* @param client - The {@link AmazonElasticMapReduceClient} with read permissions
* @param clusterIdentifier - unique identifier for this cluster
* @return public dns url
*/
public static String getPublicDns(AmazonElasticMapReduce client, String clusterId) {
DescribeJobFlowsResult describeJobFlows=client.describeJobFlows(new DescribeJobFlowsRequest().withJobFlowIds(clusterId));
describeJobFlows.getJobFlows();
List<JobFlowDetail> jobFlows = describeJobFlows.getJobFlows();
JobFlowDetail jobflow = jobFlows.get(0);
JobFlowInstancesDetail instancesDetail = jobflow.getInstances();
LOG.info("EMR cluster public DNS is "+instancesDetail.getMasterPublicDnsName());
return instancesDetail.getMasterPublicDnsName();
}
/**
* Helper method to determine if HBase is installed on this cluster
* @param client - The {@link AmazonElasticMapReduceClient} with read permissions
* @param clusterId - unique identifier for this cluster
* @return true, other throws Runtime exception
*/
private static boolean isHBaseInstalled(AmazonElasticMapReduce client, String clusterId) {
ListBootstrapActionsResult bootstrapActions = client.listBootstrapActions(new ListBootstrapActionsRequest()
.withClusterId(clusterId));
ListIterator<Command> iterator = bootstrapActions.getBootstrapActions().listIterator();
while(iterator.hasNext()) {
Command command = iterator.next();
if (command.getName().equalsIgnoreCase("Install HBase")) return true;
}
throw new RuntimeException("ERROR: Apache HBase is not installed on this cluster!!");
}
/**
* This is a helper method for creating step configuration information
* @param stepName - a custom name to label this step
* @param actionOnFailure - options are terminate cluster, terminate job flow, contiunue
* @param jarPath - path to jar file - could be on S3 or local file system
* @param args list of Java args to configure custom step
* @return
*/
private static StepConfig createStepConfig(String stepName, String actionOnFailure, String jarPath, List<String> args ) {
//Start HBase step - after installing it with a bootstrap action
StepConfig stepConfig = new StepConfig()
.withName(stepName)
.withActionOnFailure(actionOnFailure)
.withHadoopJarStep(new HadoopJarStepConfig()
.withJar(jarPath)
.withArgs(args));
return stepConfig;
}
/**
* Helper method to construct HBase arguments
*
* @param client
* The {@link AmazonEMRClient} with read permissions
* @param clusterIdentifier
* The Amazon EMR cluster to get the state of
* @return The String representation of the Amazon EMR cluster state
*/
private static List<String> getHBaseArgs() {
List<String> hbaseArgs = new ArrayList<String>();
hbaseArgs.add("emr.hbase.backup.Main");
hbaseArgs.add("--start-master");
return hbaseArgs;
}
/**
* Helper method to construct HBase arguments
*
* @return A list of HBase arguments
*/
private static List<String> getHBaseBackupArgs() {
List<String> hbaseArgs = new ArrayList<String>();
hbaseArgs.add("emr.hbase.backup.Main");
hbaseArgs.add("--set-scheduled-backup");
hbaseArgs.add("true");
hbaseArgs.add("--backup-dir");
hbaseArgs.add("s3://wdhbase/backups/kinesiscluster");
hbaseArgs.add("--incremental-backup-time-interval");
hbaseArgs.add("1");
hbaseArgs.add("--incremental-backup-time-unit");
hbaseArgs.add("hours");
hbaseArgs.add("--start-time");
hbaseArgs.add("now");
hbaseArgs.add("--consistent");
return hbaseArgs;
}
}