/*
* Copyright 2015 herd contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.finra.herd.dao.helper;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import com.amazonaws.services.elasticmapreduce.model.ActionOnFailure;
import com.amazonaws.services.elasticmapreduce.model.Cluster;
import com.amazonaws.services.elasticmapreduce.model.ClusterSummary;
import com.amazonaws.services.elasticmapreduce.model.Configuration;
import com.amazonaws.services.elasticmapreduce.model.EbsBlockDevice;
import com.amazonaws.services.elasticmapreduce.model.HadoopJarStepConfig;
import com.amazonaws.services.elasticmapreduce.model.InstanceFleet;
import com.amazonaws.services.elasticmapreduce.model.InstanceFleetProvisioningSpecifications;
import com.amazonaws.services.elasticmapreduce.model.InstanceFleetStatus;
import com.amazonaws.services.elasticmapreduce.model.InstanceFleetTimeline;
import com.amazonaws.services.elasticmapreduce.model.InstanceTypeSpecification;
import com.amazonaws.services.elasticmapreduce.model.ListInstanceFleetsResult;
import com.amazonaws.services.elasticmapreduce.model.SpotProvisioningSpecification;
import com.amazonaws.services.elasticmapreduce.model.StepConfig;
import com.amazonaws.services.elasticmapreduce.model.VolumeSpecification;
import com.amazonaws.services.securitytoken.model.Credentials;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import org.springframework.util.Assert;
import org.springframework.util.CollectionUtils;
import org.finra.herd.core.HerdDateUtils;
import org.finra.herd.dao.EmrDao;
import org.finra.herd.dao.StsDao;
import org.finra.herd.model.api.xml.EmrClusterEbsBlockDevice;
import org.finra.herd.model.api.xml.EmrClusterInstanceFleet;
import org.finra.herd.model.api.xml.EmrClusterInstanceFleetProvisioningSpecifications;
import org.finra.herd.model.api.xml.EmrClusterInstanceFleetStateChangeReason;
import org.finra.herd.model.api.xml.EmrClusterInstanceFleetStatus;
import org.finra.herd.model.api.xml.EmrClusterInstanceFleetTimeline;
import org.finra.herd.model.api.xml.EmrClusterInstanceTypeConfiguration;
import org.finra.herd.model.api.xml.EmrClusterInstanceTypeSpecification;
import org.finra.herd.model.api.xml.EmrClusterSpotProvisioningSpecification;
import org.finra.herd.model.api.xml.EmrClusterVolumeSpecification;
import org.finra.herd.model.api.xml.InstanceDefinitions;
import org.finra.herd.model.api.xml.Parameter;
import org.finra.herd.model.dto.AwsParamsDto;
import org.finra.herd.model.dto.ConfigurationValue;
import org.finra.herd.model.jpa.TrustingAccountEntity;
/**
* A helper class that provides EMR functions.
*/
@Component
public class EmrHelper extends AwsHelper
{
@Autowired
private EmrDao emrDao;
@Autowired
private StsDao stsDao;
@Autowired
private TrustingAccountDaoHelper trustingAccountDaoHelper;
/**
* Returns EMR cluster name constructed according to the template defined.
*
* @param namespaceCd the namespace code value.
* @param emrDefinitionName the EMR definition name value.
* @param clusterName the cluster name value.
*
* @return the cluster name.
*/
public String buildEmrClusterName(String namespaceCd, String emrDefinitionName, String clusterName)
{
// Set the token delimiter based on the environment configuration.
String tokenDelimiter = configurationHelper.getProperty(ConfigurationValue.TEMPLATE_TOKEN_DELIMITER);
// Setup the individual token names (using the configured delimiter).
String namespaceToken = tokenDelimiter + "namespace" + tokenDelimiter;
String emrDefinitionToken = tokenDelimiter + "emrDefinitionName" + tokenDelimiter;
String clusterNameToken = tokenDelimiter + "clusterName" + tokenDelimiter;
// Populate a map with the tokens mapped to actual database values.
Map<String, String> pathToTokenValueMap = new HashMap<>();
pathToTokenValueMap.put(namespaceToken, namespaceCd);
pathToTokenValueMap.put(emrDefinitionToken, emrDefinitionName);
pathToTokenValueMap.put(clusterNameToken, clusterName);
// Set the default EMR cluster name tokenized template.
// ~namespace~.~emrDefinitionName~.clusterName
String defaultClusterNameTemplate = namespaceToken + "." + emrDefinitionToken + "." + clusterNameToken;
// Get the EMR cluster name template from the environment, but use the default if one isn't configured.
// This gives us the ability to customize/change the format post deployment.
String emrClusterName = configurationHelper.getProperty(ConfigurationValue.EMR_CLUSTER_NAME_TEMPLATE);
if (emrClusterName == null)
{
emrClusterName = defaultClusterNameTemplate;
}
// Substitute the tokens with the actual database values.
for (Map.Entry<String, String> mapEntry : pathToTokenValueMap.entrySet())
{
emrClusterName = emrClusterName.replaceAll(mapEntry.getKey(), mapEntry.getValue());
}
return emrClusterName;
}
/**
* Gets the ID of an active EMR cluster which matches the given criteria. If both cluster ID and cluster name is specified, the name of the actual cluster
* with the given ID must match the specified name. For cases where the cluster is not found (does not exists or not active), the method fails. All
* parameters are case-insensitive and whitespace trimmed. Blank parameters are equal to null.
*
* @param emrClusterId EMR cluster ID
* @param emrClusterName EMR cluster name
* @param accountId the account Id that EMR cluster is running under
*
* @return The cluster ID
*/
public String getActiveEmrClusterId(String emrClusterId, String emrClusterName, String accountId)
{
boolean emrClusterIdSpecified = StringUtils.isNotBlank(emrClusterId);
boolean emrClusterNameSpecified = StringUtils.isNotBlank(emrClusterName);
Assert.isTrue(emrClusterIdSpecified || emrClusterNameSpecified, "One of EMR cluster ID or EMR cluster name must be specified.");
AwsParamsDto awsParamsDto = getAwsParamsDtoByAcccountId(accountId);
// Get cluster by ID first
if (emrClusterIdSpecified)
{
String emrClusterIdTrimmed = emrClusterId.trim();
// Assert cluster exists
Cluster cluster = emrDao.getEmrClusterById(emrClusterIdTrimmed, awsParamsDto);
Assert.notNull(cluster, String.format("The cluster with ID \"%s\" does not exist.", emrClusterIdTrimmed));
// Assert the cluster's state is active
String emrClusterState = cluster.getStatus().getState();
Assert.isTrue(isActiveEmrState(emrClusterState), String
.format("The cluster with ID \"%s\" is not active. The cluster state must be in one of %s. Current state is \"%s\"", emrClusterIdTrimmed,
Arrays.toString(getActiveEmrClusterStates()), emrClusterState));
// Assert cluster name equals if cluster name was specified
if (emrClusterNameSpecified)
{
String emrClusterNameTrimmed = emrClusterName.trim();
Assert.isTrue(cluster.getName().equalsIgnoreCase(emrClusterNameTrimmed), String
.format("The cluster with ID \"%s\" does not match the expected name \"%s\". The actual name is \"%s\".", cluster.getId(),
emrClusterNameTrimmed, cluster.getName()));
}
return cluster.getId();
}
else
{
String emrClusterNameTrimmed = emrClusterName.trim();
ClusterSummary clusterSummary = emrDao.getActiveEmrClusterByName(emrClusterNameTrimmed, awsParamsDto);
Assert.notNull(clusterSummary, String.format("The cluster with name \"%s\" does not exist.", emrClusterNameTrimmed));
return clusterSummary.getId();
}
}
/**
* Get the AWS Params DTO for the account Id if no account id is specified, use the default
*
* @param accountId account Id
*
* @return AwsParamsDto
*/
public AwsParamsDto getAwsParamsDtoByAcccountId(String accountId)
{
AwsParamsDto awsParamsDto = getAwsParamsDto();
if (StringUtils.isNotBlank(accountId))
{
updateAwsParamsForCrossAccountAccess(awsParamsDto, accountId.trim());
}
return awsParamsDto;
}
public EmrDao getEmrDao()
{
return emrDao;
}
public void setEmrDao(EmrDao emrDao)
{
this.emrDao = emrDao;
}
/**
* Builds the StepConfig for the Hadoop jar step.
*
* @param stepName the step name.
* @param jarLocation the location of jar.
* @param mainClass the main class.
* @param scriptArguments the arguments.
* @param isContinueOnError indicate what to do on error.
*
* @return the stepConfig.
*/
public StepConfig getEmrHadoopJarStepConfig(String stepName, String jarLocation, String mainClass, List<String> scriptArguments, Boolean isContinueOnError)
{
// Default ActionOnFailure is to cancel the execution and wait
ActionOnFailure actionOnFailure = ActionOnFailure.CANCEL_AND_WAIT;
if (isContinueOnError != null && isContinueOnError)
{
// Override based on user input
actionOnFailure = ActionOnFailure.CONTINUE;
}
// If there are no arguments
if (CollectionUtils.isEmpty(scriptArguments))
{
// Build the StepConfig object and return
return new StepConfig().withName(stepName.trim()).withActionOnFailure(actionOnFailure)
.withHadoopJarStep(new HadoopJarStepConfig().withJar(jarLocation.trim()).withMainClass(mainClass));
}
else
{
// If there are arguments, include the arguments in the StepConfig object
return new StepConfig().withName(stepName.trim()).withActionOnFailure(actionOnFailure).withHadoopJarStep(
new HadoopJarStepConfig().withJar(jarLocation.trim()).withMainClass(mainClass)
.withArgs(scriptArguments.toArray(new String[scriptArguments.size()])));
}
}
/**
* Get the S3_STAGING_RESOURCE full path from the bucket name as well as other details.
*
* @return the s3 managed location.
*/
public String getS3StagingLocation()
{
return configurationHelper.getProperty(ConfigurationValue.S3_URL_PROTOCOL) +
configurationHelper.getProperty(ConfigurationValue.S3_STAGING_BUCKET_NAME) +
configurationHelper.getProperty(ConfigurationValue.S3_URL_PATH_DELIMITER) +
configurationHelper.getProperty(ConfigurationValue.S3_STAGING_RESOURCE_BASE);
}
/**
* Returns {@code true} if the supplied EMR status is considered to be active.
*
* @param status the EMR status
*
* @return whether the given EMR status is active
*/
public boolean isActiveEmrState(String status)
{
return Arrays.asList(getActiveEmrClusterStates()).contains(status);
}
/**
* Returns {@code true} if the supplied InstanceDefinitions is {@code null} or empty (contains no elements).
*
* @param instanceDefinitions the instance group definitions from the EMR cluster definition
*
* @return whether the given InstanceDefinitions is empty
*/
public boolean isInstanceDefinitionsEmpty(InstanceDefinitions instanceDefinitions)
{
return (instanceDefinitions == null || (instanceDefinitions.getMasterInstances() == null && instanceDefinitions.getCoreInstances() == null &&
instanceDefinitions.getTaskInstances() == null));
}
private String[] getActiveEmrClusterStates()
{
String emrStatesString = configurationHelper.getProperty(ConfigurationValue.EMR_VALID_STATES);
return emrStatesString.split("\\" + configurationHelper.getProperty(ConfigurationValue.FIELD_DATA_DELIMITER));
}
/*
* Updates the AWS parameters DTO with the temporary credentials for the cross-account access.
*
* @param awsParamsDto the AWS connection parameters
* @param accountId the AWS account number
*/
private void updateAwsParamsForCrossAccountAccess(AwsParamsDto awsParamsDto, String accountId)
{
// Retrieve the role ARN and make sure it exists.
TrustingAccountEntity trustingAccountEntity = trustingAccountDaoHelper.getTrustingAccountEntity(accountId.trim());
String roleArn = trustingAccountEntity.getRoleArn();
// Assume the role. Set the duration of the role session to 3600 seconds (1 hour).
Credentials credentials = stsDao.getTemporarySecurityCredentials(awsParamsDto, UUID.randomUUID().toString(), roleArn, 3600, null);
// Update the AWS parameters DTO with the temporary credentials.
awsParamsDto.setAwsAccessKeyId(credentials.getAccessKeyId());
awsParamsDto.setAwsSecretKey(credentials.getSecretAccessKey());
awsParamsDto.setSessionToken(credentials.getSessionToken());
}
/**
* Returns EmrClusterInstanceFleet list from AWS call
*
* @param awsInstanceFleetsResult AWS Instance Fleets result
*
* @return list of EmrClusterInstanceFleet
*/
public List<EmrClusterInstanceFleet> buildEmrClusterInstanceFleetFromAwsResult(ListInstanceFleetsResult awsInstanceFleetsResult)
{
List<EmrClusterInstanceFleet> emrInstanceFleets = null;
if (awsInstanceFleetsResult != null && !CollectionUtils.isEmpty(awsInstanceFleetsResult.getInstanceFleets()))
{
emrInstanceFleets = new ArrayList();
for (InstanceFleet awsInstanceFleet : awsInstanceFleetsResult.getInstanceFleets())
{
if (awsInstanceFleet != null)
{
EmrClusterInstanceFleet emrInstanceFleet = new EmrClusterInstanceFleet();
emrInstanceFleet.setId(awsInstanceFleet.getId());
emrInstanceFleet.setName(awsInstanceFleet.getName());
emrInstanceFleet.setInstanceFleetType(awsInstanceFleet.getInstanceFleetType());
emrInstanceFleet.setTargetOnDemandCapacity(awsInstanceFleet.getTargetOnDemandCapacity());
emrInstanceFleet.setTargetSpotCapacity(awsInstanceFleet.getTargetSpotCapacity());
emrInstanceFleet.setProvisionedOnDemandCapacity(awsInstanceFleet.getProvisionedOnDemandCapacity());
emrInstanceFleet.setProvisionedSpotCapacity(awsInstanceFleet.getProvisionedSpotCapacity());
emrInstanceFleet.setInstanceTypeSpecifications(getInstanceTypeSpecifications(awsInstanceFleet.getInstanceTypeSpecifications()));
emrInstanceFleet.setLaunchSpecifications(getLaunchSpecifications(awsInstanceFleet.getLaunchSpecifications()));
emrInstanceFleet.setInstanceFleetStatus(getEmrClusterInstanceFleetStatus(awsInstanceFleet.getStatus()));
emrInstanceFleets.add(emrInstanceFleet);
}
}
}
return emrInstanceFleets;
}
/**
* Returns EmrClusterInstanceFleetStatus
*
* @param instanceFleetStatus AWS object
*
* @return EmrClusterInstanceFleetStatus
*/
protected EmrClusterInstanceFleetStatus getEmrClusterInstanceFleetStatus(InstanceFleetStatus instanceFleetStatus)
{
EmrClusterInstanceFleetStatus emrClusterInstanceFleetStatus = null;
if (instanceFleetStatus != null)
{
emrClusterInstanceFleetStatus = new EmrClusterInstanceFleetStatus();
emrClusterInstanceFleetStatus.setState(instanceFleetStatus.getState());
if (instanceFleetStatus.getStateChangeReason() != null)
{
EmrClusterInstanceFleetStateChangeReason emrClusterInstanceFleetStateChangeReason = new EmrClusterInstanceFleetStateChangeReason();
emrClusterInstanceFleetStateChangeReason.setCode(instanceFleetStatus.getStateChangeReason().getCode());
emrClusterInstanceFleetStateChangeReason.setMessage(instanceFleetStatus.getStateChangeReason().getMessage());
emrClusterInstanceFleetStatus.setStateChangeReason(emrClusterInstanceFleetStateChangeReason);
}
if (instanceFleetStatus.getTimeline() != null)
{
InstanceFleetTimeline instanceFleetTimeline = instanceFleetStatus.getTimeline();
EmrClusterInstanceFleetTimeline emrClusterInstanceFleetTimeline = new EmrClusterInstanceFleetTimeline();
emrClusterInstanceFleetTimeline.setCreationDateTime(HerdDateUtils.getXMLGregorianCalendarValue(instanceFleetTimeline.getCreationDateTime()));
emrClusterInstanceFleetTimeline.setEndDateTime(HerdDateUtils.getXMLGregorianCalendarValue(instanceFleetTimeline.getEndDateTime()));
emrClusterInstanceFleetTimeline.setReadyDateTime(HerdDateUtils.getXMLGregorianCalendarValue(instanceFleetTimeline.getReadyDateTime()));
emrClusterInstanceFleetStatus.setTimeline(emrClusterInstanceFleetTimeline);
}
}
return emrClusterInstanceFleetStatus;
}
/**
* Returns EmrClusterInstanceFleetProvisioningSpecifications
*
* @param instanceFleetProvisioningSpecifications AWS object
*
* @return EmrClusterInstanceFleetProvisioningSpecifications
*/
protected EmrClusterInstanceFleetProvisioningSpecifications getLaunchSpecifications(
InstanceFleetProvisioningSpecifications instanceFleetProvisioningSpecifications)
{
EmrClusterInstanceFleetProvisioningSpecifications emrClusterDefinitionLaunchSpecifications = null;
if (instanceFleetProvisioningSpecifications != null)
{
emrClusterDefinitionLaunchSpecifications = new EmrClusterInstanceFleetProvisioningSpecifications();
emrClusterDefinitionLaunchSpecifications.setSpotSpecification(getSpotSpecification(instanceFleetProvisioningSpecifications.getSpotSpecification()));
}
return emrClusterDefinitionLaunchSpecifications;
}
/**
* Returns EmrClusterSpotProvisioningSpecification from AWS call
*
* @param spotProvisioningSpecification AWS object
*
* @return EmrClusterSpotProvisioningSpecification
*/
protected EmrClusterSpotProvisioningSpecification getSpotSpecification(SpotProvisioningSpecification spotProvisioningSpecification)
{
EmrClusterSpotProvisioningSpecification emrClusterSpotProvisioningSpecification = null;
if (spotProvisioningSpecification != null)
{
emrClusterSpotProvisioningSpecification = new EmrClusterSpotProvisioningSpecification();
emrClusterSpotProvisioningSpecification.setTimeoutDurationMinutes(spotProvisioningSpecification.getTimeoutDurationMinutes());
emrClusterSpotProvisioningSpecification.setTimeoutAction(spotProvisioningSpecification.getTimeoutAction());
emrClusterSpotProvisioningSpecification.setBlockDurationMinutes(spotProvisioningSpecification.getBlockDurationMinutes());
}
return emrClusterSpotProvisioningSpecification;
}
/**
* Returns list of EmrClusterEbsBlockDevice
*
* @param ebsBlockDevices AWS object
*
* @return list of EmrClusterEbsBlockDevice
*/
protected List<EmrClusterEbsBlockDevice> getEbsBlockDevices(List<EbsBlockDevice> ebsBlockDevices)
{
List<EmrClusterEbsBlockDevice> emrClusterEbsBlockDevices = null;
if (!CollectionUtils.isEmpty(ebsBlockDevices))
{
emrClusterEbsBlockDevices = new ArrayList<>();
for (EbsBlockDevice ebsBlockDevice : ebsBlockDevices)
{
if (ebsBlockDevice != null)
{
EmrClusterEbsBlockDevice emrClusterEbsBlockDevice = new EmrClusterEbsBlockDevice();
emrClusterEbsBlockDevice.setDevice(ebsBlockDevice.getDevice());
emrClusterEbsBlockDevice.setVolumeSpecification(getVolumeSpecification(ebsBlockDevice.getVolumeSpecification()));
emrClusterEbsBlockDevices.add(emrClusterEbsBlockDevice);
}
}
}
return emrClusterEbsBlockDevices;
}
/**
* Returns EmrClusterVolumeSpecification
*
* @param volumeSpecification AWS object
*
* @return EmrClusterVolumeSpecification
*/
protected EmrClusterVolumeSpecification getVolumeSpecification(VolumeSpecification volumeSpecification)
{
EmrClusterVolumeSpecification emrClusterVolumeSpecification = null;
if (volumeSpecification != null)
{
emrClusterVolumeSpecification = new EmrClusterVolumeSpecification();
emrClusterVolumeSpecification.setVolumeType(volumeSpecification.getVolumeType());
emrClusterVolumeSpecification.setIops(volumeSpecification.getIops());
emrClusterVolumeSpecification.setSizeInGB(volumeSpecification.getSizeInGB());
}
return emrClusterVolumeSpecification;
}
/**
* Returns list of EmrClusterInstanceTypeSpecification
*
* @param awsInstanceTypeConfigs AWS object
*
* @return list of EmrClusterInstanceTypeSpecification
*/
protected List<EmrClusterInstanceTypeSpecification> getInstanceTypeSpecifications(List<InstanceTypeSpecification> awsInstanceTypeConfigs)
{
List<EmrClusterInstanceTypeSpecification> emrClusterInstanceTypeSpecifications = null;
if (!CollectionUtils.isEmpty(awsInstanceTypeConfigs))
{
emrClusterInstanceTypeSpecifications = new ArrayList<>();
for (InstanceTypeSpecification awsInstanceTypeConfig : awsInstanceTypeConfigs)
{
if (awsInstanceTypeConfig != null)
{
EmrClusterInstanceTypeSpecification emrClusterInstanceTypeSpecification = new EmrClusterInstanceTypeSpecification();
emrClusterInstanceTypeSpecification.setInstanceType(awsInstanceTypeConfig.getInstanceType());
emrClusterInstanceTypeSpecification.setWeightedCapacity(awsInstanceTypeConfig.getWeightedCapacity());
emrClusterInstanceTypeSpecification.setBidPrice(awsInstanceTypeConfig.getBidPrice());
emrClusterInstanceTypeSpecification.setBidPriceAsPercentageOfOnDemandPrice(awsInstanceTypeConfig.getBidPriceAsPercentageOfOnDemandPrice());
emrClusterInstanceTypeSpecification.setEbsBlockDevices(getEbsBlockDevices(awsInstanceTypeConfig.getEbsBlockDevices()));
emrClusterInstanceTypeSpecification.setEbsOptimized(awsInstanceTypeConfig.getEbsOptimized());
emrClusterInstanceTypeSpecification.setConfigurations(getConfigurations(awsInstanceTypeConfig.getConfigurations()));
emrClusterInstanceTypeSpecifications.add(emrClusterInstanceTypeSpecification);
}
}
}
return emrClusterInstanceTypeSpecifications;
}
/**
* Returns list of EmrClusterInstanceTypeConfiguration
*
* @param configurations AWS configuration object list
*
* @return list of EmrClusterInstanceTypeConfiguration
*/
protected List<EmrClusterInstanceTypeConfiguration> getConfigurations(List<Configuration> configurations)
{
List<EmrClusterInstanceTypeConfiguration> emrClusterInstanceTypeConfigurations = null;
if (!CollectionUtils.isEmpty(configurations))
{
emrClusterInstanceTypeConfigurations = new ArrayList<>();
for (Configuration configuration : configurations)
{
if (configuration != null)
{
EmrClusterInstanceTypeConfiguration emrClusterInstanceTypeConfiguration = new EmrClusterInstanceTypeConfiguration();
emrClusterInstanceTypeConfiguration.setClassification(configuration.getClassification());
emrClusterInstanceTypeConfiguration.setConfigurations(getConfigurations(configuration.getConfigurations()));
emrClusterInstanceTypeConfiguration.setProperties(getParameterList(configuration.getProperties()));
emrClusterInstanceTypeConfigurations.add(emrClusterInstanceTypeConfiguration);
}
}
}
return emrClusterInstanceTypeConfigurations;
}
/**
* Returns parameter list
*
* @param properties properties
*
* @return list of parameters
*/
protected List<Parameter> getParameterList(Map<String, String> properties)
{
List<Parameter> parameters = null;
if (!CollectionUtils.isEmpty(properties))
{
parameters = new ArrayList<>();
for (Map.Entry<String, String> entry : properties.entrySet())
{
Parameter parameter = new Parameter(entry.getKey(), entry.getValue());
parameters.add(parameter);
}
}
return parameters;
}
}