package com.sequenceiq.cloudbreak.service.cluster.flow;
import static com.sequenceiq.cloudbreak.common.type.CloudConstants.BYOS;
import static com.sequenceiq.cloudbreak.service.PollingResult.isExited;
import static com.sequenceiq.cloudbreak.service.PollingResult.isFailure;
import static com.sequenceiq.cloudbreak.service.PollingResult.isSuccess;
import static com.sequenceiq.cloudbreak.service.PollingResult.isTimeout;
import static com.sequenceiq.cloudbreak.service.cluster.flow.AmbariOperationService.AMBARI_POLLING_INTERVAL;
import static com.sequenceiq.cloudbreak.service.cluster.flow.AmbariOperationService.MAX_ATTEMPTS_FOR_AMBARI_SERVER_STARTUP;
import static com.sequenceiq.cloudbreak.service.cluster.flow.AmbariOperationService.MAX_ATTEMPTS_FOR_HOSTS;
import static com.sequenceiq.cloudbreak.service.cluster.flow.AmbariOperationType.INSTALL_AMBARI_PROGRESS_STATE;
import static com.sequenceiq.cloudbreak.service.cluster.flow.AmbariOperationType.SMOKE_TEST_AMBARI_PROGRESS_STATE;
import static com.sequenceiq.cloudbreak.service.cluster.flow.AmbariOperationType.START_AMBARI_PROGRESS_STATE;
import static com.sequenceiq.cloudbreak.service.cluster.flow.AmbariOperationType.START_OPERATION_STATE;
import static com.sequenceiq.cloudbreak.service.cluster.flow.AmbariOperationType.STOP_AMBARI_PROGRESS_STATE;
import static com.sequenceiq.cloudbreak.service.cluster.flow.AmbariOperationType.UPSCALE_AMBARI_PROGRESS_STATE;
import static java.util.Collections.singletonMap;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import javax.annotation.Resource;
import javax.inject.Inject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.sequenceiq.ambari.client.AmbariClient;
import com.sequenceiq.ambari.client.AmbariConnectionException;
import com.sequenceiq.cloudbreak.api.model.AdlsFileSystemConfiguration;
import com.sequenceiq.cloudbreak.api.model.FileSystemConfiguration;
import com.sequenceiq.cloudbreak.api.model.FileSystemType;
import com.sequenceiq.cloudbreak.api.model.InstanceStatus;
import com.sequenceiq.cloudbreak.api.model.Status;
import com.sequenceiq.cloudbreak.client.HttpClientConfig;
import com.sequenceiq.cloudbreak.cloud.model.HDPRepo;
import com.sequenceiq.cloudbreak.cloud.scheduler.CancellationException;
import com.sequenceiq.cloudbreak.common.type.CloudConstants;
import com.sequenceiq.cloudbreak.common.type.HostMetadataState;
import com.sequenceiq.cloudbreak.common.type.ResourceType;
import com.sequenceiq.cloudbreak.controller.BadRequestException;
import com.sequenceiq.cloudbreak.core.CloudbreakException;
import com.sequenceiq.cloudbreak.core.CloudbreakImageNotFoundException;
import com.sequenceiq.cloudbreak.core.CloudbreakSecuritySetupException;
import com.sequenceiq.cloudbreak.core.ClusterException;
import com.sequenceiq.cloudbreak.core.bootstrap.service.OrchestratorTypeResolver;
import com.sequenceiq.cloudbreak.domain.Blueprint;
import com.sequenceiq.cloudbreak.domain.Cluster;
import com.sequenceiq.cloudbreak.domain.FileSystem;
import com.sequenceiq.cloudbreak.domain.HostGroup;
import com.sequenceiq.cloudbreak.domain.HostMetadata;
import com.sequenceiq.cloudbreak.domain.InstanceGroup;
import com.sequenceiq.cloudbreak.domain.InstanceMetaData;
import com.sequenceiq.cloudbreak.domain.KerberosConfig;
import com.sequenceiq.cloudbreak.domain.RDSConfig;
import com.sequenceiq.cloudbreak.domain.Stack;
import com.sequenceiq.cloudbreak.domain.Topology;
import com.sequenceiq.cloudbreak.domain.TopologyRecord;
import com.sequenceiq.cloudbreak.repository.ClusterRepository;
import com.sequenceiq.cloudbreak.repository.HostMetadataRepository;
import com.sequenceiq.cloudbreak.repository.InstanceMetaDataRepository;
import com.sequenceiq.cloudbreak.repository.RdsConfigRepository;
import com.sequenceiq.cloudbreak.repository.StackRepository;
import com.sequenceiq.cloudbreak.service.CloudbreakServiceException;
import com.sequenceiq.cloudbreak.service.ClusterComponentConfigProvider;
import com.sequenceiq.cloudbreak.service.PollingResult;
import com.sequenceiq.cloudbreak.service.PollingService;
import com.sequenceiq.cloudbreak.service.TlsSecurityService;
import com.sequenceiq.cloudbreak.service.cluster.AmbariAuthenticationProvider;
import com.sequenceiq.cloudbreak.service.cluster.AmbariClientProvider;
import com.sequenceiq.cloudbreak.service.cluster.AmbariOperationFailedException;
import com.sequenceiq.cloudbreak.service.cluster.HadoopConfigurationService;
import com.sequenceiq.cloudbreak.service.cluster.flow.blueprint.AutoRecoveryConfigProvider;
import com.sequenceiq.cloudbreak.service.cluster.flow.blueprint.BlueprintConfigurationEntry;
import com.sequenceiq.cloudbreak.service.cluster.flow.blueprint.BlueprintProcessor;
import com.sequenceiq.cloudbreak.service.cluster.flow.blueprint.DruidSupersetConfigProvider;
import com.sequenceiq.cloudbreak.service.cluster.flow.blueprint.RDSConfigProvider;
import com.sequenceiq.cloudbreak.service.cluster.flow.blueprint.SmartSenseConfigProvider;
import com.sequenceiq.cloudbreak.service.cluster.flow.blueprint.ZeppelinConfigProvider;
import com.sequenceiq.cloudbreak.service.cluster.flow.filesystem.FileSystemConfigurator;
import com.sequenceiq.cloudbreak.service.cluster.flow.kerberos.KerberosContainerDnResolver;
import com.sequenceiq.cloudbreak.service.cluster.flow.kerberos.KerberosDomainResolver;
import com.sequenceiq.cloudbreak.service.cluster.flow.kerberos.KerberosHostResolver;
import com.sequenceiq.cloudbreak.service.cluster.flow.kerberos.KerberosLdapResolver;
import com.sequenceiq.cloudbreak.service.cluster.flow.kerberos.KerberosPrincipalResolver;
import com.sequenceiq.cloudbreak.service.cluster.flow.kerberos.KerberosRealmResolver;
import com.sequenceiq.cloudbreak.service.cluster.flow.kerberos.KerberosTypeResolver;
import com.sequenceiq.cloudbreak.service.events.CloudbreakEventService;
import com.sequenceiq.cloudbreak.service.hostgroup.HostGroupService;
import com.sequenceiq.cloudbreak.service.image.ImageService;
import com.sequenceiq.cloudbreak.service.messages.CloudbreakMessagesService;
import com.sequenceiq.cloudbreak.service.stack.flow.AmbariStartupListenerTask;
import com.sequenceiq.cloudbreak.service.stack.flow.AmbariStartupPollerObject;
import com.sequenceiq.cloudbreak.util.AmbariClientExceptionUtil;
import com.sequenceiq.cloudbreak.util.JsonUtil;
import groovyx.net.http.HttpResponseException;
@Service
public class AmbariClusterConnector {
private static final Logger LOGGER = LoggerFactory.getLogger(AmbariClusterConnector.class);
private static final String REALM = "NODE.DC1.CONSUL";
private static final String DOMAIN = "node.dc1.consul";
private static final String KEY_TYPE = "PERSISTED";
private static final String FQDN = "fqdn";
private static final String ADMIN = "admin";
private static final int KERBEROS_DB_PROPAGATION_PORT = 6318;
@Inject
private StackRepository stackRepository;
@Inject
private ClusterRepository clusterRepository;
@Inject
private InstanceMetaDataRepository instanceMetadataRepository;
@Inject
private HostGroupService hostGroupService;
@Inject
private AmbariOperationService ambariOperationService;
@Inject
private RdsConfigRepository rdsConfigRepository;
@Inject
private PollingService<AmbariHostsCheckerContext> hostsPollingService;
@Inject
private HadoopConfigurationService hadoopConfigurationService;
@Inject
private AmbariClientProvider ambariClientProvider;
@Inject
private CloudbreakEventService eventService;
@Inject
private RecipeEngine recipeEngine;
@Inject
private AmbariHostsStatusCheckerTask ambariHostsStatusCheckerTask;
@Inject
private PollingService<AmbariHostsCheckerContext> ambariHostJoin;
@Inject
private PollingService<AmbariClientPollerObject> ambariHealthChecker;
@Inject
private PollingService<AmbariStartupPollerObject> ambariStartupPollerObjectPollingService;
@Inject
private AmbariStartupListenerTask ambariStartupListenerTask;
@Inject
private AmbariHealthCheckerTask ambariHealthCheckerTask;
@Inject
private AmbariHostsJoinStatusCheckerTask ambariHostsJoinStatusCheckerTask;
@Inject
private HostMetadataRepository hostMetadataRepository;
@Inject
private CloudbreakMessagesService cloudbreakMessagesService;
@Resource
private Map<FileSystemType, FileSystemConfigurator> fileSystemConfigurators;
@Inject
private BlueprintProcessor blueprintProcessor;
@Inject
private TlsSecurityService tlsSecurityService;
@Inject
private SmartSenseConfigProvider smartSenseConfigProvider;
@Inject
private ZeppelinConfigProvider zeppelinConfigProvider;
@Inject
private DruidSupersetConfigProvider druidSupersetConfigProvider;
@Inject
private RDSConfigProvider rdsConfigProvider;
@Inject
private AutoRecoveryConfigProvider autoRecoveryConfigProvider;
@Inject
private ImageService imageService;
@Inject
private ClusterComponentConfigProvider clusterComponentConfigProvider;
@Inject
private AmbariViewProvider ambariViewProvider;
@Inject
private KerberosHostResolver kerberosHostResolver;
@Inject
private KerberosPrincipalResolver kerberosPrincipalResolver;
@Inject
private KerberosLdapResolver kerberosLdapResolver;
@Inject
private KerberosContainerDnResolver kerberosContainerDnResolver;
@Inject
private KerberosTypeResolver kerberosTypeResolver;
@Inject
private KerberosDomainResolver kerberosDomainResolver;
@Inject
private KerberosRealmResolver kerberosRealmResolver;
@Inject
private AmbariAuthenticationProvider ambariAuthenticationProvider;
@Inject
private OrchestratorTypeResolver orchestratorTypeResolver;
public void waitForAmbariServer(Stack stack) throws CloudbreakException {
AmbariClient defaultAmbariClient = getDefaultAmbariClient(stack);
AmbariClient cloudbreakAmbariClient = getAmbariClient(stack);
AmbariStartupPollerObject ambariStartupPollerObject = new AmbariStartupPollerObject(stack, stack.getAmbariIp(),
Arrays.asList(defaultAmbariClient, cloudbreakAmbariClient));
PollingResult pollingResult = ambariStartupPollerObjectPollingService.pollWithTimeoutSingleFailure(ambariStartupListenerTask, ambariStartupPollerObject,
AMBARI_POLLING_INTERVAL, MAX_ATTEMPTS_FOR_AMBARI_SERVER_STARTUP);
if (isSuccess(pollingResult)) {
LOGGER.info("Ambari has successfully started! Polling result: {}", pollingResult);
} else if (isExited(pollingResult)) {
throw new CancellationException("Polling of Ambari server start has been cancelled.");
} else {
LOGGER.info("Could not start Ambari. polling result: {}", pollingResult);
throw new CloudbreakException(String.format("Could not start Ambari. polling result: '%s'", pollingResult));
}
}
public Cluster buildAmbariCluster(Stack stack) {
Cluster cluster = stack.getCluster();
try {
if (cluster.getCreationStarted() == null) {
cluster.setCreationStarted(new Date().getTime());
cluster = clusterRepository.save(cluster);
}
Set<HostGroup> hostGroups = hostGroupService.getByCluster(cluster.getId());
Map<String, List<Map<String, String>>> hostGroupMappings = buildHostGroupAssociations(hostGroups);
recipeEngine.executePreInstall(stack, hostGroups);
String blueprintText = updateBlueprintWithInputs(cluster, cluster.getBlueprint());
Set<RDSConfig> rdsConfigs = rdsConfigRepository.findByClusterId(stack.getOwner(), stack.getAccount(), cluster.getId());
FileSystem fs = cluster.getFileSystem();
blueprintText = updateBlueprintConfiguration(stack, blueprintText, rdsConfigs, fs);
AmbariClient ambariClient = getAmbariClient(stack);
setBaseRepoURL(stack, ambariClient);
addBlueprint(stack, ambariClient, blueprintText);
Set<HostMetadata> hostsInCluster = hostMetadataRepository.findHostsInCluster(cluster.getId());
PollingResult waitForHostsResult = waitForHosts(stack, ambariClient, hostsInCluster);
checkPollingResult(waitForHostsResult, cloudbreakMessagesService.getMessage(Msg.AMBARI_CLUSTER_HOST_JOIN_FAILED.code()));
String clusterName = cluster.getName();
String blueprintName = cluster.getBlueprint().getBlueprintName();
String configStrategy = cluster.getConfigStrategy().name();
Boolean hideQuickLinks = cluster.getGateway().getEnableGateway();
String clusterTemplate;
if (ambariClient.getClusterName() == null) {
if (cluster.isSecure()) {
KerberosConfig kerberosConfig = cluster.getKerberosConfig();
String principal = kerberosPrincipalResolver.resolvePrincipalForKerberos(kerberosConfig);
clusterTemplate = ambariClient.createSecureCluster(clusterName, blueprintName, hostGroupMappings, configStrategy,
cluster.getPassword(), principal, kerberosConfig.getKerberosPassword(), KEY_TYPE, hideQuickLinks);
} else {
clusterTemplate = ambariClient.createCluster(clusterName, blueprintName, hostGroupMappings, configStrategy,
ambariAuthenticationProvider.getAmbariPassword(cluster), hideQuickLinks);
}
LOGGER.info("Submitted cluster creation template: {}", JsonUtil.minify(clusterTemplate));
} else {
LOGGER.info("Ambari cluster already exists: {}", clusterName);
}
PollingResult pollingResult = ambariOperationService.waitForOperationsToStart(stack, ambariClient, singletonMap("INSTALL_START", 1),
START_OPERATION_STATE);
checkPollingResult(pollingResult, cloudbreakMessagesService.getMessage(Msg.AMBARI_CLUSTER_INSTALL_FAILED.code()));
pollingResult = waitForClusterInstall(stack, ambariClient);
checkPollingResult(pollingResult, cloudbreakMessagesService.getMessage(Msg.AMBARI_CLUSTER_INSTALL_FAILED.code()));
recipeEngine.executePostInstall(stack);
triggerSmartSenseCapture(ambariClient, blueprintText);
cluster = ambariViewProvider.provideViewInformation(ambariClient, cluster);
cluster = handleClusterCreationSuccess(stack, cluster);
return cluster;
} catch (CancellationException cancellationException) {
throw cancellationException;
} catch (HttpResponseException hre) {
String errorMessage = AmbariClientExceptionUtil.getErrorMessage(hre);
throw new AmbariOperationFailedException("Ambari could not create the cluster: " + errorMessage, hre);
} catch (Exception e) {
LOGGER.error("Error while building the Ambari cluster. Message {}, throwable: {}", e.getMessage(), e);
throw new AmbariOperationFailedException(e.getMessage(), e);
}
}
private String updateBlueprintConfiguration(Stack stack, String blueprintText, Set<RDSConfig> rdsConfigs, FileSystem fs)
throws IOException, CloudbreakImageNotFoundException, CloudbreakException {
if (fs != null) {
blueprintText = extendBlueprintWithFsConfig(blueprintText, fs, stack);
}
blueprintText = smartSenseConfigProvider.addToBlueprint(stack, blueprintText);
blueprintText = zeppelinConfigProvider.addToBlueprint(stack, blueprintText);
blueprintText = druidSupersetConfigProvider.addToBlueprint(stack, blueprintText);
if (!orchestratorTypeResolver.resolveType(stack.getOrchestrator()).containerOrchestrator()) {
HDPRepo hdpRepo = clusterComponentConfigProvider.getHDPRepo(stack.getCluster().getId());
if (hdpRepo != null && hdpRepo.getHdpVersion() != null) {
blueprintText = blueprintProcessor.modifyHdpVersion(blueprintText, hdpRepo.getHdpVersion());
}
}
if (rdsConfigs != null && !rdsConfigs.isEmpty()) {
blueprintText = blueprintProcessor.addConfigEntries(blueprintText, rdsConfigProvider.getConfigs(rdsConfigs), true);
blueprintText = blueprintProcessor.removeComponentFromBlueprint("MYSQL_SERVER", blueprintText);
}
blueprintText = autoRecoveryConfigProvider.addToBlueprint(blueprintText);
return blueprintText;
}
public String updateBlueprintWithInputs(Cluster cluster, Blueprint blueprint) throws CloudbreakSecuritySetupException, IOException {
String blueprintText = blueprint.getBlueprintText();
Map<String, String> bpI = cluster.getBlueprintInputs().get(Map.class);
if (bpI != null) {
for (Map.Entry<String, String> stringStringEntry : bpI.entrySet()) {
blueprintText = blueprintText.replaceAll(String.format("\\{\\{ %s \\}\\}",
stringStringEntry.getKey()), stringStringEntry.getValue());
}
}
return blueprintText;
}
private void executeSmokeTest(Stack stack, AmbariClient ambariClient) {
PollingResult pollingResult;
pollingResult = runSmokeTest(stack, ambariClient);
if (isExited(pollingResult)) {
throw new CancellationException("Stack or cluster in delete in progress phase.");
} else if (isFailure(pollingResult) || isTimeout(pollingResult)) {
eventService.fireCloudbreakEvent(stack.getId(), Status.UPDATE_IN_PROGRESS.name(),
cloudbreakMessagesService.getMessage(Msg.AMBARI_CLUSTER_MR_SMOKE_FAILED.code()));
}
}
private void checkPollingResult(PollingResult pollingResult, String message) throws ClusterException {
if (isExited(pollingResult)) {
throw new CancellationException("Stack or cluster in delete in progress phase.");
} else if (isTimeout(pollingResult) || isFailure(pollingResult)) {
throw new ClusterException(message);
}
}
public void waitForAmbariHosts(Stack stack) throws CloudbreakSecuritySetupException {
AmbariClient ambariClient = getAmbariClient(stack);
Set<HostMetadata> hostMetadata = hostMetadataRepository.findHostsInCluster(stack.getCluster().getId());
waitForHosts(stack, ambariClient, hostMetadata);
}
public void installServices(Stack stack, HostGroup hostGroup, Set<HostMetadata> hostMetadata)
throws CloudbreakException {
AmbariClient ambariClient = getAmbariClient(stack);
List<String> existingHosts = ambariClient.getClusterHosts();
List<String> upscaleHostNames = getHostNames(hostMetadata).stream().filter(hostName -> !existingHosts.contains(hostName)).collect(Collectors.toList());
if (!upscaleHostNames.isEmpty()) {
PollingResult pollingResult = ambariOperationService.waitForOperations(stack, ambariClient,
installServices(upscaleHostNames, stack, ambariClient, hostGroup.getName()), UPSCALE_AMBARI_PROGRESS_STATE);
checkPollingResult(pollingResult, cloudbreakMessagesService.getMessage(Msg.AMBARI_CLUSTER_UPSCALE_FAILED.code()));
}
}
private AmbariClient getDefaultAmbariClient(Stack stack) throws CloudbreakSecuritySetupException {
Cluster cluster = stack.getCluster();
HttpClientConfig clientConfig = tlsSecurityService.buildTLSClientConfigForPrimaryGateway(stack.getId(), cluster.getAmbariIp());
return ambariClientProvider.getDefaultAmbariClient(clientConfig, stack.getGatewayPort());
}
private AmbariClient getAmbariClient(Stack stack) throws CloudbreakSecuritySetupException {
Cluster cluster = stack.getCluster();
HttpClientConfig clientConfig = tlsSecurityService.buildTLSClientConfigForPrimaryGateway(stack.getId(), cluster.getAmbariIp());
return ambariClientProvider.getAmbariClient(clientConfig, stack.getGatewayPort(), cluster);
}
private AmbariClient getAmbariClient(Stack stack, String user, String password) throws CloudbreakSecuritySetupException {
Cluster cluster = stack.getCluster();
HttpClientConfig clientConfig = tlsSecurityService.buildTLSClientConfigForPrimaryGateway(stack.getId(), cluster.getAmbariIp());
return ambariClientProvider.getAmbariClient(clientConfig, stack.getGatewayPort(), user, password);
}
public void credentialReplaceAmbariCluster(Long stackId, String newUserName, String newPassword) throws CloudbreakSecuritySetupException {
Stack stack = stackRepository.findOneWithLists(stackId);
Cluster cluster = clusterRepository.findOneWithLists(stack.getCluster().getId());
AmbariClient ambariClient = getAmbariClient(stack, cluster.getUserName(), cluster.getPassword());
ambariClient = createAmbariUser(newUserName, newPassword, stack, ambariClient);
ambariClient.deleteUser(cluster.getUserName());
}
private AmbariClient createAmbariUser(String newUserName, String newPassword, Stack stack, AmbariClient ambariClient) {
try {
ambariClient.createUser(newUserName, newPassword, true);
} catch (Exception e) {
try {
ambariClient = getAmbariClient(stack, newUserName, newPassword);
ambariClient.ambariServerVersion();
} catch (Exception ie) {
throw e;
}
}
return ambariClient;
}
public void credentialUpdateAmbariCluster(Long stackId, String newPassword) throws CloudbreakSecuritySetupException {
Stack stack = stackRepository.findOneWithLists(stackId);
Cluster cluster = clusterRepository.findOneWithLists(stack.getCluster().getId());
AmbariClient ambariClient = getAmbariClient(stack, cluster.getUserName(), cluster.getPassword());
changeAmbariPassword(cluster.getUserName(), cluster.getPassword(), newPassword, stack, ambariClient);
}
private AmbariClient changeAmbariPassword(String userName, String oldPassword, String newPassword, Stack stack, AmbariClient ambariClient) {
try {
ambariClient.changePassword(userName, oldPassword, newPassword, true);
} catch (Exception e) {
try {
ambariClient = getAmbariClient(stack, userName, newPassword);
ambariClient.ambariServerVersion();
} catch (Exception ie) {
throw e;
}
}
return ambariClient;
}
public void changeOriginalAmbariCredentialsAndCreateCloudbreakUser(Stack stack) throws CloudbreakSecuritySetupException {
Cluster cluster = stack.getCluster();
LOGGER.info("Changing ambari credentials for cluster: {}, ambari ip: {}", cluster.getName(), cluster.getAmbariIp());
String userName = cluster.getUserName();
String password = cluster.getPassword();
AmbariClient ambariClient = getDefaultAmbariClient(stack);
String cloudbreakUserName = ambariAuthenticationProvider.getAmbariUserName(cluster);
String cloudbreakPassword = ambariAuthenticationProvider.getAmbariPassword(cluster);
createAmbariUser(cloudbreakUserName, cloudbreakPassword, stack, ambariClient);
if (ADMIN.equals(userName)) {
if (!ADMIN.equals(password)) {
changeAmbariPassword(ADMIN, ADMIN, password, stack, ambariClient);
}
} else {
ambariClient = createAmbariUser(userName, password, stack, ambariClient);
ambariClient.deleteUser(ADMIN);
}
}
public void stopCluster(Stack stack) throws CloudbreakException {
AmbariClient ambariClient = getAmbariClient(stack);
try {
if (!allServiceStopped(ambariClient.getHostComponentsStates())) {
stopAllServices(stack, ambariClient);
}
// if (!"BYOS".equals(stack.cloudPlatform())) {
// stopAmbariAgents(stack, null);
// }
} catch (AmbariConnectionException ex) {
LOGGER.debug("Ambari not running on the gateway machine, no need to stop it.");
}
}
public int startCluster(Stack stack) throws CloudbreakException {
AmbariClient ambariClient = getAmbariClient(stack);
waitForAmbariToStart(stack);
if (!BYOS.equals(stack.cloudPlatform())) {
startAmbariAgents(stack);
}
return startAllServices(stack, ambariClient);
}
public void waitForAllServices(Stack stack, int requestId) throws CloudbreakException {
AmbariClient ambariClient = getAmbariClient(stack);
waitForAllServices(stack, ambariClient, requestId);
}
public boolean isAmbariAvailable(Stack stack) throws CloudbreakException {
boolean result = false;
Cluster cluster = stack.getCluster();
if (cluster != null) {
AmbariClient ambariClient = getAmbariClient(stack);
AmbariClientPollerObject ambariClientPollerObject = new AmbariClientPollerObject(stack, ambariClient);
try {
result = ambariHealthCheckerTask.checkStatus(ambariClientPollerObject);
} catch (Exception ex) {
result = false;
}
}
return result;
}
private String extendBlueprintWithFsConfig(String blueprintText, FileSystem fs, Stack stack) throws IOException {
FileSystemConfigurator fsConfigurator = fileSystemConfigurators.get(FileSystemType.valueOf(fs.getType()));
String json = JsonUtil.writeValueAsString(fs.getProperties());
FileSystemConfiguration fsConfiguration = (FileSystemConfiguration) JsonUtil.readValue(json, FileSystemType.valueOf(fs.getType()).getClazz());
decorateFsConfigurationProperties(fsConfiguration, stack);
Map<String, String> resourceProperties = fsConfigurator.createResources(fsConfiguration);
List<BlueprintConfigurationEntry> bpConfigEntries = fsConfigurator.getFsProperties(fsConfiguration, resourceProperties);
if (fs.isDefaultFs()) {
bpConfigEntries.addAll(fsConfigurator.getDefaultFsProperties(fsConfiguration));
}
return blueprintProcessor.addConfigEntries(blueprintText, bpConfigEntries, true);
}
private void decorateFsConfigurationProperties(FileSystemConfiguration fsConfiguration, Stack stack) {
fsConfiguration.addProperty(FileSystemConfiguration.STORAGE_CONTAINER, "cloudbreak" + stack.getId());
if (CloudConstants.AZURE.equals(stack.getPlatformVariant())) {
String resourceGroupName = stack.getResourceByType(ResourceType.ARM_TEMPLATE).getResourceName();
fsConfiguration.addProperty(FileSystemConfiguration.RESOURCE_GROUP_NAME, resourceGroupName);
}
// we have to lookup secret key from the credential because it is not stored in client side
if (fsConfiguration instanceof AdlsFileSystemConfiguration) {
String credential = String.valueOf(stack.getCredential().getAttributes().getMap().get(AdlsFileSystemConfiguration.CREDENTIAL_SECRET_KEY));
((AdlsFileSystemConfiguration) fsConfiguration).setCredential(credential);
}
}
private void stopAllServices(Stack stack, AmbariClient ambariClient) throws CloudbreakException {
LOGGER.info("Stop all Hadoop services");
eventService.fireCloudbreakEvent(stack.getId(), Status.UPDATE_IN_PROGRESS.name(),
cloudbreakMessagesService.getMessage(Msg.AMBARI_CLUSTER_SERVICES_STOPPING.code()));
int requestId = ambariClient.stopAllServices();
if (requestId != -1) {
LOGGER.info("Waiting for Hadoop services to stop on stack");
PollingResult servicesStopResult = ambariOperationService.waitForOperations(stack, ambariClient, singletonMap("stop services", requestId),
STOP_AMBARI_PROGRESS_STATE);
if (isExited(servicesStopResult)) {
throw new CancellationException("Cluster was terminated while waiting for Hadoop services to start");
} else if (isTimeout(servicesStopResult)) {
throw new CloudbreakException("Timeout while stopping Ambari services.");
}
} else {
LOGGER.warn("Failed to stop Hadoop services.");
throw new CloudbreakException("Failed to stop Hadoop services.");
}
eventService.fireCloudbreakEvent(stack.getId(), Status.UPDATE_IN_PROGRESS.name(),
cloudbreakMessagesService.getMessage(Msg.AMBARI_CLUSTER_SERVICES_STOPPED.code()));
}
private void startAllServicesAndWait(Stack stack, AmbariClient ambariClient) throws CloudbreakException {
int requestId = startAllServices(stack, ambariClient);
if (requestId != -1) {
waitForAllServices(stack, ambariClient, requestId);
} else {
LOGGER.error("Failed to start Hadoop services.");
throw new CloudbreakException("Failed to start Hadoop services.");
}
}
private int startAllServices(Stack stack, AmbariClient ambariClient) throws CloudbreakException {
LOGGER.info("Start all Hadoop services");
eventService.fireCloudbreakEvent(stack.getId(), Status.UPDATE_IN_PROGRESS.name(),
cloudbreakMessagesService.getMessage(Msg.AMBARI_CLUSTER_SERVICES_STARTING.code()));
int requestId = ambariClient.startAllServices();
if (requestId == -1) {
LOGGER.error("Failed to start Hadoop services.");
throw new CloudbreakException("Failed to start Hadoop services.");
}
return requestId;
}
private void waitForAllServices(Stack stack, AmbariClient ambariClient, int requestId) throws CloudbreakException {
LOGGER.info("Waiting for Hadoop services to start on stack");
PollingResult servicesStartResult = ambariOperationService.waitForOperations(stack, ambariClient, singletonMap("start services", requestId),
START_AMBARI_PROGRESS_STATE);
if (isExited(servicesStartResult)) {
throw new CancellationException("Cluster was terminated while waiting for Hadoop services to start");
} else if (isTimeout(servicesStartResult)) {
throw new CloudbreakException("Timeout while starting Ambari services.");
}
eventService.fireCloudbreakEvent(stack.getId(), Status.UPDATE_IN_PROGRESS.name(),
cloudbreakMessagesService.getMessage(Msg.AMBARI_CLUSTER_SERVICES_STARTED.code()));
}
private Cluster handleClusterCreationSuccess(Stack stack, Cluster cluster) {
LOGGER.info("Cluster created successfully. Cluster name: {}", cluster.getName());
cluster.setCreationFinished(new Date().getTime());
cluster.setUpSince(new Date().getTime());
cluster = clusterRepository.save(cluster);
List<InstanceMetaData> updatedInstances = new ArrayList<>();
for (InstanceGroup instanceGroup : stack.getInstanceGroups()) {
Set<InstanceMetaData> instances = instanceGroup.getAllInstanceMetaData();
for (InstanceMetaData instanceMetaData : instances) {
if (!instanceMetaData.isTerminated()) {
instanceMetaData.setInstanceStatus(InstanceStatus.REGISTERED);
updatedInstances.add(instanceMetaData);
}
}
}
instanceMetadataRepository.save(updatedInstances);
List<HostMetadata> hostMetadata = new ArrayList<>();
for (HostMetadata host : hostMetadataRepository.findHostsInCluster(cluster.getId())) {
host.setHostMetadataState(HostMetadataState.HEALTHY);
hostMetadata.add(host);
}
hostMetadataRepository.save(hostMetadata);
return cluster;
}
private void triggerSmartSenseCapture(AmbariClient ambariClient, String blueprintText) {
if (smartSenseConfigProvider.smartSenseIsConfigurable(blueprintText)) {
try {
LOGGER.info("Triggering SmartSense data capture.");
ambariClient.smartSenseCapture(0);
} catch (Exception e) {
LOGGER.error("Triggering SmartSense capture is failed.", e);
}
}
}
private List<String> getHostNames(Set<HostMetadata> hostMetadata) {
return hostMetadata.stream().map(HostMetadata::getHostName).collect(Collectors.toList());
}
private PollingResult runSmokeTest(Stack stack, AmbariClient ambariClient) {
int id = ambariClient.runMRServiceCheck();
return ambariOperationService.waitForOperations(stack, ambariClient, singletonMap("MR_SMOKE_TEST", id), SMOKE_TEST_AMBARI_PROGRESS_STATE);
}
private void waitForAmbariToStart(Stack stack) throws CloudbreakException {
LOGGER.info("Checking if Ambari Server is available.");
AmbariClient ambariClient = getAmbariClient(stack);
PollingResult ambariHealthCheckResult = ambariHealthChecker.pollWithTimeout(
ambariHealthCheckerTask,
new AmbariClientPollerObject(stack, ambariClient),
AMBARI_POLLING_INTERVAL,
MAX_ATTEMPTS_FOR_HOSTS,
AmbariOperationService.MAX_FAILURE_COUNT);
if (isExited(ambariHealthCheckResult)) {
throw new CancellationException("Cluster was terminated while waiting for Ambari to start.");
} else if (isTimeout(ambariHealthCheckResult)) {
throw new CloudbreakException("Ambari server was not restarted properly.");
}
}
private void startAmbariAgents(Stack stack) throws CloudbreakException {
LOGGER.info("Starting Ambari agents on the hosts.");
PollingResult hostsJoinedResult = waitForHostsToJoin(stack);
if (PollingResult.EXIT.equals(hostsJoinedResult)) {
throw new CancellationException("Cluster was terminated while starting Ambari agents.");
}
}
private PollingResult waitForHostsToJoin(Stack stack) throws CloudbreakSecuritySetupException {
Set<HostMetadata> hostsInCluster = hostMetadataRepository.findHostsInCluster(stack.getCluster().getId());
AmbariHostsCheckerContext ambariHostsCheckerContext =
new AmbariHostsCheckerContext(stack, getAmbariClient(stack), hostsInCluster, stack.getFullNodeCount());
return ambariHostJoin.pollWithTimeout(
ambariHostsJoinStatusCheckerTask,
ambariHostsCheckerContext,
AMBARI_POLLING_INTERVAL,
MAX_ATTEMPTS_FOR_HOSTS,
AmbariOperationService.MAX_FAILURE_COUNT);
}
private boolean allServiceStopped(Map<String, Map<String, String>> hostComponentsStates) {
boolean stopped = true;
Collection<Map<String, String>> values = hostComponentsStates.values();
for (Map<String, String> value : values) {
for (String state : value.values()) {
if (!"INSTALLED".equals(state)) {
stopped = false;
}
}
}
return stopped;
}
private void setBaseRepoURL(Stack stack, AmbariClient ambariClient) throws IOException, CloudbreakImageNotFoundException, CloudbreakException {
HDPRepo hdpRepo = null;
if (!orchestratorTypeResolver.resolveType(stack.getOrchestrator()).containerOrchestrator()) {
hdpRepo = clusterComponentConfigProvider.getHDPRepo(stack.getCluster().getId());
}
if (hdpRepo != null) {
try {
LOGGER.info("Use specific Ambari repository: {}", hdpRepo);
Map<String, String> stackRepo = hdpRepo.getStack();
Map<String, String> utilRepo = hdpRepo.getUtil();
String stackRepoId = stackRepo.remove(HDPRepo.REPO_ID_TAG);
String utilRepoId = utilRepo.remove(HDPRepo.REPO_ID_TAG);
String[] typeVersion = stackRepoId.split("-");
String stackType = typeVersion[0];
String version = "";
if (typeVersion.length > 1) {
version = typeVersion[1];
}
for (String os : stackRepo.keySet()) {
addRepository(ambariClient, stackType, version, os, stackRepoId, stackRepo.get(os), hdpRepo.isVerify());
}
for (String os : utilRepo.keySet()) {
addRepository(ambariClient, stackType, version, os, utilRepoId, utilRepo.get(os), hdpRepo.isVerify());
}
} catch (HttpResponseException e) {
String exceptionErrorMsg = AmbariClientExceptionUtil.getErrorMessage(e);
String msg = String.format("Cannot use the specified Ambari stack: %s. Error: %s", hdpRepo.toString(), exceptionErrorMsg);
throw new BadRequestException(msg, e);
}
} else {
LOGGER.info("Using latest HDP repository");
}
}
private void addRepository(AmbariClient client, String stack, String version, String os,
String repoId, String repoUrl, boolean verify) throws HttpResponseException {
client.addStackRepository(stack, version, os, repoId, repoUrl, verify);
}
private void addBlueprint(Stack stack, AmbariClient ambariClient, String blueprintText) {
try {
Cluster cluster = stack.getCluster();
Map<String, Map<String, Map<String, String>>> hostGroupConfig = hadoopConfigurationService.getHostGroupConfiguration(cluster);
blueprintText = ambariClient.extendBlueprintHostGroupConfiguration(blueprintText, hostGroupConfig);
Map<String, Map<String, String>> globalConfig = hadoopConfigurationService.getGlobalConfiguration(cluster);
blueprintText = ambariClient.extendBlueprintGlobalConfiguration(blueprintText, globalConfig);
if (cluster.isSecure()) {
String gatewayHost = cluster.getAmbariIp();
if (stack.getInstanceGroups() != null && !stack.getInstanceGroups().isEmpty()) {
Integer propagationPort = stack.getGatewayInstanceMetadata().size() > 1 ? KERBEROS_DB_PROPAGATION_PORT : null;
gatewayHost = stack.getPrimaryGatewayInstance().getDiscoveryFQDN();
String domain = gatewayHost.substring(gatewayHost.indexOf(".") + 1);
blueprintText = ambariClient.extendBlueprintWithKerberos(blueprintText,
kerberosTypeResolver.resolveTypeForKerberos(cluster.getKerberosConfig()),
kerberosHostResolver.resolveHostForKerberos(cluster, gatewayHost),
kerberosRealmResolver.getRealm(domain, cluster.getKerberosConfig()),
kerberosDomainResolver.getDomains(domain),
kerberosLdapResolver.resolveLdapUrlForKerberos(cluster.getKerberosConfig()),
kerberosContainerDnResolver.resolveContainerDnForKerberos(cluster.getKerberosConfig()),
!cluster.getKerberosConfig().getKerberosTcpAllowed(), propagationPort);
} else {
// TODO this won't work on mesos, but it doesn't work anyway
blueprintText = ambariClient.extendBlueprintWithKerberos(blueprintText,
kerberosTypeResolver.resolveTypeForKerberos(cluster.getKerberosConfig()),
gatewayHost,
REALM,
DOMAIN,
kerberosLdapResolver.resolveLdapUrlForKerberos(cluster.getKerberosConfig()),
kerberosContainerDnResolver.resolveContainerDnForKerberos(cluster.getKerberosConfig()),
!cluster.getKerberosConfig().getKerberosTcpAllowed(), null);
}
blueprintText = addHBaseClient(blueprintText);
}
LOGGER.info("Adding generated blueprint to Ambari: {}", JsonUtil.minify(blueprintText));
ambariClient.addBlueprint(blueprintText, cluster.getTopologyValidation());
} catch (IOException e) {
if ("Conflict".equals(e.getMessage())) {
LOGGER.info("Ambari blueprint already exists for stack: {}", stack.getId());
} else if (e instanceof HttpResponseException) {
String errorMessage = AmbariClientExceptionUtil.getErrorMessage((HttpResponseException) e);
throw new CloudbreakServiceException("Ambari Blueprint could not be added: " + errorMessage, e);
} else {
throw new CloudbreakServiceException(e);
}
}
}
private String addHBaseClient(String blueprint) {
String processingBlueprint = blueprint;
try {
JsonNode root = JsonUtil.readTree(processingBlueprint);
ArrayNode hostGroupsNode = (ArrayNode) root.path("host_groups");
Iterator<JsonNode> hostGroups = hostGroupsNode.elements();
while (hostGroups.hasNext()) {
JsonNode hostGroupNode = hostGroups.next();
ArrayNode componentsArray = (ArrayNode) hostGroupNode.path("components");
Iterator<JsonNode> iterator = componentsArray.elements();
boolean masterPresent = false;
boolean clientPresent = false;
while (iterator.hasNext()) {
String componentName = iterator.next().path("name").textValue();
if ("HBASE_MASTER".equals(componentName)) {
masterPresent = true;
} else if ("HBASE_CLIENT".equals(componentName)) {
clientPresent = true;
}
}
if (masterPresent && !clientPresent) {
ObjectNode arrayElementNode = componentsArray.addObject();
arrayElementNode.put("name", "HBASE_CLIENT");
}
}
processingBlueprint = JsonUtil.writeValueAsString(root);
} catch (Exception e) {
LOGGER.warn("Cannot extend blueprint with HBASE_CLIENT", e);
}
return processingBlueprint;
}
private String extendHiveConfig(AmbariClient ambariClient, String processingBlueprint) {
Map<String, Map<String, String>> config = new HashMap<>();
Map<String, String> hiveSite = new HashMap<>();
hiveSite.put("hive.server2.authentication.kerberos.keytab", "/etc/security/keytabs/hive2.service.keytab");
config.put("hive-site", hiveSite);
return ambariClient.extendBlueprintGlobalConfiguration(processingBlueprint, config);
}
private PollingResult waitForHosts(Stack stack, AmbariClient ambariClient, Set<HostMetadata> hostsInCluster) {
LOGGER.info("Waiting for hosts to connect.[Ambari server address: {}]", stack.getAmbariIp());
return hostsPollingService.pollWithTimeoutSingleFailure(
ambariHostsStatusCheckerTask, new AmbariHostsCheckerContext(stack, ambariClient, hostsInCluster, hostsInCluster.size()),
AMBARI_POLLING_INTERVAL, MAX_ATTEMPTS_FOR_HOSTS);
}
private Map<String, List<Map<String, String>>> buildHostGroupAssociations(Set<HostGroup> hostGroups) {
Map<String, List<Map<String, String>>> hostGroupMappings = new HashMap<>();
LOGGER.info("Computing host - hostGroup mappings based on hostGroup - instanceGroup associations");
for (HostGroup hostGroup : hostGroups) {
List<Map<String, String>> hostInfoForHostGroup = new ArrayList<>();
if (hostGroup.getConstraint().getInstanceGroup() != null) {
Map<String, String> topologyMapping = getTopologyMapping(hostGroup);
Long instanceGroupId = hostGroup.getConstraint().getInstanceGroup().getId();
List<InstanceMetaData> metas = instanceMetadataRepository.findAliveInstancesInInstanceGroup(instanceGroupId);
if (metas.isEmpty()) {
for (HostMetadata hostMetadata : hostGroup.getHostMetadata()) {
Map<String, String> hostInfo = new HashMap<>();
hostInfo.put(FQDN, hostMetadata.getHostName());
hostInfoForHostGroup.add(hostInfo);
}
} else {
for (InstanceMetaData meta : metas) {
Map<String, String> hostInfo = new HashMap<>();
hostInfo.put(FQDN, meta.getDiscoveryFQDN());
String localityIndicator = meta.getLocalityIndicator();
if (localityIndicator != null) {
if (topologyMapping.isEmpty()) {
// Azure
if (localityIndicator.startsWith("/")) {
hostInfo.put("rack", meta.getLocalityIndicator());
// Openstack
} else {
hostInfo.put("rack", "/" + meta.getLocalityIndicator());
}
// With topology mapping
} else {
hostInfo.put("hypervisor", meta.getLocalityIndicator());
hostInfo.put("rack", topologyMapping.get(meta.getLocalityIndicator()));
}
}
hostInfoForHostGroup.add(hostInfo);
}
}
} else {
for (HostMetadata hostMetadata : hostGroup.getHostMetadata()) {
Map<String, String> hostInfo = new HashMap<>();
hostInfo.put(FQDN, hostMetadata.getHostName());
hostInfoForHostGroup.add(hostInfo);
}
}
hostGroupMappings.put(hostGroup.getName(), hostInfoForHostGroup);
}
LOGGER.info("Computed host-hostGroup associations: {}", hostGroupMappings);
return hostGroupMappings;
}
private Map<String, String> getTopologyMapping(HostGroup hg) {
Map<String, String> result = new HashMap();
LOGGER.info("Computing hypervisor - rack mapping based on topology");
Topology topology = hg.getCluster().getStack().getCredential().getTopology();
if (topology == null) {
return result;
}
List<TopologyRecord> records = topology.getRecords();
if (records != null) {
for (TopologyRecord t : records) {
result.put(t.getHypervisor(), t.getRack());
}
}
return result;
}
private PollingResult waitForClusterInstall(Stack stack, AmbariClient ambariClient) {
Map<String, Integer> clusterInstallRequest = new HashMap<>();
clusterInstallRequest.put("CLUSTER_INSTALL", 1);
return ambariOperationService.waitForOperations(stack, ambariClient, clusterInstallRequest, INSTALL_AMBARI_PROGRESS_STATE);
}
private Map<String, Integer> installServices(List<String> hosts, Stack stack, AmbariClient ambariClient, String hostGroup) {
try {
Cluster cluster = stack.getCluster();
String blueprintName = cluster.getBlueprint().getBlueprintName();
return singletonMap("UPSCALE_REQUEST", ambariClient.addHostsWithBlueprint(blueprintName, hostGroup, hosts));
} catch (HttpResponseException e) {
if ("Conflict".equals(e.getMessage())) {
throw new BadRequestException("Host already exists.", e);
} else {
String errorMessage = AmbariClientExceptionUtil.getErrorMessage(e);
throw new CloudbreakServiceException("Ambari could not install services. " + errorMessage, e);
}
}
}
private enum Msg {
AMBARI_CLUSTER_RESETTING_AMBARI_DATABASE("ambari.cluster.resetting.ambari.database"),
AMBARI_CLUSTER_AMBARI_DATABASE_RESET("ambari.cluster.ambari.database.reset"),
AMBARI_CLUSTER_RESTARTING_AMBARI_SERVER("ambari.cluster.restarting.ambari.server"),
AMBARI_CLUSTER_RESTARTING_AMBARI_AGENT("ambari.cluster.restarting.ambari.agent"),
AMBARI_CLUSTER_AMBARI_AGENT_RESTARTED("ambari.cluster.ambari.agent.restarted"),
AMBARI_CLUSTER_AMBARI_SERVER_RESTARTED("ambari.cluster.ambari.server.restarted"),
AMBARI_CLUSTER_REMOVING_NODE_FROM_HOSTGROUP("ambari.cluster.removing.node.from.hostgroup"),
AMBARI_CLUSTER_ADDING_NODE_TO_HOSTGROUP("ambari.cluster.adding.node.to.hostgroup"),
AMBARI_CLUSTER_HOST_JOIN_FAILED("ambari.cluster.host.join.failed"),
AMBARI_CLUSTER_INSTALL_FAILED("ambari.cluster.install.failed"),
AMBARI_CLUSTER_UPSCALE_FAILED("ambari.cluster.upscale.failed"),
AMBARI_CLUSTER_MR_SMOKE_FAILED("ambari.cluster.mr.smoke.failed"),
AMBARI_CLUSTER_SERVICES_STARTING("ambari.cluster.services.starting"),
AMBARI_CLUSTER_SERVICES_STARTED("ambari.cluster.services.started"),
AMBARI_CLUSTER_SERVICES_STOPPING("ambari.cluster.services.stopping"),
AMBARI_CLUSTER_SERVICES_STOPPED("ambari.cluster.services.stopped");
private String code;
Msg(String msgCode) {
code = msgCode;
}
public String code() {
return code;
}
}
}