package com.sequenceiq.cloudbreak.service.cluster.flow; import static com.sequenceiq.cloudbreak.orchestrator.container.DockerContainer.AMBARI_AGENT; import static com.sequenceiq.cloudbreak.service.PollingResult.SUCCESS; import static com.sequenceiq.cloudbreak.service.PollingResult.isExited; import static com.sequenceiq.cloudbreak.service.PollingResult.isSuccess; import static com.sequenceiq.cloudbreak.service.PollingResult.isTimeout; import static com.sequenceiq.cloudbreak.service.cluster.DataNodeUtils.sortByUsedSpace; import static com.sequenceiq.cloudbreak.service.cluster.flow.AmbariOperationService.AMBARI_POLLING_INTERVAL; import static com.sequenceiq.cloudbreak.service.cluster.flow.AmbariOperationService.MAX_ATTEMPTS_FOR_HOSTS; import static com.sequenceiq.cloudbreak.service.cluster.flow.AmbariOperationType.DECOMMISSION_AMBARI_PROGRESS_STATE; import static com.sequenceiq.cloudbreak.service.cluster.flow.AmbariOperationType.DECOMMISSION_SERVICES_AMBARI_PROGRESS_STATE; import static com.sequenceiq.cloudbreak.service.cluster.flow.AmbariOperationType.START_SERVICES_AMBARI_PROGRESS_STATE; import static com.sequenceiq.cloudbreak.service.cluster.flow.AmbariOperationType.STOP_SERVICES_AMBARI_PROGRESS_STATE; import static java.util.Collections.singletonList; import static java.util.Collections.singletonMap; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; import javax.annotation.PostConstruct; import javax.inject.Inject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Component; import com.sequenceiq.ambari.client.AmbariClient; import com.sequenceiq.cloudbreak.client.HttpClientConfig; import com.sequenceiq.cloudbreak.controller.BadRequestException; import com.sequenceiq.cloudbreak.core.CloudbreakException; import com.sequenceiq.cloudbreak.core.CloudbreakSecuritySetupException; import com.sequenceiq.cloudbreak.core.bootstrap.service.OrchestratorType; import com.sequenceiq.cloudbreak.core.bootstrap.service.OrchestratorTypeResolver; import com.sequenceiq.cloudbreak.core.bootstrap.service.container.ContainerOrchestratorResolver; import com.sequenceiq.cloudbreak.core.bootstrap.service.host.HostOrchestratorResolver; import com.sequenceiq.cloudbreak.domain.Cluster; import com.sequenceiq.cloudbreak.domain.Container; import com.sequenceiq.cloudbreak.domain.HostGroup; import com.sequenceiq.cloudbreak.domain.HostMetadata; import com.sequenceiq.cloudbreak.domain.Orchestrator; import com.sequenceiq.cloudbreak.domain.Stack; import com.sequenceiq.cloudbreak.orchestrator.container.ContainerOrchestrator; import com.sequenceiq.cloudbreak.orchestrator.exception.CloudbreakOrchestratorException; import com.sequenceiq.cloudbreak.orchestrator.host.HostOrchestrator; import com.sequenceiq.cloudbreak.orchestrator.model.ContainerInfo; import com.sequenceiq.cloudbreak.orchestrator.model.GatewayConfig; import com.sequenceiq.cloudbreak.orchestrator.model.OrchestrationCredential; import com.sequenceiq.cloudbreak.repository.ContainerRepository; import com.sequenceiq.cloudbreak.repository.HostMetadataRepository; import com.sequenceiq.cloudbreak.service.CloudbreakServiceException; import com.sequenceiq.cloudbreak.service.GatewayConfigService; import com.sequenceiq.cloudbreak.service.PollingResult; import com.sequenceiq.cloudbreak.service.PollingService; import com.sequenceiq.cloudbreak.service.TlsSecurityService; import com.sequenceiq.cloudbreak.service.cluster.AmbariClientProvider; import com.sequenceiq.cloudbreak.service.cluster.AmbariConfigurationService; import com.sequenceiq.cloudbreak.service.cluster.AmbariOperationFailedException; import com.sequenceiq.cloudbreak.service.cluster.ConfigParam; import com.sequenceiq.cloudbreak.service.cluster.filter.HostFilterService; import com.sequenceiq.cloudbreak.service.hostgroup.HostGroupService; import com.sequenceiq.cloudbreak.util.AmbariClientExceptionUtil; import groovyx.net.http.HttpResponseException; @Component public class AmbariDecommissioner { private static final Logger LOGGER = LoggerFactory.getLogger(AmbariDecommissioner.class); private static final int MAX_ATTEMPTS_FOR_REGION_DECOM = 500; private static final String DATANODE = "DATANODE"; private static final double SAFETY_PERCENTAGE = 1.2; private static final int NO_REPLICATION = 0; private static final Map<String, String> COMPONENTS_NEED_TO_DECOMMISSION = new HashMap<>(); @Inject private HostGroupService hostGroupService; @Inject private HostMetadataRepository hostMetadataRepository; @Inject private AmbariClientProvider ambariClientProvider; @Inject private PollingService<AmbariHostsWithNames> rsPollerService; @Inject private PollingService<AmbariClientPollerObject> ambariClientPollingService; @Inject private DNDecommissionStatusCheckerTask dnDecommissionStatusCheckerTask; @Inject private RSDecommissionStatusCheckerTask rsDecommissionStatusCheckerTask; @Inject private AmbariHostsLeaveStatusCheckerTask hostsLeaveStatusCheckerTask; @Inject private PollingService<AmbariHostsWithNames> ambariHostLeave; @Inject private AmbariOperationService ambariOperationService; @Inject private AmbariConfigurationService configurationService; @Inject private HostFilterService hostFilterService; @Inject private ContainerOrchestratorResolver containerOrchestratorResolver; @Inject private ContainerRepository containerRepository; @Inject private TlsSecurityService tlsSecurityService; @Inject private GatewayConfigService gatewayConfigService; @Inject private OrchestratorTypeResolver orchestratorTypeResolver; @Inject private HostOrchestratorResolver hostOrchestratorResolver; @PostConstruct public void init() { COMPONENTS_NEED_TO_DECOMMISSION.put("DATANODE", "HDFS"); COMPONENTS_NEED_TO_DECOMMISSION.put("NODEMANAGER", "YARN"); //COMPONENTS_NEED_TO_DECOMMISSION.put("HBASE_REGIONSERVER", "HBASE"); } public Set<String> collectDownscaleCandidates(Stack stack, String hostGroupName, Integer scalingAdjustment) throws CloudbreakException { Cluster cluster = stack.getCluster(); int adjustment = Math.abs(scalingAdjustment); Set<String> hostsToRemove = selectHostsToRemove(collectDownscaleCandidates(stack, cluster, hostGroupName, adjustment), adjustment); if (hostsToRemove.size() != adjustment) { throw new CloudbreakException(String.format("Only %d hosts found to downscale but %d required.", hostsToRemove.size(), adjustment)); } return hostsToRemove; } public Set<String> decommissionAmbariNodes(Stack stack, String hostGroupName, Set<String> hostNames) throws CloudbreakException { Map<String, HostMetadata> hostsToRemove = collectHostMetadata(stack.getCluster(), hostGroupName, hostNames); if (hostsToRemove.size() != hostNames.size()) { throw new CloudbreakException(String.format("Not all the hosts found in the given hostgroup.")); } Cluster cluster = stack.getCluster(); HttpClientConfig clientConfig = tlsSecurityService.buildTLSClientConfigForPrimaryGateway(stack.getId(), cluster.getAmbariIp()); AmbariClient ambariClient = ambariClientProvider.getAmbariClient(clientConfig, stack.getGatewayPort(), cluster); List<String> runningHosts = ambariClient.getClusterHosts(); new HashSet(hostsToRemove.keySet()).forEach(hostName -> { if (!runningHosts.contains(hostName)) { hostsToRemove.remove(hostName); } }); if (hostsToRemove.isEmpty()) { return hostNames; } Map<String, HostMetadata> unhealthyHosts = new HashMap<>(); Map<String, HostMetadata> healthyHosts = new HashMap<>(); for (Map.Entry<String, HostMetadata> hostToRemove: hostsToRemove.entrySet()) { if ("UNKNOWN".equals(ambariClient.getHostState(hostToRemove.getKey()))) { unhealthyHosts.put(hostToRemove.getKey(), hostToRemove.getValue()); } else { healthyHosts.put(hostToRemove.getKey(), hostToRemove.getValue()); } } Set<String> deletedHosts = new HashSet<>(); Map<String, Map<String, String>> runningComponents = ambariClient.getHostComponentsStates(); for (Map.Entry<String, HostMetadata> host : unhealthyHosts.entrySet()) { deleteHostFromAmbari(host.getValue(), runningComponents, ambariClient); hostMetadataRepository.delete(host.getValue().getId()); deletedHosts.add(host.getKey()); } if (!healthyHosts.isEmpty()) { deletedHosts.addAll(decommissionAmbariNodes(stack, healthyHosts, runningComponents, ambariClient)); } return deletedHosts; } public boolean deleteHostFromAmbari(Stack stack, HostMetadata data) throws CloudbreakSecuritySetupException { HttpClientConfig clientConfig = tlsSecurityService.buildTLSClientConfigForPrimaryGateway(stack.getId(), stack.getCluster().getAmbariIp()); AmbariClient ambariClient = ambariClientProvider.getAmbariClient(clientConfig, stack.getGatewayPort(), stack.getCluster()); Map<String, Map<String, String>> runningComponents = ambariClient.getHostComponentsStates(); return deleteHostFromAmbari(data, runningComponents, ambariClient); } private boolean deleteHostFromAmbari(HostMetadata data, Map<String, Map<String, String>> runningComponents, AmbariClient ambariClient) throws CloudbreakSecuritySetupException { boolean hostDeleted = false; if (ambariClient.getClusterHosts().contains(data.getHostName())) { String hostState = ambariClient.getHostState(data.getHostName()); if ("UNKNOWN".equals(hostState)) { deleteHosts(singletonList(data.getHostName()), runningComponents, ambariClient); hostDeleted = true; } } else { LOGGER.debug("Host is already deleted."); hostDeleted = true; } return hostDeleted; } private Set<String> decommissionAmbariNodes(Stack stack, Map<String, HostMetadata> hostsToRemove, Map<String, Map<String, String>> runningComponents, AmbariClient ambariClient) throws CloudbreakException { Set<String> result = new HashSet<>(); PollingResult pollingResult = startServicesIfNeeded(stack, ambariClient, runningComponents); if (isSuccess(pollingResult)) { List<String> hostList = new ArrayList<>(hostsToRemove.keySet()); Map<String, Integer> decommissionRequests = decommissionComponents(ambariClient, hostList, runningComponents); if (!decommissionRequests.isEmpty()) { pollingResult = ambariOperationService.waitForOperations(stack, ambariClient, decommissionRequests, DECOMMISSION_AMBARI_PROGRESS_STATE); } if (isSuccess(pollingResult)) { pollingResult = waitForDataNodeDecommission(stack, ambariClient); if (isSuccess(pollingResult)) { pollingResult = waitForRegionServerDecommission(stack, ambariClient, hostList, runningComponents); if (isSuccess(pollingResult)) { pollingResult = stopHadoopComponents(stack, ambariClient, hostList, runningComponents); if (isSuccess(pollingResult)) { stopAndDeleteHosts(stack, ambariClient, hostList, runningComponents); result.addAll(hostsToRemove.keySet()); } } } } } return result; } private void deleteHosts(List<String> hosts, Map<String, Map<String, String>> runningComponents, AmbariClient ambariClient) throws CloudbreakSecuritySetupException { for (String hostName : hosts) { ambariClient.deleteHostComponents(hostName, new ArrayList<>(runningComponents.get(hostName).keySet())); ambariClient.deleteHost(hostName); } } private List<HostMetadata> collectDownscaleCandidates(Stack stack, Cluster cluster, String hostGroupName, Integer scalingAdjustment) throws CloudbreakSecuritySetupException { List<HostMetadata> downScaleCandidates; HttpClientConfig clientConfig = tlsSecurityService.buildTLSClientConfigForPrimaryGateway(stack.getId(), cluster.getAmbariIp()); HostGroup hostGroup = hostGroupService.getByClusterIdAndName(cluster.getId(), hostGroupName); Set<HostMetadata> hostsInHostGroup = hostGroup.getHostMetadata(); List<HostMetadata> filteredHostList = hostFilterService.filterHostsForDecommission(cluster, hostsInHostGroup, hostGroupName); int reservedInstances = hostsInHostGroup.size() - filteredHostList.size(); String blueprintName = cluster.getBlueprint().getBlueprintName(); AmbariClient ambariClient = ambariClientProvider.getAmbariClient(clientConfig, stack.getGatewayPort(), cluster); if (ambariClient.getBlueprintMap(blueprintName).get(hostGroupName).contains(DATANODE)) { int replication = getReplicationFactor(ambariClient, hostGroupName); verifyNodeCount(replication, scalingAdjustment, filteredHostList, reservedInstances); downScaleCandidates = checkAndSortByAvailableSpace(stack, ambariClient, replication, scalingAdjustment, filteredHostList); } else { verifyNodeCount(NO_REPLICATION, scalingAdjustment, filteredHostList, reservedInstances); downScaleCandidates = filteredHostList; } return downScaleCandidates; } private Map<String, HostMetadata> collectHostMetadata(Cluster cluster, String hostGroupName, Set<String> hostNames) { HostGroup hostGroup = hostGroupService.getByClusterIdAndName(cluster.getId(), hostGroupName); Set<HostMetadata> hostsInHostGroup = hostGroup.getHostMetadata(); Map<String, HostMetadata> hostMetadatas = hostsInHostGroup.stream().filter(hostMetadata -> hostNames.contains(hostMetadata.getHostName())).collect( Collectors.toMap(hostMetadata -> hostMetadata.getHostName(), hostMetadata -> hostMetadata)); return hostMetadatas; } private int getReplicationFactor(AmbariClient ambariClient, String hostGroup) { Map<String, String> configuration = configurationService.getConfiguration(ambariClient, hostGroup); return Integer.parseInt(configuration.get(ConfigParam.DFS_REPLICATION.key())); } private void verifyNodeCount(int replication, int scalingAdjustment, List<HostMetadata> filteredHostList, int reservedInstances) { int adjustment = Math.abs(scalingAdjustment); int hostSize = filteredHostList.size(); if (hostSize + reservedInstances - adjustment < replication || hostSize < adjustment) { LOGGER.info("Cannot downscale: replication: {}, adjustment: {}, filtered host size: {}", replication, scalingAdjustment, hostSize); throw new BadRequestException("There is not enough node to downscale. " + "Check the replication factor and the ApplicationMaster occupation."); } } private List<HostMetadata> checkAndSortByAvailableSpace(Stack stack, AmbariClient client, int replication, int adjustment, List<HostMetadata> filteredHostList) { int removeCount = Math.abs(adjustment); LOGGER.info("removeCount: {}, replication: {}, filteredHostList size: {}, filteredHostList: {}", removeCount, replication, filteredHostList.size(), filteredHostList); Map<String, Map<Long, Long>> dfsSpace = getDFSSpace(stack, client); Map<String, Long> sortedAscending = sortByUsedSpace(dfsSpace, false); LOGGER.info("sortedAscending: {}", sortedAscending); Map<String, Long> selectedNodes = selectNodes(sortedAscending, filteredHostList, removeCount); Map<String, Long> remainingNodes = removeSelected(sortedAscending, selectedNodes); LOGGER.info("Selected nodes for decommission: {}", selectedNodes); LOGGER.info("Remaining nodes after decommission: {}", remainingNodes); long usedSpace = getSelectedUsage(selectedNodes); long remainingSpace = getRemainingSpace(remainingNodes, dfsSpace); long safetyUsedSpace = ((Double) (usedSpace * replication * SAFETY_PERCENTAGE)).longValue(); LOGGER.info("Checking DFS space for decommission, usedSpace: {}, remainingSpace: {}", usedSpace, remainingSpace); LOGGER.info("Used space with replication: {} and safety space: {} is: {}", replication, SAFETY_PERCENTAGE, safetyUsedSpace); if (remainingSpace < safetyUsedSpace) { throw new BadRequestException( String.format("Trying to move '%s' bytes worth of data to nodes with '%s' bytes of capacity is not allowed", usedSpace, remainingSpace) ); } return convert(selectedNodes, filteredHostList); } private Map<String, Map<Long, Long>> getDFSSpace(Stack stack, AmbariClient client) { AmbariDFSSpaceRetrievalTask dfsSpaceTask = new AmbariDFSSpaceRetrievalTask(); PollingResult result = ambariClientPollingService.pollWithTimeoutSingleFailure(dfsSpaceTask, new AmbariClientPollerObject(stack, client), AmbariDFSSpaceRetrievalTask.AMBARI_RETRYING_INTERVAL, AmbariDFSSpaceRetrievalTask.AMBARI_RETRYING_COUNT); if (result == PollingResult.SUCCESS) { return dfsSpaceTask.getDfsSpace(); } else { throw new CloudbreakServiceException("Failed to get dfs space from ambari!"); } } private Map<String, Long> selectNodes(Map<String, Long> sortedAscending, List<HostMetadata> filteredHostList, int removeCount) { LOGGER.info("sortedAscending: {}, filteredHostList: {}", sortedAscending, filteredHostList); Map<String, Long> select = new HashMap<>(); int i = 0; for (String host : sortedAscending.keySet()) { if (i < removeCount) { for (HostMetadata hostMetadata : filteredHostList) { if (hostMetadata.getHostName().equalsIgnoreCase(host)) { select.put(host, sortedAscending.get(host)); i++; break; } } } else { break; } } return select; } private Map<String, Long> removeSelected(Map<String, Long> all, Map<String, Long> selected) { Map<String, Long> copy = new HashMap<>(all); for (String host : selected.keySet()) { Iterator<String> iterator = copy.keySet().iterator(); while (iterator.hasNext()) { if (iterator.next().equalsIgnoreCase(host)) { iterator.remove(); break; } } } return copy; } private long getSelectedUsage(Map<String, Long> selected) { long usage = 0; for (String host : selected.keySet()) { usage += selected.get(host); } return usage; } private long getRemainingSpace(Map<String, Long> remainingNodes, Map<String, Map<Long, Long>> dfsSpace) { long remaining = 0; for (String host : remainingNodes.keySet()) { Map<Long, Long> space = dfsSpace.get(host); remaining += space.keySet().iterator().next(); } return remaining; } private List<HostMetadata> convert(Map<String, Long> selectedNodes, List<HostMetadata> filteredHostList) { List<HostMetadata> result = new ArrayList<>(); for (String host : selectedNodes.keySet()) { for (HostMetadata hostMetadata : filteredHostList) { if (hostMetadata.getHostName().equalsIgnoreCase(host)) { result.add(hostMetadata); break; } } } return result; } private void stopAndDeleteHosts(Stack stack, AmbariClient ambariClient, final List<String> hostNames, Map<String, Map<String, String>> runningComponents) throws CloudbreakException { Orchestrator orchestrator = stack.getOrchestrator(); Map<String, Object> map = new HashMap<>(); map.putAll(orchestrator.getAttributes().getMap()); map.put("certificateDir", tlsSecurityService.prepareCertDir(stack.getId())); OrchestratorType orchestratorType = orchestratorTypeResolver.resolveType(orchestrator.getType()); try { if (orchestratorType.containerOrchestrator()) { OrchestrationCredential credential = new OrchestrationCredential(orchestrator.getApiEndpoint(), map); ContainerOrchestrator containerOrchestrator = containerOrchestratorResolver.get(orchestrator.getType()); Set<Container> containers = containerRepository.findContainersInCluster(stack.getCluster().getId()); List<ContainerInfo> containersToDelete = containers.stream() .filter(input -> hostNames.contains(input.getHost()) && input.getImage().contains(AMBARI_AGENT.getName())) .map(input -> new ContainerInfo(input.getContainerId(), input.getName(), input.getHost(), input.getImage())) .collect(Collectors.toList()); containerOrchestrator.deleteContainer(containersToDelete, credential); containerRepository.delete(containers); PollingResult pollingResult = waitForHostsToLeave(stack, ambariClient, hostNames); if (isTimeout(pollingResult)) { LOGGER.warn("Ambari agent stop timed out, delete the hosts anyway, hosts: {}", hostNames); } if (!isExited(pollingResult)) { deleteHosts(hostNames, runningComponents, ambariClient); } } else if (orchestratorType.hostOrchestrator()) { HostOrchestrator hostOrchestrator = hostOrchestratorResolver.get(stack.getOrchestrator().getType()); Map<String, String> privateIpsByFQDN = new HashMap<>(); stack.getInstanceMetaDataAsList().stream() .filter(instanceMetaData -> hostNames.stream().anyMatch(hn -> hn.contains(instanceMetaData.getDiscoveryFQDN().split("\\.")[0]))) .forEach(instanceMetaData -> privateIpsByFQDN.put(instanceMetaData.getDiscoveryFQDN(), instanceMetaData.getPrivateIp())); List<GatewayConfig> allGatewayConfigs = gatewayConfigService.getAllGatewayConfigs(stack); hostOrchestrator.tearDown(allGatewayConfigs, privateIpsByFQDN); deleteHosts(hostNames, runningComponents, ambariClient); } } catch (CloudbreakOrchestratorException e) { LOGGER.error("Failed to delete containers while decommissioning: ", e); throw new CloudbreakException("Failed to delete containers while decommissioning: ", e); } } private PollingResult waitForHostsToLeave(Stack stack, AmbariClient ambariClient, List<String> hostNames) { return ambariHostLeave.pollWithTimeout(hostsLeaveStatusCheckerTask, new AmbariHostsWithNames(stack, ambariClient, hostNames), AMBARI_POLLING_INTERVAL, MAX_ATTEMPTS_FOR_HOSTS, AmbariOperationService.MAX_FAILURE_COUNT); } private PollingResult waitForDataNodeDecommission(Stack stack, AmbariClient ambariClient) { LOGGER.info("Waiting for DataNodes to move the blocks to other nodes. stack id: {}", stack.getId()); return ambariOperationService.waitForOperations(stack, ambariClient, dnDecommissionStatusCheckerTask, Collections.emptyMap(), DECOMMISSION_SERVICES_AMBARI_PROGRESS_STATE); } private PollingResult waitForRegionServerDecommission(Stack stack, AmbariClient ambariClient, List<String> hosts, Map<String, Map<String, String>> runningComponents) { if (COMPONENTS_NEED_TO_DECOMMISSION.get("HBASE_REGIONSERVER") == null || !hosts.stream().anyMatch(hn -> runningComponents.get(hn).keySet().contains("HBASE_REGIONSERVER"))) { return SUCCESS; } LOGGER.info("Waiting for RegionServers to move the regions to other servers"); return rsPollerService.pollWithTimeoutSingleFailure(rsDecommissionStatusCheckerTask, new AmbariHostsWithNames(stack, ambariClient, hosts), AMBARI_POLLING_INTERVAL, MAX_ATTEMPTS_FOR_REGION_DECOM); } private Set<String> selectHostsToRemove(List<HostMetadata> decommissionCandidates, int adjustment) { Set<String> hostsToRemove = new HashSet<>(); int i = 0; for (HostMetadata hostMetadata : decommissionCandidates) { String hostName = hostMetadata.getHostName(); if (i < adjustment) { LOGGER.info("Host '{}' will be removed from Ambari cluster", hostName); hostsToRemove.add(hostName); } else { break; } i++; } return hostsToRemove; } private Map<String, Integer> decommissionComponents(AmbariClient ambariClient, List<String> hosts, Map<String, Map<String, String>> runningComponents) { Map<String, Integer> decommissionRequests = new HashMap<>(); COMPONENTS_NEED_TO_DECOMMISSION.keySet().forEach(component -> { List<String> hostsRunService = hosts.stream().filter(hn -> runningComponents.get(hn).keySet().contains(component)).collect(Collectors.toList()); Function<List<String>, Integer> action; if (component.equals("NODEMANAGER")) { action = l -> ambariClient.decommissionNodeManagers(l); } else if (component.equals("DATANODE")) { action = l -> ambariClient.decommissionDataNodes(l); } else if (component.equals("HBASE_REGIONSERVER")) { action = l -> { ambariClient.setHBaseRegionServersToMaintenance(l, true); return ambariClient.decommissionHBaseRegionServers(l); }; } else { throw new UnsupportedOperationException("Component decommission not allowed: " + component); } Integer requestId = decommissionComponent(ambariClient, hostsRunService, component, action); if (requestId != null) { decommissionRequests.put(component + "_DECOMMISSION", requestId); } }); return decommissionRequests; } private Integer decommissionComponent(AmbariClient ambariClient, List<String> hosts, String component, Function<List<String>, Integer> action) { List<String> hostsToDecommission = hosts.stream() .filter(h -> "INSERVICE".equals(ambariClient.getComponentStates(h, component).get("desired_admin_state"))).collect(Collectors.toList()); if (!hostsToDecommission.isEmpty()) { return action.apply(hostsToDecommission); } return null; } private PollingResult stopHadoopComponents(Stack stack, AmbariClient ambariClient, List<String> hosts, Map<String, Map<String, String>> runningComponents) { try { hosts = hosts.stream() .filter(hn -> runningComponents.get(hn).size() > 0).collect(Collectors.toList()); if (!hosts.isEmpty()) { int requestId = ambariClient.stopAllComponentsOnHosts(hosts); return ambariOperationService.waitForOperations(stack, ambariClient, singletonMap("Stopping components on the decommissioned hosts", requestId), STOP_SERVICES_AMBARI_PROGRESS_STATE); } return SUCCESS; } catch (HttpResponseException e) { String errorMessage = AmbariClientExceptionUtil.getErrorMessage(e); throw new AmbariOperationFailedException("Ambari could not stop components. " + errorMessage, e); } } private PollingResult startServicesIfNeeded(Stack stack, AmbariClient ambariClient, Map<String, Map<String, String>> runningComponents) { Map<String, Integer> requests = new HashMap<>(); try { for (String service : collectServicesToStart(ambariClient, runningComponents)) { int requestId = ambariClient.startService(service); requests.put(service + "_START", requestId); } } catch (Exception e) { LOGGER.error("Failed to start HDFS/YARN/HBASE services", e); throw new BadRequestException("Failed to start the HDFS, YARN and HBASE services, it's possible that some of the nodes are unavailable"); } if (!requests.isEmpty()) { return ambariOperationService.waitForOperations(stack, ambariClient, requests, START_SERVICES_AMBARI_PROGRESS_STATE); } else { return SUCCESS; } } private Set<String> collectServicesToStart(AmbariClient ambariClient, Map<String, Map<String, String>> runningComponents) { Set<String> services = new HashSet<>(); for (Map.Entry<String, Map<String, String>> hostComponentsEntry : runningComponents.entrySet()) { for (Map.Entry<String, String> componentStateEntry : hostComponentsEntry.getValue().entrySet()) { String component = componentStateEntry.getKey(); if (!"STARTED".equals(componentStateEntry.getValue()) && COMPONENTS_NEED_TO_DECOMMISSION.keySet().contains(component)) { Map<String, String> componentStates = ambariClient.getComponentStates(hostComponentsEntry.getKey(), component); if ("DECOMMISSIONED".equals(componentStates.get("desired_admin_state"))) { LOGGER.info("No need to start ambari service {} on host {}", component, hostComponentsEntry.getKey()); } else { services.add(COMPONENTS_NEED_TO_DECOMMISSION.get(component)); } } } } return services; } }