package com.sequenceiq.cloudbreak.service.stack.flow; import static com.sequenceiq.cloudbreak.api.model.Status.AVAILABLE; import static com.sequenceiq.cloudbreak.api.model.Status.CREATE_FAILED; import static com.sequenceiq.cloudbreak.api.model.Status.DELETE_FAILED; import static com.sequenceiq.cloudbreak.api.model.Status.STOPPED; import static com.sequenceiq.cloudbreak.api.model.Status.WAIT_FOR_SYNC; import java.util.Arrays; import java.util.Calendar; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; import javax.inject.Inject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service; import com.sequenceiq.cloudbreak.api.model.DetailedStackStatus; import com.sequenceiq.cloudbreak.api.model.InstanceStatus; import com.sequenceiq.cloudbreak.cloud.exception.CloudConnectorException; import com.sequenceiq.cloudbreak.cloud.model.CloudVmInstanceStatus; import com.sequenceiq.cloudbreak.common.type.HostMetadataState; import com.sequenceiq.cloudbreak.controller.NotFoundException; import com.sequenceiq.cloudbreak.domain.Cluster; import com.sequenceiq.cloudbreak.domain.HostMetadata; import com.sequenceiq.cloudbreak.domain.InstanceGroup; import com.sequenceiq.cloudbreak.domain.InstanceMetaData; import com.sequenceiq.cloudbreak.domain.Resource; import com.sequenceiq.cloudbreak.domain.Stack; import com.sequenceiq.cloudbreak.repository.HostMetadataRepository; import com.sequenceiq.cloudbreak.repository.InstanceGroupRepository; import com.sequenceiq.cloudbreak.repository.InstanceMetaDataRepository; import com.sequenceiq.cloudbreak.repository.ResourceRepository; import com.sequenceiq.cloudbreak.repository.StackUpdater; import com.sequenceiq.cloudbreak.service.cluster.flow.AmbariClusterConnector; import com.sequenceiq.cloudbreak.service.cluster.flow.AmbariDecommissioner; import com.sequenceiq.cloudbreak.service.events.CloudbreakEventService; import com.sequenceiq.cloudbreak.service.messages.CloudbreakMessagesService; import com.sequenceiq.cloudbreak.service.stack.StackService; import com.sequenceiq.cloudbreak.service.stack.connector.adapter.ServiceProviderMetadataAdapter; @Service public class StackSyncService { private static final Logger LOGGER = LoggerFactory.getLogger(StackSyncService.class); private static final String SYNC_STATUS_REASON = "Synced instance states with the cloud provider."; @Inject private StackService stackService; @Inject private StackUpdater stackUpdater; @Inject private CloudbreakEventService eventService; @Inject private InstanceMetaDataRepository instanceMetaDataRepository; @Inject private InstanceGroupRepository instanceGroupRepository; @Inject private HostMetadataRepository hostMetadataRepository; @Inject private ResourceRepository resourceRepository; @Inject private AmbariClusterConnector ambariClusterConnector; @Inject private AmbariDecommissioner ambariDecommissioner; @Inject private ServiceProviderMetadataAdapter metadata; @Inject private CloudbreakMessagesService cloudbreakMessagesService; public void updateInstances(Stack stack, List<InstanceMetaData> instanceMetaDataList, List<CloudVmInstanceStatus> instanceStatuses, boolean stackStatusUpdateEnabled) { Map<InstanceSyncState, Integer> counts = initInstanceStateCounts(); for (final InstanceMetaData metaData : instanceMetaDataList) { Optional<CloudVmInstanceStatus> status = instanceStatuses.stream() .filter(is -> is != null && is.getCloudInstance().getInstanceId() != null && is.getCloudInstance().getInstanceId().equals(metaData.getInstanceId())) .findFirst(); InstanceSyncState state = !status.isPresent() ? InstanceSyncState.DELETED : transform(status.get().getStatus()); syncInstanceStatusByState(stack, counts, metaData, state); } handleSyncResult(stack, counts, stackStatusUpdateEnabled); } public void sync(Long stackId, boolean stackStatusUpdateEnabled) { Stack stack = stackService.getById(stackId); if (stack.isStackInDeletionPhase() || stack.isModificationInProgress()) { LOGGER.warn("Stack could not be synchronized in {} state!", stack.getStatus()); } else { sync(stack, stackStatusUpdateEnabled); } } private void sync(Stack stack, boolean stackStatusUpdateEnabled) { Long stackId = stack.getId(); Set<InstanceMetaData> instances = instanceMetaDataRepository.findNotTerminatedForStack(stackId); Map<InstanceSyncState, Integer> instanceStateCounts = initInstanceStateCounts(); for (InstanceMetaData instance : instances) { InstanceGroup instanceGroup = instance.getInstanceGroup(); try { InstanceSyncState state = metadata.getState(stack, instanceGroup, instance.getInstanceId()); syncInstanceStatusByState(stack, instanceStateCounts, instance, state); } catch (CloudConnectorException e) { LOGGER.warn(e.getMessage(), e); eventService.fireCloudbreakEvent(stackId, AVAILABLE.name(), cloudbreakMessagesService.getMessage(Msg.STACK_SYNC_INSTANCE_STATUS_RETRIEVAL_FAILED.code(), Collections.singletonList(instance.getInstanceId()))); instanceStateCounts.put(InstanceSyncState.UNKNOWN, instanceStateCounts.get(InstanceSyncState.UNKNOWN) + 1); } } handleSyncResult(stack, instanceStateCounts, stackStatusUpdateEnabled); } private void syncInstanceStatusByState(Stack stack, Map<InstanceSyncState, Integer> counts, InstanceMetaData metaData, InstanceSyncState state) { if (InstanceSyncState.DELETED.equals(state)) { syncDeletedInstance(stack, counts, metaData); } else if (InstanceSyncState.RUNNING.equals(state)) { syncRunningInstance(stack, counts, metaData); } else if (InstanceSyncState.STOPPED.equals(state)) { syncStoppedInstance(stack, counts, metaData); } else { counts.put(InstanceSyncState.IN_PROGRESS, counts.get(InstanceSyncState.IN_PROGRESS) + 1); } } private void syncStoppedInstance(Stack stack, Map<InstanceSyncState, Integer> instanceStateCounts, InstanceMetaData instance) { instanceStateCounts.put(InstanceSyncState.STOPPED, instanceStateCounts.get(InstanceSyncState.STOPPED) + 1); if (!instance.isTerminated() && !stack.isStopped()) { LOGGER.info("Instance '{}' is reported as stopped on the cloud provider, setting its state to STOPPED.", instance.getInstanceId()); instance.setInstanceStatus(InstanceStatus.STOPPED); instanceMetaDataRepository.save(instance); eventService.fireCloudbreakEvent(stack.getId(), AVAILABLE.name(), cloudbreakMessagesService.getMessage(Msg.STACK_SYNC_INSTANCE_UPDATED.code(), Arrays.asList(instance.getInstanceId(), "stopped"))); } } private void syncRunningInstance(Stack stack, Map<InstanceSyncState, Integer> instanceStateCounts, InstanceMetaData instance) { instanceStateCounts.put(InstanceSyncState.RUNNING, instanceStateCounts.get(InstanceSyncState.RUNNING) + 1); if (stack.getStatus() == WAIT_FOR_SYNC && instance.isCreated()) { LOGGER.info("Instance '{}' is reported as created on the cloud provider but not member of the cluster, setting its state to FAILED.", instance.getInstanceId()); instance.setInstanceStatus(InstanceStatus.FAILED); instanceMetaDataRepository.save(instance); eventService.fireCloudbreakEvent(stack.getId(), CREATE_FAILED.name(), cloudbreakMessagesService.getMessage(Msg.STACK_SYNC_INSTANCE_FAILED.code(), Collections.singletonList(instance.getDiscoveryFQDN()))); } else if (!instance.isRunning() && !instance.isDecommissioned() && !instance.isCreated() && !instance.isFailed()) { LOGGER.info("Instance '{}' is reported as running on the cloud provider, updating metadata.", instance.getInstanceId()); updateMetaDataToRunning(stack.getId(), stack.getCluster(), instance); } } private void syncDeletedInstance(Stack stack, Map<InstanceSyncState, Integer> instanceStateCounts, InstanceMetaData instance) { deleteHostFromCluster(stack, instance); if (!instance.isTerminated()) { instanceStateCounts.put(InstanceSyncState.DELETED, instanceStateCounts.get(InstanceSyncState.DELETED) + 1); LOGGER.info("Instance '{}' is reported as deleted on the cloud provider, setting its state to TERMINATED.", instance.getInstanceId()); deleteResourceIfNeeded(stack, instance); updateMetaDataToTerminated(stack, instance); } } private void deleteResourceIfNeeded(Stack stack, InstanceMetaData instance) { Resource resource = resourceRepository.findByStackIdAndResourceNameOrReference(stack.getId(), instance.getInstanceId()); if (resource != null) { resourceRepository.delete(resource); } } private void handleSyncResult(Stack stack, Map<InstanceSyncState, Integer> instanceStateCounts, boolean stackStatusUpdateEnabled) { Set<InstanceMetaData> instances = instanceMetaDataRepository.findNotTerminatedForStack(stack.getId()); if (instanceStateCounts.get(InstanceSyncState.UNKNOWN) > 0) { eventService.fireCloudbreakEvent(stack.getId(), AVAILABLE.name(), cloudbreakMessagesService.getMessage(Msg.STACK_SYNC_INSTANCE_STATUS_COULDNT_DETERMINE.code())); } else if (instanceStateCounts.get(InstanceSyncState.IN_PROGRESS) > 0) { eventService.fireCloudbreakEvent(stack.getId(), AVAILABLE.name(), cloudbreakMessagesService.getMessage(Msg.STACK_SYNC_INSTANCE_OPERATION_IN_PROGRESS.code())); } else if (instanceStateCounts.get(InstanceSyncState.RUNNING) > 0 && instanceStateCounts.get(InstanceSyncState.STOPPED) > 0) { eventService.fireCloudbreakEvent(stack.getId(), STOPPED.name(), cloudbreakMessagesService.getMessage(Msg.STACK_SYNC_INSTANCE_STOPPED_ON_PROVIDER.code())); } else if (instanceStateCounts.get(InstanceSyncState.RUNNING) > 0) { updateStackStatusIfEnabled(stack.getId(), DetailedStackStatus.AVAILABLE, SYNC_STATUS_REASON, stackStatusUpdateEnabled); eventService.fireCloudbreakEvent(stack.getId(), AVAILABLE.name(), cloudbreakMessagesService.getMessage(Msg.STACK_SYNC_INSTANCE_STATE_SYNCED.code())); } else if (instanceStateCounts.get(InstanceSyncState.STOPPED).equals(instances.size())) { updateStackStatusIfEnabled(stack.getId(), DetailedStackStatus.STOPPED, SYNC_STATUS_REASON, stackStatusUpdateEnabled); eventService.fireCloudbreakEvent(stack.getId(), STOPPED.name(), cloudbreakMessagesService.getMessage(Msg.STACK_SYNC_INSTANCE_STATE_SYNCED.code())); } else { updateStackStatusIfEnabled(stack.getId(), DetailedStackStatus.DELETE_FAILED, SYNC_STATUS_REASON, stackStatusUpdateEnabled); eventService.fireCloudbreakEvent(stack.getId(), DELETE_FAILED.name(), cloudbreakMessagesService.getMessage(Msg.STACK_SYNC_INSTANCE_STATE_SYNCED.code())); } } private void updateStackStatusIfEnabled(Long stackId, DetailedStackStatus status, String statusReason, boolean stackStatusUpdateEnabled) { if (stackStatusUpdateEnabled) { stackUpdater.updateStackStatus(stackId, status, statusReason); } } private InstanceSyncState transform(com.sequenceiq.cloudbreak.cloud.model.InstanceStatus instanceStatus) { switch (instanceStatus) { case IN_PROGRESS: return InstanceSyncState.IN_PROGRESS; case STARTED: return InstanceSyncState.RUNNING; case STOPPED: return InstanceSyncState.STOPPED; case CREATED: return InstanceSyncState.RUNNING; case FAILED: return InstanceSyncState.DELETED; case TERMINATED: return InstanceSyncState.DELETED; default: return InstanceSyncState.UNKNOWN; } } private Map<InstanceSyncState, Integer> initInstanceStateCounts() { Map<InstanceSyncState, Integer> instanceStates = new HashMap<>(); instanceStates.put(InstanceSyncState.DELETED, 0); instanceStates.put(InstanceSyncState.STOPPED, 0); instanceStates.put(InstanceSyncState.RUNNING, 0); instanceStates.put(InstanceSyncState.IN_PROGRESS, 0); instanceStates.put(InstanceSyncState.UNKNOWN, 0); return instanceStates; } private void deleteHostFromCluster(Stack stack, InstanceMetaData instanceMetaData) { try { if (stack.getCluster() != null) { HostMetadata hostMetadata = hostMetadataRepository.findHostInClusterByName(stack.getCluster().getId(), instanceMetaData.getDiscoveryFQDN()); if (hostMetadata == null) { if (instanceMetaData.getInstanceStatus() != InstanceStatus.TERMINATED) { throw new NotFoundException(String.format("Host not found with id '%s'", instanceMetaData.getDiscoveryFQDN())); } } else { if (ambariClusterConnector.isAmbariAvailable(stack)) { if (ambariDecommissioner.deleteHostFromAmbari(stack, hostMetadata)) { hostMetadataRepository.delete(hostMetadata.getId()); eventService.fireCloudbreakEvent(stack.getId(), AVAILABLE.name(), cloudbreakMessagesService.getMessage(Msg.STACK_SYNC_HOST_DELETED.code(), Collections.singletonList(instanceMetaData.getDiscoveryFQDN()))); } else { eventService.fireCloudbreakEvent(stack.getId(), AVAILABLE.name(), cloudbreakMessagesService.getMessage(Msg.STACK_SYNC_INSTANCE_REMOVAL_FAILED.code(), Collections.singletonList(instanceMetaData.getDiscoveryFQDN()))); } } else { hostMetadata.setHostMetadataState(HostMetadataState.UNHEALTHY); hostMetadataRepository.save(hostMetadata); eventService.fireCloudbreakEvent(stack.getId(), AVAILABLE.name(), cloudbreakMessagesService.getMessage(Msg.STACK_SYNC_HOST_UPDATED.code(), Arrays.asList(instanceMetaData.getDiscoveryFQDN(), HostMetadataState.UNHEALTHY.name()))); } } } } catch (Exception e) { LOGGER.error("Host cannot be deleted from cluster: ", e); eventService.fireCloudbreakEvent(stack.getId(), AVAILABLE.name(), cloudbreakMessagesService.getMessage(Msg.STACK_SYNC_INSTANCE_TERMINATED.code(), Collections.singletonList(instanceMetaData.getDiscoveryFQDN()))); } } private void updateMetaDataToTerminated(Stack stack, InstanceMetaData instanceMetaData) { InstanceGroup instanceGroup = instanceMetaData.getInstanceGroup(); List<InstanceMetaData> aliveInstancesInInstanceGroup = instanceMetaDataRepository.findAliveInstancesInInstanceGroup(instanceGroup.getId()); instanceGroup.setNodeCount(aliveInstancesInInstanceGroup.size() - 1); long timeInMillis = Calendar.getInstance().getTimeInMillis(); instanceMetaData.setTerminationDate(timeInMillis); instanceMetaData.setInstanceStatus(InstanceStatus.TERMINATED); instanceMetaDataRepository.save(instanceMetaData); instanceGroupRepository.save(instanceGroup); String name; if (instanceMetaData.getDiscoveryFQDN() == null) { name = instanceMetaData.getInstanceId(); } else { name = String.format("%s (%s)", instanceMetaData.getInstanceId(), instanceMetaData.getDiscoveryFQDN()); } eventService.fireCloudbreakEvent(stack.getId(), AVAILABLE.name(), cloudbreakMessagesService.getMessage(Msg.STACK_SYNC_INSTANCE_DELETED_CBMETADATA.code(), Collections.singletonList(name))); } private void updateMetaDataToRunning(Long stackId, Cluster cluster, InstanceMetaData instanceMetaData) { InstanceGroup instanceGroup = instanceMetaData.getInstanceGroup(); if (InstanceStatus.TERMINATED.equals(instanceMetaData.getInstanceStatus())) { instanceGroup.setNodeCount(instanceGroup.getNodeCount() + 1); } HostMetadata hostMetadata = hostMetadataRepository.findHostInClusterByName(cluster.getId(), instanceMetaData.getDiscoveryFQDN()); if (hostMetadata != null) { LOGGER.info("Instance '{}' was found in the cluster metadata, setting it's state to REGISTERED.", instanceMetaData.getInstanceId()); instanceMetaData.setInstanceStatus(InstanceStatus.REGISTERED); } else { LOGGER.info("Instance '{}' was not found in the cluster metadata, setting it's state to UNREGISTERED.", instanceMetaData.getInstanceId()); instanceMetaData.setInstanceStatus(InstanceStatus.UNREGISTERED); } instanceMetaDataRepository.save(instanceMetaData); instanceGroupRepository.save(instanceGroup); eventService.fireCloudbreakEvent(stackId, AVAILABLE.name(), cloudbreakMessagesService.getMessage(Msg.STACK_SYNC_INSTANCE_UPDATED.code(), Arrays.asList(instanceMetaData.getDiscoveryFQDN(), "running"))); } private enum Msg { STACK_SYNC_INSTANCE_STATUS_RETRIEVAL_FAILED("stack.sync.instance.status.retrieval.failed"), STACK_SYNC_INSTANCE_STATUS_COULDNT_DETERMINE("stack.sync.instance.status.couldnt.determine"), STACK_SYNC_INSTANCE_OPERATION_IN_PROGRESS("stack.sync.instance.operation.in.progress"), STACK_SYNC_INSTANCE_STOPPED_ON_PROVIDER("stack.sync.instance.stopped.on.provider"), STACK_SYNC_INSTANCE_STATE_SYNCED("stack.sync.instance.state.synced"), STACK_SYNC_HOST_DELETED("stack.sync.host.deleted"), STACK_SYNC_INSTANCE_REMOVAL_FAILED("stack.sync.instance.removal.failed"), STACK_SYNC_HOST_UPDATED("stack.sync.host.updated"), STACK_SYNC_INSTANCE_TERMINATED("stack.sync.instance.terminated"), STACK_SYNC_INSTANCE_DELETED_CBMETADATA("stack.sync.instance.deleted.cbmetadata"), STACK_SYNC_INSTANCE_UPDATED("stack.sync.instance.updated"), STACK_SYNC_INSTANCE_FAILED("stack.sync.instance.failed"); private String code; Msg(String msgCode) { code = msgCode; } public String code() { return code; } } }