// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. package com.cloud.ha; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import javax.inject.Inject; import javax.naming.ConfigurationException; import org.apache.log4j.Logger; import org.apache.log4j.NDC; import org.apache.cloudstack.engine.orchestration.service.VolumeOrchestrationService; import org.apache.cloudstack.framework.config.dao.ConfigurationDao; import org.apache.cloudstack.managed.context.ManagedContext; import org.apache.cloudstack.managed.context.ManagedContextRunnable; import com.cloud.agent.AgentManager; import com.cloud.alert.AlertManager; import com.cloud.cluster.ClusterManagerListener; import com.cloud.cluster.ManagementServerHost; import com.cloud.configuration.Config; import com.cloud.dc.ClusterDetailsDao; import com.cloud.dc.DataCenterVO; import com.cloud.dc.HostPodVO; import com.cloud.dc.dao.DataCenterDao; import com.cloud.dc.dao.HostPodDao; import com.cloud.deploy.DeploymentPlanner; import com.cloud.deploy.HAPlanner; import com.cloud.exception.AgentUnavailableException; import com.cloud.exception.ConcurrentOperationException; import com.cloud.exception.InsufficientCapacityException; import com.cloud.exception.InsufficientServerCapacityException; import com.cloud.exception.OperationTimedoutException; import com.cloud.exception.ResourceUnavailableException; import com.cloud.ha.Investigator.UnknownVM; import com.cloud.ha.dao.HighAvailabilityDao; import com.cloud.host.Host; import com.cloud.host.HostVO; import com.cloud.host.Status; import com.cloud.host.dao.HostDao; import com.cloud.hypervisor.Hypervisor.HypervisorType; import com.cloud.resource.ResourceManager; import com.cloud.server.ManagementServer; import com.cloud.service.ServiceOfferingVO; import com.cloud.service.dao.ServiceOfferingDao; import com.cloud.storage.StorageManager; import com.cloud.storage.dao.GuestOSCategoryDao; import com.cloud.storage.dao.GuestOSDao; import com.cloud.user.AccountManager; import com.cloud.utils.NumbersUtil; import com.cloud.utils.component.ManagerBase; import com.cloud.utils.concurrency.NamedThreadFactory; import com.cloud.utils.exception.CloudRuntimeException; import com.cloud.vm.VMInstanceVO; import com.cloud.vm.VirtualMachine; import com.cloud.vm.VirtualMachine.State; import com.cloud.vm.VirtualMachineManager; import com.cloud.vm.VirtualMachineProfile; import com.cloud.vm.dao.VMInstanceDao; /** * HighAvailabilityManagerImpl coordinates the HA process. VMs are registered with the HA Manager for HA. The request is stored * within a database backed work queue. HAManager has a number of workers that pick up these work items to perform HA on the * VMs. * * The HA process goes as follows: 1. Check with the list of Investigators to determine that the VM is no longer running. If a * Investigator finds the VM is still alive, the HA process is stopped and the state of the VM reverts back to its previous * state. If a Investigator finds the VM is dead, then HA process is started on the VM, skipping step 2. 2. If the list of * Investigators can not determine if the VM is dead or alive. The list of FenceBuilders is invoked to fence off the VM so that * it won't do any damage to the storage and network. 3. The VM is marked as stopped. 4. The VM is started again via the normal * process of starting VMs. Note that once the VM is marked as stopped, the user may have started the VM himself. 5. VMs that * have re-started more than the configured number of times are marked as in Error state and the user is not allowed to restart * the VM. * * @config {@table || Param Name | Description | Values | Default || || workers | number of worker threads to spin off to do the * processing | int | 1 || || time.to.sleep | Time to sleep if no work items are found | seconds | 60 || || max.retries * | number of times to retry start | int | 5 || || time.between.failure | Time elapsed between failures before we * consider it as another retry | seconds | 3600 || || time.between.cleanup | Time to wait before the cleanup thread * runs | seconds | 86400 || || force.ha | Force HA to happen even if the VM says no | boolean | false || || * ha.retry.wait | time to wait before retrying the work item | seconds | 120 || || stop.retry.wait | time to wait * before retrying the stop | seconds | 120 || * } **/ public class HighAvailabilityManagerImpl extends ManagerBase implements HighAvailabilityManager, ClusterManagerListener { protected static final Logger s_logger = Logger.getLogger(HighAvailabilityManagerImpl.class); WorkerThread[] _workers; boolean _stopped; long _timeToSleep; @Inject HighAvailabilityDao _haDao; @Inject VMInstanceDao _instanceDao; @Inject HostDao _hostDao; @Inject DataCenterDao _dcDao; @Inject HostPodDao _podDao; @Inject ClusterDetailsDao _clusterDetailsDao; @Inject ServiceOfferingDao _serviceOfferingDao; long _serverId; @Inject ManagedContext _managedContext; List<Investigator> investigators; public List<Investigator> getInvestigators() { return investigators; } public void setInvestigators(List<Investigator> investigators) { this.investigators = investigators; } List<FenceBuilder> fenceBuilders; public List<FenceBuilder> getFenceBuilders() { return fenceBuilders; } public void setFenceBuilders(List<FenceBuilder> fenceBuilders) { this.fenceBuilders = fenceBuilders; } List<HAPlanner> _haPlanners; public List<HAPlanner> getHaPlanners() { return _haPlanners; } public void setHaPlanners(List<HAPlanner> haPlanners) { _haPlanners = haPlanners; } @Inject AgentManager _agentMgr; @Inject AlertManager _alertMgr; @Inject StorageManager _storageMgr; @Inject GuestOSDao _guestOSDao; @Inject GuestOSCategoryDao _guestOSCategoryDao; @Inject VirtualMachineManager _itMgr; @Inject AccountManager _accountMgr; @Inject ResourceManager _resourceMgr; @Inject ManagementServer _msServer; @Inject ConfigurationDao _configDao; @Inject VolumeOrchestrationService volumeMgr; String _instance; ScheduledExecutorService _executor; int _stopRetryInterval; int _investigateRetryInterval; int _migrateRetryInterval; int _restartRetryInterval; int _maxRetries; long _timeBetweenFailures; long _timeBetweenCleanups; boolean _forceHA; String _haTag = null; protected HighAvailabilityManagerImpl() { } @Override public Status investigate(final long hostId) { final HostVO host = _hostDao.findById(hostId); if (host == null) { return Status.Alert; } Status hostState = null; for (Investigator investigator : investigators) { hostState = investigator.isAgentAlive(host); if (hostState != null) { if (s_logger.isDebugEnabled()) { s_logger.debug(investigator.getName() + " was able to determine host " + hostId + " is in " + hostState.toString()); } return hostState; } if (s_logger.isDebugEnabled()) { s_logger.debug(investigator.getName() + " unable to determine the state of the host. Moving on."); } } return hostState; } @Override public void scheduleRestartForVmsOnHost(final HostVO host, boolean investigate) { if (host.getType() != Host.Type.Routing) { return; } if (host.getHypervisorType() == HypervisorType.VMware || host.getHypervisorType() == HypervisorType.Hyperv) { s_logger.info("Don't restart VMs on host " + host.getId() + " as it is a " + host.getHypervisorType().toString() + " host"); return; } s_logger.warn("Scheduling restart for VMs on host " + host.getId() + "-" + host.getName()); final List<VMInstanceVO> vms = _instanceDao.listByHostId(host.getId()); final DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId()); // send an email alert that the host is down StringBuilder sb = null; List<VMInstanceVO> reorderedVMList = new ArrayList<VMInstanceVO>(); if ((vms != null) && !vms.isEmpty()) { sb = new StringBuilder(); sb.append(" Starting HA on the following VMs:"); // collect list of vm names for the alert email for (int i = 0; i < vms.size(); i++) { VMInstanceVO vm = vms.get(i); if (vm.getType() == VirtualMachine.Type.User) { reorderedVMList.add(vm); } else { reorderedVMList.add(0, vm); } if (vm.isHaEnabled()) { sb.append(" " + vm.getHostName()); } } } // send an email alert that the host is down, include VMs HostPodVO podVO = _podDao.findById(host.getPodId()); String hostDesc = "name: " + host.getName() + " (id:" + host.getId() + "), availability zone: " + dcVO.getName() + ", pod: " + podVO.getName(); _alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), "Host is down, " + hostDesc, "Host [" + hostDesc + "] is down." + ((sb != null) ? sb.toString() : "")); for (VMInstanceVO vm : reorderedVMList) { ServiceOfferingVO vmOffering = _serviceOfferingDao.findById(vm.getServiceOfferingId()); if (vmOffering.getUseLocalStorage()) { if (s_logger.isDebugEnabled()){ s_logger.debug("Skipping HA on vm " + vm + ", because it uses local storage. Its fate is tied to the host."); } continue; } if (s_logger.isDebugEnabled()) { s_logger.debug("Notifying HA Mgr of to restart vm " + vm.getId() + "-" + vm.getInstanceName()); } vm = _instanceDao.findByUuid(vm.getUuid()); Long hostId = vm.getHostId(); if (hostId != null && !hostId.equals(host.getId())) { s_logger.debug("VM " + vm.getInstanceName() + " is not on down host " + host.getId() + " it is on other host " + hostId + " VM HA is done"); continue; } scheduleRestart(vm, investigate); } } @Override public void scheduleStop(VMInstanceVO vm, long hostId, WorkType type) { assert (type == WorkType.CheckStop || type == WorkType.ForceStop || type == WorkType.Stop); if (_haDao.hasBeenScheduled(vm.getId(), type)) { s_logger.info("There's already a job scheduled to stop " + vm); return; } HaWorkVO work = new HaWorkVO(vm.getId(), vm.getType(), type, Step.Scheduled, hostId, vm.getState(), 0, vm.getUpdated()); _haDao.persist(work); if (s_logger.isDebugEnabled()) { s_logger.debug("Scheduled " + work); } wakeupWorkers(); } protected void wakeupWorkers() { for (WorkerThread worker : _workers) { worker.wakup(); } } @Override public boolean scheduleMigration(final VMInstanceVO vm) { if (vm.getHostId() != null) { final HaWorkVO work = new HaWorkVO(vm.getId(), vm.getType(), WorkType.Migration, Step.Scheduled, vm.getHostId(), vm.getState(), 0, vm.getUpdated()); _haDao.persist(work); wakeupWorkers(); } return true; } @Override public void scheduleRestart(VMInstanceVO vm, boolean investigate) { Long hostId = vm.getHostId(); if (hostId == null) { try { s_logger.debug("Found a vm that is scheduled to be restarted but has no host id: " + vm); _itMgr.advanceStop(vm.getUuid(), true); } catch (ResourceUnavailableException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (OperationTimedoutException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (ConcurrentOperationException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } } if (vm.getHypervisorType() == HypervisorType.VMware || vm.getHypervisorType() == HypervisorType.Hyperv) { s_logger.info("Skip HA for VMware VM or Hyperv VM" + vm.getInstanceName()); return; } if (!investigate) { if (s_logger.isDebugEnabled()) { s_logger.debug("VM does not require investigation so I'm marking it as Stopped: " + vm.toString()); } AlertManager.AlertType alertType = AlertManager.AlertType.ALERT_TYPE_USERVM; if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) { alertType = AlertManager.AlertType.ALERT_TYPE_DOMAIN_ROUTER; } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) { alertType = AlertManager.AlertType.ALERT_TYPE_CONSOLE_PROXY; } else if (VirtualMachine.Type.SecondaryStorageVm.equals(vm.getType())) { alertType = AlertManager.AlertType.ALERT_TYPE_SSVM; } if (!(_forceHA || vm.isHaEnabled())) { String hostDesc = "id:" + vm.getHostId() + ", availability zone id:" + vm.getDataCenterId() + ", pod id:" + vm.getPodIdToDeployIn(); _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "VM (name: " + vm.getHostName() + ", id: " + vm.getId() + ") stopped unexpectedly on host " + hostDesc, "Virtual Machine " + vm.getHostName() + " (id: " + vm.getId() + ") running on host [" + vm.getHostId() + "] stopped unexpectedly."); if (s_logger.isDebugEnabled()) { s_logger.debug("VM is not HA enabled so we're done."); } } try { _itMgr.advanceStop(vm.getUuid(), true); vm = _instanceDao.findByUuid(vm.getUuid()); } catch (ResourceUnavailableException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (OperationTimedoutException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (ConcurrentOperationException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } } if (vm.getHypervisorType() == HypervisorType.VMware) { s_logger.info("Skip HA for VMware VM " + vm.getInstanceName()); return; } List<HaWorkVO> items = _haDao.findPreviousHA(vm.getId()); int timesTried = 0; for (HaWorkVO item : items) { if (timesTried < item.getTimesTried() && !item.canScheduleNew(_timeBetweenFailures)) { timesTried = item.getTimesTried(); break; } } if (hostId == null) { hostId = vm.getLastHostId(); } HaWorkVO work = new HaWorkVO(vm.getId(), vm.getType(), WorkType.HA, investigate ? Step.Investigating : Step.Scheduled, hostId != null ? hostId : 0L, vm.getState(), timesTried, vm.getUpdated()); _haDao.persist(work); if (s_logger.isInfoEnabled()) { s_logger.info("Schedule vm for HA: " + vm); } wakeupWorkers(); } protected Long restart(final HaWorkVO work) { List<HaWorkVO> items = _haDao.listFutureHaWorkForVm(work.getInstanceId(), work.getId()); if (items.size() > 0) { StringBuilder str = new StringBuilder("Cancelling this work item because newer ones have been scheduled. Work Ids = ["); for (HaWorkVO item : items) { str.append(item.getId()).append(", "); } str.delete(str.length() - 2, str.length()).append("]"); s_logger.info(str.toString()); return null; } items = _haDao.listRunningHaWorkForVm(work.getInstanceId()); if (items.size() > 0) { StringBuilder str = new StringBuilder("Waiting because there's HA work being executed on an item currently. Work Ids =["); for (HaWorkVO item : items) { str.append(item.getId()).append(", "); } str.delete(str.length() - 2, str.length()).append("]"); s_logger.info(str.toString()); return (System.currentTimeMillis() >> 10) + _investigateRetryInterval; } long vmId = work.getInstanceId(); VirtualMachine vm = _itMgr.findById(work.getInstanceId()); if (vm == null) { s_logger.info("Unable to find vm: " + vmId); return null; } s_logger.info("HA on " + vm); if (vm.getState() != work.getPreviousState() || vm.getUpdated() != work.getUpdateTime()) { s_logger.info("VM " + vm + " has been changed. Current State = " + vm.getState() + " Previous State = " + work.getPreviousState() + " last updated = " + vm.getUpdated() + " previous updated = " + work.getUpdateTime()); return null; } AlertManager.AlertType alertType = AlertManager.AlertType.ALERT_TYPE_USERVM; if (VirtualMachine.Type.DomainRouter.equals(vm.getType())) { alertType = AlertManager.AlertType.ALERT_TYPE_DOMAIN_ROUTER; } else if (VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) { alertType = AlertManager.AlertType.ALERT_TYPE_CONSOLE_PROXY; } else if (VirtualMachine.Type.SecondaryStorageVm.equals(vm.getType())) { alertType = AlertManager.AlertType.ALERT_TYPE_SSVM; } HostVO host = _hostDao.findById(work.getHostId()); boolean isHostRemoved = false; if (host == null) { host = _hostDao.findByIdIncludingRemoved(work.getHostId()); if (host != null) { s_logger.debug("VM " + vm.toString() + " is now no longer on host " + work.getHostId() + " as the host is removed"); isHostRemoved = true; } } DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId()); HostPodVO podVO = _podDao.findById(host.getPodId()); String hostDesc = "name: " + host.getName() + "(id:" + host.getId() + "), availability zone: " + dcVO.getName() + ", pod: " + podVO.getName(); Boolean alive = null; if (work.getStep() == Step.Investigating) { if (!isHostRemoved) { if (vm.getHostId() == null || vm.getHostId() != work.getHostId()) { s_logger.info("VM " + vm.toString() + " is now no longer on host " + work.getHostId()); return null; } Investigator investigator = null; for (Investigator it : investigators) { investigator = it; try { alive = investigator.isVmAlive(vm, host); s_logger.info(investigator.getName() + " found " + vm + " to be alive? " + alive); break; } catch (UnknownVM e) { s_logger.info(investigator.getName() + " could not find " + vm); } } boolean fenced = false; if (alive == null) { s_logger.debug("Fencing off VM that we don't know the state of"); for (FenceBuilder fb : fenceBuilders) { Boolean result = fb.fenceOff(vm, host); s_logger.info("Fencer " + fb.getName() + " returned " + result); if (result != null && result) { fenced = true; break; } } } else if (!alive) { fenced = true; } else { s_logger.debug("VM " + vm.getInstanceName() + " is found to be alive by " + investigator.getName()); if (host.getStatus() == Status.Up) { s_logger.info(vm + " is alive and host is up. No need to restart it."); return null; } else { s_logger.debug("Rescheduling because the host is not up but the vm is alive"); return (System.currentTimeMillis() >> 10) + _investigateRetryInterval; } } if (!fenced) { s_logger.debug("We were unable to fence off the VM " + vm); _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, "Insufficient capacity to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc); return (System.currentTimeMillis() >> 10) + _restartRetryInterval; } try { _itMgr.advanceStop(vm.getUuid(), true); } catch (ResourceUnavailableException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (OperationTimedoutException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (ConcurrentOperationException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } work.setStep(Step.Scheduled); _haDao.update(work.getId(), work); } else { s_logger.debug("How come that HA step is Investigating and the host is removed? Calling forced Stop on Vm anyways"); try { _itMgr.advanceStop(vm.getUuid(), true); } catch (ResourceUnavailableException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (OperationTimedoutException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } catch (ConcurrentOperationException e) { assert false : "How do we hit this when force is true?"; throw new CloudRuntimeException("Caught exception even though it should be handled.", e); } } } vm = _itMgr.findById(vm.getId()); if (!_forceHA && !vm.isHaEnabled()) { if (s_logger.isDebugEnabled()) { s_logger.debug("VM is not HA enabled so we're done."); } return null; // VM doesn't require HA } if ((host == null || host.getRemoved() != null || host.getState() != Status.Up) && !volumeMgr.canVmRestartOnAnotherServer(vm.getId())) { if (s_logger.isDebugEnabled()) { s_logger.debug("VM can not restart on another server."); } return null; } try { HashMap<VirtualMachineProfile.Param, Object> params = new HashMap<VirtualMachineProfile.Param, Object>(); if (_haTag != null) { params.put(VirtualMachineProfile.Param.HaTag, _haTag); } WorkType wt = work.getWorkType(); if (wt.equals(WorkType.HA)) { params.put(VirtualMachineProfile.Param.HaOperation, true); } try{ // First try starting the vm with its original planner, if it doesn't succeed send HAPlanner as its an emergency. _itMgr.advanceStart(vm.getUuid(), params, null); }catch (InsufficientCapacityException e){ s_logger.warn("Failed to deploy vm " + vmId + " with original planner, sending HAPlanner"); _itMgr.advanceStart(vm.getUuid(), params, _haPlanners.get(0)); } VMInstanceVO started = _instanceDao.findById(vm.getId()); if (started != null && started.getState() == VirtualMachine.State.Running) { s_logger.info("VM is now restarted: " + vmId + " on " + started.getHostId()); return null; } if (s_logger.isDebugEnabled()) { s_logger.debug("Rescheduling VM " + vm.toString() + " to try again in " + _restartRetryInterval); } } catch (final InsufficientCapacityException e) { s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, "Insufficient capacity to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc); } catch (final ResourceUnavailableException e) { s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, "The Storage is unavailable for trying to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc); } catch (ConcurrentOperationException e) { s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, "The Storage is unavailable for trying to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc); } catch (OperationTimedoutException e) { s_logger.warn("Unable to restart " + vm.toString() + " due to " + e.getMessage()); _alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "Unable to restart " + vm.getHostName() + " which was running on host " + hostDesc, "The Storage is unavailable for trying to restart VM, name: " + vm.getHostName() + ", id: " + vmId + " which was running on host " + hostDesc); } vm = _itMgr.findById(vm.getId()); work.setUpdateTime(vm.getUpdated()); work.setPreviousState(vm.getState()); return (System.currentTimeMillis() >> 10) + _restartRetryInterval; } public Long migrate(final HaWorkVO work) { long vmId = work.getInstanceId(); long srcHostId = work.getHostId(); try { work.setStep(Step.Migrating); _haDao.update(work.getId(), work); VMInstanceVO vm = _instanceDao.findById(vmId); if (vm == null) { return null; } // First try starting the vm with its original planner, if it doesn't succeed send HAPlanner as its an emergency. _itMgr.migrateAway(vm.getUuid(), srcHostId); return null; } catch (InsufficientServerCapacityException e) { s_logger.warn("Insufficient capacity for migrating a VM."); _resourceMgr.maintenanceFailed(srcHostId); return (System.currentTimeMillis() >> 10) + _migrateRetryInterval; } } @Override public void scheduleDestroy(VMInstanceVO vm, long hostId) { final HaWorkVO work = new HaWorkVO(vm.getId(), vm.getType(), WorkType.Destroy, Step.Scheduled, hostId, vm.getState(), 0, vm.getUpdated()); _haDao.persist(work); if (s_logger.isDebugEnabled()) { s_logger.debug("Scheduled " + work.toString()); } wakeupWorkers(); } @Override public void cancelDestroy(VMInstanceVO vm, Long hostId) { _haDao.delete(vm.getId(), WorkType.Destroy); } protected Long destroyVM(final HaWorkVO work) { final VirtualMachine vm = _itMgr.findById(work.getInstanceId()); s_logger.info("Destroying " + vm.toString()); try { if (vm.getState() != State.Destroyed) { s_logger.info("VM is no longer in Destroyed state " + vm.toString()); return null; } if (vm.getHostId() != null) { _itMgr.destroy(vm.getUuid(), false); s_logger.info("Successfully destroy " + vm); return null; } else { if (s_logger.isDebugEnabled()) { s_logger.debug(vm + " has already been stopped"); } return null; } } catch (final AgentUnavailableException e) { s_logger.debug("Agnet is not available" + e.getMessage()); } catch (OperationTimedoutException e) { s_logger.debug("operation timed out: " + e.getMessage()); } catch (ConcurrentOperationException e) { s_logger.debug("concurrent operation: " + e.getMessage()); } return (System.currentTimeMillis() >> 10) + _stopRetryInterval; } protected Long stopVM(final HaWorkVO work) throws ConcurrentOperationException { VirtualMachine vm = _itMgr.findById(work.getInstanceId()); if (vm == null) { s_logger.info("No longer can find VM " + work.getInstanceId() + ". Throwing away " + work); work.setStep(Step.Done); return null; } s_logger.info("Stopping " + vm); try { if (work.getWorkType() == WorkType.Stop) { _itMgr.advanceStop(vm.getUuid(), false); s_logger.info("Successfully stopped " + vm); return null; } else if (work.getWorkType() == WorkType.CheckStop) { if ((vm.getState() != work.getPreviousState()) || vm.getUpdated() != work.getUpdateTime() || vm.getHostId() == null || vm.getHostId().longValue() != work.getHostId()) { s_logger.info(vm + " is different now. Scheduled Host: " + work.getHostId() + " Current Host: " + (vm.getHostId() != null ? vm.getHostId() : "none") + " State: " + vm.getState()); return null; } _itMgr.advanceStop(vm.getUuid(), false); s_logger.info("Stop for " + vm + " was successful"); return null; } else if (work.getWorkType() == WorkType.ForceStop) { if ((vm.getState() != work.getPreviousState()) || vm.getUpdated() != work.getUpdateTime() || vm.getHostId() == null || vm.getHostId().longValue() != work.getHostId()) { s_logger.info(vm + " is different now. Scheduled Host: " + work.getHostId() + " Current Host: " + (vm.getHostId() != null ? vm.getHostId() : "none") + " State: " + vm.getState()); return null; } _itMgr.advanceStop(vm.getUuid(), true); s_logger.info("Stop for " + vm + " was successful"); return null; } else { assert false : "Who decided there's other steps but didn't modify the guy who does the work?"; } } catch (final ResourceUnavailableException e) { s_logger.debug("Agnet is not available" + e.getMessage()); } catch (OperationTimedoutException e) { s_logger.debug("operation timed out: " + e.getMessage()); } return (System.currentTimeMillis() >> 10) + _stopRetryInterval; } @Override public void cancelScheduledMigrations(final HostVO host) { WorkType type = host.getType() == HostVO.Type.Storage ? WorkType.Stop : WorkType.Migration; _haDao.deleteMigrationWorkItems(host.getId(), type, _serverId); } @Override public List<VMInstanceVO> findTakenMigrationWork() { List<HaWorkVO> works = _haDao.findTakenWorkItems(WorkType.Migration); List<VMInstanceVO> vms = new ArrayList<VMInstanceVO>(works.size()); for (HaWorkVO work : works) { VMInstanceVO vm = _instanceDao.findById(work.getInstanceId()); if (vm != null) { vms.add(vm); } } return vms; } private void rescheduleWork(final HaWorkVO work, final long nextTime) { s_logger.info("Rescheduling work " + work + " to try again at " + new Date(nextTime << 10)); work.setTimeToTry(nextTime); work.setTimesTried(work.getTimesTried() + 1); work.setServerId(null); work.setDateTaken(null); } private long getRescheduleTime(WorkType workType) { switch (workType) { case Migration: return ((System.currentTimeMillis() >> 10) + _migrateRetryInterval); case HA: return ((System.currentTimeMillis() >> 10) + _restartRetryInterval); case Stop: case CheckStop: case ForceStop: return ((System.currentTimeMillis() >> 10) + _stopRetryInterval); case Destroy: return ((System.currentTimeMillis() >> 10) + _restartRetryInterval); } return 0; } private void processWork(final HaWorkVO work) { final WorkType wt = work.getWorkType(); try { Long nextTime = null; if (wt == WorkType.Migration) { nextTime = migrate(work); } else if (wt == WorkType.HA) { nextTime = restart(work); } else if (wt == WorkType.Stop || wt == WorkType.CheckStop || wt == WorkType.ForceStop) { nextTime = stopVM(work); } else if (wt == WorkType.Destroy) { nextTime = destroyVM(work); } else { assert false : "How did we get here with " + wt.toString(); return; } if (nextTime == null) { s_logger.info("Completed work " + work); work.setStep(Step.Done); } else { rescheduleWork(work, nextTime.longValue()); } } catch (Exception e) { s_logger.warn("Encountered unhandled exception during HA process, reschedule work", e); long nextTime = getRescheduleTime(wt); rescheduleWork(work, nextTime); // if restart failed in the middle due to exception, VM state may has been changed // recapture into the HA worker so that it can really continue in it next turn VMInstanceVO vm = _instanceDao.findById(work.getInstanceId()); work.setUpdateTime(vm.getUpdated()); work.setPreviousState(vm.getState()); } if (!Step.Done.equals(work.getStep()) && work.getTimesTried() >= _maxRetries) { s_logger.warn("Giving up, retried max. times for work: " + work); work.setStep(Step.Done); } _haDao.update(work.getId(), work); } @Override public boolean configure(final String name, final Map<String, Object> xmlParams) throws ConfigurationException { _serverId = _msServer.getId(); Map<String, String> params = new HashMap<String, String>(); params = _configDao.getConfiguration(Long.toHexString(_serverId), xmlParams); String value = params.get(Config.HAWorkers.key()); final int count = NumbersUtil.parseInt(value, 1); _workers = new WorkerThread[count]; for (int i = 0; i < _workers.length; i++) { _workers[i] = new WorkerThread("HA-Worker-" + i); } value = params.get("force.ha"); _forceHA = Boolean.parseBoolean(value); value = params.get("time.to.sleep"); _timeToSleep = (long)NumbersUtil.parseInt(value, 60) * 1000; value = params.get("max.retries"); _maxRetries = NumbersUtil.parseInt(value, 5); value = params.get("time.between.failures"); _timeBetweenFailures = NumbersUtil.parseLong(value, 3600) * 1000; value = params.get("time.between.cleanup"); _timeBetweenCleanups = NumbersUtil.parseLong(value, 3600 * 24); value = params.get("stop.retry.interval"); _stopRetryInterval = NumbersUtil.parseInt(value, 10 * 60); value = params.get("restart.retry.interval"); _restartRetryInterval = NumbersUtil.parseInt(value, 10 * 60); value = params.get("investigate.retry.interval"); _investigateRetryInterval = NumbersUtil.parseInt(value, 1 * 60); value = params.get("migrate.retry.interval"); _migrateRetryInterval = NumbersUtil.parseInt(value, 2 * 60); _instance = params.get("instance"); if (_instance == null) { _instance = "VMOPS"; } _haTag = params.get("ha.tag"); _haDao.releaseWorkItems(_serverId); _stopped = true; _executor = Executors.newScheduledThreadPool(count, new NamedThreadFactory("HA")); return true; } @Override public boolean start() { _stopped = false; for (final WorkerThread thread : _workers) { thread.start(); } _executor.scheduleAtFixedRate(new CleanupTask(), _timeBetweenCleanups, _timeBetweenCleanups, TimeUnit.SECONDS); return true; } @Override public boolean stop() { _stopped = true; wakeupWorkers(); _executor.shutdown(); return true; } protected class CleanupTask extends ManagedContextRunnable { @Override protected void runInContext() { s_logger.info("HA Cleanup Thread Running"); try { _haDao.cleanup(System.currentTimeMillis() - _timeBetweenFailures); } catch (Exception e) { s_logger.warn("Error while cleaning up", e); } } } protected class WorkerThread extends Thread { public WorkerThread(String name) { super(name); } @Override public void run() { s_logger.info("Starting work"); while (!_stopped) { _managedContext.runWithContext(new Runnable() { @Override public void run() { runWithContext(); } }); } s_logger.info("Time to go home!"); } private void runWithContext() { HaWorkVO work = null; try { s_logger.trace("Checking the database for work"); work = _haDao.take(_serverId); if (work == null) { try { synchronized (this) { wait(_timeToSleep); } return; } catch (final InterruptedException e) { s_logger.info("Interrupted"); return; } } NDC.push("work-" + work.getId()); s_logger.info("Processing work " + work); processWork(work); } catch (final Throwable th) { s_logger.error("Caught this throwable, ", th); } finally { if (work != null) { NDC.pop(); } } } public synchronized void wakup() { notifyAll(); } } @Override public void onManagementNodeJoined(List<? extends ManagementServerHost> nodeList, long selfNodeId) { } @Override public void onManagementNodeLeft(List<? extends ManagementServerHost> nodeList, long selfNodeId) { for (ManagementServerHost node : nodeList) { _haDao.releaseWorkItems(node.getMsid()); } } @Override public void onManagementNodeIsolated() { } @Override public String getHaTag() { return _haTag; } @Override public DeploymentPlanner getHAPlanner() { return _haPlanners.get(0); } @Override public boolean hasPendingHaWork(long vmId) { List<HaWorkVO> haWorks = _haDao.listPendingHaWorkForVm(vmId); return haWorks.size() > 0; } }