package org.ovirt.engine.core.bll; import java.util.List; import org.ovirt.engine.core.common.AuditLogType; import org.ovirt.engine.core.common.action.FenceVdsActionParameters; import org.ovirt.engine.core.common.action.RunVmParams; import org.ovirt.engine.core.common.action.VdcActionParametersBase; import org.ovirt.engine.core.common.action.VdcActionType; import org.ovirt.engine.core.common.businessentities.FenceActionType; import org.ovirt.engine.core.common.businessentities.FenceStatusReturnValue; import org.ovirt.engine.core.common.businessentities.VDSStatus; import org.ovirt.engine.core.common.businessentities.VM; import org.ovirt.engine.core.common.businessentities.VMStatus; import org.ovirt.engine.core.common.config.Config; import org.ovirt.engine.core.common.config.ConfigValues; import org.ovirt.engine.core.common.errors.VdcBLLException; import org.ovirt.engine.core.common.errors.VdcBllErrors; import org.ovirt.engine.core.common.vdscommands.DestroyVmVDSCommandParameters; import org.ovirt.engine.core.common.vdscommands.SetVdsStatusVDSCommandParameters; import org.ovirt.engine.core.common.vdscommands.SetVmStatusVDSCommandParameters; import org.ovirt.engine.core.common.vdscommands.VDSCommandType; import org.ovirt.engine.core.common.vdscommands.VDSReturnValue; import org.ovirt.engine.core.compat.Guid; import org.ovirt.engine.core.compat.LogCompat; import org.ovirt.engine.core.compat.LogFactoryCompat; import org.ovirt.engine.core.dal.VdcBllMessages; import org.ovirt.engine.core.dal.dbbroker.DbFacade; import org.ovirt.engine.core.dal.dbbroker.auditloghandling.AuditLogDirector; import org.ovirt.engine.core.dal.dbbroker.auditloghandling.AuditLogableBase; import org.ovirt.engine.core.utils.ThreadUtils; public abstract class FenceVdsBaseCommand<T extends FenceVdsActionParameters> extends VdsCommand<T> { private final int SLEEP_BEFORE_FIRST_ATTEMPT=5000; private static LogCompat log = LogFactoryCompat.getLog(FenceVdsBaseCommand.class); protected FencingExecutor _executor; protected List<VM> mVmList = null; private boolean privateFencingSucceeded; /** * Constructor for command creation when compensation is applied on startup * * @param commandId */ protected FenceVdsBaseCommand(Guid commandId) { super(commandId); } public FenceVdsBaseCommand(T parameters) { super(parameters); mVmList = DbFacade.getInstance().getVmDAO().getAllRunningForVds(getVdsId()); } /** * Gets the number of times to retry a get status PM operation after stop/start PM operation. * * @return */ protected abstract int getRerties(); /** * Gets the number of seconds to delay between each retry. * * @return */ protected abstract int getDelayInSeconds(); protected boolean getFencingSucceeded() { return privateFencingSucceeded; } protected void setFencingSucceeded(boolean value) { privateFencingSucceeded = value; } @Override protected boolean canDoAction() { boolean retValue = false; String event; String runningPmOp; if (getVds() == null) { addCanDoActionMessage(VdcBllMessages.ACTION_TYPE_FAILED_HOST_NOT_EXIST); return false; } // get the event to look for , if we requested to start Host then we should look when we stopped it and vice versa. if (getParameters().getAction() == FenceActionType.Start) { event = AuditLogType.USER_VDS_STOP.name(); runningPmOp=FenceActionType.Stop.name(); } else { event = AuditLogType.USER_VDS_START.name(); runningPmOp=FenceActionType.Start.name(); } if (getVds().getpm_enabled() && IsPowerManagementLegal(getVds().getStaticData(), getVdsGroup().getcompatibility_version().toString())) { // check if we are in the interval of X seconds from startup // if yes , system is still initializing , ignore fencing operations java.util.Date waitTo = Backend.getInstance() .getStartedAt() .AddSeconds((Integer) Config.GetValue(ConfigValues.DisableFenceAtStartupInSec)); java.util.Date now = new java.util.Date(); if (waitTo.before(now) || waitTo.equals(now)) { // Check Quiet time between PM operations, this is done only if parent command is not <Restart> int secondsLeftToNextPmOp = ((getParameters().getParentCommand() == VdcActionType.RestartVds)) ? 0 : DbFacade.getInstance().getAuditLogDAO().getTimeToWaitForNextPmOp(getVds().getvds_name(), event); if (secondsLeftToNextPmOp <= 0) { // try to get vds status _executor = createExecutorForProxyCheck(); if (_executor.FindVdsToFence()) { if (!(retValue = _executor.checkProxyHostConnectionToHost())) { addCanDoActionMessage(VdcBllMessages.VDS_FAILED_FENCE_VIA_PROXY_CONNECTION); } } else { addCanDoActionMessage(VdcBllMessages.VDS_NO_VDS_PROXY_FOUND); } } else { addCanDoActionMessage(VdcBllMessages.VDS_FENCE_DISABLED_AT_QUIET_TIME); addCanDoActionMessage(String.format("$operation %1$s", runningPmOp)); addCanDoActionMessage(String.format("$seconds %1$s", secondsLeftToNextPmOp)); } } else { addCanDoActionMessage(VdcBllMessages.VDS_FENCE_DISABLED_AT_SYSTEM_STARTUP_INTERVAL); } } if (!retValue) { HandleError(); } getReturnValue().setSucceeded(retValue); return retValue; } @Override protected void executeCommand() { VDSStatus lastStatus = getVds().getstatus(); VDSReturnValue vdsReturnValue = null; try { // Set status immediately to prevent a race (BZ 636950/656224) setStatus(); _executor = new FencingExecutor(getVds(), getParameters().getAction()); if (_executor.FindVdsToFence()) { vdsReturnValue = _executor.Fence(); setFencingSucceeded(vdsReturnValue.getSucceeded()); if (getFencingSucceeded()) { _executor = new FencingExecutor(getVds(), FenceActionType.Status); if (waitForStatus(getVds().getvds_name(), getParameters().getAction())) { handleSpecificCommandActions(); } else { // We reach this if we wait for on/off status // after start/stop as defined in configurable delay/retries and // did not reach the desired on/off status. // We assume that fencing operation didn't complete successfully // Setting this flag will cause the appropriate Alert to pop // and to restore host status to it's previous value as // appears in the finally block. setFencingSucceeded(false); } } else { if (!((FenceStatusReturnValue) (vdsReturnValue.getReturnValue())).getIsSkipped()) { // Since this is a non-transactive command , restore last status setSucceeded(false); log.errorFormat("Failed to {0} VDS", getParameters().getAction() .name() .toLowerCase()); throw new VdcBLLException(VdcBllErrors.VDS_FENCING_OPERATION_FAILED); } else { //Fencing operation was skipped because Host is already in the requested state. setStatus(lastStatus); } } } setSucceeded(getFencingSucceeded()); } finally { if (!getSucceeded()) { setStatus(lastStatus); AlertIfPowerManagementOperationFailed(); } } } /** * Create the executor used in the can do action check. The executor created does not do retries to find a proxy * host, so that clients calling the can do action will get a quick response, and don't risk timing out. * * @return An executor used to check the availability of a proxy host. */ protected FencingExecutor createExecutorForProxyCheck() { return new FencingExecutor(getVds(), FenceActionType.Status); } protected void DestroyVmOnDestination(VM vm) { if (vm.getstatus() == VMStatus.MigratingFrom) { try { if (vm.getmigrating_to_vds() != null) { Backend.getInstance() .getResourceManager() .RunVdsCommand( VDSCommandType.DestroyVm, new DestroyVmVDSCommandParameters(new Guid(vm.getmigrating_to_vds().toString()), vm .getvm_guid(), true, false, 0)); log.infoFormat("Stopped migrating vm: {0} on vds: {1}", vm.getvm_name(), vm.getmigrating_to_vds()); } } catch (RuntimeException ex) { log.infoFormat("Could not stop migrating vm: {0} on vds: {1}, Error: {2}", vm.getvm_name(), vm.getmigrating_to_vds(), ex.getMessage()); // intentionally ingnored } } } protected void RestartVdsVms() { java.util.ArrayList<VdcActionParametersBase> runVmParamsList = new java.util.ArrayList<VdcActionParametersBase>(); // restart all running vms of a failed vds. for (VM vm : mVmList) { DestroyVmOnDestination(vm); VDSReturnValue returnValue = Backend .getInstance() .getResourceManager() .RunVdsCommand(VDSCommandType.SetVmStatus, new SetVmStatusVDSCommandParameters(vm.getvm_guid(), VMStatus.Down)); // Write that this VM was shut down by host rebbot or manual fence if (returnValue != null && returnValue.getSucceeded()) { LogSettingVmToDown(getVds().getvds_id(), vm.getvm_guid()); } // ResourceManager.Instance.removeRunningVm(vm.vm_guid, VdsId); setVmId(vm.getvm_guid()); setVmName(vm.getvm_name()); setVm(vm); // EINAV: TODO: The next commented line of code is performing an // asynchronous task // (RestoreAllSnapshots) in case of a stateless VM. need to take // care of that case. // VmPoolHandler.ProcessVmPoolOnStopVm(VmId); //Handle highly available VMs if (vm.getauto_startup()) { runVmParamsList.add(new RunVmParams(vm.getvm_guid(), true)); } } if (runVmParamsList.size() > 0) { Backend.getInstance().runInternalMultipleActions(VdcActionType.RunVm, runVmParamsList); } setVm(null); setVmId(Guid.Empty); setVmName(null); } protected void setStatus() { Backend.getInstance() .getResourceManager() .RunVdsCommand(VDSCommandType.SetVdsStatus, new SetVdsStatusVDSCommandParameters(getVdsId(), VDSStatus.Reboot)); RunSleepOnReboot(); } protected void HandleError() { } protected boolean waitForStatus(String vdsName, FenceActionType actionType) { final String FENCE_CMD = (actionType == FenceActionType.Start) ? "on" : "off"; final String ACTION_NAME = actionType.name().toLowerCase(); int i = 1; boolean statusReached = false; log.infoFormat("Waiting for vds {0} to {1}", vdsName, ACTION_NAME); // Waiting before first attempt to check the host status. // This is done because if we will attempt to get host status immediately // in most cases it will not turn from on/off to off/on and we will need // to wait a full cycle for it. ThreadUtils.sleep(SLEEP_BEFORE_FIRST_ATTEMPT); while (!statusReached && i <= getRerties()) { log.infoFormat("Attempt {0} to get vds {1} status", i, vdsName); if (_executor.FindVdsToFence()) { VDSReturnValue returnValue = _executor.Fence(); if (returnValue != null && returnValue.getReturnValue() != null) { FenceStatusReturnValue value = (FenceStatusReturnValue) returnValue.getReturnValue(); if (FENCE_CMD.equalsIgnoreCase(value.getStatus())) { statusReached = true; log.infoFormat("vds {0} status is {1}", vdsName, FENCE_CMD); } else { i++; if (i <= getRerties()) ThreadUtils.sleep(getDelayInSeconds() * 1000); } } else { log.errorFormat("Failed to get host {0} status.", vdsName); break; } } else { break; } } if (!statusReached) { // Send an Alert String actionName = (getParameters().getParentCommand() == VdcActionType.RestartVds) ? FenceActionType.Restart.name() : ACTION_NAME; AuditLogableBase auditLogable = new AuditLogableBase(); auditLogable.AddCustomValue("Host", vdsName); auditLogable.AddCustomValue("Status", actionName); AuditLogDirector.log(auditLogable, AuditLogType.VDS_ALERT_FENCING_STATUS_VERIFICATION_FAILED); log.errorFormat("Failed to verify host {0} {1} status. Have retried {2} times with delay of {3} seconds between each retry.", vdsName, ACTION_NAME, getRerties(), getDelayInSeconds()); } return statusReached; } protected void setStatus(VDSStatus status) { if (getVds().getstatus() != status) { Backend.getInstance() .getResourceManager() .RunVdsCommand(VDSCommandType.SetVdsStatus, new SetVdsStatusVDSCommandParameters(getVds().getvds_id(), status)); } } protected abstract void handleSpecificCommandActions(); }