package org.ovirt.engine.core.vdsbroker.monitoring; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Objects; import java.util.stream.Collectors; import javax.inject.Inject; import javax.inject.Singleton; import org.ovirt.engine.core.common.businessentities.IVdsEventListener; import org.ovirt.engine.core.common.businessentities.VmDynamic; import org.ovirt.engine.core.common.businessentities.VmStatistics; import org.ovirt.engine.core.common.utils.Pair; import org.ovirt.engine.core.compat.Guid; import org.ovirt.engine.core.compat.TransactionScopeOption; import org.ovirt.engine.core.dal.dbbroker.auditloghandling.AuditLogDirector; import org.ovirt.engine.core.dao.DiskImageDynamicDao; import org.ovirt.engine.core.dao.VdsDynamicDao; import org.ovirt.engine.core.dao.VmDynamicDao; import org.ovirt.engine.core.dao.VmGuestAgentInterfaceDao; import org.ovirt.engine.core.dao.VmStatisticsDao; import org.ovirt.engine.core.dao.network.VmNetworkInterfaceDao; import org.ovirt.engine.core.dao.network.VmNetworkStatisticsDao; import org.ovirt.engine.core.utils.transaction.TransactionSupport; import org.ovirt.engine.core.vdsbroker.ResourceManager; import org.ovirt.engine.core.vdsbroker.VdsManager; import org.ovirt.engine.core.vdsbroker.VmManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * invoke all Vm analyzers in hand and iterate over their report * and take actions - fire VDSM commands (destroy,run/rerun,migrate), report complete actions, * hand-over migration and save-to-db */ @Singleton public class VmsMonitoring { @Inject private AuditLogDirector auditLogDirector; @Inject private ResourceManager resourceManager; @Inject private BalloonMonitoring balloonMonitoring; @Inject private LunDisksMonitoring lunDisksMonitoring; @Inject private VmJobsMonitoring vmJobsMonitoring; @Inject private DiskImageDynamicDao diskImageDynamicDao; @Inject private VmDynamicDao vmDynamicDao; @Inject private VmNetworkStatisticsDao vmNetworkStatisticsDao; @Inject private VmStatisticsDao vmStatisticsDao; @Inject private VmGuestAgentInterfaceDao vmGuestAgentInterfaceDao; @Inject private VmNetworkInterfaceDao vmNetworkInterfaceDao; @Inject private VdsDynamicDao vdsDynamicDao; private static final Logger log = LoggerFactory.getLogger(VmsMonitoring.class); /** * analyze and react upon changes on the monitoredVms. relevant changes would * be persisted and state transitions and internal commands would * take place accordingly. * * @param monitoredVms The Vms we want to monitor and analyze for changes. - * VM object represent the persisted object(namely the one in db) and the VmInternalData - * is the running one as reported from VDSM * @param fetchTime When the VMs were fetched * @param vdsManager The manager of the monitored host * @param updateStatistics Whether or not this monitoring should include VM statistics */ public void perform( List<Pair<VmDynamic, VdsmVm>> monitoredVms, long fetchTime, VdsManager vdsManager, boolean updateStatistics) { if (monitoredVms.isEmpty()) { return; } List<VmAnalyzer> vmAnalyzers = Collections.emptyList(); try { vmAnalyzers = analyzeVms(monitoredVms, fetchTime, vdsManager, updateStatistics); // It is important to add the unmanaged VMs before flushing the dynamic data into the database addUnmanagedVms(vmAnalyzers, vdsManager.getVdsId()); flush(vmAnalyzers); postFlush(vmAnalyzers, vdsManager); vdsManager.vmsMonitoringInitFinished(); } catch (RuntimeException ex) { log.error("Failed during vms monitoring on host {} error is: {}", vdsManager.getVdsName(), ex); log.error("Exception:", ex); } finally { unlockVms(vmAnalyzers); } } private void unlockVms(List<VmAnalyzer> vmAnalyzers) { vmAnalyzers.stream().map(VmAnalyzer::getVmId).forEach(vmId -> { VmManager vmManager = getVmManager(vmId); vmManager.updateVmDataChangedTime(); vmManager.unlock(); }); } /** * Analyze the VM data pair * Skip analysis on VMs which cannot be locked * note: metrics calculation like memCommited and vmsCoresCount should be calculated *before* * this filtering. * @return The analyzers which hold all the data per VM */ private List<VmAnalyzer> analyzeVms( List<Pair<VmDynamic, VdsmVm>> monitoredVms, long fetchTime, VdsManager vdsManager, boolean updateStatistics) { VmAnalyzerFactory vmAnalyzerFactory = getVmAnalyzerFactory(vdsManager, updateStatistics); List<VmAnalyzer> vmAnalyzers = new ArrayList<>(monitoredVms.size()); monitoredVms.forEach(vm -> { // TODO filter out migratingTo VMs if no action is taken on them if (shouldAnalyzeVm(vm, fetchTime, vdsManager.getVdsId())) { try { VmAnalyzer vmAnalyzer = vmAnalyzerFactory.getVmAnalyzer(vm); vmAnalyzer.analyze(); vmAnalyzers.add(vmAnalyzer); } catch (RuntimeException ex) { Guid vmId = getVmId(vm.getFirst(), vm.getSecond()); VmManager vmManager = getVmManager(vmId); vmManager.unlock(); log.error("Failed during monitoring vm: {} , error is: {}", vmId, ex); log.error("Exception:", ex); } } }); vmAnalyzers.sort(Comparator.comparing(VmAnalyzer::getVmId)); return vmAnalyzers; } protected VmAnalyzerFactory getVmAnalyzerFactory(VdsManager vdsManager, boolean statistics) { return new VmAnalyzerFactory( vdsManager, statistics, auditLogDirector, resourceManager, vmDynamicDao, vmNetworkInterfaceDao, vdsDynamicDao); } private boolean shouldAnalyzeVm(Pair<VmDynamic, VdsmVm> pair, long fetchTime, Guid vdsId) { Guid vmId = getVmId(pair.getFirst(), pair.getSecond()); VmManager vmManager = getVmManager(vmId); if (!vmManager.trylock()) { log.debug("skipping VM '{}' from this monitoring cycle" + " - the VM is locked by its VmManager ", vmId); return false; } if (!vmManager.isLatestData(pair.getSecond(), vdsId)) { log.warn("skipping VM '{}' from this monitoring cycle" + " - newer VM data was already processed", vmId); vmManager.unlock(); return false; } if (vmManager.getVmDataChangedTime() != null && fetchTime - vmManager.getVmDataChangedTime() <= 0) { log.warn("skipping VM '{}' from this monitoring cycle" + " - the VM data has changed since fetching the data", vmId); vmManager.unlock(); return false; } return true; } private void postFlush(List<VmAnalyzer> vmAnalyzers, VdsManager vdsManager) { Collection<Guid> movedToDownVms = new ArrayList<>(); List<Guid> succeededToRunVms = new ArrayList<>(); List<Guid> autoVmsToRun = new ArrayList<>(); List<Guid> coldRebootVmsToRun = new ArrayList<>(); List<Guid> vmIdsWithBalloonDriverNotRequestedOrAvailable = new ArrayList<>(); List<Guid> vmIdsWithBalloonDriverRequestedAndUnavailable = new ArrayList<>(); List<Guid> vmIdsWithGuestAgentUpOrBalloonDeflated = new ArrayList<>(); List<Guid> vmIdsWithGuestAgentDownAndBalloonInfalted = new ArrayList<>(); // now loop over the result and act for (VmAnalyzer vmAnalyzer : vmAnalyzers) { // rerun all vms from rerun list if (vmAnalyzer.isRerun()) { log.error("Rerun VM '{}'. Called from VDS '{}'", vmAnalyzer.getVmId(), vdsManager.getVdsName()); resourceManager.rerunFailedCommand(vmAnalyzer.getVmId(), vdsManager.getVdsId()); } if (vmAnalyzer.isSuccededToRun()) { vdsManager.succeededToRunVm(vmAnalyzer.getVmId()); succeededToRunVms.add(vmAnalyzer.getVmId()); } // Refrain from auto-start HA VM during its re-run attempts. if (vmAnalyzer.isAutoVmToRun() && !vmAnalyzer.isRerun()) { autoVmsToRun.add(vmAnalyzer.getVmId()); } if (vmAnalyzer.isColdRebootVmToRun()) { coldRebootVmsToRun.add(vmAnalyzer.getVmId()); } // process all vms that powering up. if (vmAnalyzer.isPoweringUp()) { getVdsEventListener().processOnVmPoweringUp(vmAnalyzer.getVmId()); } if (vmAnalyzer.isMovedToDown()) { movedToDownVms.add(vmAnalyzer.getVmId()); } if (vmAnalyzer.isRemoveFromAsync()) { resourceManager.removeAsyncRunningVm(vmAnalyzer.getVmId()); } if (vmAnalyzer.isVmBalloonDriverNotRequestedOrAvailable()) { vmIdsWithBalloonDriverNotRequestedOrAvailable.add(vmAnalyzer.getVmId()); } if (vmAnalyzer.isVmBalloonDriverRequestedAndUnavailable()) { vmIdsWithBalloonDriverRequestedAndUnavailable.add(vmAnalyzer.getVmId()); } if (vmAnalyzer.isGuestAgentUpOrBalloonDeflated()) { vmIdsWithGuestAgentUpOrBalloonDeflated.add(vmAnalyzer.getVmId()); } if (vmAnalyzer.isGuestAgentDownAndBalloonInfalted()) { vmIdsWithGuestAgentDownAndBalloonInfalted.add(vmAnalyzer.getVmId()); } } getVdsEventListener().updateSlaPolicies(succeededToRunVms, vdsManager.getVdsId()); // run all vms that crashed that marked with auto startup getVdsEventListener().runFailedAutoStartVMs(autoVmsToRun); // run all vms that went down as a part of cold reboot process getVdsEventListener().runColdRebootVms(coldRebootVmsToRun); // process all vms that went down getVdsEventListener().processOnVmStop(movedToDownVms, vdsManager.getVdsId()); getVdsEventListener().refreshHostIfAnyVmHasHostDevices(succeededToRunVms, vdsManager.getVdsId()); // Looping only over powering up VMs as LUN device size // is updated by VDSM only once when running a VM. lunDisksMonitoring.process(vmAnalyzers.stream() .filter(VmAnalyzer::isPoweringUp) .collect(Collectors.toMap(VmAnalyzer::getVmId, VmAnalyzer::getVmLunsMap))); vmJobsMonitoring.process(vmAnalyzers.stream() .filter(analyzer -> analyzer.getVmJobs() != null) .collect(Collectors.toMap(VmAnalyzer::getVmId, VmAnalyzer::getVmJobs))); balloonMonitoring.process( vmIdsWithBalloonDriverNotRequestedOrAvailable, vmIdsWithBalloonDriverRequestedAndUnavailable, vmIdsWithGuestAgentUpOrBalloonDeflated, vmIdsWithGuestAgentDownAndBalloonInfalted); } private void flush(List<VmAnalyzer> vmAnalyzers) { saveVmDynamic(vmAnalyzers); saveVmStatistics(vmAnalyzers); saveVmInterfaceStatistics(vmAnalyzers); saveVmDiskImageStatistics(vmAnalyzers); saveVmGuestAgentNetworkDevices(vmAnalyzers); } private void saveVmDiskImageStatistics(List<VmAnalyzer> vmAnalyzers) { diskImageDynamicDao.updateAllDiskImageDynamicWithDiskIdByVmId(vmAnalyzers.stream() .map(VmAnalyzer::getVmDiskImageDynamicToSave) .flatMap(Collection::stream) .collect(Collectors.toList())); } private void saveVmDynamic(List<VmAnalyzer> vmAnalyzers) { vmDynamicDao.updateAllInBatch(vmAnalyzers.stream() .map(VmAnalyzer::getVmDynamicToSave) .filter(Objects::nonNull) .collect(Collectors.toList())); } private void saveVmInterfaceStatistics(List<VmAnalyzer> vmAnalyzers) { vmNetworkStatisticsDao.updateAllInBatch(vmAnalyzers.stream() .map(VmAnalyzer::getVmNetworkStatistics) .flatMap(List::stream) .collect(Collectors.toList())); } private void saveVmStatistics(List<VmAnalyzer> vmAnalyzers) { List<VmStatistics> statistics = vmAnalyzers.stream() .map(VmAnalyzer::getVmStatisticsToSave) .filter(Objects::nonNull) .collect(Collectors.toList()); vmStatisticsDao.updateAllInBatch(statistics); statistics.forEach(stats -> getVmManager(stats.getId()).setStatistics(stats)); } protected void addUnmanagedVms(List<VmAnalyzer> vmAnalyzers, Guid vdsId) { List<Guid> unmanagedVmIds = vmAnalyzers.stream() .filter(VmAnalyzer::isUnmanagedVm) .map(VmAnalyzer::getVmId) .collect(Collectors.toList()); getVdsEventListener().addUnmanagedVms(vdsId, unmanagedVmIds); } // ***** DB interaction ***** private void saveVmGuestAgentNetworkDevices(List<VmAnalyzer> vmAnalyzers) { List<VmAnalyzer> analyzersWithChangeGuestAgentNics = vmAnalyzers.stream() .filter(analyzer -> !analyzer.getVmGuestAgentNics().isEmpty()) .collect(Collectors.toList()); if (analyzersWithChangeGuestAgentNics.isEmpty()) { return; } TransactionSupport.executeInScope(TransactionScopeOption.Required, () -> { List<Guid> vmIds = analyzersWithChangeGuestAgentNics.stream() .map(VmAnalyzer::getVmId) .collect(Collectors.toList()); vmGuestAgentInterfaceDao.removeAllForVms(vmIds); analyzersWithChangeGuestAgentNics.stream() .map(VmAnalyzer::getVmGuestAgentNics) .flatMap(List::stream) .forEach(nic -> vmGuestAgentInterfaceDao.save(nic)); return null; }); } // ***** Helpers and sub-methods ***** static Guid getVmId(VmDynamic dbVm, VdsmVm vdsmVm) { return dbVm != null ? dbVm.getId() : vdsmVm.getVmDynamic().getId(); } protected IVdsEventListener getVdsEventListener() { return resourceManager.getEventListener(); } protected VmManager getVmManager(Guid vmId) { return resourceManager.getVmManager(vmId); } }