package org.ovirt.engine.core.bll.scheduling.policyunits; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.ovirt.engine.core.bll.scheduling.PolicyUnitImpl; import org.ovirt.engine.core.bll.scheduling.SchedulingUnit; import org.ovirt.engine.core.bll.scheduling.SlaValidator; import org.ovirt.engine.core.bll.scheduling.pending.PendingMemory; import org.ovirt.engine.core.bll.scheduling.pending.PendingResourceManager; import org.ovirt.engine.core.common.businessentities.Cluster; import org.ovirt.engine.core.common.businessentities.NumaTuneMode; import org.ovirt.engine.core.common.businessentities.VDS; import org.ovirt.engine.core.common.businessentities.VM; import org.ovirt.engine.core.common.businessentities.VMStatus; import org.ovirt.engine.core.common.businessentities.VdsNumaNode; import org.ovirt.engine.core.common.businessentities.VmNumaNode; import org.ovirt.engine.core.common.errors.EngineMessage; import org.ovirt.engine.core.common.scheduling.PerHostMessages; import org.ovirt.engine.core.common.scheduling.PolicyUnit; import org.ovirt.engine.core.common.scheduling.PolicyUnitType; import org.ovirt.engine.core.dal.dbbroker.DbFacade; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @SchedulingUnit( guid = "c9ddbb34-0e1d-4061-a8d7-b0893fa80932", name = "Memory", description = "Filters out hosts that have insufficient memory to run the VM", type = PolicyUnitType.FILTER ) public class MemoryPolicyUnit extends PolicyUnitImpl { private static final Logger log = LoggerFactory.getLogger(MemoryPolicyUnit.class); public MemoryPolicyUnit(PolicyUnit policyUnit, PendingResourceManager pendingResourceManager) { super(policyUnit, pendingResourceManager); } @Override public List<VDS> filter(Cluster cluster, List<VDS> hosts, VM vm, Map<String, String> parameters, PerHostMessages messages) { List<VDS> list = new ArrayList<>(); // If Vm in Paused mode - no additional memory allocation needed if (vm.getStatus() == VMStatus.Paused) { return hosts; } List<VmNumaNode> vmNumaNodes = DbFacade.getInstance().getVmNumaNodeDao().getAllVmNumaNodeByVmId(vm.getId()); for (VDS vds : hosts) { // Check physical memory needed to start / receive the VM // This is probably not needed for all VMs, but QEMU might attempt full // allocation without provoked and fail if there is not enough memory int pendingRealMemory = PendingMemory.collectForHost(getPendingResourceManager(), vds.getId()); if (!SlaValidator.getInstance().hasPhysMemoryToRunVM(vds, vm, pendingRealMemory)) { Long hostAvailableMem = vds.getMemFree() + vds.getSwapFree(); log.debug( "Host '{}' has insufficient memory to run the VM. Only {} MB of physical memory + swap are available.", vds.getName(), hostAvailableMem); messages.addMessage(vds.getId(), String.format("$availableMem %1$d", hostAvailableMem)); messages.addMessage(vds.getId(), EngineMessage.VAR__DETAIL__NOT_ENOUGH_MEMORY.toString()); continue; } // Check logical memory using overcommit, pending and guaranteed memory rules if (!memoryChecker.evaluate(vds, vm)) { log.debug("Host '{}' is already too close to the memory overcommitment limit. It can only accept {} MB of additional memory load.", vds.getName(), vds.getMaxSchedulingMemory()); messages.addMessage(vds.getId(), String.format("$availableMem %1$f", vds.getMaxSchedulingMemory())); messages.addMessage(vds.getId(), EngineMessage.VAR__DETAIL__NOT_ENOUGH_MEMORY.toString()); continue; } // In case one of VM's virtual NUMA nodes (vNode) is pinned to physical NUMA nodes (pNode), // host will be excluded ('filter out') when: // * memory tune is strict (vNode memory cannot be spread across several pNodes' memory) // [and] // * host support NUMA configuration // * there isn't enough memory for pinned vNode in pNode if (vm.getNumaTuneMode() == NumaTuneMode.STRICT && isVmNumaPinned(vmNumaNodes) && (!vds.isNumaSupport() || !canVmNumaPinnedToVds(vm, vmNumaNodes, vds))) { log.debug("Host '{}' cannot accommodate memory of VM's pinned virtual NUMA nodes within host's physical NUMA nodes", vds.getName()); messages.addMessage(vds.getId(), EngineMessage.VAR__DETAIL__NOT_MEMORY_PINNED_NUMA.toString()); continue; } list.add(vds); } return list; } private boolean canVmNumaPinnedToVds(VM vm, List<VmNumaNode> nodes, VDS vds) { List<VdsNumaNode> pNodes = DbFacade.getInstance().getVdsNumaNodeDao().getAllVdsNumaNodeByVdsId(vds.getId()); if (pNodes == null || pNodes.isEmpty()) { return false; } Map<Integer, VdsNumaNode> indexMap = toMap(pNodes); for (VmNumaNode vNode : nodes) { for (Integer pinnedIndex : vNode.getVdsNumaNodeList()) { if (vNode.getMemTotal() > indexMap.get(pinnedIndex) .getNumaNodeStatistics() .getMemFree()) { return false; } } } return true; } private Map<Integer, VdsNumaNode> toMap(List<VdsNumaNode> pNodes) { Map<Integer, VdsNumaNode> map = new HashMap<>(); for (VdsNumaNode pNode : pNodes) { map.put(pNode.getIndex(), pNode); } return map; } private boolean isVmNumaPinned(List<VmNumaNode> nodes) { if (nodes == null || nodes.isEmpty()) { return false; } // iterate through the nodes, and see if there's at least one pinned node. for (VmNumaNode vmNumaNode : nodes) { if (!vmNumaNode.getVdsNumaNodeList().isEmpty()) { return true; } } return false; } }