package org.ovirt.engine.core.bll.scheduling.arem;
import static java.util.Collections.min;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Random;
import java.util.Set;
import java.util.stream.Collectors;
import javax.inject.Inject;
import org.ovirt.engine.core.bll.scheduling.SchedulingManager;
import org.ovirt.engine.core.common.businessentities.Cluster;
import org.ovirt.engine.core.common.businessentities.VM;
import org.ovirt.engine.core.common.scheduling.AffinityGroup;
import org.ovirt.engine.core.compat.Guid;
import org.ovirt.engine.core.dao.VmDao;
import org.ovirt.engine.core.dao.scheduling.AffinityGroupDao;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Class to detect affinity group violations and select VMs for
* migration, to resolve the violations.
*/
public class AffinityRulesEnforcer {
private static final Logger log = LoggerFactory.getLogger(AffinityRulesEnforcer.class);
@Inject
private AffinityGroupDao affinityGroupDao;
@Inject
private VmDao vmDao;
@Inject
private SchedulingManager schedulingManager;
private final Random random = new Random();
protected enum FailMode {
IMMEDIATELY, // Fail when first violation is detected
GET_ALL // Collect all violations
}
/**
* Choose a valid VM for migration by applying affinity rules in the following order:
* <p>
* 1.VM to Hosts Affinity
* 2.VM to VM affinity
*
* @param cluster current cluster
* @return Valid VM for migration, null otherwise
*/
public VM chooseNextVmToMigrate(Cluster cluster) {
List<AffinityGroup> allAffinityGroups = affinityGroupDao.getAllAffinityGroupsByClusterId(cluster.getId());
Optional<VM> vm = chooseNextVmToMigrateFromVMsToHostsAffinity(cluster, allAffinityGroups);
if (vm.isPresent()) {
return vm.get();
}
return chooseNextVmToMigrateFromVMsAffinity(cluster, allAffinityGroups);
}
/**
* Choose a VM to migrate by applying VM to host affinity rules.
* Candidate VMs will selected in the following order:
* <p>
* 1.Candidate VMs violating enforcing affinity to hosts.
* 2.Candidate VMs violating non enforcing affinity to hosts.
*
* @param cluster Current cluster
* @param allAffinityGroups All affinity groups for the current cluster.
* @return Valid VM for migration by VM to host affinity, empty result otherwise
*/
private Optional<VM> chooseNextVmToMigrateFromVMsToHostsAffinity(Cluster cluster, List<AffinityGroup>
allAffinityGroups) {
List<AffinityGroup> allVmToHostsAffinityGroups = getAllAffinityGroupsForVMsToHostsAffinity(allAffinityGroups);
if (allVmToHostsAffinityGroups.isEmpty()) {
return Optional.empty();
}
Map<Guid, VM> vmsMap = getVMsMap(allVmToHostsAffinityGroups);
List<Guid> candidateVMs =
getVmToHostsAffinityGroupCandidates(allVmToHostsAffinityGroups, vmsMap, true);
if (candidateVMs.isEmpty()) {
log.debug("No vm to hosts hard-affinity group violation detected");
} else {
List<AffinityRulesUtils.AffinityGroupConflicts> conflicts = AffinityRulesUtils
.checkForAffinityGroupHostsConflict(allVmToHostsAffinityGroups);
for (AffinityRulesUtils.AffinityGroupConflicts conflict : conflicts) {
if (conflict.isVmToVmAffinity()) {
log.warn(conflict.getType().getMessage(),
conflict.getVms().stream()
.map(id -> id.toString())
.collect(Collectors.joining(",")),
AffinityRulesUtils.getAffinityGroupsNames(conflict.getAffinityGroups()),
conflict.getNegativeVms().stream()
.map(id -> id.toString())
.collect(Collectors.joining(","))
);
} else {
log.warn(conflict.getType().getMessage(),
AffinityRulesUtils.getAffinityGroupsNames(conflict.getAffinityGroups()),
conflict.getHosts().stream()
.map(id -> id.toString())
.collect(Collectors.joining(",")),
conflict.getVms().stream()
.map(id -> id.toString())
.collect(Collectors.joining(",")));
}
}
}
for (Guid id : candidateVMs) {
VM candidateVM = vmsMap.get(id);
if (isVmMigrationValid(cluster, candidateVM)) {
return Optional.of(candidateVM);
}
}
candidateVMs =
getVmToHostsAffinityGroupCandidates(allVmToHostsAffinityGroups, vmsMap, false);
if (candidateVMs.isEmpty()) {
log.debug("No vm to hosts soft-affinity group violation detected");
}
for (Guid id : candidateVMs) {
VM candidateVM = vmsMap.get(id);
if (isVmMigrationValid(cluster, candidateVM)) {
return Optional.of(candidateVM);
}
}
return Optional.empty();
}
/**
* Create a VM id to VM object map from the affinity groups list input.
* Each VM will appear only once in the map.
* <p>
* Example: Given affinity group 1 containing VM ids {1,2,3} and affinity group 2 containing VM ids {3,4}
* the resultant map would be {(1,Vm1),(2,Vm2),(3,Vm3),(4,Vm4)}.
*
* @param allVMtoHostsAffinityGroups All VM to hosts affinity groups for the current cluster
* @return VMs map with key: id, value: associated vm object
*/
private Map<Guid, VM> getVMsMap(List<AffinityGroup> allVMtoHostsAffinityGroups) {
Map<Guid, VM> vmsMap = vmDao.getVmsByIds(allVMtoHostsAffinityGroups.stream()
.map(AffinityGroup::getVmIds)
.flatMap(List::stream)
.distinct()
.collect(Collectors.toList()))
.stream()
.collect(Collectors.toMap(VM::getId, vm -> vm));
return vmsMap;
}
/**
* Get a list of candidate VMs (by VM ids) from the VM to host affinity groups.
* This list will contain all VMs that violate the host affinity policies
* sorted according to the number of violations (descending).
*
* @param allVMtoHostsAffinityGroups VM to Host affinity groups.
* @param vmsMap VMs map with key: vm id , value: associated vm object.
* @param isVdsAffinityEnforcing true - Hard affinity constraint, false - Soft affinity constraint.
* @return list of candidate VMs for migration by VM to Host affinities.
*/
private List<Guid> getVmToHostsAffinityGroupCandidates(List<AffinityGroup> allVMtoHostsAffinityGroups,
Map<Guid, VM> vmsMap, boolean isVdsAffinityEnforcing) {
Map<Guid, Integer> vmToHostsAffinityMap = new HashMap<>();
// Iterate over all affinity groups and check the currently running
// VMs for compliance, record violations per VM
allVMtoHostsAffinityGroups.stream()
.filter(AffinityGroup::isVdsAffinityEnabled)
.filter(ag -> ag.isVdsEnforcing() == isVdsAffinityEnforcing)
.forEach(g -> {
Set<Guid> affHosts = new HashSet<>(g.getVdsIds());
g.getVmIds()
.forEach(vm_id -> {
VM vm = vmsMap.get(vm_id);
if (vm == null) {
return;
}
if (affHosts.contains(vm.getRunOnVds()) && !g.isVdsPositive()) {
// Negative affinity violated
vmToHostsAffinityMap.put(vm_id,
1 + vmToHostsAffinityMap.getOrDefault(vm_id, 0));
} else if (!affHosts.contains(vm.getRunOnVds()) && g.isVdsPositive()) {
// Positive affinity violated
vmToHostsAffinityMap.put(vm_id,
1 + vmToHostsAffinityMap.getOrDefault(vm_id, 0));
}
});
});
// Sort according the to the number of violations
return vmToHostsAffinityMap.entrySet().stream()
.sorted(Comparator.comparing(Map.Entry::getValue))
.map(Map.Entry::getKey)
.collect(Collectors.toList());
}
private VM chooseNextVmToMigrateFromVMsAffinity(Cluster cluster, List<AffinityGroup> allAffinityGroups) {
List<AffinityGroup> allHardAffinityGroups = getAllHardAffinityGroupsForVMsAffinity(allAffinityGroups);
Set<Set<Guid>> unifiedPositiveAffinityGroups = AffinityRulesUtils.getUnifiedPositiveAffinityGroups(
allHardAffinityGroups);
List<AffinityGroup> unifiedAffinityGroups = AffinityRulesUtils.setsToAffinityGroups(
unifiedPositiveAffinityGroups);
// Add negative affinity groups
for (AffinityGroup ag : allHardAffinityGroups) {
if (ag.isVmNegative()) {
unifiedAffinityGroups.add(ag);
}
}
// Create a set of all VMs in affinity groups
Set<Guid> allVms = new HashSet<>();
for (AffinityGroup group : unifiedAffinityGroups) {
allVms.addAll(group.getVmIds());
}
Map<Guid, Guid> vmToHost = createMapOfVmToHost(allVms);
// There is no need to migrate when no collision was detected
Set<AffinityGroup> violatedAffinityGroups =
checkForVMAffinityGroupViolations(unifiedAffinityGroups, vmToHost, FailMode.GET_ALL);
if (violatedAffinityGroups.isEmpty()) {
log.debug("No affinity group collision detected for cluster {}. Standing by.", cluster.getId());
return null;
}
// Find a VM that is breaking the affinityGroup and can be theoretically migrated
// - start with bigger Affinity Groups
List<AffinityGroup> affGroupsBySize = new ArrayList<>(violatedAffinityGroups);
Collections.sort(affGroupsBySize, Collections.reverseOrder(new AffinityGroupComparator()));
for (AffinityGroup affinityGroup : affGroupsBySize) {
final List<VM> candidateVms;
if (affinityGroup.isVmPositive()) {
candidateVms = vmDao.getVmsByIds(findVmViolatingPositiveAg(affinityGroup, vmToHost));
log.info("Positive affinity group violation detected");
} else if (affinityGroup.isVmNegative()) {
candidateVms = vmDao.getVmsByIds(findVmViolatingNegativeAg(affinityGroup, vmToHost));
log.info("Negative affinity group violation detected");
} else {
continue;
}
while (!candidateVms.isEmpty()) {
final int index = random.nextInt(candidateVms.size());
final VM candidateVm = candidateVms.get(index);
if (isVmMigrationValid(cluster, candidateVm)) {
return candidateVm;
}
candidateVms.remove(index);
}
}
// No possible migration..
return null;
}
/**
* Test whether any migration is possible using current
* AffinityGroup settings to prevent any further breakage.
*
* @param cluster Current cluster.
* @param candidateVm VM candidate for migration.
* @return true - if the candidate VM is a viable candidate for solving the affinity group violation situation.
* false - otherwise.
*/
private boolean isVmMigrationValid(Cluster cluster, VM candidateVm) {
if (candidateVm.isHostedEngine()) {
log.debug("VM {} is NOT a viable candidate for solving the affinity group violation situation"
+ " since its a hosted engine VM.",
candidateVm.getId());
return false;
}
List<Guid> vdsBlackList =
candidateVm.getRunOnVds() == null ?
Collections.emptyList() : Arrays.asList(candidateVm.getRunOnVds());
boolean canMove = schedulingManager.canSchedule(cluster, candidateVm,
vdsBlackList, Collections.emptyList(), new ArrayList<>());
if (canMove) {
log.debug("VM {} is a viable candidate for solving the affinity group violation situation.",
candidateVm.getId());
return true;
}
log.debug("VM {} is NOT a viable candidate for solving the affinity group violation situation.",
candidateVm.getId());
return false;
}
private Map<Guid, Guid> createMapOfVmToHost(Set<Guid> allVms) {
Map<Guid, Guid> outputMap = new HashMap<>();
for (VM vm : vmDao.getVmsByIds(new ArrayList<>(allVms))) {
Guid hostId = vm.getRunOnVds();
if (hostId != null) {
outputMap.put(vm.getId(), hostId);
}
}
return outputMap;
}
/**
* Select VMs from the broken affinity group that are running on the same host.
*
* @param affinityGroup broken affinity rule
* @param vmToHost vm to host assignments
* @return a list of vms which are candidates for migration
*/
private List<Guid> findVmViolatingNegativeAg(AffinityGroup affinityGroup, Map<Guid, Guid> vmToHost) {
Map<Guid, Guid> firstAssignment = new HashMap<>();
Set<Guid> violatingVms = new HashSet<>();
// When a VM runs on an already occupied host, report both
// the vm and the previous occupant as candidates for migration
for (Guid vm : affinityGroup.getVmIds()) {
Guid host = vmToHost.get(vm);
// Ignore stopped VMs
if (host == null) {
continue;
}
if (firstAssignment.containsKey(host)) {
violatingVms.add(vm);
violatingVms.add(firstAssignment.get(host));
} else {
firstAssignment.put(host, vm);
}
}
List<Guid> violatingVmsArray = new ArrayList<>(violatingVms);
return violatingVmsArray;
}
/**
* Select VMs from the broken affinity group that are running on the host with the minimal amount
* of VMs from the broken affinity group.
* <p>
* Ex.: Host1: A, B, C, D Host2: E, F -> select E or F
*
* @param affinityGroup broken affinity group
* @param vmToHost vm to host assignments
* @return a list of vms which are candidates for migration
*/
private List<Guid> findVmViolatingPositiveAg(AffinityGroup affinityGroup, Map<Guid, Guid> vmToHost) {
Map<Guid, List<Guid>> hostCount = new HashMap<>();
// Prepare affinity group related host counts
for (Guid vm : affinityGroup.getVmIds()) {
Guid host = vmToHost.get(vm);
// Ignore stopped VMs
if (host == null) {
continue;
}
if (hostCount.containsKey(host)) {
hostCount.get(host).add(vm);
} else {
hostCount.put(host, new ArrayList<>());
hostCount.get(host).add(vm);
}
}
// Select the host with the least amount of VMs
Guid host = chooseCandidateHostForMigration(hostCount);
if (host == null) {
return Collections.emptyList();
}
return hostCount.get(host);
}
/**
* Select a host to source a VM belonging to the Affinity Group. The assumption here is that
* the host with the lowest amount of VMs from the affinity group is the best source,
* because the number of needed migrations will be minimal when compared to other solutions.
*/
protected Guid chooseCandidateHostForMigration(Map<Guid, ? extends Collection<Guid>> mapOfHostsToVms) {
int maxNumberOfVms = Integer.MAX_VALUE;
Guid bestHost = null;
for (Map.Entry<Guid, ? extends Collection<Guid>> entry : mapOfHostsToVms.entrySet()) {
if (entry.getValue().size() < maxNumberOfVms) {
maxNumberOfVms = entry.getValue().size();
bestHost = entry.getKey();
}
}
return bestHost;
}
/**
* Detect whether the current VM to VDS assignment violates current Affinity Groups.
*
* @param affinityGroups Unified affinity groups
* @param vmToHost Mapping of VM to currently assigned VDS
* @return broken AffinityGroups
*/
protected static Set<AffinityGroup> checkForVMAffinityGroupViolations(Iterable<AffinityGroup> affinityGroups,
Map<Guid, Guid> vmToHost, FailMode mode) {
Set<AffinityGroup> broken = new HashSet<>();
for (AffinityGroup affinity : affinityGroups) {
// Negative groups
if (affinity.isVmNegative()) {
// Record all hosts that are already occupied by VMs from this group
Map<Guid, Guid> usedHosts = new HashMap<>();
for (Guid vm : affinity.getVmIds()) {
Guid host = vmToHost.get(vm);
if (host == null) {
continue;
}
// Report a violation when any host has more than one VM from this group
if (usedHosts.containsKey(host)) {
log.debug("Negative affinity rule violated between VMs {} and {} on host {}",
vm, usedHosts.get(host), host);
broken.add(affinity);
if (mode.equals(FailMode.IMMEDIATELY)) {
return broken;
}
} else {
usedHosts.put(host, vm);
}
}
// Positive groups
} else if (affinity.isVmPositive()) {
// All VMs from this group have to be running on a single host
Guid targetHost = null;
for (Guid vm : affinity.getVmIds()) {
Guid host = vmToHost.get(vm);
if (host == null) {
continue;
}
// Report a violation when two VMs do not share a common host
if (targetHost != null && !targetHost.equals(host)) {
log.debug("Positive affinity rule violated by VM {} running at {} when other VM(s) are at {}",
vm, host, targetHost);
broken.add(affinity);
if (mode.equals(FailMode.IMMEDIATELY)) {
return broken;
}
} else if (targetHost == null) {
targetHost = host;
}
}
}
}
return broken;
}
private List<AffinityGroup> getAllHardAffinityGroupsForVMsAffinity(List<AffinityGroup> allAffinityGroups) {
return allAffinityGroups.stream()
.filter(AffinityGroup::isVmAffinityEnabled)
.filter(AffinityGroup::isVmEnforcing)
.collect(Collectors.toList());
}
private List<AffinityGroup> getAllAffinityGroupsForVMsToHostsAffinity(List<AffinityGroup> allAffinityGroups) {
return allAffinityGroups.stream()
.filter(g -> !g.getVdsIds().isEmpty() && !g.getVmIds().isEmpty())
.collect(Collectors.toList());
}
private static class AffinityGroupComparator implements Comparator<AffinityGroup>, Serializable {
@Override
public int compare(AffinityGroup thisAffinityGroup, AffinityGroup thatAffinityGroup) {
final List<Guid> thisEntityIds = thisAffinityGroup.getVmIds();
final List<Guid> otherEntityIds = thatAffinityGroup.getVmIds();
// Avoid NoSuchElementExceptions from Collections.min()
if (thisEntityIds.isEmpty() && otherEntityIds.isEmpty()) {
return 0;
}
int diff = Integer.compare(thisEntityIds.size(), otherEntityIds.size());
// Merged affinity groups do not have an ID, so use the VM with the tiniest ID instead
return diff != 0 ? diff : min(thisEntityIds).compareTo(min(otherEntityIds));
}
}
}