package org.ovirt.engine.core.bll.scheduling; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Optional; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import javax.annotation.PostConstruct; import javax.inject.Inject; import javax.inject.Singleton; import org.apache.commons.lang.math.NumberUtils; import org.ovirt.engine.core.bll.hostdev.HostDeviceManager; import org.ovirt.engine.core.bll.network.host.NetworkDeviceHelper; import org.ovirt.engine.core.bll.network.host.VfScheduler; import org.ovirt.engine.core.bll.scheduling.external.BalanceResult; import org.ovirt.engine.core.bll.scheduling.external.ExternalSchedulerBroker; import org.ovirt.engine.core.bll.scheduling.external.ExternalSchedulerDiscovery; import org.ovirt.engine.core.bll.scheduling.external.WeightResultEntry; import org.ovirt.engine.core.bll.scheduling.pending.PendingCpuCores; import org.ovirt.engine.core.bll.scheduling.pending.PendingMemory; import org.ovirt.engine.core.bll.scheduling.pending.PendingOvercommitMemory; import org.ovirt.engine.core.bll.scheduling.pending.PendingResourceManager; import org.ovirt.engine.core.bll.scheduling.pending.PendingVM; import org.ovirt.engine.core.bll.scheduling.policyunits.RankSelectorPolicyUnit; import org.ovirt.engine.core.bll.scheduling.selector.SelectorInstance; import org.ovirt.engine.core.common.AuditLogType; import org.ovirt.engine.core.common.BackendService; import org.ovirt.engine.core.common.businessentities.Cluster; import org.ovirt.engine.core.common.businessentities.VDS; import org.ovirt.engine.core.common.businessentities.VDSStatus; import org.ovirt.engine.core.common.businessentities.VM; import org.ovirt.engine.core.common.businessentities.VmStatic; import org.ovirt.engine.core.common.config.Config; import org.ovirt.engine.core.common.config.ConfigValues; import org.ovirt.engine.core.common.errors.EngineMessage; import org.ovirt.engine.core.common.scheduling.ClusterPolicy; import org.ovirt.engine.core.common.scheduling.OptimizationType; import org.ovirt.engine.core.common.scheduling.PerHostMessages; import org.ovirt.engine.core.common.scheduling.PolicyUnit; import org.ovirt.engine.core.common.scheduling.PolicyUnitType; import org.ovirt.engine.core.common.utils.Pair; import org.ovirt.engine.core.compat.Guid; import org.ovirt.engine.core.dal.dbbroker.DbFacade; import org.ovirt.engine.core.dal.dbbroker.auditloghandling.AuditLogDirector; import org.ovirt.engine.core.dal.dbbroker.auditloghandling.AuditLogable; import org.ovirt.engine.core.dal.dbbroker.auditloghandling.AuditLogableImpl; import org.ovirt.engine.core.dao.ClusterDao; import org.ovirt.engine.core.dao.VdsDao; import org.ovirt.engine.core.dao.scheduling.ClusterPolicyDao; import org.ovirt.engine.core.dao.scheduling.PolicyUnitDao; import org.ovirt.engine.core.di.Injector; import org.ovirt.engine.core.utils.threadpool.ThreadPoolUtil; import org.ovirt.engine.core.utils.timer.OnTimerMethodAnnotation; import org.ovirt.engine.core.utils.timer.SchedulerUtilQuartzImpl; import org.ovirt.engine.core.vdsbroker.ResourceManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @Singleton public class SchedulingManager implements BackendService { private static final Logger log = LoggerFactory.getLogger(SchedulingManager.class); private static final String HIGH_UTILIZATION = "HighUtilization"; private static final String LOW_UTILIZATION = "LowUtilization"; @Inject private AuditLogDirector auditLogDirector; @Inject private ResourceManager resourceManager; @Inject private MigrationHandler migrationHandler; @Inject private ExternalSchedulerDiscovery exSchedulerDiscovery; @Inject private DbFacade dbFacade; @Inject private NetworkDeviceHelper networkDeviceHelper; @Inject private HostDeviceManager hostDeviceManager; @Inject private ExternalSchedulerBroker externalBroker; @Inject private VfScheduler vfScheduler; private PendingResourceManager pendingResourceManager; /** * [policy id, policy] map */ private final ConcurrentHashMap<Guid, ClusterPolicy> policyMap; /** * [policy unit id, policy unit] map */ private volatile ConcurrentHashMap<Guid, PolicyUnitImpl> policyUnits; private final Object policyUnitsLock = new Object(); private final ConcurrentHashMap<Guid, Semaphore> clusterLockMap = new ConcurrentHashMap<>(); private final VdsFreeMemoryChecker noWaitingMemoryChecker = new VdsFreeMemoryChecker(new NonWaitingDelayer()); private final Map<Guid, Boolean> clusterId2isHaReservationSafe = new HashMap<>(); private final Guid defaultSelectorGuid = InternalPolicyUnits.getGuid(RankSelectorPolicyUnit.class); private PendingResourceManager getPendingResourceManager() { return pendingResourceManager; } @Inject protected SchedulingManager() { policyMap = new ConcurrentHashMap<>(); policyUnits = new ConcurrentHashMap<>(); } @PostConstruct public void init() { log.info("Initializing Scheduling manager"); initializePendingResourceManager(); loadPolicyUnits(); loadClusterPolicies(); loadExternalScheduler(); enableLoadBalancer(); enableHaReservationCheck(); log.info("Initialized Scheduling manager"); } private void initializePendingResourceManager() { pendingResourceManager = new PendingResourceManager(resourceManager); } private void loadExternalScheduler() { if (Config.<Boolean>getValue(ConfigValues.ExternalSchedulerEnabled)) { log.info("Starting external scheduler discovery thread"); /* Disable all external units, this is needed in case an external scheduler broker implementation is missing, because nobody would then disable units that were registered by the missing broker */ exSchedulerDiscovery.markAllExternalPoliciesAsDisabled(); ThreadPoolUtil.execute(() -> { if (exSchedulerDiscovery.discover()) { reloadPolicyUnits(); } }); } else { exSchedulerDiscovery.markAllExternalPoliciesAsDisabled(); log.info("External scheduler disabled, discovery skipped"); } } private void reloadPolicyUnits() { synchronized (policyUnitsLock) { policyUnits = new ConcurrentHashMap<>(); loadPolicyUnits(); } } public List<ClusterPolicy> getClusterPolicies() { return new ArrayList<>(policyMap.values()); } public ClusterPolicy getClusterPolicy(Guid clusterPolicyId) { return policyMap.get(clusterPolicyId); } public Optional<ClusterPolicy> getClusterPolicy(String name) { if (name == null || name.isEmpty()) { return Optional.empty(); } for (ClusterPolicy clusterPolicy : policyMap.values()) { if (clusterPolicy.getName().toLowerCase().equals(name.toLowerCase())) { return Optional.of(clusterPolicy); } } return Optional.empty(); } public ClusterPolicy getDefaultClusterPolicy() { for (ClusterPolicy clusterPolicy : policyMap.values()) { if (clusterPolicy.isDefaultPolicy()) { return clusterPolicy; } } // This should never happen, there must be at least one InternalClusterPolicy // that is marked as default. InternalClusterPoliciesTest.testDefaultPolicy() // makes sure exactly one is defined throw new RuntimeException("There is no system default cluster policy!"); } public Map<Guid, PolicyUnitImpl> getPolicyUnitsMap() { synchronized (policyUnitsLock) { return policyUnits; } } private void loadClusterPolicies() { // Load internal cluster policies policyMap.putAll(InternalClusterPolicies.getClusterPolicies()); Map<Guid, PolicyUnitType> internalTypes = new HashMap<>(); for (PolicyUnitImpl unit: policyUnits.values()) { internalTypes.put(unit.getGuid(), unit.getType()); } // Get all user provided cluster policies List<ClusterPolicy> allClusterPolicies = getClusterPolicyDao().getAll( Collections.unmodifiableMap(internalTypes)); for (ClusterPolicy clusterPolicy : allClusterPolicies) { policyMap.put(clusterPolicy.getId(), clusterPolicy); } } private void loadPolicyUnits() { // Load internal policy units for (Class<? extends PolicyUnitImpl> unitType: InternalPolicyUnits.getList()) { try { PolicyUnitImpl unit = InternalPolicyUnits.instantiate(unitType, getPendingResourceManager()); policyUnits.put(unit.getGuid(), Injector.injectMembers(unit)); } catch (Exception e){ log.error("Could not instantiate a policy unit {}.", unitType.getName(), e); } } // Load all external policy units List<PolicyUnit> allPolicyUnits = getPolicyUnitDao().getAll(); for (PolicyUnit policyUnit : allPolicyUnits) { policyUnits.put(policyUnit.getId(), new ExternalPolicyUnit(policyUnit, getPendingResourceManager())); } } private static class SchedulingResult { Map<Guid, Pair<EngineMessage, String>> filteredOutReasons; Map<Guid, String> hostNames; PerHostMessages details; public SchedulingResult() { filteredOutReasons = new HashMap<>(); hostNames = new HashMap<>(); details = new PerHostMessages(); } public void addReason(Guid id, String hostName, EngineMessage filterType, String filterName) { filteredOutReasons.put(id, new Pair<>(filterType, filterName)); hostNames.put(id, hostName); } public Collection<String> getReasonMessages() { List<String> lines = new ArrayList<>(); for (Entry<Guid, Pair<EngineMessage, String>> line: filteredOutReasons.entrySet()) { lines.add(line.getValue().getFirst().name()); lines.add(String.format("$%1$s %2$s", "hostName", hostNames.get(line.getKey()))); lines.add(String.format("$%1$s %2$s", "filterName", line.getValue().getSecond())); final List<String> detailMessages = details.getMessages(line.getKey()); if (detailMessages.isEmpty()) { lines.add(EngineMessage.SCHEDULING_HOST_FILTERED_REASON.name()); } else { lines.addAll(detailMessages); lines.add(EngineMessage.SCHEDULING_HOST_FILTERED_REASON_WITH_DETAIL.name()); } } return lines; } private PerHostMessages getDetails() { return details; } } public Optional<Guid> schedule(Cluster cluster, VM vm, List<Guid> hostBlackList, List<Guid> hostWhiteList, List<Guid> destHostIdList, List<String> messages, VdsFreeMemoryChecker memoryChecker, String correlationId) { prepareClusterLock(cluster.getId()); try { log.debug("Scheduling started, correlation Id: {}", correlationId); checkAllowOverbooking(cluster); lockCluster(cluster.getId()); List<VDS> vdsList = getVdsDao() .getAllForClusterWithStatus(cluster.getId(), VDSStatus.Up); vdsList = removeBlacklistedHosts(vdsList, hostBlackList); vdsList = keepOnlyWhitelistedHosts(vdsList, hostWhiteList); refreshCachedPendingValues(vdsList); ClusterPolicy policy = policyMap.get(cluster.getClusterPolicyId()); Map<String, String> parameters = createClusterPolicyParameters(cluster); vdsList = runFilters(policy.getFilters(), cluster, vdsList, vm, parameters, policy.getFilterPositionMap(), messages, memoryChecker, true, correlationId); if (vdsList.isEmpty()) { return Optional.empty(); } Optional<Guid> bestHost = selectBestHost(cluster, vm, destHostIdList, vdsList, policy, parameters); if (bestHost.isPresent() && !bestHost.get().equals(vm.getRunOnVds())) { Guid bestHostId = bestHost.get(); getPendingResourceManager().addPending(new PendingCpuCores(bestHostId, vm, vm.getNumOfCpus())); VDS bestHostEntity = vdsList.stream().filter(vds -> vds.getId().equals(bestHostId)).findFirst().get(); getPendingResourceManager().addPending(new PendingMemory(bestHostId, vm, bestHostEntity.getGuestOverhead())); getPendingResourceManager().addPending(new PendingOvercommitMemory(bestHostId, vm, vm.getMemSizeMb())); getPendingResourceManager().addPending(new PendingVM(bestHostId, vm)); getPendingResourceManager().notifyHostManagers(bestHostId); markVfsAsUsedByVm(vm, bestHostId); } return bestHost; } catch (InterruptedException e) { log.error("scheduling interrupted, correlation Id: {}: {}", correlationId, e.getMessage()); log.debug("Exception: ", e); return Optional.empty(); } finally { releaseCluster(cluster.getId()); log.debug("Scheduling ended, correlation Id: {}", correlationId); } } private void releaseCluster(Guid cluster) { // ensuring setting the semaphore permits to 1 synchronized (clusterLockMap.get(cluster)) { clusterLockMap.get(cluster).drainPermits(); clusterLockMap.get(cluster).release(); } } private void lockCluster(Guid cluster) throws InterruptedException { clusterLockMap.get(cluster).acquire(); } private void prepareClusterLock(Guid cluster) { clusterLockMap.putIfAbsent(cluster, new Semaphore(1)); } private void markVfsAsUsedByVm(VM vm, Guid bestHostId) { Map<Guid, String> passthroughVnicToVfMap = vfScheduler.getVnicToVfMap(vm.getId(), bestHostId); if (passthroughVnicToVfMap == null || passthroughVnicToVfMap.isEmpty()) { return; } try { hostDeviceManager.acquireHostDevicesLock(bestHostId); Collection<String> virtualFunctions = passthroughVnicToVfMap.values(); log.debug("Marking following VF as used by VM({}) on selected host({}): {}", vm.getId(), bestHostId, virtualFunctions); networkDeviceHelper.setVmIdOnVfs(bestHostId, vm.getId(), new HashSet<>(virtualFunctions)); } finally { hostDeviceManager.releaseHostDevicesLock(bestHostId); } } /** * Refresh cached VDS pending fields with the current pending * values from PendingResourceManager. * @param vdsList - list of candidate hosts */ private void refreshCachedPendingValues(List<VDS> vdsList) { for (VDS vds: vdsList) { int pendingMemory = PendingOvercommitMemory.collectForHost(getPendingResourceManager(), vds.getId()); int pendingCpuCount = PendingCpuCores.collectForHost(getPendingResourceManager(), vds.getId()); vds.setPendingVcpusCount(pendingCpuCount); vds.setPendingVmemSize(pendingMemory); } } /** * @param destHostIdList - used for RunAt preselection, overrides the ordering in vdsList * @param availableVdsList - presorted list of hosts (better hosts first) that are available */ private Optional<Guid> selectBestHost(Cluster cluster, VM vm, List<Guid> destHostIdList, List<VDS> availableVdsList, ClusterPolicy policy, Map<String, String> parameters) { // in case a default destination host was specified and // it passed filters, return the first found List<VDS> runnableHosts = new LinkedList<>(); if (destHostIdList.size() > 0) { // there are dedicated hosts // intersect dedicated hosts list with available list for (VDS vds : availableVdsList) { for (Guid destHostId : destHostIdList) { if (destHostId.equals(vds.getId())) { runnableHosts.add(vds); } } } } if (runnableHosts.isEmpty()) { // no dedicated hosts found runnableHosts = availableVdsList; } switch (runnableHosts.size()){ case 0: // no runnable hosts found, nothing found return Optional.empty(); case 1: // found single available host, in available list return it return Optional.of(runnableHosts.get(0).getId()); default: // select best runnable host with scoring functions (from policy) List<Pair<Guid, Integer>> functions = policy.getFunctions(); Guid selector = Optional.of(policy).map(ClusterPolicy::getSelector).orElse(defaultSelectorGuid); PolicyUnitImpl selectorUnit = policyUnits.get(selector); SelectorInstance selectorInstance = selectorUnit.selector(parameters); List<Guid> runnableGuids = runnableHosts.stream().map(VDS::getId).collect(Collectors.toList()); selectorInstance.init(functions, runnableGuids); if (!functions.isEmpty() && shouldWeighClusterHosts(cluster, runnableHosts)) { Optional<Guid> bestHostByFunctions = runFunctions(selectorInstance, functions, cluster, runnableHosts, vm, parameters); if (bestHostByFunctions.isPresent()) { return bestHostByFunctions; } } } // failed select best runnable host using scoring functions, return the first return Optional.of(runnableHosts.get(0).getId()); } /** * Checks whether scheduler should schedule several requests in parallel: * Conditions: * * config option SchedulerAllowOverBooking should be enabled. * * cluster optimization type flag should allow over-booking. * * more than than X (config.SchedulerOverBookingThreshold) pending for scheduling. * In case all of the above conditions are met, we release all the pending scheduling * requests. */ private void checkAllowOverbooking(Cluster cluster) { if (OptimizationType.ALLOW_OVERBOOKING == cluster.getOptimizationType() && Config.<Boolean>getValue(ConfigValues.SchedulerAllowOverBooking) && clusterLockMap.get(cluster.getId()).getQueueLength() >= Config.<Integer>getValue(ConfigValues.SchedulerOverBookingThreshold)) { log.info("Scheduler: cluster '{}' lock is skipped (cluster is allowed to overbook)", cluster.getName()); // release pending threads (requests) and current one (+1) clusterLockMap.get(cluster.getId()) .release(Config.<Integer>getValue(ConfigValues.SchedulerOverBookingThreshold) + 1); } } /** * Checks whether scheduler should weigh hosts/or skip weighing: * * More than one host (it's trivial to weigh a single host). * * optimize for speed is enabled for the cluster, and there are less than configurable requests pending (skip * weighing in a loaded setup). */ private boolean shouldWeighClusterHosts(Cluster cluster, List<VDS> vdsList) { Integer threshold = Config.<Integer>getValue(ConfigValues.SpeedOptimizationSchedulingThreshold); // threshold is crossed only when cluster is configured for optimized for speed boolean crossedThreshold = OptimizationType.OPTIMIZE_FOR_SPEED == cluster.getOptimizationType() && clusterLockMap.get(cluster.getId()).getQueueLength() > threshold; if (crossedThreshold) { log.info( "Scheduler: skipping whinging hosts in cluster '{}', since there are more than '{}' parallel requests", cluster.getName(), threshold); } return vdsList.size() > 1 && !crossedThreshold; } public boolean canSchedule(Cluster cluster, VM vm, List<Guid> vdsBlackList, List<Guid> vdsWhiteList, List<String> messages) { List<VDS> vdsList = getVdsDao() .getAllForClusterWithStatus(cluster.getId(), VDSStatus.Up); vdsList = removeBlacklistedHosts(vdsList, vdsBlackList); vdsList = keepOnlyWhitelistedHosts(vdsList, vdsWhiteList); refreshCachedPendingValues(vdsList); ClusterPolicy policy = policyMap.get(cluster.getClusterPolicyId()); Map<String, String> parameters = createClusterPolicyParameters(cluster); vdsList = runFilters(policy.getFilters(), cluster, vdsList, vm, parameters, policy.getFilterPositionMap(), messages, noWaitingMemoryChecker, false, null); return vdsList != null && !vdsList.isEmpty(); } private Map<String, String> createClusterPolicyParameters(Cluster cluster) { Map<String, String> parameters = new HashMap<>(); if (cluster.getClusterPolicyProperties() != null) { parameters.putAll(cluster.getClusterPolicyProperties()); } return parameters; } /** * Remove hosts from vdsList that are not present on the whitelist * * Empty white list signalizes that nothing is to be done. * * @param vdsList List of hosts to filter * @param list Whitelist */ private List<VDS> keepOnlyWhitelistedHosts(List<VDS> vdsList, List<Guid> list) { if (!list.isEmpty()) { Set<Guid> listSet = new HashSet<>(list); return vdsList.stream() .filter(host -> listSet.contains(host.getId())) .collect(Collectors.toList()); } else { return vdsList; } } /** * Remove hosts from vdsList that are present on the blacklist * * Empty black list signalizes that nothing is to be done. * * @param vdsList List of hosts to filter * @param list Blacklist */ private List<VDS> removeBlacklistedHosts(List<VDS> vdsList, List<Guid> list) { if (!list.isEmpty()) { Set<Guid> listSet = new HashSet<>(list); return vdsList.stream() .filter(host -> !listSet.contains(host.getId())) .collect(Collectors.toList()); } else { return vdsList; } } private List<VDS> runFilters(ArrayList<Guid> filters, Cluster cluster, List<VDS> hostList, VM vm, Map<String, String> parameters, Map<Guid, Integer> filterPositionMap, List<String> messages, VdsFreeMemoryChecker memoryChecker, boolean shouldRunExternalFilters, String correlationId) { SchedulingResult result = new SchedulingResult(); ArrayList<PolicyUnitImpl> internalFilters = new ArrayList<>(); ArrayList<PolicyUnitImpl> externalFilters = new ArrayList<>(); // Create a local copy so we can manipulate it filters = new ArrayList<>(filters); sortFilters(filters, filterPositionMap); for (Guid filter : filters) { PolicyUnitImpl filterPolicyUnit = policyUnits.get(filter); if (filterPolicyUnit.getPolicyUnit().isInternal()) { internalFilters.add(filterPolicyUnit); } else { if (filterPolicyUnit.getPolicyUnit().isEnabled()) { externalFilters.add(filterPolicyUnit); } } } /* Short circuit filters if there are no hosts at all */ if (hostList.isEmpty()) { messages.add(EngineMessage.SCHEDULING_NO_HOSTS.name()); messages.addAll(result.getReasonMessages()); return hostList; } hostList = runInternalFilters(internalFilters, cluster, hostList, vm, parameters, filterPositionMap, memoryChecker, correlationId, result); if (shouldRunExternalFilters && Config.<Boolean>getValue(ConfigValues.ExternalSchedulerEnabled) && !externalFilters.isEmpty() && !hostList.isEmpty()) { hostList = runExternalFilters(externalFilters, hostList, vm, parameters, messages, correlationId, result); } if (hostList.isEmpty()) { messages.add(EngineMessage.SCHEDULING_ALL_HOSTS_FILTERED_OUT.name()); messages.addAll(result.getReasonMessages()); } return hostList; } private List<VDS> runInternalFilters(ArrayList<PolicyUnitImpl> filters, Cluster cluster, List<VDS> hostList, VM vm, Map<String, String> parameters, Map<Guid, Integer> filterPositionMap, VdsFreeMemoryChecker memoryChecker, String correlationId, SchedulingResult result) { for (PolicyUnitImpl filterPolicyUnit : filters) { if (hostList.isEmpty()) { break; } filterPolicyUnit.setMemoryChecker(memoryChecker); List<VDS> currentHostList = new ArrayList<>(hostList); hostList = filterPolicyUnit.filter(cluster, hostList, vm, parameters, result.getDetails()); logFilterActions(currentHostList, toIdSet(hostList), EngineMessage.VAR__FILTERTYPE__INTERNAL, filterPolicyUnit.getPolicyUnit().getName(), result, correlationId); } return hostList; } private Set<Guid> toIdSet(List<VDS> hostList) { return hostList.stream().map(VDS::getId).collect(Collectors.toSet()); } private void logFilterActions(List<VDS> oldList, Set<Guid> newSet, EngineMessage actionName, String filterName, SchedulingResult result, String correlationId) { for (VDS host: oldList) { if (!newSet.contains(host.getId())) { result.addReason(host.getId(), host.getName(), actionName, filterName); log.info("Candidate host '{}' ('{}') was filtered out by '{}' filter '{}' (correlation id: {})", host.getName(), host.getId(), actionName.name(), filterName, correlationId); } } } private List<VDS> runExternalFilters(ArrayList<PolicyUnitImpl> filters, List<VDS> hostList, VM vm, Map<String, String> parameters, List<String> messages, String correlationId, SchedulingResult result) { List<Guid> hostIDs = hostList.stream().map(VDS::getId).collect(Collectors.toList()); List<String> filterNames = filters.stream() .filter(f -> !f.getPolicyUnit().isInternal()) .map(f -> f.getPolicyUnit().getName()) .collect(Collectors.toList()); List<Guid> filteredIDs = externalBroker.runFilters(filterNames, hostIDs, vm.getId(), parameters); logFilterActions(hostList, new HashSet<>(filteredIDs), EngineMessage.VAR__FILTERTYPE__EXTERNAL, Arrays.toString(filterNames.toArray()), result, correlationId); hostList = intersectHosts(hostList, filteredIDs); return hostList; } private List<VDS> intersectHosts(List<VDS> hosts, List<Guid> IDs) { Set<Guid> idSet = new HashSet<>(IDs); return hosts.stream().filter(host -> idSet.contains(host.getId())).collect(Collectors.toList()); } private void sortFilters(ArrayList<Guid> filters, final Map<Guid, Integer> filterPositionMap) { Collections.sort(filters, Comparator.comparingInt(f -> filterPositionMap.getOrDefault(f, 0))); } private Optional<Guid> runFunctions(SelectorInstance selector, List<Pair<Guid, Integer>> functions, Cluster cluster, List<VDS> hostList, VM vm, Map<String, String> parameters) { List<Pair<PolicyUnitImpl, Integer>> internalScoreFunctions = new ArrayList<>(); List<Pair<PolicyUnitImpl, Integer>> externalScoreFunctions = new ArrayList<>(); for (Pair<Guid, Integer> pair : functions) { PolicyUnitImpl currentPolicy = policyUnits.get(pair.getFirst()); if (currentPolicy.getPolicyUnit().isInternal()) { internalScoreFunctions.add(new Pair<>(currentPolicy, pair.getSecond())); } else { if (currentPolicy.getPolicyUnit().isEnabled()) { externalScoreFunctions.add(new Pair<>(currentPolicy, pair.getSecond())); } } } runInternalFunctions(selector, internalScoreFunctions, cluster, hostList, vm, parameters); if (Config.<Boolean>getValue(ConfigValues.ExternalSchedulerEnabled) && !externalScoreFunctions.isEmpty()) { runExternalFunctions(selector, externalScoreFunctions, hostList, vm, parameters); } return selector.best(); } private void runInternalFunctions(SelectorInstance selector, List<Pair<PolicyUnitImpl, Integer>> functions, Cluster cluster, List<VDS> hostList, VM vm, Map<String, String> parameters) { for (Pair<PolicyUnitImpl, Integer> pair : functions) { List<Pair<Guid, Integer>> scoreResult = pair.getFirst().score(cluster, hostList, vm, parameters); for (Pair<Guid, Integer> result : scoreResult) { selector.record(pair.getFirst().getGuid(), result.getFirst(), result.getSecond()); } } } private void runExternalFunctions(SelectorInstance selector, List<Pair<PolicyUnitImpl, Integer>> functions, List<VDS> hostList, VM vm, Map<String, String> parameters) { List<Guid> hostIDs = hostList.stream().map(VDS::getId).collect(Collectors.toList()); List<Pair<String, Integer>> scoreNameAndWeight = functions.stream() .filter(pair -> !pair.getFirst().getPolicyUnit().isInternal()) .map(pair -> new Pair<>(pair.getFirst().getName(), pair.getSecond())) .collect(Collectors.toList()); Map<String, Guid> nameToGuidMap = functions.stream() .filter(pair -> !pair.getFirst().getPolicyUnit().isInternal()) .collect(Collectors.toMap(pair -> pair.getFirst().getPolicyUnit().getName(), pair -> pair.getFirst().getPolicyUnit().getId())); List<WeightResultEntry> externalScores = externalBroker.runScores(scoreNameAndWeight, hostIDs, vm.getId(), parameters); sumScoreResults(selector, nameToGuidMap, externalScores); } private void sumScoreResults(SelectorInstance selector, Map<String, Guid> nametoGuidMap, List<WeightResultEntry> externalScores) { for (WeightResultEntry resultEntry : externalScores) { // The old external scheduler returns summed up data without policy unit identification, treat // it as a single policy unit with id null selector.record(nametoGuidMap.getOrDefault(resultEntry.getWeightUnit(), null), resultEntry.getHost(), resultEntry.getWeight()); } } public Map<String, String> getCustomPropertiesRegexMap(ClusterPolicy clusterPolicy) { Set<Guid> usedPolicyUnits = new HashSet<>(); if (clusterPolicy.getFilters() != null) { usedPolicyUnits.addAll(clusterPolicy.getFilters()); } if (clusterPolicy.getFunctions() != null) { for (Pair<Guid, Integer> pair : clusterPolicy.getFunctions()) { usedPolicyUnits.add(pair.getFirst()); } } if (clusterPolicy.getBalance() != null) { usedPolicyUnits.add(clusterPolicy.getBalance()); } Map<String, String> map = new LinkedHashMap<>(); for (Guid policyUnitId : usedPolicyUnits) { map.putAll(policyUnits.get(policyUnitId).getPolicyUnit().getParameterRegExMap()); } return map; } public void addClusterPolicy(ClusterPolicy clusterPolicy) { getClusterPolicyDao().save(clusterPolicy); policyMap.put(clusterPolicy.getId(), clusterPolicy); } public void editClusterPolicy(ClusterPolicy clusterPolicy) { getClusterPolicyDao().update(clusterPolicy); policyMap.put(clusterPolicy.getId(), clusterPolicy); } public void removeClusterPolicy(Guid clusterPolicyId) { getClusterPolicyDao().remove(clusterPolicyId); policyMap.remove(clusterPolicyId); } private VdsDao getVdsDao() { return dbFacade.getVdsDao(); } private ClusterDao getClusterDao() { return dbFacade.getClusterDao(); } private PolicyUnitDao getPolicyUnitDao() { return dbFacade.getPolicyUnitDao(); } private ClusterPolicyDao getClusterPolicyDao() { return dbFacade.getClusterPolicyDao(); } private void enableLoadBalancer() { if (Config.<Boolean>getValue(ConfigValues.EnableVdsLoadBalancing)) { log.info("Start scheduling to enable vds load balancer"); Injector.get(SchedulerUtilQuartzImpl.class).scheduleAFixedDelayJob( this, "performLoadBalancing", new Class[] {}, new Object[] {}, Config.<Integer>getValue(ConfigValues.VdsLoadBalancingIntervalInMinutes), Config.<Integer>getValue(ConfigValues.VdsLoadBalancingIntervalInMinutes), TimeUnit.MINUTES); log.info("Finished scheduling to enable vds load balancer"); } } private void enableHaReservationCheck() { if (Config.<Boolean>getValue(ConfigValues.EnableVdsLoadBalancing)) { log.info("Start HA Reservation check"); Integer interval = Config.<Integer> getValue(ConfigValues.VdsHaReservationIntervalInMinutes); Injector.get(SchedulerUtilQuartzImpl.class).scheduleAFixedDelayJob( this, "performHaResevationCheck", new Class[] {}, new Object[] {}, interval, interval, TimeUnit.MINUTES); log.info("Finished HA Reservation check"); } } @OnTimerMethodAnnotation("performHaResevationCheck") public void performHaResevationCheck() { log.debug("HA Reservation check timer entered."); List<Cluster> clusters = getClusterDao().getAll(); if (clusters != null) { HaReservationHandling haReservationHandling = new HaReservationHandling(getPendingResourceManager()); for (Cluster cluster : clusters) { if (cluster.supportsHaReservation()) { List<VDS> returnedFailedHosts = new ArrayList<>(); boolean clusterHaStatus = haReservationHandling.checkHaReservationStatusForCluster(cluster, returnedFailedHosts); if (!clusterHaStatus) { // create Alert using returnedFailedHosts AuditLogable logable = createEventForCluster(cluster); String failedHostsStr = returnedFailedHosts.stream().map(VDS::getName).collect(Collectors.joining(", ")); logable.addCustomValue("Hosts", failedHostsStr); auditLogDirector.log(logable, AuditLogType.CLUSTER_ALERT_HA_RESERVATION); log.info("Cluster '{}' fail to pass HA reservation check.", cluster.getName()); } boolean clusterHaStatusFromPreviousCycle = clusterId2isHaReservationSafe.containsKey(cluster.getId()) ? clusterId2isHaReservationSafe.get(cluster.getId()) : true; // Update the status map with the new status clusterId2isHaReservationSafe.put(cluster.getId(), clusterHaStatus); // Create Alert if the status was changed from false to true if (!clusterHaStatusFromPreviousCycle && clusterHaStatus) { AuditLogable logable = createEventForCluster(cluster); auditLogDirector.log(logable, AuditLogType.CLUSTER_ALERT_HA_RESERVATION_DOWN); } } } } log.debug("HA Reservation check timer finished."); } private AuditLogable createEventForCluster(Cluster cluster) { AuditLogable logable = new AuditLogableImpl(); logable.setClusterName(cluster.getName()); logable.setClusterId(cluster.getId()); return logable; } @OnTimerMethodAnnotation("performLoadBalancing") public void performLoadBalancing() { log.debug("Load Balancer timer entered."); List<Cluster> clusters = getClusterDao().getAll(); for (Cluster cluster : clusters) { ClusterPolicy policy = policyMap.get(cluster.getClusterPolicyId()); PolicyUnitImpl policyUnit = policyUnits.get(policy.getBalance()); Optional<BalanceResult> balanceResult = Optional.empty(); if (policyUnit.getPolicyUnit().isEnabled()) { List<VDS> hosts = getVdsDao().getAllForClusterWithoutMigrating(cluster.getId()); if (policyUnit.getPolicyUnit().isInternal()) { balanceResult = internalRunBalance(policyUnit, cluster, hosts); } else if (Config.<Boolean> getValue(ConfigValues.ExternalSchedulerEnabled)) { balanceResult = externalRunBalance(policyUnit, cluster, hosts); } } if (balanceResult.isPresent() && balanceResult.get().isValid()) { migrationHandler.migrateVM(balanceResult.get().getCandidateHosts(), balanceResult.get().getVmToMigrate(), AuditLogDirector.getMessage(AuditLogType.MIGRATION_REASON_LOAD_BALANCING)); } } } private Optional<BalanceResult> internalRunBalance(PolicyUnitImpl policyUnit, Cluster cluster, List<VDS> hosts) { return policyUnit.balance(cluster, hosts, cluster.getClusterPolicyProperties(), new ArrayList<>()); } private Optional<BalanceResult> externalRunBalance(PolicyUnitImpl policyUnit, Cluster cluster, List<VDS> hosts) { List<Guid> hostIDs = new ArrayList<>(); for (VDS vds : hosts) { hostIDs.add(vds.getId()); } Optional<BalanceResult> balanceResult = externalBroker.runBalance(policyUnit.getPolicyUnit().getName(), hostIDs, cluster.getClusterPolicyProperties()); if (balanceResult.isPresent()) { return balanceResult; } log.warn("All external schedulers returned empty balancing result."); return Optional.empty(); } /** * returns all cluster policies names containing the specific policy unit. * @return List of cluster policy names that use the referenced policyUnitId * or null if the policy unit is not available. */ public List<String> getClusterPoliciesNamesByPolicyUnitId(Guid policyUnitId) { List<String> list = new ArrayList<>(); final PolicyUnitImpl policyUnitImpl = policyUnits.get(policyUnitId); if (policyUnitImpl == null) { log.warn("Trying to find usages of non-existing policy unit '{}'", policyUnitId); return null; } PolicyUnit policyUnit = policyUnitImpl.getPolicyUnit(); if (policyUnit != null) { for (ClusterPolicy clusterPolicy : policyMap.values()) { switch (policyUnit.getPolicyUnitType()) { case FILTER: Collection<Guid> filters = clusterPolicy.getFilters(); if (filters != null && filters.contains(policyUnitId)) { list.add(clusterPolicy.getName()); } break; case WEIGHT: Collection<Pair<Guid, Integer>> functions = clusterPolicy.getFunctions(); if (functions == null) { break; } for (Pair<Guid, Integer> pair : functions) { if (pair.getFirst().equals(policyUnitId)) { list.add(clusterPolicy.getName()); break; } } break; case LOAD_BALANCING: if (policyUnitId.equals(clusterPolicy.getBalance())) { list.add(clusterPolicy.getName()); } break; default: break; } } } return list; } public void removeExternalPolicyUnit(Guid policyUnitId) { getPolicyUnitDao().remove(policyUnitId); policyUnits.remove(policyUnitId); } /** * update host scheduling statistics: * * CPU load duration interval over/under policy threshold */ public void updateHostSchedulingStats(VDS vds) { if (vds.getUsageCpuPercent() != null) { Cluster cluster = getClusterDao().get(vds.getClusterId()); if (vds.getUsageCpuPercent() >= NumberUtils.toInt(cluster.getClusterPolicyProperties() .get(HIGH_UTILIZATION), Config.<Integer> getValue(ConfigValues.HighUtilizationForEvenlyDistribute)) || vds.getUsageCpuPercent() <= NumberUtils.toInt(cluster.getClusterPolicyProperties() .get(LOW_UTILIZATION), Config.<Integer> getValue(ConfigValues.LowUtilizationForEvenlyDistribute))) { if (vds.getCpuOverCommitTimestamp() == null) { vds.setCpuOverCommitTimestamp(new Date()); } } else { vds.setCpuOverCommitTimestamp(null); } } } /** * Clear pending records for a VM. * This operation locks the cluster to make sure a possible scheduling operation is not under way. */ public void clearPendingVm(VmStatic vm) { prepareClusterLock(vm.getClusterId()); try { lockCluster(vm.getClusterId()); getPendingResourceManager().clearVm(vm); } catch (InterruptedException e) { log.warn("Interrupted.. pending counters can be out of sync"); } finally { releaseCluster(vm.getClusterId()); } } }