package com.hubspot.singularity.mesos;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.inject.Singleton;
import org.apache.mesos.Protos;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Optional;
import com.google.common.collect.Lists;
import com.google.inject.Inject;
import com.google.inject.Provider;
import com.hubspot.mesos.MesosUtils;
import com.hubspot.mesos.Resources;
import com.hubspot.singularity.RequestType;
import com.hubspot.singularity.SingularityTask;
import com.hubspot.singularity.SingularityTaskRequest;
import com.hubspot.singularity.SlaveMatchState;
import com.hubspot.singularity.config.CustomExecutorConfiguration;
import com.hubspot.singularity.config.MesosConfiguration;
import com.hubspot.singularity.config.SingularityConfiguration;
import com.hubspot.singularity.data.DisasterManager;
import com.hubspot.singularity.data.TaskManager;
import com.hubspot.singularity.scheduler.SingularityScheduler;
import com.hubspot.singularity.scheduler.SingularitySchedulerStateCache;
@Singleton
public class SingularityMesosOfferScheduler {
private static final Logger LOG = LoggerFactory.getLogger(SingularityMesosOfferScheduler.class);
private final Resources defaultResources;
private final Resources defaultCustomExecutorResources;
private final TaskManager taskManager;
private final SingularityMesosTaskPrioritizer taskPrioritizer;
private final SingularityScheduler scheduler;
private final SingularityConfiguration configuration;
private final SingularityMesosTaskBuilder mesosTaskBuilder;
private final SingularitySlaveAndRackManager slaveAndRackManager;
private final SingularitySlaveAndRackHelper slaveAndRackHelper;
private final SingularityTaskSizeOptimizer taskSizeOptimizer;
private final DisasterManager disasterManager;
private final Provider<SingularitySchedulerStateCache> stateCacheProvider;
private final SchedulerDriverSupplier schedulerDriverSupplier;
@Inject
public SingularityMesosOfferScheduler(MesosConfiguration mesosConfiguration, CustomExecutorConfiguration customExecutorConfiguration, TaskManager taskManager, SingularityMesosTaskPrioritizer taskPrioritizer,
SingularityScheduler scheduler, SingularityConfiguration configuration, SingularityMesosTaskBuilder mesosTaskBuilder,
SingularitySlaveAndRackManager slaveAndRackManager, SingularityTaskSizeOptimizer taskSizeOptimizer, SingularitySlaveAndRackHelper slaveAndRackHelper,
Provider<SingularitySchedulerStateCache> stateCacheProvider, SchedulerDriverSupplier schedulerDriverSupplier, DisasterManager disasterManager) {
this.defaultResources = new Resources(mesosConfiguration.getDefaultCpus(), mesosConfiguration.getDefaultMemory(), 0, mesosConfiguration.getDefaultDisk());
this.defaultCustomExecutorResources = new Resources(customExecutorConfiguration.getNumCpus(), customExecutorConfiguration.getMemoryMb(), 0, customExecutorConfiguration.getDiskMb());
this.taskManager = taskManager;
this.scheduler = scheduler;
this.configuration = configuration;
this.mesosTaskBuilder = mesosTaskBuilder;
this.slaveAndRackManager = slaveAndRackManager;
this.taskSizeOptimizer = taskSizeOptimizer;
this.stateCacheProvider = stateCacheProvider;
this.slaveAndRackHelper = slaveAndRackHelper;
this.disasterManager = disasterManager;
this.schedulerDriverSupplier = schedulerDriverSupplier;
this.taskPrioritizer = taskPrioritizer;
}
private Map<String, SingularityTaskRequestHolder> getDueTaskRequestHolders() {
final List<SingularityTaskRequest> taskRequests = taskPrioritizer.getSortedDueTasks(scheduler.getDueTasks());
for (SingularityTaskRequest taskRequest : taskRequests) {
LOG.trace("Task {} is due", taskRequest.getPendingTask().getPendingTaskId());
}
taskPrioritizer.removeTasksAffectedByPriorityFreeze(taskRequests);
final Map<String, SingularityTaskRequestHolder> taskRequestHolders = new HashMap<>(taskRequests.size());
for (SingularityTaskRequest taskRequest : taskRequests) {
taskRequestHolders.put(taskRequest.getPendingTask().getPendingTaskId().getId(), new SingularityTaskRequestHolder(taskRequest, defaultResources, defaultCustomExecutorResources));
}
return taskRequestHolders;
}
public List<SingularityOfferHolder> checkOffers(final Collection<Protos.Offer> offers) {
boolean useTaskCredits = disasterManager.isTaskCreditEnabled();
int taskCredits = useTaskCredits ? disasterManager.getUpdatedCreditCount() : -1;
final SingularitySchedulerStateCache stateCache = stateCacheProvider.get();
scheduler.checkForDecomissions(stateCache);
scheduler.drainPendingQueue(stateCache);
final Map<String, SingularityTaskRequestHolder> pendingTaskIdToTaskRequest = getDueTaskRequestHolders();
final int numDueTasks = pendingTaskIdToTaskRequest.size();
if (offers.isEmpty()) {
LOG.debug("No offers to check");
return Collections.emptyList();
}
final List<SingularityOfferHolder> offerHolders = Lists.newArrayListWithCapacity(offers.size());
final Map<String, Map<String, Integer>> tasksPerOfferPerRequest = new HashMap<>();
for (Protos.Offer offer : offers) {
offerHolders.add(new SingularityOfferHolder(offer, numDueTasks, slaveAndRackHelper.getRackIdOrDefault(offer), slaveAndRackHelper.getTextAttributes(offer),
slaveAndRackHelper.getReservedSlaveAttributes(offer)));
}
boolean addedTaskInLastLoop = true;
int tasksScheduled = 0;
while (!pendingTaskIdToTaskRequest.isEmpty() && addedTaskInLastLoop && canScheduleAdditionalTasks(taskCredits)) {
addedTaskInLastLoop = false;
Collections.shuffle(offerHolders);
for (SingularityOfferHolder offerHolder : offerHolders) {
if (configuration.getMaxTasksPerOffer() > 0 && offerHolder.getAcceptedTasks().size() >= configuration.getMaxTasksPerOffer()) {
LOG.trace("Offer {} is full ({}) - skipping", offerHolder.getOffer(), offerHolder.getAcceptedTasks().size());
continue;
}
Optional<SingularityTask> accepted = match(pendingTaskIdToTaskRequest.values(), stateCache, offerHolder, tasksPerOfferPerRequest);
if (accepted.isPresent()) {
tasksScheduled++;
if (useTaskCredits) {
taskCredits--;
LOG.debug("Remaining task credits: {}", taskCredits);
}
offerHolder.addMatchedTask(accepted.get());
addedTaskInLastLoop = true;
pendingTaskIdToTaskRequest.remove(accepted.get().getTaskRequest().getPendingTask().getPendingTaskId().getId());
if (useTaskCredits && taskCredits == 0) {
LOG.info("Used all available task credits, not scheduling any more tasks");
break;
}
}
if (pendingTaskIdToTaskRequest.isEmpty()) {
break;
}
}
}
if (useTaskCredits) {
disasterManager.saveTaskCreditCount(taskCredits);
}
LOG.info("{} tasks scheduled, {} tasks remaining after examining {} offers", tasksScheduled, numDueTasks - tasksScheduled, offers.size());
return offerHolders;
}
private boolean canScheduleAdditionalTasks(int taskCredits) {
return taskCredits == -1 || taskCredits > 0;
}
private Optional<SingularityTask> match(Collection<SingularityTaskRequestHolder> taskRequests, SingularitySchedulerStateCache stateCache, SingularityOfferHolder offerHolder,
Map<String, Map<String, Integer>> tasksPerOfferPerRequest) {
final String offerId = offerHolder.getOffer().getId().getValue();
for (SingularityTaskRequestHolder taskRequestHolder : taskRequests) {
final SingularityTaskRequest taskRequest = taskRequestHolder.getTaskRequest();
if (offerHolder.hasRejectedPendingTaskAlready(taskRequest.getPendingTask().getPendingTaskId())) {
continue;
}
if (tooManyTasksPerOfferForRequest(tasksPerOfferPerRequest, offerId, taskRequestHolder.getTaskRequest())) {
LOG.debug("Skipping task request for request id {}, too many tasks already scheduled using offer {}", taskRequest.getRequest().getId(), offerId);
continue;
}
if (taskRequest.getRequest().getRequestType() == RequestType.ON_DEMAND) {
int maxActiveOnDemandTasks = taskRequest.getRequest().getInstances().or(configuration.getMaxActiveOnDemandTasksPerRequest());
if (maxActiveOnDemandTasks > 0) {
int activeTasksForRequest = stateCache.getActiveTaskIdsForRequest(taskRequest.getRequest().getId()).size();
if (activeTasksForRequest >= maxActiveOnDemandTasks) {
LOG.debug("Skipping pending task {}, already running {} instances for request {} (max is {})", taskRequest.getPendingTask().getPendingTaskId(), activeTasksForRequest);
continue;
}
}
}
if (LOG.isTraceEnabled()) {
LOG.trace("Attempting to match task {} resources {} with required role '{}' ({} for task + {} for executor) with remaining offer resources {}", taskRequest.getPendingTask().getPendingTaskId(),
taskRequestHolder.getTotalResources(), taskRequest.getRequest().getRequiredRole().or("*"), taskRequestHolder.getTaskResources(), taskRequestHolder.getExecutorResources(),
offerHolder.getCurrentResources());
}
final boolean matchesResources = MesosUtils.doesOfferMatchResources(taskRequest.getRequest().getRequiredRole(), taskRequestHolder.getTotalResources(), offerHolder.getCurrentResources(),
taskRequestHolder.getRequestedPorts());
final SlaveMatchState slaveMatchState = slaveAndRackManager.doesOfferMatch(offerHolder, taskRequest, stateCache);
if (matchesResources && slaveMatchState.isMatchAllowed()) {
final SingularityTask task = mesosTaskBuilder.buildTask(offerHolder.getOffer(), offerHolder.getCurrentResources(), taskRequest, taskRequestHolder.getTaskResources(), taskRequestHolder.getExecutorResources());
final SingularityTask zkTask = taskSizeOptimizer.getSizeOptimizedTask(task);
if (LOG.isTraceEnabled()) {
LOG.trace("Accepted and built task {}", zkTask);
}
LOG.info("Launching task {} slot on slave {} ({})", task.getTaskId(), offerHolder.getOffer().getSlaveId().getValue(), offerHolder.getOffer().getHostname());
taskManager.createTaskAndDeletePendingTask(zkTask);
stateCache.getActiveTaskIds().add(task.getTaskId());
stateCache.getActiveTaskIdsForRequest(task.getTaskRequest().getRequest().getId()).add(task.getTaskId());
addRequestToMapByOfferId(tasksPerOfferPerRequest, offerId, taskRequest.getRequest().getId());
stateCache.getScheduledTasks().remove(taskRequest.getPendingTask());
return Optional.of(task);
} else {
offerHolder.addRejectedTask(taskRequest.getPendingTask().getPendingTaskId());
if (LOG.isTraceEnabled()) {
LOG.trace("Ignoring offer {} with roles {} on {} for task {}; matched resources: {}, slave match state: {}", offerHolder.getOffer().getId().getValue(),
MesosUtils.getRoles(offerHolder.getOffer()), offerHolder.getOffer().getHostname(), taskRequest.getPendingTask().getPendingTaskId(), matchesResources, slaveMatchState);
}
}
}
return Optional.absent();
}
private void addRequestToMapByOfferId(Map<String, Map<String, Integer>> tasksPerOfferPerRequest, String offerId, String requestId) {
if (tasksPerOfferPerRequest.containsKey(offerId)) {
if (tasksPerOfferPerRequest.get(offerId).containsKey(requestId)) {
int count = tasksPerOfferPerRequest.get(offerId).get(requestId);
tasksPerOfferPerRequest.get(offerId).put(requestId, count + 1);
} else {
tasksPerOfferPerRequest.get(offerId).put(requestId, 0);
}
} else {
tasksPerOfferPerRequest.put(offerId, new HashMap<String, Integer>());
tasksPerOfferPerRequest.get(offerId).put(requestId, 1);
}
}
private boolean tooManyTasksPerOfferForRequest(Map<String, Map<String, Integer>> tasksPerOfferPerRequest, String offerId, SingularityTaskRequest taskRequest) {
if (!tasksPerOfferPerRequest.containsKey(offerId)) {
return false;
}
if (!tasksPerOfferPerRequest.get(offerId).containsKey(taskRequest.getRequest().getId())) {
return false;
}
int maxPerOfferPerRequest = taskRequest.getRequest().getMaxTasksPerOffer().or(configuration.getMaxTasksPerOfferPerRequest());
if (!(maxPerOfferPerRequest > 0)) {
return false;
}
return tasksPerOfferPerRequest.get(offerId).get(taskRequest.getRequest().getId()) > maxPerOfferPerRequest;
}
public boolean isConnected() {
return schedulerDriverSupplier.get().isPresent();
}
}