package com.hubspot.singularity.mesos; import java.util.ArrayList; import java.util.List; import java.util.Set; import java.util.concurrent.atomic.AtomicLong; import javax.inject.Singleton; import org.apache.mesos.Protos; import org.apache.mesos.Protos.Offer; import org.apache.mesos.Scheduler; import org.apache.mesos.SchedulerDriver; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.codahale.metrics.annotation.Timed; import com.google.common.collect.Sets; import com.google.inject.Inject; import com.google.inject.name.Named; import com.hubspot.mesos.JavaUtils; import com.hubspot.mesos.MesosUtils; import com.hubspot.singularity.SingularityAction; import com.hubspot.singularity.SingularityMainModule; import com.hubspot.singularity.config.SingularityConfiguration; import com.hubspot.singularity.data.DisasterManager; import com.hubspot.singularity.mesos.SingularitySlaveAndRackManager.CheckResult; @Singleton public class SingularityMesosScheduler implements Scheduler { private static final Logger LOG = LoggerFactory.getLogger(SingularityMesosScheduler.class); private final SingularityMesosFrameworkMessageHandler messageHandler; private final SingularitySlaveAndRackManager slaveAndRackManager; private final DisasterManager disasterManager; private final SchedulerDriverSupplier schedulerDriverSupplier; private final OfferCache offerCache; private final SingularityMesosOfferScheduler offerScheduler; private final SingularityMesosStatusUpdateHandler statusUpdateHandler; private final boolean offerCacheEnabled; private final boolean delayWhenStatusUpdateDeltaTooLarge; private final long delayWhenDeltaOverMs; private final AtomicLong statusUpdateDeltaAvg; @Inject public SingularityMesosScheduler(SingularityMesosFrameworkMessageHandler messageHandler, SingularitySlaveAndRackManager slaveAndRackManager, SchedulerDriverSupplier schedulerDriverSupplier, OfferCache offerCache, SingularityMesosOfferScheduler offerScheduler, SingularityMesosStatusUpdateHandler statusUpdateHandler, DisasterManager disasterManager, SingularityConfiguration configuration, @Named(SingularityMainModule.STATUS_UPDATE_DELTA_30S_AVERAGE) AtomicLong statusUpdateDeltaAvg) { this.messageHandler = messageHandler; this.slaveAndRackManager = slaveAndRackManager; this.schedulerDriverSupplier = schedulerDriverSupplier; this.disasterManager = disasterManager; this.offerCache = offerCache; this.offerScheduler = offerScheduler; this.statusUpdateHandler = statusUpdateHandler; this.offerCacheEnabled = configuration.isCacheOffers(); this.delayWhenStatusUpdateDeltaTooLarge = configuration.isDelayOfferProcessingForLargeStatusUpdateDelta(); this.delayWhenDeltaOverMs = configuration.getDelayPollersWhenDeltaOverMs(); this.statusUpdateDeltaAvg = statusUpdateDeltaAvg; } @Override public void registered(SchedulerDriver driver, Protos.FrameworkID frameworkId, Protos.MasterInfo masterInfo) { LOG.info("Registered driver {}, with frameworkId {} and master {}", driver, frameworkId, masterInfo); schedulerDriverSupplier.setSchedulerDriver(driver); } @Override public void reregistered(SchedulerDriver driver, Protos.MasterInfo masterInfo) { LOG.info("Reregistered driver {}, with master {}", driver, masterInfo); schedulerDriverSupplier.setSchedulerDriver(driver); } @Override @Timed public void resourceOffers(SchedulerDriver driver, List<Protos.Offer> offers) { final long start = System.currentTimeMillis(); LOG.info("Received {} offer(s)", offers.size()); boolean delclineImmediately = false; if (disasterManager.isDisabled(SingularityAction.PROCESS_OFFERS)) { LOG.info("Processing offers is currently disabled, declining {} offers", offers.size()); delclineImmediately = true; } if (delayWhenStatusUpdateDeltaTooLarge && statusUpdateDeltaAvg.get() > delayWhenDeltaOverMs) { LOG.info("Status update delta is too large ({}), declining offers while status updates catch up", statusUpdateDeltaAvg.get()); delclineImmediately = true; } if (delclineImmediately) { for (Protos.Offer offer : offers) { driver.declineOffer(offer.getId()); } return; } if (offerCacheEnabled) { if (disasterManager.isDisabled(SingularityAction.CACHE_OFFERS)) { offerCache.disableOfferCache(); } else { offerCache.enableOfferCache(); } } List<Protos.Offer> offersToCheck = new ArrayList<>(offers); for (Offer offer : offers) { String rolesInfo = MesosUtils.getRoles(offer).toString(); LOG.debug("Received offer ID {} with roles {} from {} ({}) for {} cpu(s), {} memory, {} ports, and {} disk", offer.getId().getValue(), rolesInfo, offer.getHostname(), offer.getSlaveId().getValue(), MesosUtils.getNumCpus(offer), MesosUtils.getMemory(offer), MesosUtils.getNumPorts(offer), MesosUtils.getDisk(offer)); CheckResult checkResult = slaveAndRackManager.checkOffer(offer); if (checkResult == CheckResult.NOT_ACCEPTING_TASKS) { driver.declineOffer(offer.getId()); offersToCheck.remove(offer); LOG.debug("Will decline offer {}, slave {} is not currently in a state to launch tasks", offer.getId().getValue(), offer.getHostname()); } } final Set<Protos.OfferID> acceptedOffers = Sets.newHashSetWithExpectedSize(offersToCheck.size()); try { List<SingularityOfferHolder> offerHolders = offerScheduler.checkOffers(offers); for (SingularityOfferHolder offerHolder : offerHolders) { if (!offerHolder.getAcceptedTasks().isEmpty()) { offerHolder.launchTasks(driver); acceptedOffers.add(offerHolder.getOffer().getId()); } else { offerCache.cacheOffer(driver, start, offerHolder.getOffer()); } } } catch (Throwable t) { LOG.error("Received fatal error while handling offers - will decline all available offers", t); for (Protos.Offer offer : offersToCheck) { if (acceptedOffers.contains(offer.getId())) { continue; } driver.declineOffer(offer.getId()); } throw t; } LOG.info("Finished handling {} new offer(s) ({}), {} accepted, {} declined/cached", offers.size(), JavaUtils.duration(start), acceptedOffers.size(), offers.size() - acceptedOffers.size()); } @Override public void offerRescinded(SchedulerDriver driver, Protos.OfferID offerId) { LOG.info("Offer {} rescinded", offerId); offerCache.rescindOffer(driver, offerId); } @Override public void statusUpdate(SchedulerDriver driver, Protos.TaskStatus status) { statusUpdateHandler.processStatusUpdate(status); } @Override public void frameworkMessage(SchedulerDriver driver, Protos.ExecutorID executorId, Protos.SlaveID slaveId, byte[] data) { LOG.info("Framework message from executor {} on slave {} with {} bytes of data", executorId, slaveId, data.length); messageHandler.handleMessage(executorId, slaveId, data); } @Override public void disconnected(SchedulerDriver driver) { schedulerDriverSupplier.setSchedulerDriver(null); LOG.warn("Scheduler/Driver disconnected"); } @Override public void slaveLost(SchedulerDriver driver, Protos.SlaveID slaveId) { LOG.warn("Lost a slave {}", slaveId); slaveAndRackManager.slaveLost(slaveId); } @Override public void executorLost(SchedulerDriver driver, Protos.ExecutorID executorId, Protos.SlaveID slaveId, int status) { LOG.warn("Lost an executor {} on slave {} with status {}", executorId, slaveId, status); } @Override public void error(SchedulerDriver driver, String message) { LOG.warn("Error from mesos: {}", message); } public boolean isConnected() { return schedulerDriverSupplier.get().isPresent(); } }