package com.hubspot.singularity.scheduler; import java.util.Collection; import java.util.Collections; import java.util.List; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import javax.inject.Singleton; import org.apache.mesos.Protos.SlaveID; import org.apache.mesos.Protos.TaskID; import org.apache.mesos.Protos.TaskState; import org.apache.mesos.Protos.TaskStatus; import org.apache.mesos.SchedulerDriver; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.codahale.metrics.Histogram; import com.codahale.metrics.Snapshot; import com.codahale.metrics.UniformReservoir; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Optional; import com.google.common.collect.Lists; import com.google.inject.Inject; import com.google.inject.name.Named; import com.hubspot.mesos.JavaUtils; import com.hubspot.singularity.SingularityAbort; import com.hubspot.singularity.SingularityAbort.AbortReason; import com.hubspot.singularity.SingularityMainModule; import com.hubspot.singularity.SingularityManagedScheduledExecutorServiceFactory; import com.hubspot.singularity.SingularityTaskId; import com.hubspot.singularity.SingularityTaskReconciliationStatistics; import com.hubspot.singularity.SingularityTaskStatusHolder; import com.hubspot.singularity.config.SingularityConfiguration; import com.hubspot.singularity.data.StateManager; import com.hubspot.singularity.data.TaskManager; import com.hubspot.singularity.mesos.SchedulerDriverSupplier; import com.hubspot.singularity.sentry.SingularityExceptionNotifier; @Singleton public class SingularityTaskReconciliation { private static final Logger LOG = LoggerFactory.getLogger(SingularityTaskReconciliation.class); private final TaskManager taskManager; private final String serverId; private final ScheduledExecutorService executorService; private final AtomicBoolean isRunningReconciliation; private final SingularityConfiguration configuration; private final SingularityAbort abort; private final SingularityExceptionNotifier exceptionNotifier; private final SchedulerDriverSupplier schedulerDriverSupplier; private final StateManager stateManager; @Inject public SingularityTaskReconciliation(SingularityManagedScheduledExecutorServiceFactory executorServiceFactory, SingularityExceptionNotifier exceptionNotifier, TaskManager taskManager, StateManager stateManager, SingularityConfiguration configuration, @Named(SingularityMainModule.SERVER_ID_PROPERTY) String serverId, SingularityAbort abort, SchedulerDriverSupplier schedulerDriverSupplier) { this.taskManager = taskManager; this.stateManager = stateManager; this.serverId = serverId; this.exceptionNotifier = exceptionNotifier; this.configuration = configuration; this.abort = abort; this.schedulerDriverSupplier = schedulerDriverSupplier; this.isRunningReconciliation = new AtomicBoolean(false); this.executorService = executorServiceFactory.get(getClass().getSimpleName()); } enum ReconciliationState { ALREADY_RUNNING, STARTED, NO_DRIVER; } @VisibleForTesting boolean isReconciliationRunning() { return isRunningReconciliation.get(); } public ReconciliationState startReconciliation() { final long taskReconciliationStartedAt = System.currentTimeMillis(); if (!isRunningReconciliation.compareAndSet(false, true)) { LOG.info("Reconciliation is already running, NOT starting a new reconciliation process"); return ReconciliationState.ALREADY_RUNNING; } Optional<SchedulerDriver> schedulerDriver = schedulerDriverSupplier.get(); if (!schedulerDriver.isPresent()) { LOG.trace("Not running reconciliation - no schedulerDriver present"); isRunningReconciliation.set(false); return ReconciliationState.NO_DRIVER; } final List<SingularityTaskId> activeTaskIds = taskManager.getActiveTaskIds(); LOG.info("Starting a reconciliation cycle - {} current active tasks", activeTaskIds.size()); SchedulerDriver driver = schedulerDriver.get(); driver.reconcileTasks(Collections.<TaskStatus> emptyList()); scheduleReconciliationCheck(driver, taskReconciliationStartedAt, activeTaskIds, 0, new Histogram(new UniformReservoir())); return ReconciliationState.STARTED; } private void scheduleReconciliationCheck(final SchedulerDriver driver, final long reconciliationStart, final Collection<SingularityTaskId> remainingTaskIds, final int numTimes, final Histogram histogram) { LOG.info("Scheduling reconciliation check #{} - {} tasks left - waiting {}", numTimes + 1, remainingTaskIds.size(), JavaUtils.durationFromMillis(configuration.getCheckReconcileWhenRunningEveryMillis())); executorService.schedule(new Runnable() { @Override public void run() { try { checkReconciliation(driver, reconciliationStart, remainingTaskIds, numTimes + 1, histogram); } catch (Throwable t) { LOG.error("While checking for reconciliation tasks", t); exceptionNotifier.notify(String.format("Error checking for reconciliation tasks (%s)", t.getMessage()), t); abort.abort(AbortReason.UNRECOVERABLE_ERROR, Optional.of(t)); } } }, configuration.getCheckReconcileWhenRunningEveryMillis(), TimeUnit.MILLISECONDS); } private void checkReconciliation(final SchedulerDriver driver, final long reconciliationStart, final Collection<SingularityTaskId> remainingTaskIds, final int numTimes, final Histogram histogram) { final List<SingularityTaskStatusHolder> taskStatusHolders = taskManager.getLastActiveTaskStatusesFor(remainingTaskIds); final List<TaskStatus> taskStatuses = Lists.newArrayListWithCapacity(taskStatusHolders.size()); for (SingularityTaskStatusHolder taskStatusHolder : taskStatusHolders) { if (taskStatusHolder.getServerId().equals(serverId) && taskStatusHolder.getServerTimestamp() > reconciliationStart) { histogram.update(taskStatusHolder.getServerTimestamp() - reconciliationStart); continue; } if (taskStatusHolder.getTaskStatus().isPresent()) { LOG.debug("Re-requesting task status for {}", taskStatusHolder.getTaskId()); taskStatuses.add(taskStatusHolder.getTaskStatus().get()); } else { TaskStatus.Builder fakeTaskStatusBuilder = TaskStatus.newBuilder() .setTaskId(TaskID.newBuilder().setValue(taskStatusHolder.getTaskId().getId())) .setState(TaskState.TASK_STARTING); if (taskStatusHolder.getSlaveId().isPresent()) { fakeTaskStatusBuilder.setSlaveId(SlaveID.newBuilder().setValue(taskStatusHolder.getSlaveId().get())); } LOG.info("Task {} didn't have a TaskStatus yet, submitting fake status", taskStatusHolder.getTaskId()); taskStatuses.add(fakeTaskStatusBuilder.build()); } } if (taskStatuses.isEmpty()) { LOG.info("Task reconciliation ended after {} checks and {}", numTimes, JavaUtils.duration(reconciliationStart)); final Snapshot snapshot = histogram.getSnapshot(); stateManager.saveTaskReconciliationStatistics(new SingularityTaskReconciliationStatistics(reconciliationStart, System.currentTimeMillis() - reconciliationStart, numTimes, histogram.getCount(), snapshot.getMax(), snapshot.getMean(), snapshot.getMin(), snapshot.getMedian(), snapshot.get75thPercentile(), snapshot.get95thPercentile(), snapshot.get98thPercentile(), snapshot.get99thPercentile(), snapshot.get999thPercentile(), snapshot.getStdDev())); isRunningReconciliation.set(false); return; } LOG.info("Requesting reconciliation of {} taskStatuses, task reconciliation has been running for {}", taskStatuses.size(), JavaUtils.duration(reconciliationStart)); driver.reconcileTasks(taskStatuses); scheduleReconciliationCheck(driver, reconciliationStart, remainingTaskIds, numTimes, histogram); } }