/**
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.aurora.scheduler.scheduling;
import java.lang.annotation.Retention;
import java.lang.annotation.Target;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.atomic.AtomicLong;
import javax.inject.Inject;
import javax.inject.Qualifier;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.Maps;
import com.google.common.eventbus.Subscribe;
import com.google.common.util.concurrent.RateLimiter;
import org.apache.aurora.common.quantity.Amount;
import org.apache.aurora.common.quantity.Time;
import org.apache.aurora.common.stats.SlidingStats;
import org.apache.aurora.common.stats.StatsProvider;
import org.apache.aurora.common.util.BackoffStrategy;
import org.apache.aurora.scheduler.BatchWorker;
import org.apache.aurora.scheduler.async.AsyncModule.AsyncExecutor;
import org.apache.aurora.scheduler.async.DelayExecutor;
import org.apache.aurora.scheduler.base.TaskGroupKey;
import org.apache.aurora.scheduler.base.Tasks;
import org.apache.aurora.scheduler.events.PubsubEvent.EventSubscriber;
import org.apache.aurora.scheduler.events.PubsubEvent.TaskStateChange;
import org.apache.aurora.scheduler.events.PubsubEvent.TasksDeleted;
import org.apache.aurora.scheduler.storage.Storage;
import org.apache.aurora.scheduler.storage.entities.IAssignedTask;
import org.apache.aurora.scheduler.storage.entities.IScheduledTask;
import static java.lang.annotation.ElementType.FIELD;
import static java.lang.annotation.ElementType.METHOD;
import static java.lang.annotation.ElementType.PARAMETER;
import static java.lang.annotation.RetentionPolicy.RUNTIME;
import static java.util.Objects.requireNonNull;
import static org.apache.aurora.gen.ScheduleStatus.PENDING;
/**
* A collection of task groups, where a task group is a collection of tasks that are known to be
* equal in the way they schedule. This is expected to be tasks associated with the same job key,
* who also have {@code equal()} {@link org.apache.aurora.scheduler.storage.entities.ITaskConfig}
* values.
* <p>
* This is used to prevent redundant work in trying to schedule tasks as well as to provide
* nearly-equal responsiveness when scheduling across jobs. In other words, a 1000 instance job
* cannot starve a 1 instance job.
*/
public class TaskGroups implements EventSubscriber {
@VisibleForTesting
static final String SCHEDULE_ATTEMPTS_BLOCKS = "schedule_attempts_blocks";
private final ConcurrentMap<TaskGroupKey, TaskGroup> groups = Maps.newConcurrentMap();
private final DelayExecutor executor;
private final TaskGroupsSettings settings;
private final TaskScheduler taskScheduler;
private final RescheduleCalculator rescheduleCalculator;
private final BatchWorker<Set<String>> batchWorker;
// Track the penalties of tasks at the time they were scheduled. This is to provide data that
// may influence the selection of a different backoff strategy.
private final SlidingStats scheduledTaskPenalties =
new SlidingStats("scheduled_task_penalty", "ms");
private final AtomicLong scheduleAttemptsBlocks;
/**
* Annotation for the max scheduling batch size.
*/
@VisibleForTesting
@Qualifier
@Target({ FIELD, PARAMETER, METHOD }) @Retention(RUNTIME)
public @interface SchedulingMaxBatchSize { }
@VisibleForTesting
public static class TaskGroupBatchWorker extends BatchWorker<Set<String>> {
@Inject
TaskGroupBatchWorker(
Storage storage,
StatsProvider statsProvider,
@SchedulingMaxBatchSize int maxBatchSize) {
super(storage, statsProvider, maxBatchSize);
}
@Override
protected String serviceName() {
return "TaskGroupBatchWorker";
}
}
public static class TaskGroupsSettings {
private final Amount<Long, Time> firstScheduleDelay;
private final BackoffStrategy taskGroupBackoff;
private final RateLimiter rateLimiter;
private final int maxTasksPerSchedule;
public TaskGroupsSettings(
Amount<Long, Time> firstScheduleDelay,
BackoffStrategy taskGroupBackoff,
RateLimiter rateLimiter,
int maxTasksPerSchedule) {
this.firstScheduleDelay = requireNonNull(firstScheduleDelay);
Preconditions.checkArgument(firstScheduleDelay.getValue() > 0);
this.taskGroupBackoff = requireNonNull(taskGroupBackoff);
this.rateLimiter = requireNonNull(rateLimiter);
this.maxTasksPerSchedule = maxTasksPerSchedule;
Preconditions.checkArgument(maxTasksPerSchedule > 0);
}
}
@VisibleForTesting
@Inject
public TaskGroups(
@AsyncExecutor DelayExecutor executor,
TaskGroupsSettings settings,
TaskScheduler taskScheduler,
RescheduleCalculator rescheduleCalculator,
TaskGroupBatchWorker batchWorker,
StatsProvider statsProvider) {
this.executor = requireNonNull(executor);
this.settings = requireNonNull(settings);
this.taskScheduler = requireNonNull(taskScheduler);
this.rescheduleCalculator = requireNonNull(rescheduleCalculator);
this.batchWorker = requireNonNull(batchWorker);
this.scheduleAttemptsBlocks = statsProvider.makeCounter(SCHEDULE_ATTEMPTS_BLOCKS);
}
private synchronized void evaluateGroupLater(Runnable evaluate, TaskGroup group) {
// Avoid check-then-act by holding the intrinsic lock. If not done atomically, we could
// remove a group while a task is being added to it.
if (group.hasMore()) {
executor.execute(evaluate, Amount.of(group.getPenaltyMs(), Time.MILLISECONDS));
} else {
groups.remove(group.getKey());
}
}
private void startGroup(final TaskGroup group) {
Runnable monitor = new Runnable() {
@Override
public void run() {
final Set<String> taskIds = group.peek(settings.maxTasksPerSchedule);
long penaltyMs = 0;
if (!taskIds.isEmpty()) {
if (settings.rateLimiter.acquire() > 0) {
scheduleAttemptsBlocks.incrementAndGet();
}
CompletableFuture<Set<String>> result = batchWorker.execute(storeProvider ->
taskScheduler.schedule(storeProvider, taskIds));
Set<String> scheduled = null;
try {
scheduled = result.get();
} catch (ExecutionException | InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException(e);
}
scheduledTaskPenalties.accumulate(group.getPenaltyMs());
if (scheduled.isEmpty()) {
penaltyMs = settings.taskGroupBackoff.calculateBackoffMs(group.getPenaltyMs());
} else {
group.remove(scheduled);
if (group.hasMore()) {
penaltyMs = settings.firstScheduleDelay.as(Time.MILLISECONDS);
}
}
}
group.setPenaltyMs(penaltyMs);
evaluateGroupLater(this, group);
}
};
evaluateGroupLater(monitor, group);
}
/**
* Informs the task groups of a task state change.
* <p>
* This is used to observe {@link org.apache.aurora.gen.ScheduleStatus#PENDING} tasks and begin
* attempting to schedule them.
*
* @param stateChange State change notification.
*/
@Subscribe
public synchronized void taskChangedState(TaskStateChange stateChange) {
if (stateChange.getNewState() == PENDING) {
IScheduledTask task = stateChange.getTask();
TaskGroupKey key = TaskGroupKey.from(task.getAssignedTask().getTask());
TaskGroup newGroup = new TaskGroup(key, Tasks.id(task));
TaskGroup existing = groups.putIfAbsent(key, newGroup);
if (existing == null) {
long penaltyMs;
if (stateChange.isTransition()) {
penaltyMs = settings.firstScheduleDelay.as(Time.MILLISECONDS);
} else {
penaltyMs = rescheduleCalculator.getStartupScheduleDelayMs(task);
}
newGroup.setPenaltyMs(penaltyMs);
startGroup(newGroup);
} else {
existing.offer(Tasks.id(task));
}
}
}
/**
* Signals the scheduler that tasks have been deleted.
*
* @param deleted Tasks deleted event.
*/
@Subscribe
public synchronized void tasksDeleted(TasksDeleted deleted) {
for (IAssignedTask task
: Iterables.transform(deleted.getTasks(), IScheduledTask::getAssignedTask)) {
TaskGroup group = groups.get(TaskGroupKey.from(task.getTask()));
if (group != null) {
group.remove(ImmutableSet.of(task.getTaskId()));
}
}
}
public Iterable<TaskGroup> getGroups() {
return ImmutableSet.copyOf(groups.values());
}
}