/**
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.aurora.scheduler.state;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import javax.inject.Inject;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Ordering;
import com.google.common.collect.Sets;
import org.apache.aurora.common.util.Clock;
import org.apache.aurora.gen.AssignedTask;
import org.apache.aurora.gen.ScheduleStatus;
import org.apache.aurora.gen.ScheduledTask;
import org.apache.aurora.gen.TaskEvent;
import org.apache.aurora.scheduler.TaskIdGenerator;
import org.apache.aurora.scheduler.base.Query;
import org.apache.aurora.scheduler.base.Tasks;
import org.apache.aurora.scheduler.events.EventSink;
import org.apache.aurora.scheduler.events.PubsubEvent;
import org.apache.aurora.scheduler.events.PubsubEvent.TaskStateChange;
import org.apache.aurora.scheduler.mesos.Driver;
import org.apache.aurora.scheduler.scheduling.RescheduleCalculator;
import org.apache.aurora.scheduler.state.SideEffect.Action;
import org.apache.aurora.scheduler.storage.Storage.MutableStoreProvider;
import org.apache.aurora.scheduler.storage.TaskStore;
import org.apache.aurora.scheduler.storage.entities.IAssignedTask;
import org.apache.aurora.scheduler.storage.entities.IScheduledTask;
import org.apache.aurora.scheduler.storage.entities.ITaskConfig;
import org.apache.mesos.v1.Protos.AgentID;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static java.util.Objects.requireNonNull;
import static org.apache.aurora.common.base.MorePreconditions.checkNotBlank;
import static org.apache.aurora.gen.ScheduleStatus.ASSIGNED;
import static org.apache.aurora.gen.ScheduleStatus.INIT;
import static org.apache.aurora.gen.ScheduleStatus.PENDING;
import static org.apache.aurora.gen.ScheduleStatus.THROTTLED;
import static org.apache.aurora.scheduler.state.StateChangeResult.INVALID_CAS_STATE;
import static org.apache.aurora.scheduler.state.StateChangeResult.SUCCESS;
/**
* Manager of all persistence-related operations for the scheduler. Acts as a controller for
* persisted state machine transitions, and their side-effects.
*/
public class StateManagerImpl implements StateManager {
private static final Logger LOG = LoggerFactory.getLogger(StateManagerImpl.class);
private final Clock clock;
private final Driver driver;
private final TaskIdGenerator taskIdGenerator;
private final EventSink eventSink;
private final RescheduleCalculator rescheduleCalculator;
@Inject
StateManagerImpl(
final Clock clock,
Driver driver,
TaskIdGenerator taskIdGenerator,
EventSink eventSink,
RescheduleCalculator rescheduleCalculator) {
this.clock = requireNonNull(clock);
this.driver = requireNonNull(driver);
this.taskIdGenerator = requireNonNull(taskIdGenerator);
this.eventSink = requireNonNull(eventSink);
this.rescheduleCalculator = requireNonNull(rescheduleCalculator);
}
private IScheduledTask createTask(int instanceId, ITaskConfig template) {
AssignedTask assigned = new AssignedTask()
.setTaskId(taskIdGenerator.generate(template, instanceId))
.setInstanceId(instanceId)
.setTask(template.newBuilder());
return IScheduledTask.build(new ScheduledTask()
.setStatus(INIT)
.setAssignedTask(assigned));
}
@Override
public void insertPendingTasks(
MutableStoreProvider storeProvider,
final ITaskConfig task,
Set<Integer> instanceIds) {
requireNonNull(storeProvider);
requireNonNull(task);
checkNotBlank(instanceIds);
// Done outside the write transaction to minimize the work done inside a transaction.
Set<IScheduledTask> scheduledTasks = FluentIterable.from(instanceIds)
.transform(instanceId -> createTask(instanceId, task)).toSet();
Iterable<IScheduledTask> existingTasks = storeProvider.getTaskStore().fetchTasks(
Query.jobScoped(task.getJob()).active());
Set<Integer> existingInstanceIds =
FluentIterable.from(existingTasks).transform(Tasks::getInstanceId).toSet();
if (!Sets.intersection(existingInstanceIds, instanceIds).isEmpty()) {
throw new IllegalArgumentException("Instance ID collision detected.");
}
storeProvider.getUnsafeTaskStore().saveTasks(scheduledTasks);
for (IScheduledTask scheduledTask : scheduledTasks) {
updateTaskAndExternalState(
storeProvider.getUnsafeTaskStore(),
Tasks.id(scheduledTask),
Optional.of(scheduledTask),
Optional.of(PENDING),
Optional.absent());
}
}
@Override
public StateChangeResult changeState(
MutableStoreProvider storeProvider,
String taskId,
Optional<ScheduleStatus> casState,
final ScheduleStatus newState,
final Optional<String> auditMessage) {
return updateTaskAndExternalState(
storeProvider.getUnsafeTaskStore(),
casState,
taskId,
newState,
auditMessage);
}
@Override
public IAssignedTask assignTask(
MutableStoreProvider storeProvider,
String taskId,
String slaveHost,
AgentID slaveId,
Function<IAssignedTask, IAssignedTask> resourceAssigner) {
checkNotBlank(taskId);
checkNotBlank(slaveHost);
requireNonNull(slaveId);
requireNonNull(resourceAssigner);
IScheduledTask mutated = storeProvider.getUnsafeTaskStore().mutateTask(taskId,
task -> {
ScheduledTask builder = task.newBuilder();
builder.setAssignedTask(resourceAssigner.apply(task.getAssignedTask()).newBuilder());
builder.getAssignedTask()
.setSlaveHost(slaveHost)
.setSlaveId(slaveId.getValue());
return IScheduledTask.build(builder);
}).get();
StateChangeResult changeResult = updateTaskAndExternalState(
storeProvider.getUnsafeTaskStore(),
Optional.absent(),
taskId,
ASSIGNED,
Optional.absent());
Preconditions.checkState(
changeResult == SUCCESS,
"Attempt to assign task %s to %s failed",
taskId,
slaveHost);
return mutated.getAssignedTask();
}
@VisibleForTesting
static final Supplier<String> LOCAL_HOST_SUPPLIER = Suppliers.memoize(
() -> {
try {
return InetAddress.getLocalHost().getHostName();
} catch (UnknownHostException e) {
LOG.error("Failed to get self hostname.");
throw new RuntimeException(e);
}
});
private StateChangeResult updateTaskAndExternalState(
TaskStore.Mutable taskStore,
Optional<ScheduleStatus> casState,
String taskId,
ScheduleStatus targetState,
Optional<String> transitionMessage) {
Optional<IScheduledTask> task = taskStore.fetchTask(taskId);
// CAS operation fails if the task does not exist, or the states don't match.
if (casState.isPresent()
&& (!task.isPresent() || casState.get() != task.get().getStatus())) {
return INVALID_CAS_STATE;
}
return updateTaskAndExternalState(
taskStore,
taskId,
task,
Optional.of(targetState),
transitionMessage);
}
private static final List<Action> ACTIONS_IN_ORDER = ImmutableList.of(
Action.INCREMENT_FAILURES,
Action.SAVE_STATE,
Action.RESCHEDULE,
Action.KILL,
Action.DELETE);
static {
// Sanity check to ensure no actions are missing.
Preconditions.checkState(
ImmutableSet.copyOf(ACTIONS_IN_ORDER).equals(ImmutableSet.copyOf(Action.values())),
"Not all actions are included in ordering.");
}
// Actions are deliberately ordered to prevent things like deleting a task before rescheduling it
// (thus losing the object to copy), or rescheduling a task before incrementing the failure count
// (thus not carrying forward the failure increment).
private static final Ordering<SideEffect> ACTION_ORDER =
Ordering.explicit(ACTIONS_IN_ORDER).onResultOf(SideEffect::getAction);
private StateChangeResult updateTaskAndExternalState(
TaskStore.Mutable taskStore,
String taskId,
// Note: This argument should be used with caution.
// This is because using the captured value within the storage operation below is
// highly-risky, since it doesn't necessarily represent the value in storage.
// As a result, it would be easy to accidentally clobber mutations.
Optional<IScheduledTask> task,
Optional<ScheduleStatus> targetState,
Optional<String> transitionMessage) {
if (task.isPresent()) {
Preconditions.checkArgument(taskId.equals(task.get().getAssignedTask().getTaskId()));
}
List<PubsubEvent> events = Lists.newArrayList();
TaskStateMachine stateMachine = task.isPresent()
? new TaskStateMachine(task.get())
: new TaskStateMachine(taskId);
TransitionResult result = stateMachine.updateState(targetState);
for (SideEffect sideEffect : ACTION_ORDER.sortedCopy(result.getSideEffects())) {
Optional<IScheduledTask> upToDateTask = taskStore.fetchTask(taskId);
switch (sideEffect.getAction()) {
case INCREMENT_FAILURES:
taskStore.mutateTask(taskId, task1 -> IScheduledTask.build(
task1.newBuilder().setFailureCount(task1.getFailureCount() + 1)));
break;
case SAVE_STATE:
Preconditions.checkState(
upToDateTask.isPresent(),
"Operation expected task %s to be present.",
taskId);
Optional<IScheduledTask> mutated = taskStore.mutateTask(taskId, task1 -> {
ScheduledTask mutableTask = task1.newBuilder();
mutableTask.setStatus(targetState.get());
mutableTask.addToTaskEvents(new TaskEvent()
.setTimestamp(clock.nowMillis())
.setStatus(targetState.get())
.setMessage(transitionMessage.orNull())
.setScheduler(LOCAL_HOST_SUPPLIER.get()));
return IScheduledTask.build(mutableTask);
});
events.add(TaskStateChange.transition(mutated.get(), stateMachine.getPreviousState()));
break;
case RESCHEDULE:
Preconditions.checkState(
upToDateTask.isPresent(),
"Operation expected task %s to be present.",
taskId);
LOG.info("Task being rescheduled: " + taskId);
ScheduleStatus newState;
String auditMessage;
long flapPenaltyMs = rescheduleCalculator.getFlappingPenaltyMs(upToDateTask.get());
if (flapPenaltyMs > 0) {
newState = THROTTLED;
auditMessage =
String.format("Rescheduled, penalized for %s ms for flapping", flapPenaltyMs);
} else {
newState = PENDING;
auditMessage = "Rescheduled";
}
IScheduledTask newTask = IScheduledTask.build(createTask(
upToDateTask.get().getAssignedTask().getInstanceId(),
upToDateTask.get().getAssignedTask().getTask())
.newBuilder()
.setFailureCount(upToDateTask.get().getFailureCount())
.setAncestorId(taskId));
taskStore.saveTasks(ImmutableSet.of(newTask));
updateTaskAndExternalState(
taskStore,
Tasks.id(newTask),
Optional.of(newTask),
Optional.of(newState),
Optional.of(auditMessage));
break;
case KILL:
driver.killTask(taskId);
break;
case DELETE:
Preconditions.checkState(
upToDateTask.isPresent(),
"Operation expected task %s to be present.",
taskId);
events.add(deleteTasks(taskStore, ImmutableSet.of(taskId)));
break;
default:
throw new IllegalStateException("Unrecognized side-effect " + sideEffect.getAction());
}
}
// Note (AURORA-138): Delaying events until after the write operation is somewhat futile, since
// the state may actually not be written to durable store
// (e.g. if this is a nested transaction). Ideally, Storage would add a facility to attach
// side-effects that are performed after the outer-most transaction completes (meaning state
// has been durably persisted).
for (PubsubEvent event : events) {
eventSink.post(event);
}
return result.getResult();
}
@Override
public void deleteTasks(MutableStoreProvider storeProvider, final Set<String> taskIds) {
Map<String, IScheduledTask> tasks = Maps.uniqueIndex(
storeProvider.getTaskStore().fetchTasks(Query.taskScoped(taskIds)),
Tasks::id);
for (Map.Entry<String, IScheduledTask> entry : tasks.entrySet()) {
updateTaskAndExternalState(
storeProvider.getUnsafeTaskStore(),
entry.getKey(),
Optional.of(entry.getValue()),
Optional.absent(),
Optional.absent());
}
}
private static PubsubEvent deleteTasks(TaskStore.Mutable taskStore, Set<String> taskIds) {
Iterable<IScheduledTask> tasks = taskStore.fetchTasks(Query.taskScoped(taskIds));
taskStore.deleteTasks(taskIds);
return new PubsubEvent.TasksDeleted(ImmutableSet.copyOf(tasks));
}
}