/*- * -\-\- * Helios Services * -- * Copyright (C) 2016 Spotify AB * -- * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * -/-/- */ package com.spotify.helios.agent; import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.base.Predicates.in; import static com.google.common.base.Predicates.not; import static com.spotify.helios.common.descriptors.Goal.START; import static com.spotify.helios.common.descriptors.Goal.UNDEPLOY; import static com.spotify.helios.servicescommon.Reactor.Callback; import static java.util.concurrent.TimeUnit.SECONDS; import com.google.common.base.Predicate; import com.google.common.base.Supplier; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import com.google.common.util.concurrent.AbstractIdleService; import com.spotify.helios.common.descriptors.Goal; import com.spotify.helios.common.descriptors.Job; import com.spotify.helios.common.descriptors.JobId; import com.spotify.helios.common.descriptors.Task; import com.spotify.helios.common.descriptors.TaskStatus; import com.spotify.helios.servicescommon.PersistentAtomicReference; import com.spotify.helios.servicescommon.Reactor; import com.spotify.helios.servicescommon.ReactorFactory; import java.util.Collections; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Runs jobs to implement the desired container deployment state. */ public class Agent extends AbstractIdleService { public static final Map<JobId, Execution> EMPTY_EXECUTIONS = Collections.emptyMap(); private static final Logger log = LoggerFactory.getLogger(Agent.class); private static final long UPDATE_INTERVAL = SECONDS.toMillis(30); private static final Predicate<Execution> PORT_ALLOCATION_PENDING = new Predicate<Execution>() { @Override public boolean apply(final Execution execution) { assert execution != null; return execution.getGoal() != UNDEPLOY && execution.getPorts() == null; } }; private static final Predicate<Execution> PORTS_ALLOCATED = new Predicate<Execution>() { @Override public boolean apply(final Execution execution) { assert execution != null; return execution.getPorts() != null; } }; private final AgentModel model; private final SupervisorFactory supervisorFactory; private final ModelListener modelListener = new ModelListener(); private final Supervisor.Listener supervisorListener = new SupervisorListener(); private final Map<JobId, Supervisor> supervisors = Maps.newHashMap(); private final Reactor reactor; private final PersistentAtomicReference<Map<JobId, Execution>> executions; private final PortAllocator portAllocator; private final Reaper reaper; /** * Create a new agent. * * @param model The model. * @param supervisorFactory The factory to use for creating supervisors. * @param reactorFactory The factory to use for creating reactors. * @param executions A persistent map of executions. * @param portAllocator Allocator for job ports. * @param reaper The reaper. */ public Agent(final AgentModel model, final SupervisorFactory supervisorFactory, final ReactorFactory reactorFactory, final PersistentAtomicReference<Map<JobId, Execution>> executions, final PortAllocator portAllocator, final Reaper reaper) { this.model = checkNotNull(model, "model"); this.supervisorFactory = checkNotNull(supervisorFactory, "supervisorFactory"); this.executions = checkNotNull(executions, "executions"); this.portAllocator = checkNotNull(portAllocator, "portAllocator"); this.reactor = checkNotNull(reactorFactory.create("agent", new Update(), UPDATE_INTERVAL), "reactor"); this.reaper = checkNotNull(reaper, "reaper"); } @Override protected void startUp() throws Exception { for (final Entry<JobId, Execution> entry : executions.get().entrySet()) { final Execution execution = entry.getValue(); final Job job = execution.getJob(); if (execution.getPorts() != null) { createSupervisor(job, execution.getPorts()); } } model.addListener(modelListener); reactor.startAsync().awaitRunning(); reactor.signal(); } @Override protected void shutDown() throws Exception { reactor.stopAsync().awaitTerminated(); for (final Supervisor supervisor : supervisors.values()) { supervisor.close(); supervisor.join(); } } /** * Create a job supervisor. * * @param job The job . */ private Supervisor createSupervisor(final Job job, final Map<String, Integer> portAllocation) { log.debug("creating job supervisor: {}", job); final TaskStatus taskStatus = model.getTaskStatus(job.getId()); final String containerId = (taskStatus == null) ? null : taskStatus.getContainerId(); final Supervisor supervisor = supervisorFactory.create(job, containerId, portAllocation, supervisorListener); supervisors.put(job.getId(), supervisor); return supervisor; } /** * Listens to model state updates and signals the reactor. */ private class ModelListener implements AgentModel.Listener { @Override public void tasksChanged(final AgentModel model) { reactor.signal(); } } /** * Listens to supervisor state updates and signals the reactor. */ private class SupervisorListener implements Supervisor.Listener { @Override public void stateChanged(final Supervisor supervisor) { reactor.signal(); } } /** * Starts and stops supervisors to reflect the desired state. Called by the reactor. */ private class Update implements Callback { @Override public void run(final boolean timeout) throws InterruptedException { // Note: when changing this code: // * Ensure that supervisors for the same container never run concurrently. // * A supervisor must not be released before its container is stopped. // * A new container must either reuse an existing supervisor or wait for the old supervisor // to die before spawning a new one. // * Book-keeping a supervisor of one job should not block processing of other jobs // Reap unwanted containers reaper.reap(new Supplier<Set<String>>() { @Override public Set<String> get() { final Set<String> active = Sets.newHashSet(); for (final Supervisor supervisor : supervisors.values()) { final String containerId = supervisor.containerId(); if (containerId != null) { active.add(containerId); } } return active; } }); final Map<JobId, Task> tasks = model.getTasks(); log.debug("tasks: {}", tasks); log.debug("executions: {}", executions.get()); log.debug("supervisors: {}", supervisors); // Create and update executions final Map<JobId, Execution> newExecutions = Maps.newHashMap(executions.get()); for (final Entry<JobId, Task> entry : tasks.entrySet()) { final JobId jobId = entry.getKey(); final Task task = entry.getValue(); final Execution existing = newExecutions.get(jobId); if (existing != null) { if (existing.getGoal() != task.getGoal()) { final Execution execution = existing.withGoal(task.getGoal()); newExecutions.put(jobId, execution); } } else { newExecutions.put(jobId, Execution.of(task.getJob()).withGoal(task.getGoal())); } } // Create undeploy goals for removed tasks for (final Entry<JobId, Execution> entry : newExecutions.entrySet()) { final JobId jobId = entry.getKey(); final Execution execution = entry.getValue(); if (!tasks.containsKey(jobId)) { log.debug("Setting UNDEPLOY goal for removed job: {}", execution.getJob()); entry.setValue(execution.withGoal(Goal.UNDEPLOY)); } } // Allocate ports final Map<JobId, Execution> pending = ImmutableMap.copyOf( Maps.filterValues(newExecutions, PORT_ALLOCATION_PENDING)); if (!pending.isEmpty()) { final ImmutableSet.Builder<Integer> usedPorts = ImmutableSet.builder(); final Map<JobId, Execution> allocated = Maps.filterValues(newExecutions, PORTS_ALLOCATED); for (final Entry<JobId, Execution> entry : allocated.entrySet()) { usedPorts.addAll(entry.getValue().getPorts().values()); } for (final Entry<JobId, Execution> entry : pending.entrySet()) { final JobId jobId = entry.getKey(); final Execution execution = entry.getValue(); final Job job = execution.getJob(); final Map<String, Integer> ports = portAllocator.allocate(job.getPorts(), usedPorts.build()); log.debug("Allocated ports for job {}: {}", jobId, ports); if (ports != null) { newExecutions.put(jobId, execution.withPorts(ports)); usedPorts.addAll(ports.values()); } else { log.warn("Unable to allocate ports for job: {}", job); } } } // Persist executions if (!newExecutions.equals(executions.get())) { executions.setUnchecked(ImmutableMap.copyOf(newExecutions)); } // Remove stopped supervisors. for (final Entry<JobId, Supervisor> entry : ImmutableSet.copyOf(supervisors.entrySet())) { final JobId jobId = entry.getKey(); final Supervisor supervisor = entry.getValue(); if (supervisor.isStopping() && supervisor.isDone()) { log.debug("releasing stopped supervisor: {}", jobId); supervisors.remove(jobId); supervisor.close(); reactor.signal(); } } // Create new supervisors for (final Entry<JobId, Execution> entry : executions.get().entrySet()) { final JobId jobId = entry.getKey(); final Execution execution = entry.getValue(); final Supervisor supervisor = supervisors.get(jobId); if (supervisor == null && execution.getGoal() == START && execution.getPorts() != null) { createSupervisor(execution.getJob(), execution.getPorts()); } } // Update supervisor goals for (final Map.Entry<JobId, Supervisor> entry : supervisors.entrySet()) { final JobId jobId = entry.getKey(); final Supervisor supervisor = entry.getValue(); final Execution execution = executions.get().get(jobId); supervisor.setGoal(execution.getGoal()); } // Reap dead executions final Set<JobId> reapedTasks = Sets.newHashSet(); for (final Entry<JobId, Execution> entry : executions.get().entrySet()) { final JobId jobId = entry.getKey(); final Execution execution = entry.getValue(); if (execution.getGoal() == UNDEPLOY) { final Supervisor supervisor = supervisors.get(jobId); if (supervisor == null) { reapedTasks.add(jobId); log.debug("Removing task: {}", jobId); model.removeTaskStatus(jobId); } } } // Persist executions if (!reapedTasks.isEmpty()) { final Map<JobId, Execution> survivors = Maps.filterKeys(executions.get(), not(in(reapedTasks))); executions.setUnchecked(ImmutableMap.copyOf(survivors)); } } } }