/*- * -\-\- * Helios Services * -- * Copyright (C) 2016 Spotify AB * -- * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * -/-/- */ package com.spotify.helios.agent; import static com.spotify.helios.common.descriptors.Goal.START; import static com.spotify.helios.common.descriptors.Goal.STOP; import static com.spotify.helios.common.descriptors.TaskStatus.State.CREATING; import static com.spotify.helios.common.descriptors.TaskStatus.State.FAILED; import static com.spotify.helios.common.descriptors.TaskStatus.State.PULLING_IMAGE; import static com.spotify.helios.common.descriptors.TaskStatus.State.RUNNING; import static com.spotify.helios.common.descriptors.TaskStatus.State.STARTING; import static com.spotify.helios.common.descriptors.TaskStatus.State.STOPPED; import static com.spotify.helios.common.descriptors.TaskStatus.State.STOPPING; import static java.util.Arrays.asList; import static java.util.concurrent.TimeUnit.MINUTES; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.startsWith; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertThat; import static org.mockito.Matchers.any; import static org.mockito.Matchers.argThat; import static org.mockito.Matchers.eq; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.timeout; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Maps; import com.google.common.util.concurrent.SettableFuture; import com.spotify.docker.client.DockerClient; import com.spotify.docker.client.exceptions.DockerException; import com.spotify.docker.client.messages.ContainerConfig; import com.spotify.docker.client.messages.ContainerCreation; import com.spotify.docker.client.messages.ContainerExit; import com.spotify.docker.client.messages.ContainerInfo; import com.spotify.docker.client.messages.ContainerState; import com.spotify.docker.client.messages.ImageInfo; import com.spotify.docker.client.messages.NetworkSettings; import com.spotify.helios.common.descriptors.Goal; import com.spotify.helios.common.descriptors.Job; import com.spotify.helios.common.descriptors.JobId; import com.spotify.helios.common.descriptors.PortMapping; import com.spotify.helios.common.descriptors.TaskStatus; import com.spotify.helios.common.descriptors.ThrottleState; import com.spotify.helios.serviceregistration.ServiceRegistrar; import com.spotify.helios.servicescommon.statistics.NoopSupervisorMetrics; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import org.hamcrest.FeatureMatcher; import org.hamcrest.Matcher; import org.junit.After; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.mockito.ArgumentCaptor; import org.mockito.Captor; import org.mockito.Mock; import org.mockito.runners.MockitoJUnitRunner; import org.mockito.stubbing.Answer; @RunWith(MockitoJUnitRunner.class) public class SupervisorTest { private final ExecutorService executor = Executors.newCachedThreadPool(); private static final String NAMESPACE = "helios-deadbeef"; private static final String REPOSITORY = "spotify"; private static final String TAG = "17"; private static final String IMAGE = REPOSITORY + ":" + TAG; private static final String NAME = "foobar"; private static final List<String> COMMAND = asList("foo", "bar"); private static final String VERSION = "4711"; private static final Job JOB = Job.newBuilder() .setName(NAME) .setCommand(COMMAND) .setImage(IMAGE) .setVersion(VERSION) .build(); private static final Map<String, PortMapping> PORTS = Collections.emptyMap(); private static final Map<String, String> ENV = ImmutableMap.of("foo", "17", "bar", "4711"); private static final Set<String> EXPECTED_CONTAINER_ENV = ImmutableSet.of("foo=17", "bar=4711"); private final ContainerInfo runningResponse = mock(ContainerInfo.class); private final ContainerInfo stoppedResponse = mock(ContainerInfo.class); @Mock public AgentModel model; @Mock public DockerClient docker; @Mock public RestartPolicy retryPolicy; @Mock public ServiceRegistrar registrar; @Mock public Sleeper sleeper; @Captor public ArgumentCaptor<ContainerConfig> containerConfigCaptor; @Captor public ArgumentCaptor<String> containerNameCaptor; @Captor public ArgumentCaptor<TaskStatus> taskStatusCaptor; private Supervisor sut; @Before public void setup() throws Exception { final ContainerState runningState = mock(ContainerState.class); when(runningState.running()).thenReturn(true); when(runningResponse.state()).thenReturn(runningState); when(runningResponse.networkSettings()).thenReturn(mock(NetworkSettings.class)); final ContainerState stoppedState = mock(ContainerState.class); when(stoppedState.running()).thenReturn(false); when(stoppedResponse.state()).thenReturn(stoppedState); when(retryPolicy.delay(any(ThrottleState.class))).thenReturn(10L); sut = createSupervisor(JOB); mockTaskStatus(JOB.getId()); } @After public void teardown() throws Exception { if (sut != null) { sut.close(); sut.join(); } } private void mockTaskStatus(final JobId jobId) throws Exception { final ConcurrentMap<JobId, TaskStatus> statusMap = Maps.newConcurrentMap(); doAnswer(invocationOnMock -> { final TaskStatus status = (TaskStatus) invocationOnMock.getArguments()[1]; statusMap.put(jobId, status); return null; }).when(model).setTaskStatus(eq(jobId), taskStatusCaptor.capture()); when(model.getTaskStatus(eq(jobId))).thenReturn(statusMap.get(jobId)); } @Test public void verifySupervisorStartsAndStopsDockerContainer() throws Exception { final String containerId = "deadbeef"; when(docker.createContainer(any(ContainerConfig.class), any(String.class))) .thenReturn(ContainerCreation.builder().id(containerId).build()); final ImageInfo imageInfo = mock(ImageInfo.class); when(docker.inspectImage(IMAGE)).thenReturn(imageInfo); // Have waitContainer wait forever. final SettableFuture<ContainerExit> waitFuture = SettableFuture.create(); when(docker.waitContainer(containerId)).thenAnswer(futureAnswer(waitFuture)); // Start the job sut.setGoal(START); // Verify that the container is created verify(docker, timeout(30000)).createContainer(containerConfigCaptor.capture(), containerNameCaptor.capture()); verify(model, timeout(30000)).setTaskStatus(eq(JOB.getId()), eq(TaskStatus.newBuilder() .setJob(JOB) .setGoal(START) .setState(CREATING) .setContainerId(null) .setEnv(ENV) .build()) ); final ContainerConfig containerConfig = containerConfigCaptor.getValue(); assertEquals(IMAGE, containerConfig.image()); assertEquals(EXPECTED_CONTAINER_ENV, ImmutableSet.copyOf(containerConfig.env())); final String containerName = containerNameCaptor.getValue(); assertEquals(JOB.getId().toShortString(), shortJobIdFromContainerName(containerName)); // Verify that the container is started verify(docker, timeout(30000)).startContainer(eq(containerId)); verify(model, timeout(30000)).setTaskStatus(eq(JOB.getId()), eq(TaskStatus.newBuilder() .setJob(JOB) .setGoal(START) .setState(STARTING) .setContainerId(containerId) .setEnv(ENV) .build()) ); when(docker.inspectContainer(eq(containerId))).thenReturn(runningResponse); verify(docker, timeout(30000)).waitContainer(containerId); verify(model, timeout(30000)).setTaskStatus(eq(JOB.getId()), eq(TaskStatus.newBuilder() .setJob(JOB) .setGoal(START) .setState(RUNNING) .setContainerId(containerId) .setEnv(ENV) .build()) ); // Stop the job sut.setGoal(STOP); verify(docker, timeout(30000)).stopContainer( eq(containerId), eq(Supervisor.DEFAULT_SECONDS_TO_WAIT_BEFORE_KILL)); // Change docker container state to stopped now that it was killed when(docker.inspectContainer(eq(containerId))).thenReturn(stoppedResponse); // Verify that the pulling state is signalled verify(model, timeout(30000)).setTaskStatus(eq(JOB.getId()), eq(TaskStatus.newBuilder() .setJob(JOB) .setGoal(START) .setState(PULLING_IMAGE) .setContainerId(null) .setEnv(ENV) .build()) ); // Verify that the STOPPING and STOPPED states are signalled verify(model, timeout(30000)).setTaskStatus(eq(JOB.getId()), eq(TaskStatus.newBuilder() .setJob(JOB) .setGoal(STOP) .setState(STOPPING) .setContainerId(containerId) .setEnv(ENV) .build()) ); verify(model, timeout(30000)).setTaskStatus(eq(JOB.getId()), eq(TaskStatus.newBuilder() .setJob(JOB) .setGoal(STOP) .setState(STOPPED) .setContainerId(containerId) .setEnv(ENV) .build()) ); } @Test public void verifySupervisorStopsDockerContainerWithConfiguredKillTime() throws Exception { final String containerId = "deadbeef"; final Job longKillTimeJob = Job.newBuilder() .setName(NAME) .setCommand(COMMAND) .setImage(IMAGE) .setVersion(VERSION) .setSecondsToWaitBeforeKill(30) .build(); mockTaskStatus(longKillTimeJob.getId()); final Supervisor longKillTimeSupervisor = createSupervisor(longKillTimeJob); when(docker.createContainer(any(ContainerConfig.class), any(String.class))) .thenReturn(ContainerCreation.builder().id(containerId).build()); final ImageInfo imageInfo = mock(ImageInfo.class); when(docker.inspectImage(IMAGE)).thenReturn(imageInfo); // Have waitContainer wait forever. final SettableFuture<ContainerExit> waitFuture = SettableFuture.create(); when(docker.waitContainer(containerId)).thenAnswer(futureAnswer(waitFuture)); // Start the job (so that a runner exists) longKillTimeSupervisor.setGoal(START); when(docker.inspectContainer(eq(containerId))).thenReturn(runningResponse); // This is already verified above, but it works as a hack to wait for the model/docker state // to converge in such a way that a setGoal(STOP) will work. :| verify(docker, timeout(30000)).waitContainer(containerId); // Stop the job longKillTimeSupervisor.setGoal(STOP); verify(docker, timeout(30000)).stopContainer( eq(containerId), eq(longKillTimeJob.getSecondsToWaitBeforeKill())); // Change docker container state to stopped now that it was killed when(docker.inspectContainer(eq(containerId))).thenReturn(stoppedResponse); } private String shortJobIdFromContainerName(final String containerName) { assertThat(containerName, startsWith(NAMESPACE + "-")); final String name = containerName.substring(NAMESPACE.length() + 1); final int lastUnderscore = name.lastIndexOf('_'); return name.substring(0, lastUnderscore).replace('_', ':'); } @Test public void verifySupervisorRestartsExitedContainer() throws Exception { final String containerId1 = "deadbeef1"; final String containerId2 = "deadbeef2"; final ContainerCreation createResponse1 = ContainerCreation.builder().id(containerId1).build(); final ContainerCreation createResponse2 = ContainerCreation.builder().id(containerId2).build(); when(docker.createContainer(any(ContainerConfig.class), any(String.class))) .thenReturn(createResponse1); final ImageInfo imageInfo = mock(ImageInfo.class); when(docker.inspectImage(IMAGE)).thenReturn(imageInfo); when(docker.inspectContainer(eq(containerId1))).thenReturn(runningResponse); final SettableFuture<ContainerExit> waitFuture1 = SettableFuture.create(); final SettableFuture<ContainerExit> waitFuture2 = SettableFuture.create(); when(docker.waitContainer(containerId1)).thenAnswer(futureAnswer(waitFuture1)); when(docker.waitContainer(containerId2)).thenAnswer(futureAnswer(waitFuture2)); // Start the job sut.setGoal(START); verify(docker, timeout(30000)).createContainer(any(ContainerConfig.class), any(String.class)); verify(docker, timeout(30000)).startContainer(eq(containerId1)); verify(docker, timeout(30000)).waitContainer(containerId1); // Indicate that the container exited when(docker.inspectContainer(eq(containerId1))).thenReturn(stoppedResponse); when(docker.createContainer(any(ContainerConfig.class), any(String.class))) .thenReturn(createResponse2); when(docker.inspectContainer(eq(containerId2))).thenReturn(runningResponse); waitFuture1.set(ContainerExit.create(1)); // Verify that the container was restarted verify(docker, timeout(30000)).createContainer(any(ContainerConfig.class), any(String.class)); verify(docker, timeout(30000)).startContainer(eq(containerId2)); verify(docker, timeout(30000)).waitContainer(containerId2); } public void verifyExceptionSetsTaskStatusToFailed(final Exception exception) throws Exception { when(docker.inspectImage(IMAGE)).thenThrow(exception); when(retryPolicy.delay(any(ThrottleState.class))).thenReturn(MINUTES.toMillis(1)); // Start the job sut.setGoal(START); verify(retryPolicy, timeout(30000)).delay(any(ThrottleState.class)); assertEquals(taskStatusCaptor.getValue().getState(), FAILED); } @Test public void verifyDockerExceptionSetsTaskStatusToFailed() throws Exception { verifyExceptionSetsTaskStatusToFailed(new DockerException("FAIL")); } @Test public void verifyRuntimeExceptionSetsTaskStatusToFailed() throws Exception { verifyExceptionSetsTaskStatusToFailed(new RuntimeException("FAIL")); } private Answer<?> futureAnswer(final SettableFuture<?> future) { return (Answer<Object>) invocation -> future.get(); } /** * Verifies a fix for a NPE that is thrown when the Supervisor receives a goal of UNDEPLOY for a * job with gracePeriod that has never been STARTed. */ @Test public void verifySupervisorHandlesUndeployingOfNotRunningContainerWithGracePeriod() throws Exception { final int gracePeriod = 5; final Job job = JOB.toBuilder() .setGracePeriod(gracePeriod) .build(); final Supervisor sut = createSupervisor(job); sut.setGoal(Goal.UNDEPLOY); // when the NPE was thrown, the model was never updated verify(model, timeout(30000)).setTaskStatus(eq(job.getId()), argThat(is(taskStatusWithState(TaskStatus.State.STOPPING)))); verify(model, timeout(30000)).setTaskStatus(eq(job.getId()), argThat(is(taskStatusWithState(TaskStatus.State.STOPPED)))); verify(sleeper, never()).sleep(gracePeriod * 1000); } private Supervisor createSupervisor(final Job job) { final TaskConfig config = TaskConfig.builder() .namespace(NAMESPACE) .host("AGENT_NAME") .job(job) .envVars(ENV) .build(); final TaskStatus.Builder taskStatus = TaskStatus.newBuilder() .setJob(job) .setEnv(ENV) .setPorts(PORTS); final StatusUpdater statusUpdater = new DefaultStatusUpdater(model, taskStatus); final TaskMonitor monitor = new TaskMonitor( job.getId(), FlapController.create(), statusUpdater); final TaskRunnerFactory runnerFactory = TaskRunnerFactory.builder() .registrar(registrar) .config(config) .dockerClient(docker) .listener(monitor) .build(); return Supervisor.newBuilder() .setJob(job) .setStatusUpdater(statusUpdater) .setDockerClient(docker) .setRestartPolicy(retryPolicy) .setRunnerFactory(runnerFactory) .setMetrics(new NoopSupervisorMetrics()) .setMonitor(monitor) .build(); } private static Matcher<TaskStatus> taskStatusWithState(final TaskStatus.State state) { return new FeatureMatcher<TaskStatus, TaskStatus.State>(equalTo(state), "state", "state") { @Override protected TaskStatus.State featureValueOf(final TaskStatus actual) { return actual.getState(); } }; } }