package com.hubspot.singularity.scheduler;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.apache.mesos.Protos.TaskState;
import org.junit.Assert;
import org.junit.Test;
import com.google.common.base.Optional;
import com.google.common.collect.ImmutableList;
import com.hubspot.deploy.HealthcheckOptions;
import com.hubspot.deploy.HealthcheckOptionsBuilder;
import com.hubspot.mesos.Resources;
import com.hubspot.singularity.DeployState;
import com.hubspot.singularity.SingularityDeploy;
import com.hubspot.singularity.SingularityDeployBuilder;
import com.hubspot.singularity.SingularityShellCommand;
import com.hubspot.singularity.SingularityTask;
import com.hubspot.singularity.SingularityTaskHealthcheckResult;
import com.hubspot.singularity.SingularityTaskId;
import com.hubspot.singularity.api.SingularityBounceRequest;
import com.hubspot.singularity.api.SingularitySkipHealthchecksRequest;
import com.hubspot.singularity.scheduler.SingularityNewTaskChecker.CheckTaskState;
public class SingularityHealthchecksTest extends SingularitySchedulerTestBase {
public SingularityHealthchecksTest() {
super(false);
}
@Test
public void testSkipHealthchecksEdgeCases() {
try {
setConfigurationForNoDelay();
configuration.setKillAfterTasksDoNotRunDefaultSeconds(100);
configuration.setCheckNewTasksEverySeconds(1);
initRequest();
initHCDeploy();
requestResource.skipHealthchecks(requestId, new SingularitySkipHealthchecksRequest(Optional.of(Boolean.TRUE), Optional.<Long> absent(), Optional.<String> absent(), Optional.<String> absent()));
SingularityTask firstTask = startTask(firstDeploy, 1);
Assert.assertTrue(!taskManager.getLastHealthcheck(firstTask.getTaskId()).isPresent());
finishHealthchecks();
finishNewTaskChecksAndCleanup();
Assert.assertEquals(1, taskManager.getNumActiveTasks());
requestResource.skipHealthchecks(requestId, new SingularitySkipHealthchecksRequest(Optional.of(Boolean.FALSE), Optional.<Long> absent(), Optional.<String> absent(), Optional.<String> absent()));
// run new task check ONLY.
newTaskChecker.enqueueNewTaskCheck(firstTask, requestManager.getRequest(requestId), healthchecker);
finishNewTaskChecks();
finishHealthchecks();
finishNewTaskChecksAndCleanup();
// healthcheck will fail
Assert.assertTrue(taskManager.getLastHealthcheck(firstTask.getTaskId()).isPresent());
Assert.assertEquals(0, taskManager.getNumActiveTasks());
} finally {
unsetConfigurationForNoDelay();
configuration.setCheckNewTasksEverySeconds(5);
}
}
@Test
public void testSkipHealthchecksDuringBounce() {
try {
initRequest();
initHCDeploy();
SingularityTask firstTask = startTask(firstDeploy, 1);
requestResource.bounce(requestId, Optional.of(new SingularityBounceRequest(Optional.<Boolean> absent(), Optional.of(true), Optional.<Long> absent(), Optional.<String> absent(), Optional.<String>absent(), Optional.<SingularityShellCommand>absent())));
setConfigurationForNoDelay();
cleaner.drainCleanupQueue();
resourceOffers();
List<SingularityTaskId> taskIds = taskManager.getAllTaskIds();
taskIds.remove(firstTask.getTaskId());
SingularityTaskId secondTaskId = taskIds.get(0);
SingularityTask secondTask = taskManager.getTask(secondTaskId).get();
statusUpdate(secondTask, TaskState.TASK_RUNNING);
Assert.assertTrue(healthchecker.cancelHealthcheck(firstTask.getTaskId().getId()));
newTaskChecker.cancelNewTaskCheck(firstTask.getTaskId().getId());
finishHealthchecks();
finishNewTaskChecks();
Assert.assertTrue(!taskManager.getLastHealthcheck(secondTask.getTaskId()).isPresent());
cleaner.drainCleanupQueue();
killKilledTasks();
Assert.assertEquals(0, taskManager.getNumCleanupTasks());
Assert.assertEquals(1, taskManager.getNumActiveTasks());
} finally {
unsetConfigurationForNoDelay();
}
}
@Test
public void testHealthchecksDuringBounce() {
initRequest();
initHCDeploy();
startTask(firstDeploy);
requestResource.bounce(requestId, Optional.absent());
cleaner.drainCleanupQueue();
SingularityTask secondTask = startTask(firstDeploy);
cleaner.drainCleanupQueue();
Assert.assertEquals(1, taskManager.getNumCleanupTasks());
Assert.assertEquals(2, taskManager.getNumActiveTasks());
taskManager.saveHealthcheckResult(new SingularityTaskHealthcheckResult(Optional.of(503), Optional.of(1000L), 1, Optional.<String> absent(), Optional.<String> absent(), secondTask.getTaskId(), Optional.<Boolean>absent()));
cleaner.drainCleanupQueue();
Assert.assertEquals(1, taskManager.getNumCleanupTasks());
Assert.assertEquals(2, taskManager.getNumActiveTasks());
taskManager.saveHealthcheckResult(new SingularityTaskHealthcheckResult(Optional.of(200), Optional.of(1000L), System.currentTimeMillis(), Optional.<String> absent(), Optional.<String> absent(), secondTask.getTaskId(), Optional.<Boolean>absent()));
cleaner.drainCleanupQueue();
killKilledTasks();
Assert.assertEquals(0, taskManager.getNumCleanupTasks());
Assert.assertEquals(1, taskManager.getNumActiveTasks());
}
@Test
public void testHealthchecksTimeout() {
initRequest();
final long hourAgo = System.currentTimeMillis() - TimeUnit.HOURS.toMillis(1);
final String deployId = "timeout_test";
HealthcheckOptions options = new HealthcheckOptionsBuilder("http://uri").setMaxRetries(Optional.of(2)).build();
SingularityDeployBuilder db = new SingularityDeployBuilder(requestId, deployId).setHealthcheck(Optional.of(options));
db.setDeployHealthTimeoutSeconds(Optional.of(TimeUnit.DAYS.toMillis(1)));
SingularityDeploy deploy = initDeploy(db, hourAgo);
deployChecker.checkDeploys();
Assert.assertTrue(!deployManager.getDeployResult(requestId, deployId).isPresent());
SingularityTask task = launchTask(request, deploy, hourAgo, hourAgo + 1, 1, TaskState.TASK_RUNNING);
deployChecker.checkDeploys();
Assert.assertTrue(!deployManager.getDeployResult(requestId, deployId).isPresent());
taskManager.saveHealthcheckResult(new SingularityTaskHealthcheckResult(Optional.of(503), Optional.of(1000L), hourAgo + 1, Optional.<String> absent(), Optional.<String> absent(), task.getTaskId(), Optional.<Boolean>absent()));
deployChecker.checkDeploys();
Assert.assertEquals(DeployState.FAILED, deployManager.getDeployResult(requestId, deployId).get().getDeployState());
}
@Test
public void testMaxHealthcheckRetries() {
initRequest();
final String deployId = "retry_test";
HealthcheckOptions options = new HealthcheckOptionsBuilder("http://uri").setMaxRetries(Optional.of(2)).build();
SingularityDeployBuilder db = new SingularityDeployBuilder(requestId, deployId).setHealthcheck(Optional.of(options));
SingularityDeploy deploy = initDeploy(db, System.currentTimeMillis());
deployChecker.checkDeploys();
Assert.assertTrue(!deployManager.getDeployResult(requestId, deployId).isPresent());
SingularityTask task = launchTask(request, deploy, System.currentTimeMillis(), 1, TaskState.TASK_RUNNING);
deployChecker.checkDeploys();
Assert.assertTrue(!deployManager.getDeployResult(requestId, deployId).isPresent());
taskManager.saveHealthcheckResult(new SingularityTaskHealthcheckResult(Optional.of(503), Optional.of(1000L), System.currentTimeMillis(), Optional.<String> absent(), Optional.<String> absent(), task.getTaskId(), Optional.<Boolean>absent()));
taskManager.saveHealthcheckResult(new SingularityTaskHealthcheckResult(Optional.of(503), Optional.of(1000L), System.currentTimeMillis() + 1, Optional.<String> absent(), Optional.<String> absent(), task.getTaskId(), Optional.<Boolean>absent()));
deployChecker.checkDeploys();
Assert.assertTrue(!deployManager.getDeployResult(requestId, deployId).isPresent());
taskManager.saveHealthcheckResult(new SingularityTaskHealthcheckResult(Optional.of(503), Optional.of(1000L), System.currentTimeMillis() + 1, Optional.<String> absent(), Optional.<String> absent(), task.getTaskId(), Optional.<Boolean>absent()));
deployChecker.checkDeploys();
Assert.assertEquals(DeployState.FAILED, deployManager.getDeployResult(requestId, deployId).get().getDeployState());
}
@Test
public void testNewTaskCheckerRespectsDeployHealthcheckRetries() {
initRequest();
final String deployId = "new_task_healthcheck";
HealthcheckOptions options = new HealthcheckOptionsBuilder("http://uri").setMaxRetries(Optional.of(1)).build();
SingularityDeployBuilder db = new SingularityDeployBuilder(requestId, deployId).setHealthcheck(Optional.of(options));
SingularityDeploy deploy = initAndFinishDeploy(request, db);
SingularityTask task = launchTask(request, deploy, System.currentTimeMillis(), 1, TaskState.TASK_RUNNING);
Assert.assertEquals(CheckTaskState.CHECK_IF_OVERDUE, newTaskChecker.getTaskState(task, requestManager.getRequest(requestId), healthchecker));
Assert.assertTrue(taskManager.getCleanupTaskIds().isEmpty());
taskManager.saveHealthcheckResult(new SingularityTaskHealthcheckResult(Optional.of(503), Optional.of(1000L), System.currentTimeMillis() + 1, Optional.<String> absent(), Optional.<String> absent(), task.getTaskId(), Optional.<Boolean>absent()));
Assert.assertEquals(CheckTaskState.CHECK_IF_OVERDUE, newTaskChecker.getTaskState(task, requestManager.getRequest(requestId), healthchecker));
taskManager.saveHealthcheckResult(new SingularityTaskHealthcheckResult(Optional.of(503), Optional.of(1000L), System.currentTimeMillis() + 1, Optional.<String> absent(), Optional.<String> absent(), task.getTaskId(), Optional.<Boolean>absent()));
Assert.assertEquals(CheckTaskState.UNHEALTHY_KILL_TASK, newTaskChecker.getTaskState(task, requestManager.getRequest(requestId), healthchecker));
}
@Test
public void testHealthchecksSuccess() {
initRequest();
final String deployId = "hc_test";
HealthcheckOptions options = new HealthcheckOptionsBuilder("http://uri").setMaxRetries(Optional.of(2)).build();
SingularityDeployBuilder db = new SingularityDeployBuilder(requestId, deployId).setHealthcheck(Optional.of(options));
SingularityDeploy deploy = initDeploy(db, System.currentTimeMillis());
deployChecker.checkDeploys();
Assert.assertTrue(!deployManager.getDeployResult(requestId, deployId).isPresent());
SingularityTask task = launchTask(request, deploy, System.currentTimeMillis(), 1, TaskState.TASK_RUNNING);
deployChecker.checkDeploys();
Assert.assertTrue(!deployManager.getDeployResult(requestId, deployId).isPresent());
taskManager.saveHealthcheckResult(new SingularityTaskHealthcheckResult(Optional.of(503), Optional.of(1000L), System.currentTimeMillis(), Optional.<String>absent(), Optional.<String>absent(), task.getTaskId(), Optional.<Boolean>absent()));
deployChecker.checkDeploys();
Assert.assertTrue(!deployManager.getDeployResult(requestId, deployId).isPresent());
taskManager.saveHealthcheckResult(new SingularityTaskHealthcheckResult(Optional.of(200), Optional.of(1000L), System.currentTimeMillis() + 1, Optional.<String>absent(), Optional.<String>absent(), task.getTaskId(), Optional.<Boolean>absent()));
deployChecker.checkDeploys();
Assert.assertEquals(DeployState.SUCCEEDED, deployManager.getDeployResult(requestId, deployId).get().getDeployState());
}
@Test
public void testFailingStatusCodes() {
initRequest();
final String deployId = "retry_test";
List<Integer> failureCodes = ImmutableList.of(404);
HealthcheckOptions options = new HealthcheckOptionsBuilder("http://uri").setMaxRetries(Optional.of(3)).setFailureStatusCodes(Optional.of(failureCodes)).build();
SingularityDeployBuilder db = new SingularityDeployBuilder(requestId, deployId).setHealthcheck(Optional.of(options));
SingularityDeploy deploy = initDeploy(db, System.currentTimeMillis());
deployChecker.checkDeploys();
Assert.assertTrue(!deployManager.getDeployResult(requestId, deployId).isPresent());
SingularityTask task = launchTask(request, deploy, System.currentTimeMillis(), 1, TaskState.TASK_RUNNING);
deployChecker.checkDeploys();
Assert.assertTrue(!deployManager.getDeployResult(requestId, deployId).isPresent());
taskManager.saveHealthcheckResult(new SingularityTaskHealthcheckResult(Optional.of(503), Optional.of(1000L), System.currentTimeMillis(), Optional.<String> absent(), Optional.<String> absent(), task.getTaskId(), Optional.<Boolean>absent()));
deployChecker.checkDeploys();
Assert.assertTrue(!deployManager.getDeployResult(requestId, deployId).isPresent());
taskManager.saveHealthcheckResult(new SingularityTaskHealthcheckResult(Optional.of(404), Optional.of(1000L), System.currentTimeMillis() + 1, Optional.<String> absent(), Optional.<String> absent(), task.getTaskId(), Optional.<Boolean>absent()));
deployChecker.checkDeploys();
// Bad status code should cause instant failure even though retries remain
Assert.assertEquals(DeployState.FAILED, deployManager.getDeployResult(requestId, deployId).get().getDeployState());
}
@Test
public void testStartupTimeout() {
initRequest();
final long hourAgo = System.currentTimeMillis() - TimeUnit.HOURS.toMillis(1);
final String deployId = "startup_timeout_test";
HealthcheckOptions options = new HealthcheckOptionsBuilder("http://uri").setMaxRetries(Optional.of(2)).setStartupTimeoutSeconds(Optional.of((int) TimeUnit.MINUTES.toSeconds(30))).build();
SingularityDeployBuilder db = new SingularityDeployBuilder(requestId, deployId).setHealthcheck(Optional.of(options));
db.setDeployHealthTimeoutSeconds(Optional.of(TimeUnit.DAYS.toMillis(1)));
SingularityDeploy deploy = initDeploy(db, hourAgo);
deployChecker.checkDeploys();
Assert.assertTrue(!deployManager.getDeployResult(requestId, deployId).isPresent());
SingularityTask task = launchTask(request, deploy, hourAgo, hourAgo + 1, 1, TaskState.TASK_RUNNING);
deployChecker.checkDeploys();
Assert.assertTrue(!deployManager.getDeployResult(requestId, deployId).isPresent());
taskManager.saveHealthcheckResult(new SingularityTaskHealthcheckResult(Optional.<Integer>absent(), Optional.of(1000L), hourAgo + 1, Optional.<String> absent(), Optional.of("ERROR"), task.getTaskId(), Optional.of(true)));
deployChecker.checkDeploys();
Assert.assertEquals(DeployState.FAILED, deployManager.getDeployResult(requestId, deployId).get().getDeployState());
}
@Test
public void testStartupDoesNotCountTowardsRetries() {
initRequest();
final String deployId = "retry_test";
HealthcheckOptions options = new HealthcheckOptionsBuilder("http://uri").setMaxRetries(Optional.of(1)).build();
SingularityDeployBuilder db = new SingularityDeployBuilder(requestId, deployId).setHealthcheck(Optional.of(options));
SingularityDeploy deploy = initDeploy(db, System.currentTimeMillis());
deployChecker.checkDeploys();
Assert.assertTrue(!deployManager.getDeployResult(requestId, deployId).isPresent());
SingularityTask task = launchTask(request, deploy, System.currentTimeMillis(), 1, TaskState.TASK_RUNNING);
deployChecker.checkDeploys();
Assert.assertTrue(!deployManager.getDeployResult(requestId, deployId).isPresent());
taskManager.saveHealthcheckResult(new SingularityTaskHealthcheckResult(Optional.<Integer>absent(), Optional.of(1000L), System.currentTimeMillis(), Optional.<String> absent(), Optional.of("ConnectionRefused"), task.getTaskId(), Optional.of(true)));
taskManager.saveHealthcheckResult(new SingularityTaskHealthcheckResult(Optional.<Integer>absent(), Optional.of(1000L), System.currentTimeMillis() + 1, Optional.<String> absent(), Optional.of("ConnectionRefused"), task.getTaskId(), Optional.of(true)));
deployChecker.checkDeploys();
Assert.assertTrue(!deployManager.getDeployResult(requestId, deployId).isPresent());
taskManager.saveHealthcheckResult(new SingularityTaskHealthcheckResult(Optional.of(503), Optional.of(1000L), System.currentTimeMillis() + 1, Optional.<String> absent(), Optional.<String> absent(), task.getTaskId(), Optional.<Boolean>absent()));
deployChecker.checkDeploys();
Assert.assertTrue(!deployManager.getDeployResult(requestId, deployId).isPresent());
taskManager.saveHealthcheckResult(new SingularityTaskHealthcheckResult(Optional.of(200), Optional.of(1000L), System.currentTimeMillis() + 1, Optional.<String> absent(), Optional.<String> absent(), task.getTaskId(), Optional.<Boolean>absent()));
deployChecker.checkDeploys();
Assert.assertEquals(DeployState.SUCCEEDED, deployManager.getDeployResult(requestId, deployId).get().getDeployState());
}
@Test
public void testPortIndices() {
try {
setConfigurationForNoDelay();
initRequest();
HealthcheckOptions options = new HealthcheckOptionsBuilder("http://uri").setPortIndex(Optional.of(1)).build();
firstDeploy = initAndFinishDeploy(request, new SingularityDeployBuilder(request.getId(), firstDeployId)
.setCommand(Optional.of("sleep 100")).setResources(Optional.of(new Resources(1, 64, 3, 0)))
.setHealthcheck(Optional.of(options)));
requestResource.postRequest(request.toBuilder().setInstances(Optional.of(2)).build());
scheduler.drainPendingQueue(stateCacheProvider.get());
String[] portRange = {"80:82"};
sms.resourceOffers(driver, Arrays.asList(createOffer(20, 20000, "slave1", "host1", Optional.<String> absent(), Collections.<String, String>emptyMap(), portRange)));
SingularityTaskId firstTaskId = taskManager.getActiveTaskIdsForRequest(requestId).get(0);
SingularityTask firstTask = taskManager.getTask(firstTaskId).get();
statusUpdate(firstTask, TaskState.TASK_RUNNING);
newTaskChecker.enqueueNewTaskCheck(firstTask, requestManager.getRequest(requestId), healthchecker);
finishNewTaskChecks();
finishHealthchecks();
finishNewTaskChecksAndCleanup();
Assert.assertTrue(taskManager.getLastHealthcheck(firstTask.getTaskId()).get().toString().contains("host1:81"));
} finally {
unsetConfigurationForNoDelay();
}
}
@Test
public void testPortNumber() {
try {
setConfigurationForNoDelay();
initRequest();
HealthcheckOptions options = new HealthcheckOptionsBuilder("http://uri").setPortNumber(Optional.of(81L)).build();
firstDeploy = initAndFinishDeploy(request, new SingularityDeployBuilder(request.getId(), firstDeployId)
.setCommand(Optional.of("sleep 100")).setResources(Optional.of(new Resources(1, 64, 3, 0)))
.setHealthcheck(Optional.of(options)));
requestResource.postRequest(request.toBuilder().setInstances(Optional.of(2)).build());
scheduler.drainPendingQueue(stateCacheProvider.get());
String[] portRange = {"80:82"};
sms.resourceOffers(driver, Arrays.asList(createOffer(20, 20000, "slave1", "host1", Optional.<String> absent(), Collections.<String, String> emptyMap(), portRange)));
SingularityTaskId firstTaskId = taskManager.getActiveTaskIdsForRequest(requestId).get(0);
SingularityTask firstTask = taskManager.getTask(firstTaskId).get();
statusUpdate(firstTask, TaskState.TASK_RUNNING);
newTaskChecker.enqueueNewTaskCheck(firstTask, requestManager.getRequest(requestId), healthchecker);
finishNewTaskChecks();
finishHealthchecks();
finishNewTaskChecksAndCleanup();
Assert.assertTrue(taskManager.getLastHealthcheck(firstTask.getTaskId()).get().toString().contains("host1:81"));
} finally {
unsetConfigurationForNoDelay();
}
}
private void setConfigurationForNoDelay() {
configuration.setNewTaskCheckerBaseDelaySeconds(0);
configuration.setHealthcheckIntervalSeconds(0);
configuration.setDeployHealthyBySeconds(0);
configuration.setKillAfterTasksDoNotRunDefaultSeconds(1);
configuration.setHealthcheckMaxRetries(Optional.of(0));
}
private void unsetConfigurationForNoDelay() {
configuration.setNewTaskCheckerBaseDelaySeconds(1);
configuration.setHealthcheckIntervalSeconds(5);
configuration.setDeployHealthyBySeconds(120);
configuration.setKillAfterTasksDoNotRunDefaultSeconds(600);
configuration.setHealthcheckMaxRetries(Optional.<Integer>absent());
}
}