/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.addthis.hydra.job; import java.util.Arrays; import com.addthis.basis.test.SlowTest; import com.addthis.codec.config.Configs; import com.addthis.hydra.job.mq.HostCapacity; import com.addthis.hydra.job.mq.HostState; import com.addthis.hydra.job.mq.JobKey; import com.addthis.hydra.job.spawn.Spawn; import com.addthis.hydra.util.ZkCodecStartUtil; import com.addthis.maljson.JSONException; import com.addthis.maljson.JSONObject; import org.junit.After; import org.junit.Before; import org.junit.Test; import org.junit.experimental.categories.Category; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; @Category(SlowTest.class) public class HostFailWorkerTest extends ZkCodecStartUtil { private Spawn spawn; private HostFailWorker hostFailWorker; @Before public void setup() throws Exception { spawn = Configs.newDefault(Spawn.class); hostFailWorker = spawn.getHostFailWorker(); } @After public void cleanup() throws Exception { spawn.close(); } @Test public void minionDownTest() throws Exception { String liveHostId = "livehost"; String replicaHostId = "replicahost"; String emptyHostId = "emptyhost"; spawn.hostManager.updateHostState(makeHostState("otherhost", true)); // If a minion is down, HostFailWorker should complain spawn.hostManager.updateHostState(makeHostState(emptyHostId, true)); HostState host1 = makeHostState(liveHostId, true); Job job = makeSingleTaskJob(liveHostId, replicaHostId); JobKey[] jobKeys = new JobKey[]{new JobKey(job.getId(), 0)}; host1.setStopped(jobKeys); spawn.hostManager.updateHostState(host1); HostState host2 = makeHostState("downhost", false); host2.setReplicas(jobKeys); spawn.hostManager.updateHostState(host2); assertTrue("should not be able to fail host with task that is also on a down host", !hostFailWorker.checkHostStatesForFailure(liveHostId)); assertTrue("should be able to fail empty host", hostFailWorker.checkHostStatesForFailure(emptyHostId)); } private Job makeSingleTaskJob(String liveHost, String replicaHost) throws Exception { Job job = spawn.createJob("a", 0, Arrays.asList(liveHost, replicaHost), "default", "dummy", false); job.setReplicas(spawn.getJobDefaults().replicas); JobTask task = new JobTask(liveHost, 0, 0); task.setReplicas(Arrays.asList(new JobTaskReplica(replicaHost, job.getId(), 0, 0))); job.setTasks(Arrays.asList(task)); spawn.updateJob(job, false); return job; } @Test public void statePersistenceTest() { // Mark some hosts for failure, then spin up a new HostFailWorker and make sure it can load the state hostFailWorker.markHostsToFail("a,b", HostFailWorker.FailState.FAILING_FS_DEAD); hostFailWorker.markHostsToFail("c", HostFailWorker.FailState.FAILING_FS_OKAY); HostFailWorker hostFailWorker2 = new HostFailWorker(spawn, spawn.hostManager, null); assertEquals("should persist state", HostFailWorker.FailState.FAILING_FS_DEAD, hostFailWorker2.getFailureState("a")); assertEquals("should persist state", HostFailWorker.FailState.FAILING_FS_DEAD, hostFailWorker2.getFailureState("b")); assertEquals("should persist state", HostFailWorker.FailState.FAILING_FS_OKAY, hostFailWorker2.getFailureState("c")); assertEquals("should show alive state for non-failed host", HostFailWorker.FailState.ALIVE, hostFailWorker2.getFailureState("d")); } @Test public void failWarningTest() throws JSONException { // Check to make sure the UI warnings about host state are correct spawn.hostManager.updateHostState(makeHostState("a", true, 1000, 2000)); spawn.hostManager.updateHostState(makeHostState("b", true, 500, 2000)); spawn.hostManager.updateHostState(makeHostState("c", true, 1500, 2000)); JSONObject failBMessage = hostFailWorker.getInfoForHostFailure("b", true); assertTrue("should have ~.5 disk usage before fail", failBMessage.get("prefail") != null && areClose(failBMessage.getDouble("prefail"), .5)); assertTrue("should have ~.75 disk usage after fail", failBMessage.get("postfail") != null && areClose(failBMessage.getDouble("postfail"), .75)); spawn.hostManager.updateHostState(makeHostState("d", true, 10000, 11000)); JSONObject failDMessage = hostFailWorker.getInfoForHostFailure("d", true); assertTrue("should get fatal warning after failing too-big host", failDMessage.has("fatal")); } @Test public void fullDiskTest() throws Exception { String fullHostId = "full"; String emptyHostId = "empty"; spawn.hostManager.updateHostState(makeHostState(fullHostId, true, 999_000_000_000L, 1000_000_000_000L)); spawn.hostManager.updateHostState(makeHostState(emptyHostId, true, 100_000_000_000L, 1000_000_000_000L)); zkClient.create().creatingParentsIfNeeded().forPath("/minion/up/" + fullHostId, new byte[]{}); zkClient.create().creatingParentsIfNeeded().forPath("/minion/up/" + emptyHostId, new byte[]{}); Thread.sleep(2000); // need to let spawn detect the new minions hostFailWorker.updateFullMinions(); assertEquals("should correctly detect full host", HostFailWorker.FailState.DISK_FULL, hostFailWorker.getFailureState(fullHostId)); assertEquals("should correctly detect okay host", HostFailWorker.FailState.ALIVE, hostFailWorker.getFailureState(emptyHostId)); } private static boolean areClose(double x, double y) { return Math.abs(x - y) < .001; } private HostState makeHostState(String id, boolean up, long diskUsed, long diskMax) { HostState hostState = makeHostState(id, up); hostState.setUsed(new HostCapacity(0, 0, 0, diskUsed)); hostState.setMax(new HostCapacity(0, 0, 0, diskMax)); return hostState; } private HostState makeHostState(String id, boolean up) { HostState hostState = new HostState(id); hostState.setHost("pretend_host"); hostState.setUp(up); hostState.setReplicas(new JobKey[]{}); hostState.setStopped(new JobKey[]{}); return hostState; } }