/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.addthis.hydra.job;
import java.util.Arrays;
import com.addthis.basis.test.SlowTest;
import com.addthis.codec.config.Configs;
import com.addthis.hydra.job.mq.HostCapacity;
import com.addthis.hydra.job.mq.HostState;
import com.addthis.hydra.job.mq.JobKey;
import com.addthis.hydra.job.spawn.Spawn;
import com.addthis.hydra.util.ZkCodecStartUtil;
import com.addthis.maljson.JSONException;
import com.addthis.maljson.JSONObject;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
@Category(SlowTest.class)
public class HostFailWorkerTest extends ZkCodecStartUtil {
private Spawn spawn;
private HostFailWorker hostFailWorker;
@Before
public void setup() throws Exception {
spawn = Configs.newDefault(Spawn.class);
hostFailWorker = spawn.getHostFailWorker();
}
@After
public void cleanup() throws Exception {
spawn.close();
}
@Test
public void minionDownTest() throws Exception {
String liveHostId = "livehost";
String replicaHostId = "replicahost";
String emptyHostId = "emptyhost";
spawn.hostManager.updateHostState(makeHostState("otherhost", true));
// If a minion is down, HostFailWorker should complain
spawn.hostManager.updateHostState(makeHostState(emptyHostId, true));
HostState host1 = makeHostState(liveHostId, true);
Job job = makeSingleTaskJob(liveHostId, replicaHostId);
JobKey[] jobKeys = new JobKey[]{new JobKey(job.getId(), 0)};
host1.setStopped(jobKeys);
spawn.hostManager.updateHostState(host1);
HostState host2 = makeHostState("downhost", false);
host2.setReplicas(jobKeys);
spawn.hostManager.updateHostState(host2);
assertTrue("should not be able to fail host with task that is also on a down host", !hostFailWorker.checkHostStatesForFailure(liveHostId));
assertTrue("should be able to fail empty host", hostFailWorker.checkHostStatesForFailure(emptyHostId));
}
private Job makeSingleTaskJob(String liveHost, String replicaHost) throws Exception {
Job job = spawn.createJob("a", 0, Arrays.asList(liveHost, replicaHost), "default", "dummy", false);
job.setReplicas(spawn.getJobDefaults().replicas);
JobTask task = new JobTask(liveHost, 0, 0);
task.setReplicas(Arrays.asList(new JobTaskReplica(replicaHost, job.getId(), 0, 0)));
job.setTasks(Arrays.asList(task));
spawn.updateJob(job, false);
return job;
}
@Test
public void statePersistenceTest() {
// Mark some hosts for failure, then spin up a new HostFailWorker and make sure it can load the state
hostFailWorker.markHostsToFail("a,b", HostFailWorker.FailState.FAILING_FS_DEAD);
hostFailWorker.markHostsToFail("c", HostFailWorker.FailState.FAILING_FS_OKAY);
HostFailWorker hostFailWorker2 = new HostFailWorker(spawn, spawn.hostManager, null);
assertEquals("should persist state", HostFailWorker.FailState.FAILING_FS_DEAD, hostFailWorker2.getFailureState("a"));
assertEquals("should persist state", HostFailWorker.FailState.FAILING_FS_DEAD, hostFailWorker2.getFailureState("b"));
assertEquals("should persist state", HostFailWorker.FailState.FAILING_FS_OKAY, hostFailWorker2.getFailureState("c"));
assertEquals("should show alive state for non-failed host", HostFailWorker.FailState.ALIVE, hostFailWorker2.getFailureState("d"));
}
@Test
public void failWarningTest() throws JSONException {
// Check to make sure the UI warnings about host state are correct
spawn.hostManager.updateHostState(makeHostState("a", true, 1000, 2000));
spawn.hostManager.updateHostState(makeHostState("b", true, 500, 2000));
spawn.hostManager.updateHostState(makeHostState("c", true, 1500, 2000));
JSONObject failBMessage = hostFailWorker.getInfoForHostFailure("b", true);
assertTrue("should have ~.5 disk usage before fail", failBMessage.get("prefail") != null && areClose(failBMessage.getDouble("prefail"), .5));
assertTrue("should have ~.75 disk usage after fail", failBMessage.get("postfail") != null && areClose(failBMessage.getDouble("postfail"), .75));
spawn.hostManager.updateHostState(makeHostState("d", true, 10000, 11000));
JSONObject failDMessage = hostFailWorker.getInfoForHostFailure("d", true);
assertTrue("should get fatal warning after failing too-big host", failDMessage.has("fatal"));
}
@Test
public void fullDiskTest() throws Exception {
String fullHostId = "full";
String emptyHostId = "empty";
spawn.hostManager.updateHostState(makeHostState(fullHostId, true, 999_000_000_000L, 1000_000_000_000L));
spawn.hostManager.updateHostState(makeHostState(emptyHostId, true, 100_000_000_000L, 1000_000_000_000L));
zkClient.create().creatingParentsIfNeeded().forPath("/minion/up/" + fullHostId, new byte[]{});
zkClient.create().creatingParentsIfNeeded().forPath("/minion/up/" + emptyHostId, new byte[]{});
Thread.sleep(2000); // need to let spawn detect the new minions
hostFailWorker.updateFullMinions();
assertEquals("should correctly detect full host", HostFailWorker.FailState.DISK_FULL, hostFailWorker.getFailureState(fullHostId));
assertEquals("should correctly detect okay host", HostFailWorker.FailState.ALIVE, hostFailWorker.getFailureState(emptyHostId));
}
private static boolean areClose(double x, double y) {
return Math.abs(x - y) < .001;
}
private HostState makeHostState(String id, boolean up, long diskUsed, long diskMax) {
HostState hostState = makeHostState(id, up);
hostState.setUsed(new HostCapacity(0, 0, 0, diskUsed));
hostState.setMax(new HostCapacity(0, 0, 0, diskMax));
return hostState;
}
private HostState makeHostState(String id, boolean up) {
HostState hostState = new HostState(id);
hostState.setHost("pretend_host");
hostState.setUp(up);
hostState.setReplicas(new JobKey[]{});
hostState.setStopped(new JobKey[]{});
return hostState;
}
}