/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.addthis.hydra.job; import java.util.LinkedHashSet; import java.util.Map; import java.util.Set; import com.addthis.hydra.job.HostFailWorker.FailState; import com.addthis.hydra.job.mq.HostState; import com.addthis.hydra.job.spawn.Spawn; import com.addthis.hydra.job.store.SpawnDataStore; import com.addthis.hydra.job.store.SpawnDataStoreKeys; import com.addthis.maljson.JSONArray; import com.addthis.maljson.JSONException; import com.addthis.maljson.JSONObject; import com.google.common.collect.ImmutableMap; import com.gs.collections.impl.factory.Sets; import org.apache.commons.lang3.tuple.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * A class storing the internal state of the host failure queue. * * All changes are immediately saved to the SpawnDataStore. */ public class HostFailState { private static final Logger log = LoggerFactory.getLogger(HostFailState.class); private static final String filesystemDeadKey = "deadFs"; private static final String filesystemOkayKey = "okayFs"; private static final String filesystemFullKey = "fullFs"; private final Spawn spawn; private final Set<String> failFsDead; private final Set<String> failFsOkay; private final Set<String> fsFull; private final Map<FailState, Set<String>> hostsToFailByType; public HostFailState(Spawn spawn) { this.spawn = spawn; failFsDead = new LinkedHashSet<>(); failFsOkay = new LinkedHashSet<>(); fsFull = new LinkedHashSet<>(); hostsToFailByType = ImmutableMap.of(FailState.FAILING_FS_DEAD, failFsDead, FailState.DISK_FULL, fsFull, FailState.FAILING_FS_OKAY, failFsOkay); } /** * @return The set of all minion ids across all failure queues. */ public Set<String> queuedHosts() { synchronized (hostsToFailByType) { return Sets.unionAll(failFsDead, failFsOkay, fsFull); } } /** * Add a new host to the failure queue * * @param hostId The host id to add * @param failState The state of the host being failed */ public void putHost(String hostId, FailState failState) { synchronized (hostsToFailByType) { if (failFsDead.contains(hostId)) { log.info("Ignoring fs-okay failure of " + hostId + " because it is already being failed fs-dead"); return; } switch (failState) { case FAILING_FS_DEAD: fsFull.remove(hostId); failFsOkay.remove(hostId); failFsDead.add(hostId); break; case FAILING_FS_OKAY: fsFull.remove(hostId); failFsOkay.add(hostId); break; case DISK_FULL: fsFull.add(hostId); failFsOkay.remove(hostId); break; default: log.warn("Unexcepted failState: {}", failState); } saveState(); } } /** * Retrieve the next host to fail * * @return The uuid of the next host to fail, and whether the file system is dead. If the queue is empty, return null. */ public Pair<String, FailState> nextHostToFail() { synchronized (hostsToFailByType) { String hostUuid = findFirstHost(failFsDead, false); if (hostUuid != null) { return Pair.of(hostUuid, FailState.FAILING_FS_DEAD); } hostUuid = findFirstHost(fsFull, true); if (hostUuid != null) { return Pair.of(hostUuid, FailState.DISK_FULL); } hostUuid = findFirstHost(failFsOkay, true); if (hostUuid != null) { return Pair.of(hostUuid, FailState.FAILING_FS_OKAY); } return null; } } private String findFirstHost(Set<String> hosts, boolean requireUp) { for (String hostUuid : hosts) { if (requireUp) { HostState host = spawn.hostManager.getHostState(hostUuid); if (host != null && !host.isDead() && host.isUp()) { return hostUuid; } } else { return hostUuid; } } return null; } /** * Cancel the failure for a host * * @param hostId The uuid to cancel */ public void removeHost(String hostId) { synchronized (hostsToFailByType) { failFsDead.remove(hostId); failFsOkay.remove(hostId); fsFull.remove(hostId); saveState(); } } /** * Load the stored state from the SpawnDataStore * * @return True if at least one host was loaded */ public boolean loadState() { SpawnDataStore spawnDataStore = spawn.getSpawnDataStore(); if (spawnDataStore == null) { return false; } String raw = spawnDataStore.get(SpawnDataStoreKeys.SPAWN_HOST_FAIL_WORKER_PATH); if (raw == null) { return false; } synchronized (hostsToFailByType) { try { JSONObject decoded = new JSONObject(raw); loadHostsFromJSONArray(failFsOkay, decoded.optJSONArray(filesystemOkayKey)); log.info("Loaded hosts to fail fs-ok: {}", failFsOkay); loadHostsFromJSONArray(failFsDead, decoded.optJSONArray(filesystemDeadKey)); log.info("Loaded hosts to fail fs-dead: {}", failFsDead); loadHostsFromJSONArray(fsFull, decoded.optJSONArray(filesystemFullKey)); log.info("Loaded hosts to fail fs-full: {}", fsFull); return true; } catch (Exception e) { log.warn("Failed to load HostFailState: raw={}", raw, e); return false; } } } /** * Internal method to convert a JSONArray from the SpawnDataStore to a list of hosts */ private void loadHostsFromJSONArray(Set<String> modified, JSONArray arr) throws JSONException { if (arr == null) { return; } for (int i = 0; i < arr.length(); i++) { modified.add(arr.getString(i)); } } /** * Save the state to the SpawnDataStore */ public void saveState() { try { synchronized (hostsToFailByType) { JSONObject jsonObject = new JSONObject(); jsonObject.put(filesystemDeadKey, new JSONArray(failFsDead)); jsonObject.put(filesystemOkayKey, new JSONArray(failFsOkay)); jsonObject.put(filesystemFullKey, new JSONArray(fsFull)); spawn.getSpawnDataStore().put(SpawnDataStoreKeys.SPAWN_HOST_FAIL_WORKER_PATH, jsonObject.toString()); } } catch (Exception e) { log.warn("Failed to save HostFailState: " + e, e); } } /** * Get the state of failure for a host * * @param hostId The host to check * @return ALIVE if the host is not being failed; otherwise, a description of the type of failure */ public FailState getState(String hostId) { synchronized (hostsToFailByType) { if (failFsOkay.contains(hostId)) { return FailState.FAILING_FS_OKAY; } else if (failFsDead.contains(hostId)) { return FailState.FAILING_FS_DEAD; } else if (fsFull.contains(hostId)) { return FailState.DISK_FULL; } else { return FailState.ALIVE; } } } }