package com.breakersoft.plow.crond;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import com.breakersoft.plow.ExitStatus;
import com.breakersoft.plow.Node;
import com.breakersoft.plow.Proc;
import com.breakersoft.plow.Signal;
import com.breakersoft.plow.Task;
import com.breakersoft.plow.dispatcher.DispatchService;
import com.breakersoft.plow.service.JobService;
import com.breakersoft.plow.service.NodeService;
import com.breakersoft.plow.thrift.NodeState;
import com.breakersoft.plow.thrift.TaskFilterT;
import com.breakersoft.plow.thrift.TaskState;
public class DownNodeChecker extends AbstractCrondTask {
private static final Logger logger = LoggerFactory.getLogger(DownNodeChecker.class);
@Autowired
private NodeService nodeService;
@Autowired
private JobService jobService;
@Autowired
private DispatchService dispatchService;
public DownNodeChecker() {
super(CrondTask.DOWN_NODE_CHECK);
}
@Override
protected void run() {
logger.info("Running check for down nodes.");
final List<Node> nodes = nodeService.getUnresponsiveNodes();
if (nodes.isEmpty()) {
return;
}
logger.info("Found {} unresponsive nodes, flipping to down state.", nodes.size());
for (Node node: nodes) {
logger.info("{} is down.", node);
if (!nodeService.setNodeState(node, NodeState.DOWN)) {
// Something else set the state to down.
continue;
}
TaskFilterT filter = new TaskFilterT();
filter.addToNodeIds(node.getNodeId().toString());
// Now stop all the tasks running on this node.
for (Task task: jobService.getTasks(filter)) {
if (dispatchService.stopTask(task, TaskState.WAITING, ExitStatus.FAIL, Signal.NODE_DOWN)) {
Proc proc = nodeService.getProc(task);
dispatchService.deallocateProc(proc, "Node is unresponsive.");
// Might want to try to actually kill the process here just in case.
}
}
}
}
}