package com.workshare.msnos.core.cloud;
import com.workshare.msnos.core.Cloud;
import com.workshare.msnos.core.Message;
import com.workshare.msnos.core.MessageBuilder;
import com.workshare.msnos.core.RemoteEntity;
import com.workshare.msnos.soup.time.SystemTime;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.UUID;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
public class AgentWatchdog {
private static Logger log = LoggerFactory.getLogger(AgentWatchdog.class);
private final Cloud cloud;
private final ScheduledExecutorService scheduler;
private static final long AGENT_TIMEOUT = Long.getLong("msnos.core.agents.timeout.millis", 90000L);
private static final long AGENT_RETRIES = Long.getLong("msnos.core.agents.retries.num", 3);
public AgentWatchdog(Cloud cloud, ScheduledExecutorService executor) {
this.cloud = cloud;
this.scheduler = executor;
}
public void start() {
final long period = AGENT_TIMEOUT / 2;
log.debug("Probing agent every {} milliseconds", period);
scheduler.scheduleAtFixedRate(new Runnable() {
@Override
public void run() {
probeQuietAgents();
}
}, period, period, TimeUnit.MILLISECONDS);
}
private void probeQuietAgents() {
log.trace("Probing quite agents...");
for (RemoteEntity agent : cloud.getRemoteAgents()) {
final long currentTime = SystemTime.asMillis();
final long agentTime = agent.getAccessTime();
if (agentTime < currentTime - AGENT_TIMEOUT) {
log.debug("- sending ping to "+uuidOf(agent)+" - agentTime {}, currentTime {}", agentTime, currentTime);
try {
cloud.send(new MessageBuilder(Message.Type.PIN, cloud, agent).make());
} catch (IOException e) {
log.debug("Unexpected exception pinging agent " + agent, e);
}
}
if (agentTime < currentTime - (AGENT_TIMEOUT * AGENT_RETRIES)) {
log.debug("- remote agent {} removed due to inactivity: {}", uuidOf(agent), agent);
cloud.removeFaultyAgent(agent);
}
}
log.trace("Done!");
}
private UUID uuidOf(RemoteEntity agent) {
return agent.getIden().getUUID();
}
}