package io.cattle.platform.agent.server.ping.impl;
import static com.google.common.util.concurrent.Futures.*;
import io.cattle.platform.agent.AgentLocator;
import io.cattle.platform.agent.RemoteAgent;
import io.cattle.platform.agent.server.ping.PingMonitor;
import io.cattle.platform.agent.server.ping.dao.PingDao;
import io.cattle.platform.agent.server.resource.impl.AgentResourcesMonitor;
import io.cattle.platform.agent.server.util.AgentConnectionUtils;
import io.cattle.platform.agent.util.AgentUtils;
import io.cattle.platform.archaius.util.ArchaiusUtil;
import io.cattle.platform.core.constants.AgentConstants;
import io.cattle.platform.core.constants.CommonStatesConstants;
import io.cattle.platform.core.model.Agent;
import io.cattle.platform.engine.process.ExitReason;
import io.cattle.platform.engine.process.ProcessInstanceException;
import io.cattle.platform.engine.process.util.ProcessEngineUtils;
import io.cattle.platform.eventing.EventCallOptions;
import io.cattle.platform.framework.event.Ping;
import io.cattle.platform.ha.monitor.PingInstancesMonitor;
import io.cattle.platform.lock.LockDelegator;
import io.cattle.platform.lock.definition.LockDefinition;
import io.cattle.platform.object.ObjectManager;
import io.cattle.platform.object.process.ObjectProcessManager;
import io.cattle.platform.task.Task;
import io.cattle.platform.task.TaskOptions;
import java.util.concurrent.TimeUnit;
import javax.inject.Inject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.util.concurrent.FutureCallback;
import com.google.common.util.concurrent.ListeningExecutorService;
import com.netflix.config.DynamicLongProperty;
public class PingMonitorImpl implements PingMonitor, Task, TaskOptions {
private static final DynamicLongProperty BAD_PINGS = ArchaiusUtil.getLong("agent.ping.reconnect.after.failed.count");
private static final DynamicLongProperty PING_TIMEOUT = ArchaiusUtil.getLong("agent.ping.timeout.seconds");
private static final DynamicLongProperty PING_STATS_EVERY = ArchaiusUtil.getLong("agent.ping.stats.every");
private static final DynamicLongProperty PING_RESOURCES_EVERY = ArchaiusUtil.getLong("agent.ping.resources.every");
private static final DynamicLongProperty PING_INSTANCES_EVERY = ArchaiusUtil.getLong("agent.ping.instances.every");
private static final DynamicLongProperty PING_SCHEDULE = ArchaiusUtil.getLong("task.agent.ping.schedule");
private static final Logger log = LoggerFactory.getLogger(PingMonitorImpl.class);
@Inject
AgentResourcesMonitor agentResourceManager;
@Inject
PingInstancesMonitor pingInstanceMonitor;
@Inject
ObjectProcessManager processManager;
@Inject
ObjectManager objectManager;
int interation = 0;
@Inject
PingDao pingDao;
@Inject
LockDelegator lockDelegator;
@Inject
AgentLocator agentLocator;
@Inject
ListeningExecutorService executorService;
LoadingCache<Long, PingStatus> status = CacheBuilder.newBuilder().expireAfterAccess(PING_SCHEDULE.get() * 3, TimeUnit.SECONDS).build(
new CacheLoader<Long, PingStatus>() {
@Override
public PingStatus load(Long key) throws Exception {
return new PingStatus(key);
}
});
protected void handleOwned(Agent agent) {
Ping ping = AgentUtils.newPing(agent);
if (isInterval(PING_STATS_EVERY.get())) {
ping.setOption(Ping.STATS, true);
}
if (isInterval(PING_RESOURCES_EVERY.get())) {
ping.setOption(Ping.RESOURCES, true);
}
if (isInterval(PING_INSTANCES_EVERY.get())) {
ping.setOption(Ping.INSTANCES, true);
}
doPing(agent, ping);
}
protected boolean isInterval(long every) {
return interation % every == 0;
}
protected void ping(Agent agent) {
LockDefinition lockDef = AgentConnectionUtils.getConnectionLock(agent);
if (!lockDelegator.isLocked(lockDef) && !lockDelegator.tryLock(lockDef)) {
return;
}
handleOwned(agent);
}
protected void doPing(final Agent agent, Ping ping) {
RemoteAgent remoteAgent = agentLocator.lookupAgent(agent);
EventCallOptions options = new EventCallOptions(0, PING_TIMEOUT.get() * 1000);
addCallback(remoteAgent.call(ping, Ping.class, options), new FutureCallback<Ping>() {
@Override
public void onSuccess(Ping pong) {
pingSuccess(agent, pong);
}
@Override
public void onFailure(Throwable t) {
pingFailure(agent);
}
});
}
protected void pingSuccess(Agent agent, Ping pong) {
status.getUnchecked(agent.getId()).success();
agentResourceManager.processPingReply(pong);
pingInstanceMonitor.pingReply(pong);
}
protected void pingFailure(Agent agent) {
long count = status.getUnchecked(agent.getId()).failed();
if (count < 3) {
log.info("Missed ping from agent [{}] count [{}]", agent.getId(), count);
} else {
log.error("Failed to get ping from agent [{}] count [{}]", agent.getId(), count);
}
if (count >= BAD_PINGS.get()) {
try {
agent = objectManager.reload(agent);
if (CommonStatesConstants.ACTIVE.equals(agent.getState())) {
log.error("Scheduling reconnect for [{}]", agent.getId());
processManager.scheduleProcessInstance(AgentConstants.PROCESS_RECONNECT, agent, null);
}
} catch (ProcessInstanceException e) {
if (e.getExitReason() != ExitReason.CANCELED) {
throw e;
}
}
}
}
@Override
public void run() {
if (!ProcessEngineUtils.enabled()) {
return;
}
for (Agent agent : pingDao.findAgentsToPing()) {
ping(agent);
}
interation++;
}
@Override
public boolean isShouldRecord() {
return false;
}
@Override
public boolean isShouldLock() {
return false;
}
@Override
public String getName() {
return "agent.ping";
}
}