/*******************************************************************************
*
* Copyright (c) 2004-2009, Oracle Corporation
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
*
*
*
*
*******************************************************************************/
package hudson.slaves;
import hudson.model.AsyncPeriodicWork;
import hudson.model.TaskListener;
import hudson.model.Hudson;
import hudson.model.Computer;
import hudson.util.TimeUnit2;
import hudson.remoting.VirtualChannel;
import hudson.remoting.Channel;
import hudson.remoting.Callable;
import hudson.Extension;
import java.io.IOException;
import java.util.logging.Logger;
/**
* Makes sure that connections to slaves are alive, and if they are not, cut
* them off.
*
* <p> If we only rely on TCP retransmission time out for this, the time it
* takes to detect a bad connection is in the order of 10s of minutes, so we
* take the matters to our own hands.
*
* @author Kohsuke Kawaguchi
* @since 1.325
*/
@Extension
public class ConnectionActivityMonitor extends AsyncPeriodicWork {
public ConnectionActivityMonitor() {
super("Connection Activity monitoring to slaves");
}
protected void execute(TaskListener listener) throws IOException, InterruptedException {
if (!enabled) {
return;
}
long now = System.currentTimeMillis();
for (Computer c : Hudson.getInstance().getComputers()) {
VirtualChannel ch = c.getChannel();
if (ch instanceof Channel) {
Channel channel = (Channel) ch;
if (now - channel.getLastHeard() > TIME_TILL_PING) {
// haven't heard from this slave for a while.
Long lastPing = (Long) channel.getProperty(ConnectionActivityMonitor.class);
if (lastPing != null && now - lastPing > TIMEOUT) {
LOGGER.info("Repeated ping attempts failed on " + c.getName() + ". Disconnecting");
c.disconnect(OfflineCause.create(Messages._ConnectionActivityMonitor_OfflineCause()));
} else {
// send a ping. if we receive a reply, it will be reflected in the next getLastHeard() call.
channel.callAsync(PING_COMMAND);
if (lastPing == null) {
channel.setProperty(ConnectionActivityMonitor.class, now);
}
}
} else {
// we are receiving data nicely
channel.setProperty(ConnectionActivityMonitor.class, null);
}
}
}
}
public long getRecurrencePeriod() {
return enabled ? FREQUENCY : TimeUnit2.DAYS.toMillis(30);
}
/**
* Time till initial ping
*/
private static final long TIME_TILL_PING = Long.getLong(ConnectionActivityMonitor.class.getName() + ".timeToPing", TimeUnit2.MINUTES.toMillis(3));
private static final long FREQUENCY = Long.getLong(ConnectionActivityMonitor.class.getName() + ".frequency", TimeUnit2.SECONDS.toMillis(10));
/**
* When do we abandon the effort and cut off?
*/
private static final long TIMEOUT = Long.getLong(ConnectionActivityMonitor.class.getName() + ".timeToPing", TimeUnit2.MINUTES.toMillis(4));
// disabled by default until proven in the production
public boolean enabled = Boolean.getBoolean(ConnectionActivityMonitor.class.getName() + ".enabled");
private static final PingCommand PING_COMMAND = new PingCommand();
private static final class PingCommand implements Callable<Void, RuntimeException> {
public Void call() throws RuntimeException {
return null;
}
private static final long serialVersionUID = 1L;
}
private static final Logger LOGGER = Logger.getLogger(ConnectionActivityMonitor.class.getName());
}