/*******************************************************************************
*
* Copyright (c) 2004-2009 Oracle Corporation.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
*
* Kohsuke Kawaguchi, Stephen Connolly
*
*
*******************************************************************************/
package hudson.slaves;
import hudson.model.*;
import hudson.model.Hudson.MasterComputer;
import hudson.remoting.Channel;
import hudson.remoting.VirtualChannel;
import hudson.remoting.Callable;
import hudson.util.StreamTaskListener;
import hudson.util.NullStream;
import hudson.util.RingBufferLogHandler;
import hudson.util.Futures;
import hudson.FilePath;
import hudson.lifecycle.WindowsSlaveInstaller;
import hudson.Util;
import hudson.AbortException;
import hudson.remoting.Launcher;
import static hudson.slaves.SlaveComputer.LogHolder.SLAVE_LOG_HANDLER;
import hudson.slaves.OfflineCause.ChannelTermination;
import java.io.File;
import java.io.OutputStream;
import java.io.InputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.logging.Level;
import java.util.logging.LogRecord;
import java.util.logging.Logger;
import java.util.logging.Handler;
import java.util.List;
import java.util.Collections;
import java.util.ArrayList;
import java.nio.charset.Charset;
import java.util.concurrent.Future;
import java.security.Security;
import hudson.util.io.ReopenableFileOutputStream;
import org.kohsuke.stapler.StaplerRequest;
import org.kohsuke.stapler.StaplerResponse;
import org.kohsuke.stapler.QueryParameter;
import org.kohsuke.stapler.HttpResponse;
import org.kohsuke.stapler.HttpRedirect;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletResponse;
import org.eclipse.hudson.security.HudsonSecurityManager;
/**
* {@link Computer} for {@link Slave}s.
*
* @author Kohsuke Kawaguchi
*/
public class SlaveComputer extends Computer {
private volatile Channel channel;
private volatile transient boolean acceptingTasks = true;
private Charset defaultCharset;
private Boolean isUnix;
/**
* Effective {@link ComputerLauncher} that hides the details of how we
* launch a slave agent on this computer.
*
* <p> This is normally the same as {@link Slave#getLauncher()} but can be
* different. See {@link #grabLauncher(Node)}.
*/
private ComputerLauncher launcher;
/**
* Perpetually writable log file.
*/
private final ReopenableFileOutputStream log;
/**
* {@link StreamTaskListener} that wraps {@link #log}, hence perpetually
* writable.
*/
private final TaskListener taskListener;
/**
* Number of failed attempts to reconnect to this node (so that if we keep
* failing to reconnect, we can stop trying.)
*/
private transient int numRetryAttempt;
/**
* Tracks the status of the last launch operation, which is always
* asynchronous. This can be used to wait for the completion, or cancel the
* launch activity.
*/
private volatile Future<?> lastConnectActivity = null;
private Object constructed = new Object();
public SlaveComputer(Slave slave) {
super(slave);
this.log = new ReopenableFileOutputStream(getLogFile(), 5);
this.taskListener = new StreamTaskListener(log);
}
/**
* {@inheritDoc}
*/
@Override
public boolean isAcceptingTasks() {
return acceptingTasks;
}
/**
* Allows a {@linkplain hudson.slaves.ComputerLauncher} or a
* {@linkplain hudson.slaves.RetentionStrategy} to suspend tasks being
* accepted by the slave computer.
*
* @param acceptingTasks {@code true} if the slave can accept tasks.
*/
public void setAcceptingTasks(boolean acceptingTasks) {
this.acceptingTasks = acceptingTasks;
}
/**
* True if this computer is a Unix machine (as opposed to Windows machine).
*
* @return null if the computer is disconnected and therefore we don't know
* whether it is Unix or not.
*/
public Boolean isUnix() {
return isUnix;
}
@Override
public Slave getNode() {
return (Slave) super.getNode();
}
@Override
public String getIcon() {
Future<?> l = lastConnectActivity;
if (l != null && !l.isDone()) {
return "computer-flash.gif";
}
return super.getIcon();
}
/**
* @deprecated since 2008-05-20.
*/
@Deprecated
@Override
public boolean isJnlpAgent() {
return launcher instanceof JNLPLauncher;
}
@Override
public boolean isLaunchSupported() {
return launcher.isLaunchSupported();
}
public ComputerLauncher getLauncher() {
return launcher;
}
protected Future<?> _connect(boolean forceReconnect) {
if (channel != null) {
return Futures.precomputed(null);
}
if (!forceReconnect && isConnecting()) {
return lastConnectActivity;
}
if (forceReconnect && isConnecting()) {
logger.fine("Forcing a reconnect on " + getName());
}
closeChannel();
return lastConnectActivity = Computer.threadPoolForRemoting.submit(new java.util.concurrent.Callable<Object>() {
public Object call() throws Exception {
// do this on another thread so that the lengthy launch operation
// (which is typical) won't block UI thread.
try {
log.rewind();
try {
launcher.launch(SlaveComputer.this, taskListener);
return null;
} catch (AbortException e) {
taskListener.error(e.getMessage());
throw e;
} catch (IOException e) {
Util.displayIOException(e, taskListener);
e.printStackTrace(taskListener.error(Messages.ComputerLauncher_unexpectedError()));
throw e;
} catch (InterruptedException e) {
e.printStackTrace(taskListener.error(Messages.ComputerLauncher_abortedLaunch()));
throw e;
}
} finally {
if (channel == null) {
offlineCause = new OfflineCause.LaunchFailed();
}
}
}
});
}
/**
* {@inheritDoc}
*/
@Override
public void taskAccepted(Executor executor, Queue.Task task) {
super.taskAccepted(executor, task);
if (launcher instanceof ExecutorListener) {
((ExecutorListener) launcher).taskAccepted(executor, task);
}
if (getNode().getRetentionStrategy() instanceof ExecutorListener) {
((ExecutorListener) getNode().getRetentionStrategy()).taskAccepted(executor, task);
}
}
/**
* {@inheritDoc}
*/
@Override
public void taskCompleted(Executor executor, Queue.Task task, long durationMS) {
super.taskCompleted(executor, task, durationMS);
if (launcher instanceof ExecutorListener) {
((ExecutorListener) launcher).taskCompleted(executor, task, durationMS);
}
RetentionStrategy r = getRetentionStrategy();
if (r instanceof ExecutorListener) {
((ExecutorListener) r).taskCompleted(executor, task, durationMS);
}
}
/**
* {@inheritDoc}
*/
@Override
public void taskCompletedWithProblems(Executor executor, Queue.Task task, long durationMS, Throwable problems) {
super.taskCompletedWithProblems(executor, task, durationMS, problems);
if (launcher instanceof ExecutorListener) {
((ExecutorListener) launcher).taskCompletedWithProblems(executor, task, durationMS, problems);
}
RetentionStrategy r = getRetentionStrategy();
if (r instanceof ExecutorListener) {
((ExecutorListener) r).taskCompletedWithProblems(executor, task, durationMS, problems);
}
}
@Override
public boolean isConnecting() {
Future<?> l = lastConnectActivity;
return isOffline() && l != null && !l.isDone();
}
public OutputStream openLogFile() {
try {
log.rewind();
return log;
} catch (IOException e) {
logger.log(Level.SEVERE, "Failed to create log file " + getLogFile(), e);
return new NullStream();
}
}
private final Object channelLock = new Object();
public void setChannel(InputStream in, OutputStream out, TaskListener taskListener, Channel.Listener listener) throws IOException, InterruptedException {
setChannel(in, out, taskListener.getLogger(), listener);
}
/**
* Creates a {@link Channel} from the given stream and sets that to this
* slave.
*
* @param in Stream connected to the remote "slave.jar". It's the caller's
* responsibility to do buffering on this stream, if that's necessary.
* @param out Stream connected to the remote peer. It's the caller's
* responsibility to do buffering on this stream, if that's necessary.
* @param launchLog If non-null, receive the portion of data in <tt>is</tt>
* before the data goes into the "binary mode". This is useful when the
* established communication channel might include some data that might be
* useful for debugging/trouble-shooting.
* @param listener Gets a notification when the channel closes, to perform
* clean up. Can be null. By the time this method is called, the cause of
* the termination is reported to the user, so the implementation of the
* listener doesn't need to do that again.
*/
public void setChannel(InputStream in, OutputStream out, OutputStream launchLog, Channel.Listener listener) throws IOException, InterruptedException {
if (this.channel != null) {
throw new IllegalStateException("Already connected");
}
final TaskListener taskListener = new StreamTaskListener(launchLog);
PrintStream log = taskListener.getLogger();
Channel channel = new Channel(nodeName, threadPoolForRemoting, Channel.Mode.NEGOTIATE,
in, out, launchLog);
channel.addListener(new Channel.Listener() {
@Override
public void onClosed(Channel c, IOException cause) {
SlaveComputer.this.channel = null;
// Orderly shutdown will have null exception
if (cause != null) {
offlineCause = new ChannelTermination(cause);
cause.printStackTrace(taskListener.error("Connection terminated"));
} else {
taskListener.getLogger().println("Connection terminated");
}
launcher.afterDisconnect(SlaveComputer.this, taskListener);
}
});
if (listener != null) {
channel.addListener(listener);
}
String slaveVersion = channel.call(new SlaveVersion());
log.println("Slave.jar version: " + slaveVersion);
boolean _isUnix = channel.call(new DetectOS());
log.println(_isUnix ? hudson.model.Messages.Slave_UnixSlave() : hudson.model.Messages.Slave_WindowsSlave());
String defaultCharsetName = channel.call(new DetectDefaultCharset());
String remoteFs = getNode().getRemoteFS();
if (_isUnix && !remoteFs.contains("/") && remoteFs.contains("\\")) {
log.println("WARNING: " + remoteFs + " looks suspiciously like Windows path. Maybe you meant " + remoteFs.replace('\\', '/') + "?");
}
FilePath root = new FilePath(channel, getNode().getRemoteFS());
channel.call(new SlaveInitializer());
channel.call(new WindowsSlaveInstaller(remoteFs));
for (ComputerListener cl : ComputerListener.all()) {
cl.preOnline(this, channel, root, taskListener);
}
offlineCause = null;
// update the data structure atomically to prevent others from seeing a channel that's not properly initialized yet
synchronized (channelLock) {
if (this.channel != null) {
// check again. we used to have this entire method in a big sycnhronization block,
// but Channel constructor blocks for an external process to do the connection
// if CommandLauncher is used, and that cannot be interrupted because it blocks at InputStream.
// so if the process hangs, it hangs the thread in a lock, and since Hudson will try to relaunch,
// we'll end up queuing the lot of threads in a pseudo deadlock.
// This implementation prevents that by avoiding a lock. HUDSON-1705 is likely a manifestation of this.
channel.close();
throw new IllegalStateException("Already connected");
}
isUnix = _isUnix;
numRetryAttempt = 0;
this.channel = channel;
defaultCharset = Charset.forName(defaultCharsetName);
}
for (ComputerListener cl : ComputerListener.all()) {
cl.onOnline(this, taskListener);
}
log.println("Slave successfully connected and online");
Hudson.getInstance().getQueue().scheduleMaintenance();
}
@Override
public Channel getChannel() {
return channel;
}
public Charset getDefaultCharset() {
return defaultCharset;
}
public List<LogRecord> getLogRecords() throws IOException, InterruptedException {
if (channel == null) {
return Collections.emptyList();
} else {
return channel.call(new Callable<List<LogRecord>, RuntimeException>() {
public List<LogRecord> call() {
return new ArrayList<LogRecord>(SLAVE_LOG_HANDLER.getView());
}
});
}
}
public HttpResponse doDoDisconnect(@QueryParameter String offlineMessage) throws IOException, ServletException {
if (channel != null) {
//does nothing in case computer is already disconnected
checkPermission(CONFIGURE);
offlineMessage = Util.fixEmptyAndTrim(offlineMessage);
disconnect(OfflineCause.create(Messages._SlaveComputer_DisconnectedBy(
HudsonSecurityManager.getAuthentication().getName(),
offlineMessage != null ? " : " + offlineMessage : "")));
}
return new HttpRedirect(".");
}
@Override
public Future<?> disconnect(OfflineCause cause) {
super.disconnect(cause);
return Computer.threadPoolForRemoting.submit(new Runnable() {
public void run() {
// do this on another thread so that any lengthy disconnect operation
// (which could be typical) won't block UI thread.
launcher.beforeDisconnect(SlaveComputer.this, taskListener);
closeChannel();
launcher.afterDisconnect(SlaveComputer.this, taskListener);
}
});
}
public void doLaunchSlaveAgent(StaplerRequest req, StaplerResponse rsp) throws IOException, ServletException {
if (channel != null) {
rsp.sendError(HttpServletResponse.SC_NOT_FOUND);
return;
}
connect(true);
// TODO: would be nice to redirect the user to "launching..." wait page,
// then spend a few seconds there and poll for the completion periodically.
rsp.sendRedirect("log");
}
public void tryReconnect() {
numRetryAttempt++;
if (numRetryAttempt < 6 || (numRetryAttempt % 12) == 0) {
// initially retry several times quickly, and after that, do it infrequently.
logger.info("Attempting to reconnect " + nodeName);
connect(true);
}
}
/**
* Serves jar files for JNLP slave agents.
*
* @deprecated since 2008-08-18. This URL binding is no longer used and
* moved up directly under to {@link Hudson}, but it's left here for now
* just in case some old JNLP slave agents request it.
*/
public Slave.JnlpJar getJnlpJars(String fileName) {
return new Slave.JnlpJar(fileName);
}
@Override
protected void kill() {
super.kill();
closeChannel();
}
public RetentionStrategy getRetentionStrategy() {
Slave n = getNode();
return n == null ? RetentionStrategy.INSTANCE : n.getRetentionStrategy();
}
/**
* If still connected, disconnect.
*/
private void closeChannel() {
// TODO: race condition between this and the setChannel method.
Channel c = channel;
channel = null;
isUnix = null;
if (c != null) {
try {
c.close();
} catch (IOException e) {
logger.log(Level.SEVERE, "Failed to terminate channel to " + getDisplayName(), e);
}
}
for (ComputerListener cl : ComputerListener.all()) {
cl.onOffline(this);
}
}
@Override
protected void setNode(Node node) {
super.setNode(node);
launcher = grabLauncher(node);
// maybe the configuration was changed to relaunch the slave, so try to re-launch now.
// "constructed==null" test is an ugly work around to avoid launching before the object is fully
// constructed.
if (constructed != null) {
if (node instanceof Slave) {
((Slave) node).getRetentionStrategy().check(this);
} else {
connect(false);
}
}
}
/**
* Grabs a {@link ComputerLauncher} out of {@link Node} to keep it in this
* {@link Computer}. The returned launcher will be set to {@link #launcher}
* and used to carry out the actual launch operation.
*
* <p> Subtypes that needs to decorate {@link ComputerLauncher} can do so by
* overriding this method. This is useful for {@link SlaveComputer}s for
* clouds for example, where one normally needs additional pre-launch step
* (such as waiting for the provisioned node to become available) before the
* user specified launch step (like SSH connection) kicks in.
*
* @see ComputerLauncherFilter
*/
protected ComputerLauncher grabLauncher(Node node) {
return ((Slave) node).getLauncher();
}
private static final Logger logger = Logger.getLogger(SlaveComputer.class.getName());
private static final class SlaveVersion implements Callable<String, IOException> {
public String call() throws IOException {
try {
return Launcher.VERSION;
} catch (Throwable ex) {
return "< 1.335";
} // Older slave.jar won't have VERSION
}
}
private static final class DetectOS implements Callable<Boolean, IOException> {
public Boolean call() throws IOException {
return File.pathSeparatorChar == ':';
}
}
private static final class DetectDefaultCharset implements Callable<String, IOException> {
public String call() throws IOException {
return Charset.defaultCharset().name();
}
}
/**
* Puts the {@link #SLAVE_LOG_HANDLER} into a separate class so that loading
* this class in JVM doesn't end up loading tons of additional classes.
*/
static final class LogHolder {
/**
* This field is used on each slave node to record log records on the
* slave.
*/
static final RingBufferLogHandler SLAVE_LOG_HANDLER = new RingBufferLogHandler();
}
private static class SlaveInitializer implements Callable<Void, RuntimeException> {
public Void call() {
// avoid double installation of the handler. JNLP slaves can reconnect to the master multiple times
// and each connection gets a different RemoteClassLoader, so we need to evict them by class name,
// not by their identity.
Logger logger = Logger.getLogger("hudson");
for (Handler h : logger.getHandlers()) {
if (h.getClass().getName().equals(SLAVE_LOG_HANDLER.getClass().getName())) {
logger.removeHandler(h);
}
}
logger.addHandler(SLAVE_LOG_HANDLER);
// remove Sun PKCS11 provider if present. See http://wiki.hudson-ci.org/display/HUDSON/Solaris+Issue+6276483
try {
Security.removeProvider("SunPKCS11-Solaris");
} catch (SecurityException e) {
// ignore this error.
}
Channel.current().setProperty("slave", Boolean.TRUE); // indicate that this side of the channel is the slave side.
return null;
}
private static final long serialVersionUID = 1L;
}
/**
* Obtains a {@link VirtualChannel} that allows some computation to be
* performed on the master. This method can be called from any thread on the
* master, or from slave (more precisely, it only works from the remoting
* request-handling thread in slaves, which means if you've started separate
* thread on slaves, that'll fail.)
*
* @return null if the calling thread doesn't have any trace of where its
* master is.
* @since 1.362
*/
public static VirtualChannel getChannelToMaster() {
if (Hudson.getInstance() != null) {
return MasterComputer.localChannel;
}
// if this method is called from within the slave computation thread, this should work
Channel c = Channel.current();
if (c != null && c.getProperty("slave") == Boolean.TRUE) {
return c;
}
return null;
}
}