/******************************************************************************* * * Copyright (c) 2004-2009 Oracle Corporation. * * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * * Kohsuke Kawaguchi, Stephen Connolly * * *******************************************************************************/ package hudson.slaves; import hudson.model.*; import hudson.model.Hudson.MasterComputer; import hudson.remoting.Channel; import hudson.remoting.VirtualChannel; import hudson.remoting.Callable; import hudson.util.StreamTaskListener; import hudson.util.NullStream; import hudson.util.RingBufferLogHandler; import hudson.util.Futures; import hudson.FilePath; import hudson.lifecycle.WindowsSlaveInstaller; import hudson.Util; import hudson.AbortException; import hudson.remoting.Launcher; import static hudson.slaves.SlaveComputer.LogHolder.SLAVE_LOG_HANDLER; import hudson.slaves.OfflineCause.ChannelTermination; import java.io.File; import java.io.OutputStream; import java.io.InputStream; import java.io.IOException; import java.io.PrintStream; import java.util.logging.Level; import java.util.logging.LogRecord; import java.util.logging.Logger; import java.util.logging.Handler; import java.util.List; import java.util.Collections; import java.util.ArrayList; import java.nio.charset.Charset; import java.util.concurrent.Future; import java.security.Security; import hudson.util.io.ReopenableFileOutputStream; import org.kohsuke.stapler.StaplerRequest; import org.kohsuke.stapler.StaplerResponse; import org.kohsuke.stapler.QueryParameter; import org.kohsuke.stapler.HttpResponse; import org.kohsuke.stapler.HttpRedirect; import javax.servlet.ServletException; import javax.servlet.http.HttpServletResponse; import org.eclipse.hudson.security.HudsonSecurityManager; /** * {@link Computer} for {@link Slave}s. * * @author Kohsuke Kawaguchi */ public class SlaveComputer extends Computer { private volatile Channel channel; private volatile transient boolean acceptingTasks = true; private Charset defaultCharset; private Boolean isUnix; /** * Effective {@link ComputerLauncher} that hides the details of how we * launch a slave agent on this computer. * * <p> This is normally the same as {@link Slave#getLauncher()} but can be * different. See {@link #grabLauncher(Node)}. */ private ComputerLauncher launcher; /** * Perpetually writable log file. */ private final ReopenableFileOutputStream log; /** * {@link StreamTaskListener} that wraps {@link #log}, hence perpetually * writable. */ private final TaskListener taskListener; /** * Number of failed attempts to reconnect to this node (so that if we keep * failing to reconnect, we can stop trying.) */ private transient int numRetryAttempt; /** * Tracks the status of the last launch operation, which is always * asynchronous. This can be used to wait for the completion, or cancel the * launch activity. */ private volatile Future<?> lastConnectActivity = null; private Object constructed = new Object(); public SlaveComputer(Slave slave) { super(slave); this.log = new ReopenableFileOutputStream(getLogFile(), 5); this.taskListener = new StreamTaskListener(log); } /** * {@inheritDoc} */ @Override public boolean isAcceptingTasks() { return acceptingTasks; } /** * Allows a {@linkplain hudson.slaves.ComputerLauncher} or a * {@linkplain hudson.slaves.RetentionStrategy} to suspend tasks being * accepted by the slave computer. * * @param acceptingTasks {@code true} if the slave can accept tasks. */ public void setAcceptingTasks(boolean acceptingTasks) { this.acceptingTasks = acceptingTasks; } /** * True if this computer is a Unix machine (as opposed to Windows machine). * * @return null if the computer is disconnected and therefore we don't know * whether it is Unix or not. */ public Boolean isUnix() { return isUnix; } @Override public Slave getNode() { return (Slave) super.getNode(); } @Override public String getIcon() { Future<?> l = lastConnectActivity; if (l != null && !l.isDone()) { return "computer-flash.gif"; } return super.getIcon(); } /** * @deprecated since 2008-05-20. */ @Deprecated @Override public boolean isJnlpAgent() { return launcher instanceof JNLPLauncher; } @Override public boolean isLaunchSupported() { return launcher.isLaunchSupported(); } public ComputerLauncher getLauncher() { return launcher; } protected Future<?> _connect(boolean forceReconnect) { if (channel != null) { return Futures.precomputed(null); } if (!forceReconnect && isConnecting()) { return lastConnectActivity; } if (forceReconnect && isConnecting()) { logger.fine("Forcing a reconnect on " + getName()); } closeChannel(); return lastConnectActivity = Computer.threadPoolForRemoting.submit(new java.util.concurrent.Callable<Object>() { public Object call() throws Exception { // do this on another thread so that the lengthy launch operation // (which is typical) won't block UI thread. try { log.rewind(); try { launcher.launch(SlaveComputer.this, taskListener); return null; } catch (AbortException e) { taskListener.error(e.getMessage()); throw e; } catch (IOException e) { Util.displayIOException(e, taskListener); e.printStackTrace(taskListener.error(Messages.ComputerLauncher_unexpectedError())); throw e; } catch (InterruptedException e) { e.printStackTrace(taskListener.error(Messages.ComputerLauncher_abortedLaunch())); throw e; } } finally { if (channel == null) { offlineCause = new OfflineCause.LaunchFailed(); } } } }); } /** * {@inheritDoc} */ @Override public void taskAccepted(Executor executor, Queue.Task task) { super.taskAccepted(executor, task); if (launcher instanceof ExecutorListener) { ((ExecutorListener) launcher).taskAccepted(executor, task); } if (getNode().getRetentionStrategy() instanceof ExecutorListener) { ((ExecutorListener) getNode().getRetentionStrategy()).taskAccepted(executor, task); } } /** * {@inheritDoc} */ @Override public void taskCompleted(Executor executor, Queue.Task task, long durationMS) { super.taskCompleted(executor, task, durationMS); if (launcher instanceof ExecutorListener) { ((ExecutorListener) launcher).taskCompleted(executor, task, durationMS); } RetentionStrategy r = getRetentionStrategy(); if (r instanceof ExecutorListener) { ((ExecutorListener) r).taskCompleted(executor, task, durationMS); } } /** * {@inheritDoc} */ @Override public void taskCompletedWithProblems(Executor executor, Queue.Task task, long durationMS, Throwable problems) { super.taskCompletedWithProblems(executor, task, durationMS, problems); if (launcher instanceof ExecutorListener) { ((ExecutorListener) launcher).taskCompletedWithProblems(executor, task, durationMS, problems); } RetentionStrategy r = getRetentionStrategy(); if (r instanceof ExecutorListener) { ((ExecutorListener) r).taskCompletedWithProblems(executor, task, durationMS, problems); } } @Override public boolean isConnecting() { Future<?> l = lastConnectActivity; return isOffline() && l != null && !l.isDone(); } public OutputStream openLogFile() { try { log.rewind(); return log; } catch (IOException e) { logger.log(Level.SEVERE, "Failed to create log file " + getLogFile(), e); return new NullStream(); } } private final Object channelLock = new Object(); public void setChannel(InputStream in, OutputStream out, TaskListener taskListener, Channel.Listener listener) throws IOException, InterruptedException { setChannel(in, out, taskListener.getLogger(), listener); } /** * Creates a {@link Channel} from the given stream and sets that to this * slave. * * @param in Stream connected to the remote "slave.jar". It's the caller's * responsibility to do buffering on this stream, if that's necessary. * @param out Stream connected to the remote peer. It's the caller's * responsibility to do buffering on this stream, if that's necessary. * @param launchLog If non-null, receive the portion of data in <tt>is</tt> * before the data goes into the "binary mode". This is useful when the * established communication channel might include some data that might be * useful for debugging/trouble-shooting. * @param listener Gets a notification when the channel closes, to perform * clean up. Can be null. By the time this method is called, the cause of * the termination is reported to the user, so the implementation of the * listener doesn't need to do that again. */ public void setChannel(InputStream in, OutputStream out, OutputStream launchLog, Channel.Listener listener) throws IOException, InterruptedException { if (this.channel != null) { throw new IllegalStateException("Already connected"); } final TaskListener taskListener = new StreamTaskListener(launchLog); PrintStream log = taskListener.getLogger(); Channel channel = new Channel(nodeName, threadPoolForRemoting, Channel.Mode.NEGOTIATE, in, out, launchLog); channel.addListener(new Channel.Listener() { @Override public void onClosed(Channel c, IOException cause) { SlaveComputer.this.channel = null; // Orderly shutdown will have null exception if (cause != null) { offlineCause = new ChannelTermination(cause); cause.printStackTrace(taskListener.error("Connection terminated")); } else { taskListener.getLogger().println("Connection terminated"); } launcher.afterDisconnect(SlaveComputer.this, taskListener); } }); if (listener != null) { channel.addListener(listener); } String slaveVersion = channel.call(new SlaveVersion()); log.println("Slave.jar version: " + slaveVersion); boolean _isUnix = channel.call(new DetectOS()); log.println(_isUnix ? hudson.model.Messages.Slave_UnixSlave() : hudson.model.Messages.Slave_WindowsSlave()); String defaultCharsetName = channel.call(new DetectDefaultCharset()); String remoteFs = getNode().getRemoteFS(); if (_isUnix && !remoteFs.contains("/") && remoteFs.contains("\\")) { log.println("WARNING: " + remoteFs + " looks suspiciously like Windows path. Maybe you meant " + remoteFs.replace('\\', '/') + "?"); } FilePath root = new FilePath(channel, getNode().getRemoteFS()); channel.call(new SlaveInitializer()); channel.call(new WindowsSlaveInstaller(remoteFs)); for (ComputerListener cl : ComputerListener.all()) { cl.preOnline(this, channel, root, taskListener); } offlineCause = null; // update the data structure atomically to prevent others from seeing a channel that's not properly initialized yet synchronized (channelLock) { if (this.channel != null) { // check again. we used to have this entire method in a big sycnhronization block, // but Channel constructor blocks for an external process to do the connection // if CommandLauncher is used, and that cannot be interrupted because it blocks at InputStream. // so if the process hangs, it hangs the thread in a lock, and since Hudson will try to relaunch, // we'll end up queuing the lot of threads in a pseudo deadlock. // This implementation prevents that by avoiding a lock. HUDSON-1705 is likely a manifestation of this. channel.close(); throw new IllegalStateException("Already connected"); } isUnix = _isUnix; numRetryAttempt = 0; this.channel = channel; defaultCharset = Charset.forName(defaultCharsetName); } for (ComputerListener cl : ComputerListener.all()) { cl.onOnline(this, taskListener); } log.println("Slave successfully connected and online"); Hudson.getInstance().getQueue().scheduleMaintenance(); } @Override public Channel getChannel() { return channel; } public Charset getDefaultCharset() { return defaultCharset; } public List<LogRecord> getLogRecords() throws IOException, InterruptedException { if (channel == null) { return Collections.emptyList(); } else { return channel.call(new Callable<List<LogRecord>, RuntimeException>() { public List<LogRecord> call() { return new ArrayList<LogRecord>(SLAVE_LOG_HANDLER.getView()); } }); } } public HttpResponse doDoDisconnect(@QueryParameter String offlineMessage) throws IOException, ServletException { if (channel != null) { //does nothing in case computer is already disconnected checkPermission(CONFIGURE); offlineMessage = Util.fixEmptyAndTrim(offlineMessage); disconnect(OfflineCause.create(Messages._SlaveComputer_DisconnectedBy( HudsonSecurityManager.getAuthentication().getName(), offlineMessage != null ? " : " + offlineMessage : ""))); } return new HttpRedirect("."); } @Override public Future<?> disconnect(OfflineCause cause) { super.disconnect(cause); return Computer.threadPoolForRemoting.submit(new Runnable() { public void run() { // do this on another thread so that any lengthy disconnect operation // (which could be typical) won't block UI thread. launcher.beforeDisconnect(SlaveComputer.this, taskListener); closeChannel(); launcher.afterDisconnect(SlaveComputer.this, taskListener); } }); } public void doLaunchSlaveAgent(StaplerRequest req, StaplerResponse rsp) throws IOException, ServletException { if (channel != null) { rsp.sendError(HttpServletResponse.SC_NOT_FOUND); return; } connect(true); // TODO: would be nice to redirect the user to "launching..." wait page, // then spend a few seconds there and poll for the completion periodically. rsp.sendRedirect("log"); } public void tryReconnect() { numRetryAttempt++; if (numRetryAttempt < 6 || (numRetryAttempt % 12) == 0) { // initially retry several times quickly, and after that, do it infrequently. logger.info("Attempting to reconnect " + nodeName); connect(true); } } /** * Serves jar files for JNLP slave agents. * * @deprecated since 2008-08-18. This URL binding is no longer used and * moved up directly under to {@link Hudson}, but it's left here for now * just in case some old JNLP slave agents request it. */ public Slave.JnlpJar getJnlpJars(String fileName) { return new Slave.JnlpJar(fileName); } @Override protected void kill() { super.kill(); closeChannel(); } public RetentionStrategy getRetentionStrategy() { Slave n = getNode(); return n == null ? RetentionStrategy.INSTANCE : n.getRetentionStrategy(); } /** * If still connected, disconnect. */ private void closeChannel() { // TODO: race condition between this and the setChannel method. Channel c = channel; channel = null; isUnix = null; if (c != null) { try { c.close(); } catch (IOException e) { logger.log(Level.SEVERE, "Failed to terminate channel to " + getDisplayName(), e); } } for (ComputerListener cl : ComputerListener.all()) { cl.onOffline(this); } } @Override protected void setNode(Node node) { super.setNode(node); launcher = grabLauncher(node); // maybe the configuration was changed to relaunch the slave, so try to re-launch now. // "constructed==null" test is an ugly work around to avoid launching before the object is fully // constructed. if (constructed != null) { if (node instanceof Slave) { ((Slave) node).getRetentionStrategy().check(this); } else { connect(false); } } } /** * Grabs a {@link ComputerLauncher} out of {@link Node} to keep it in this * {@link Computer}. The returned launcher will be set to {@link #launcher} * and used to carry out the actual launch operation. * * <p> Subtypes that needs to decorate {@link ComputerLauncher} can do so by * overriding this method. This is useful for {@link SlaveComputer}s for * clouds for example, where one normally needs additional pre-launch step * (such as waiting for the provisioned node to become available) before the * user specified launch step (like SSH connection) kicks in. * * @see ComputerLauncherFilter */ protected ComputerLauncher grabLauncher(Node node) { return ((Slave) node).getLauncher(); } private static final Logger logger = Logger.getLogger(SlaveComputer.class.getName()); private static final class SlaveVersion implements Callable<String, IOException> { public String call() throws IOException { try { return Launcher.VERSION; } catch (Throwable ex) { return "< 1.335"; } // Older slave.jar won't have VERSION } } private static final class DetectOS implements Callable<Boolean, IOException> { public Boolean call() throws IOException { return File.pathSeparatorChar == ':'; } } private static final class DetectDefaultCharset implements Callable<String, IOException> { public String call() throws IOException { return Charset.defaultCharset().name(); } } /** * Puts the {@link #SLAVE_LOG_HANDLER} into a separate class so that loading * this class in JVM doesn't end up loading tons of additional classes. */ static final class LogHolder { /** * This field is used on each slave node to record log records on the * slave. */ static final RingBufferLogHandler SLAVE_LOG_HANDLER = new RingBufferLogHandler(); } private static class SlaveInitializer implements Callable<Void, RuntimeException> { public Void call() { // avoid double installation of the handler. JNLP slaves can reconnect to the master multiple times // and each connection gets a different RemoteClassLoader, so we need to evict them by class name, // not by their identity. Logger logger = Logger.getLogger("hudson"); for (Handler h : logger.getHandlers()) { if (h.getClass().getName().equals(SLAVE_LOG_HANDLER.getClass().getName())) { logger.removeHandler(h); } } logger.addHandler(SLAVE_LOG_HANDLER); // remove Sun PKCS11 provider if present. See http://wiki.hudson-ci.org/display/HUDSON/Solaris+Issue+6276483 try { Security.removeProvider("SunPKCS11-Solaris"); } catch (SecurityException e) { // ignore this error. } Channel.current().setProperty("slave", Boolean.TRUE); // indicate that this side of the channel is the slave side. return null; } private static final long serialVersionUID = 1L; } /** * Obtains a {@link VirtualChannel} that allows some computation to be * performed on the master. This method can be called from any thread on the * master, or from slave (more precisely, it only works from the remoting * request-handling thread in slaves, which means if you've started separate * thread on slaves, that'll fail.) * * @return null if the calling thread doesn't have any trace of where its * master is. * @since 1.362 */ public static VirtualChannel getChannelToMaster() { if (Hudson.getInstance() != null) { return MasterComputer.localChannel; } // if this method is called from within the slave computation thread, this should work Channel c = Channel.current(); if (c != null && c.getProperty("slave") == Boolean.TRUE) { return c; } return null; } }