/* * The MIT License * * Copyright (c) 2004-2009, Sun Microsystems, Inc., Kohsuke Kawaguchi, Stephen Connolly * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package hudson.slaves; import hudson.model.*; import hudson.model.Hudson.MasterComputer; import hudson.remoting.Channel; import hudson.remoting.VirtualChannel; import hudson.remoting.Callable; import hudson.util.StreamTaskListener; import hudson.util.NullStream; import hudson.util.RingBufferLogHandler; import hudson.util.Futures; import hudson.FilePath; import hudson.lifecycle.WindowsSlaveInstaller; import hudson.Util; import hudson.AbortException; import hudson.remoting.Launcher; import static hudson.slaves.SlaveComputer.LogHolder.SLAVE_LOG_HANDLER; import hudson.slaves.OfflineCause.ChannelTermination; import java.io.File; import java.io.OutputStream; import java.io.InputStream; import java.io.IOException; import java.io.PrintStream; import java.util.logging.Level; import java.util.logging.LogRecord; import java.util.logging.Logger; import java.util.logging.Handler; import java.util.List; import java.util.Collections; import java.util.ArrayList; import java.nio.charset.Charset; import java.util.concurrent.Future; import java.security.Security; import hudson.util.io.ReopenableFileOutputStream; import org.kohsuke.stapler.StaplerRequest; import org.kohsuke.stapler.StaplerResponse; import org.kohsuke.stapler.QueryParameter; import org.kohsuke.stapler.HttpResponse; import org.kohsuke.stapler.HttpRedirect; import javax.servlet.ServletException; import javax.servlet.http.HttpServletResponse; /** * {@link Computer} for {@link Slave}s. * * @author Kohsuke Kawaguchi */ public class SlaveComputer extends Computer { private volatile Channel channel; private volatile transient boolean acceptingTasks = true; private Charset defaultCharset; private Boolean isUnix; /** * Effective {@link ComputerLauncher} that hides the details of * how we launch a slave agent on this computer. * * <p> * This is normally the same as {@link Slave#getLauncher()} but * can be different. See {@link #grabLauncher(Node)}. */ private ComputerLauncher launcher; /** * Perpetually writable log file. */ private final ReopenableFileOutputStream log; /** * {@link StreamTaskListener} that wraps {@link #log}, hence perpetually writable. */ private final TaskListener taskListener; /** * Number of failed attempts to reconnect to this node * (so that if we keep failing to reconnect, we can stop * trying.) */ private transient int numRetryAttempt; /** * Tracks the status of the last launch operation, which is always asynchronous. * This can be used to wait for the completion, or cancel the launch activity. */ private volatile Future<?> lastConnectActivity = null; private Object constructed = new Object(); public SlaveComputer(Slave slave) { super(slave); this.log = new ReopenableFileOutputStream(getLogFile()); this.taskListener = new StreamTaskListener(log); } /** * {@inheritDoc} */ @Override public boolean isAcceptingTasks() { return acceptingTasks; } /** * Allows a {@linkplain hudson.slaves.ComputerLauncher} or a {@linkplain hudson.slaves.RetentionStrategy} to * suspend tasks being accepted by the slave computer. * * @param acceptingTasks {@code true} if the slave can accept tasks. */ public void setAcceptingTasks(boolean acceptingTasks) { this.acceptingTasks = acceptingTasks; } /** * True if this computer is a Unix machine (as opposed to Windows machine). * * @return * null if the computer is disconnected and therefore we don't know whether it is Unix or not. */ public Boolean isUnix() { return isUnix; } @Override public Slave getNode() { return (Slave)super.getNode(); } @Override public String getIcon() { Future<?> l = lastConnectActivity; if(l!=null && !l.isDone()) return "computer-flash.gif"; return super.getIcon(); } /** * @deprecated since 2008-05-20. */ @Deprecated @Override public boolean isJnlpAgent() { return launcher instanceof JNLPLauncher; } @Override public boolean isLaunchSupported() { return launcher.isLaunchSupported(); } public ComputerLauncher getLauncher() { return launcher; } protected Future<?> _connect(boolean forceReconnect) { if(channel!=null) return Futures.precomputed(null); if(!forceReconnect && isConnecting()) return lastConnectActivity; if(forceReconnect && isConnecting()) logger.fine("Forcing a reconnect on "+getName()); closeChannel(); return lastConnectActivity = Computer.threadPoolForRemoting.submit(new java.util.concurrent.Callable<Object>() { public Object call() throws Exception { // do this on another thread so that the lengthy launch operation // (which is typical) won't block UI thread. try { log.rewind(); try { launcher.launch(SlaveComputer.this, taskListener); return null; } catch (AbortException e) { taskListener.error(e.getMessage()); throw e; } catch (IOException e) { Util.displayIOException(e,taskListener); e.printStackTrace(taskListener.error(Messages.ComputerLauncher_unexpectedError())); throw e; } catch (InterruptedException e) { e.printStackTrace(taskListener.error(Messages.ComputerLauncher_abortedLaunch())); throw e; } } finally { if (channel==null) offlineCause = new OfflineCause.LaunchFailed(); } } }); } /** * {@inheritDoc} */ @Override public void taskAccepted(Executor executor, Queue.Task task) { super.taskAccepted(executor, task); if (launcher instanceof ExecutorListener) { ((ExecutorListener)launcher).taskAccepted(executor, task); } if (getNode().getRetentionStrategy() instanceof ExecutorListener) { ((ExecutorListener)getNode().getRetentionStrategy()).taskAccepted(executor, task); } } /** * {@inheritDoc} */ @Override public void taskCompleted(Executor executor, Queue.Task task, long durationMS) { super.taskCompleted(executor, task, durationMS); if (launcher instanceof ExecutorListener) { ((ExecutorListener)launcher).taskCompleted(executor, task, durationMS); } RetentionStrategy r = getRetentionStrategy(); if (r instanceof ExecutorListener) { ((ExecutorListener) r).taskCompleted(executor, task, durationMS); } } /** * {@inheritDoc} */ @Override public void taskCompletedWithProblems(Executor executor, Queue.Task task, long durationMS, Throwable problems) { super.taskCompletedWithProblems(executor, task, durationMS, problems); if (launcher instanceof ExecutorListener) { ((ExecutorListener)launcher).taskCompletedWithProblems(executor, task, durationMS, problems); } RetentionStrategy r = getRetentionStrategy(); if (r instanceof ExecutorListener) { ((ExecutorListener) r).taskCompletedWithProblems(executor, task, durationMS, problems); } } @Override public boolean isConnecting() { Future<?> l = lastConnectActivity; return isOffline() && l!=null && !l.isDone(); } public OutputStream openLogFile() { try { log.rewind(); return log; } catch (IOException e) { logger.log(Level.SEVERE, "Failed to create log file "+getLogFile(),e); return new NullStream(); } } private final Object channelLock = new Object(); public void setChannel(InputStream in, OutputStream out, TaskListener taskListener, Channel.Listener listener) throws IOException, InterruptedException { setChannel(in,out,taskListener.getLogger(),listener); } /** * Creates a {@link Channel} from the given stream and sets that to this slave. * * @param in * Stream connected to the remote "slave.jar". It's the caller's responsibility to do * buffering on this stream, if that's necessary. * @param out * Stream connected to the remote peer. It's the caller's responsibility to do * buffering on this stream, if that's necessary. * @param launchLog * If non-null, receive the portion of data in <tt>is</tt> before * the data goes into the "binary mode". This is useful * when the established communication channel might include some data that might * be useful for debugging/trouble-shooting. * @param listener * Gets a notification when the channel closes, to perform clean up. Can be null. * By the time this method is called, the cause of the termination is reported to the user, * so the implementation of the listener doesn't need to do that again. */ public void setChannel(InputStream in, OutputStream out, OutputStream launchLog, Channel.Listener listener) throws IOException, InterruptedException { if(this.channel!=null) throw new IllegalStateException("Already connected"); final TaskListener taskListener = new StreamTaskListener(launchLog); PrintStream log = taskListener.getLogger(); Channel channel = new Channel(nodeName,threadPoolForRemoting, Channel.Mode.NEGOTIATE, in,out, launchLog); channel.addListener(new Channel.Listener() { @Override public void onClosed(Channel c, IOException cause) { SlaveComputer.this.channel = null; // Orderly shutdown will have null exception if (cause!=null) { offlineCause = new ChannelTermination(cause); cause.printStackTrace(taskListener.error("Connection terminated")); } else { taskListener.getLogger().println("Connection terminated"); } launcher.afterDisconnect(SlaveComputer.this, taskListener); } }); if(listener!=null) channel.addListener(listener); String slaveVersion = channel.call(new SlaveVersion()); log.println("Slave.jar version: " + slaveVersion); boolean _isUnix = channel.call(new DetectOS()); log.println(_isUnix? hudson.model.Messages.Slave_UnixSlave():hudson.model.Messages.Slave_WindowsSlave()); String defaultCharsetName = channel.call(new DetectDefaultCharset()); String remoteFs = getNode().getRemoteFS(); if(_isUnix && !remoteFs.contains("/") && remoteFs.contains("\\")) log.println("WARNING: "+remoteFs+" looks suspiciously like Windows path. Maybe you meant "+remoteFs.replace('\\','/')+"?"); FilePath root = new FilePath(channel,getNode().getRemoteFS()); channel.call(new SlaveInitializer()); channel.call(new WindowsSlaveInstaller(remoteFs)); for (ComputerListener cl : ComputerListener.all()) cl.preOnline(this,channel,root,taskListener); offlineCause = null; // update the data structure atomically to prevent others from seeing a channel that's not properly initialized yet synchronized(channelLock) { if(this.channel!=null) { // check again. we used to have this entire method in a big sycnhronization block, // but Channel constructor blocks for an external process to do the connection // if CommandLauncher is used, and that cannot be interrupted because it blocks at InputStream. // so if the process hangs, it hangs the thread in a lock, and since Hudson will try to relaunch, // we'll end up queuing the lot of threads in a pseudo deadlock. // This implementation prevents that by avoiding a lock. HUDSON-1705 is likely a manifestation of this. channel.close(); throw new IllegalStateException("Already connected"); } isUnix = _isUnix; numRetryAttempt = 0; this.channel = channel; defaultCharset = Charset.forName(defaultCharsetName); } for (ComputerListener cl : ComputerListener.all()) cl.onOnline(this,taskListener); log.println("Slave successfully connected and online"); Hudson.getInstance().getQueue().scheduleMaintenance(); } @Override public Channel getChannel() { return channel; } public Charset getDefaultCharset() { return defaultCharset; } public List<LogRecord> getLogRecords() throws IOException, InterruptedException { if(channel==null) return Collections.emptyList(); else return channel.call(new Callable<List<LogRecord>,RuntimeException>() { public List<LogRecord> call() { return new ArrayList<LogRecord>(SLAVE_LOG_HANDLER.getView()); } }); } public HttpResponse doDoDisconnect(@QueryParameter String offlineMessage) throws IOException, ServletException { if (channel!=null) { //does nothing in case computer is already disconnected checkPermission(Hudson.ADMINISTER); offlineMessage = Util.fixEmptyAndTrim(offlineMessage); disconnect(OfflineCause.create(Messages._SlaveComputer_DisconnectedBy( Hudson.getAuthentication().getName(), offlineMessage!=null ? " : " + offlineMessage : "") )); } return new HttpRedirect("."); } @Override public Future<?> disconnect(OfflineCause cause) { super.disconnect(cause); return Computer.threadPoolForRemoting.submit(new Runnable() { public void run() { // do this on another thread so that any lengthy disconnect operation // (which could be typical) won't block UI thread. launcher.beforeDisconnect(SlaveComputer.this, taskListener); closeChannel(); launcher.afterDisconnect(SlaveComputer.this, taskListener); } }); } public void doLaunchSlaveAgent(StaplerRequest req, StaplerResponse rsp) throws IOException, ServletException { if(channel!=null) { rsp.sendError(HttpServletResponse.SC_NOT_FOUND); return; } connect(true); // TODO: would be nice to redirect the user to "launching..." wait page, // then spend a few seconds there and poll for the completion periodically. rsp.sendRedirect("log"); } public void tryReconnect() { numRetryAttempt++; if(numRetryAttempt<6 || (numRetryAttempt%12)==0) { // initially retry several times quickly, and after that, do it infrequently. logger.info("Attempting to reconnect "+nodeName); connect(true); } } /** * Serves jar files for JNLP slave agents. * * @deprecated since 2008-08-18. * This URL binding is no longer used and moved up directly under to {@link Hudson}, * but it's left here for now just in case some old JNLP slave agents request it. */ public Slave.JnlpJar getJnlpJars(String fileName) { return new Slave.JnlpJar(fileName); } @Override protected void kill() { super.kill(); closeChannel(); } public RetentionStrategy getRetentionStrategy() { Slave n = getNode(); return n==null ? RetentionStrategy.INSTANCE : n.getRetentionStrategy(); } /** * If still connected, disconnect. */ private void closeChannel() { // TODO: race condition between this and the setChannel method. Channel c = channel; channel = null; isUnix = null; if (c != null) { try { c.close(); } catch (IOException e) { logger.log(Level.SEVERE, "Failed to terminate channel to " + getDisplayName(), e); } } for (ComputerListener cl : ComputerListener.all()) cl.onOffline(this); } @Override protected void setNode(Node node) { super.setNode(node); launcher = grabLauncher(node); // maybe the configuration was changed to relaunch the slave, so try to re-launch now. // "constructed==null" test is an ugly hack to avoid launching before the object is fully // constructed. if(constructed!=null) { if (node instanceof Slave) ((Slave)node).getRetentionStrategy().check(this); else connect(false); } } /** * Grabs a {@link ComputerLauncher} out of {@link Node} to keep it in this {@link Computer}. * The returned launcher will be set to {@link #launcher} and used to carry out the actual launch operation. * * <p> * Subtypes that needs to decorate {@link ComputerLauncher} can do so by overriding this method. * This is useful for {@link SlaveComputer}s for clouds for example, where one normally needs * additional pre-launch step (such as waiting for the provisioned node to become available) * before the user specified launch step (like SSH connection) kicks in. * * @see ComputerLauncherFilter */ protected ComputerLauncher grabLauncher(Node node) { return ((Slave)node).getLauncher(); } private static final Logger logger = Logger.getLogger(SlaveComputer.class.getName()); private static final class SlaveVersion implements Callable<String,IOException> { public String call() throws IOException { try { return Launcher.VERSION; } catch (Throwable ex) { return "< 1.335"; } // Older slave.jar won't have VERSION } } private static final class DetectOS implements Callable<Boolean,IOException> { public Boolean call() throws IOException { return File.pathSeparatorChar==':'; } } private static final class DetectDefaultCharset implements Callable<String,IOException> { public String call() throws IOException { return Charset.defaultCharset().name(); } } /** * Puts the {@link #SLAVE_LOG_HANDLER} into a separate class so that loading this class * in JVM doesn't end up loading tons of additional classes. */ static final class LogHolder { /** * This field is used on each slave node to record log records on the slave. */ static final RingBufferLogHandler SLAVE_LOG_HANDLER = new RingBufferLogHandler(); } private static class SlaveInitializer implements Callable<Void,RuntimeException> { public Void call() { // avoid double installation of the handler. JNLP slaves can reconnect to the master multiple times // and each connection gets a different RemoteClassLoader, so we need to evict them by class name, // not by their identity. Logger logger = Logger.getLogger("hudson"); for (Handler h : logger.getHandlers()) { if (h.getClass().getName().equals(SLAVE_LOG_HANDLER.getClass().getName())) logger.removeHandler(h); } logger.addHandler(SLAVE_LOG_HANDLER); // remove Sun PKCS11 provider if present. See http://wiki.hudson-ci.org/display/HUDSON/Solaris+Issue+6276483 try { Security.removeProvider("SunPKCS11-Solaris"); } catch (SecurityException e) { // ignore this error. } Channel.current().setProperty("slave",Boolean.TRUE); // indicate that this side of the channel is the slave side. return null; } private static final long serialVersionUID = 1L; } /** * Obtains a {@link VirtualChannel} that allows some computation to be performed on the master. * This method can be called from any thread on the master, or from slave (more precisely, * it only works from the remoting request-handling thread in slaves, which means if you've started * separate thread on slaves, that'll fail.) * * @return null if the calling thread doesn't have any trace of where its master is. * @since 1.362 */ public static VirtualChannel getChannelToMaster() { if (Hudson.getInstance()!=null) return MasterComputer.localChannel; // if this method is called from within the slave computation thread, this should work Channel c = Channel.current(); if (c!=null && c.getProperty("slave")==Boolean.TRUE) return c; return null; } }