/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.brooklyn.util.core.internal.ssh.sshj;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Throwables.getCausalChain;
import static com.google.common.collect.Iterables.any;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicReference;
import net.schmizz.sshj.connection.ConnectionException;
import net.schmizz.sshj.connection.channel.direct.PTYMode;
import net.schmizz.sshj.connection.channel.direct.Session;
import net.schmizz.sshj.connection.channel.direct.Session.Command;
import net.schmizz.sshj.connection.channel.direct.Session.Shell;
import net.schmizz.sshj.connection.channel.direct.SessionChannel;
import net.schmizz.sshj.sftp.FileAttributes;
import net.schmizz.sshj.sftp.SFTPClient;
import net.schmizz.sshj.transport.TransportException;
import net.schmizz.sshj.xfer.InMemorySourceFile;
import org.apache.brooklyn.core.BrooklynFeatureEnablement;
import org.apache.brooklyn.util.core.internal.ssh.BackoffLimitedRetryHandler;
import org.apache.brooklyn.util.core.internal.ssh.ShellTool;
import org.apache.brooklyn.util.core.internal.ssh.SshAbstractTool;
import org.apache.brooklyn.util.core.internal.ssh.SshTool;
import org.apache.brooklyn.util.exceptions.Exceptions;
import org.apache.brooklyn.util.exceptions.RuntimeTimeoutException;
import org.apache.brooklyn.util.io.FileUtil;
import org.apache.brooklyn.util.repeat.Repeater;
import org.apache.brooklyn.util.stream.KnownSizeInputStream;
import org.apache.brooklyn.util.stream.StreamGobbler;
import org.apache.brooklyn.util.stream.Streams;
import org.apache.brooklyn.util.text.Strings;
import org.apache.brooklyn.util.time.Duration;
import org.apache.brooklyn.util.time.Time;
import org.apache.commons.io.input.ProxyInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Joiner;
import com.google.common.base.Predicate;
import com.google.common.base.Stopwatch;
import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.io.CountingOutputStream;
import com.google.common.net.HostAndPort;
import com.google.common.primitives.Ints;
/**
* For ssh and scp-style commands, using the sshj library.
*/
public class SshjTool extends SshAbstractTool implements SshTool {
/*
* TODO synchronization of connect/disconnect needs revisited!
* Saw SshjToolIntegrationTest.testExecBigConcurrentCommand fail with:
* Caused by: java.lang.AssertionError
* at net.schmizz.sshj.SSHClient.auth(SSHClient.java:204)
* i.e. another thread had called disconnect just before the failing thread
* did SSHClient.auth.
* Having multiple threads call connect/disconnect is going to be brittle. With
* our retries we can get away with it usually, but it's not good!
*
* TODO need to upgrade sshj version from 0.8.1 to 0.9, but jclouds 1.7.2 still
* relies on 0.8.1. In 0.9, it fixes the https://github.com/shikhar/sshj/issues/89
* so does not throw AssertionError.
*/
private static final Logger LOG = LoggerFactory.getLogger(SshjTool.class);
protected final int sshTries;
protected final long sshTriesTimeout;
protected final BackoffLimitedRetryHandler backoffLimitedRetryHandler;
/** Terminal type name for {@code allocatePTY} option. */
final static String TERM = "vt100"; // "dumb"
private class CloseFtpChannelOnCloseInputStream extends ProxyInputStream {
private final SFTPClient sftp;
private CloseFtpChannelOnCloseInputStream(InputStream proxy, SFTPClient sftp) {
super(proxy);
this.sftp = sftp;
}
@Override
public void close() throws IOException {
super.close();
closeWhispering(sftp, this);
}
}
private final SshjClientConnection sshClientConnection;
public static SshjToolBuilder builder() {
return new SshjToolBuilder();
}
public static class SshjToolBuilder extends Builder<SshjTool, SshjToolBuilder> {
}
public static class Builder<T extends SshjTool, B extends Builder<T,B>> extends AbstractSshToolBuilder<T,B> {
protected long connectTimeout;
protected long sessionTimeout;
protected int sshTries = 4; //allow 4 tries by default, much safer
protected long sshTriesTimeout = 2*60*1000; //allow 2 minutes by default (so if too slow trying sshTries times, abort anyway)
protected long sshRetryDelay = 50L;
@Override
public B from(Map<String,?> props) {
super.from(props);
sshTries = getOptionalVal(props, PROP_SSH_TRIES);
sshTriesTimeout = getOptionalVal(props, PROP_SSH_TRIES_TIMEOUT);
sshRetryDelay = getOptionalVal(props, PROP_SSH_RETRY_DELAY);
connectTimeout = getOptionalVal(props, PROP_CONNECT_TIMEOUT);
sessionTimeout = getOptionalVal(props, PROP_SESSION_TIMEOUT);
return self();
}
public B connectTimeout(int val) {
this.connectTimeout = val; return self();
}
public B sessionTimeout(int val) {
this.sessionTimeout = val; return self();
}
public B sshRetries(int val) {
this.sshTries = val; return self();
}
public B sshRetriesTimeout(int val) {
this.sshTriesTimeout = val; return self();
}
public B sshRetryDelay(long val) {
this.sshRetryDelay = val; return self();
}
@Override
@SuppressWarnings("unchecked")
public T build() {
return (T) new SshjTool(this);
}
}
public SshjTool(Map<String,?> map) {
this(builder().from(map));
}
protected SshjTool(Builder<?,?> builder) {
super(builder);
sshTries = builder.sshTries;
sshTriesTimeout = builder.sshTriesTimeout;
backoffLimitedRetryHandler = new BackoffLimitedRetryHandler(sshTries, builder.sshRetryDelay);
sshClientConnection = SshjClientConnection.builder()
.hostAndPort(HostAndPort.fromParts(host, port))
.username(user)
.password(password)
.privateKeyPassphrase(privateKeyPassphrase)
.privateKeyData(privateKeyData)
.privateKeyFile(privateKeyFile)
.strictHostKeyChecking(strictHostKeyChecking)
.connectTimeout(builder.connectTimeout)
.sessionTimeout(builder.sessionTimeout)
.build();
if (LOG.isTraceEnabled()) LOG.trace("Created SshTool {} ({})", this, System.identityHashCode(this));
}
@Override
public void connect() {
try {
if (LOG.isTraceEnabled()) LOG.trace("Connecting SshjTool {} ({})", this, System.identityHashCode(this));
acquire(sshClientConnection);
} catch (Exception e) {
if (LOG.isDebugEnabled()) LOG.debug(toString()+" failed to connect (rethrowing)", e);
throw propagate(e, "failed to connect");
}
}
@Override
@Deprecated // see super
public void connect(int maxAttempts) {
connect(); // FIXME Should callers instead configure sshTries? But that would apply to all ssh attempts
}
@Override
public void disconnect() {
if (LOG.isTraceEnabled()) LOG.trace("Disconnecting SshjTool {} ({})", this, System.identityHashCode(this));
try {
Stopwatch perfStopwatch = Stopwatch.createStarted();
sshClientConnection.clear();
if (LOG.isTraceEnabled()) LOG.trace("SSH Performance: {} disconnect took {}", sshClientConnection.getHostAndPort(), Time.makeTimeStringRounded(perfStopwatch));
} catch (Exception e) {
throw Exceptions.propagate(e);
}
}
@Override
public boolean isConnected() {
return sshClientConnection.isConnected() && sshClientConnection.isAuthenticated();
}
@Override
public int copyToServer(java.util.Map<String,?> props, byte[] contents, String pathAndFileOnRemoteServer) {
return copyToServer(props, newInputStreamSupplier(contents), contents.length, pathAndFileOnRemoteServer);
}
@Override
public int copyToServer(Map<String,?> props, InputStream contents, String pathAndFileOnRemoteServer) {
/* sshj needs to:
* 1) to know the length of the InputStream to copy the file to perform copy; and
* 2) re-read the input stream on retry if the first attempt fails.
* For now, write it to a file, unless caller supplies a KnownSizeInputStream
*
* (We could have a switch where we hold it in memory if less than some max size,
* but most the routines should supply a string or byte array or similar,
* so we probably don't come here too often.)
*/
if (contents instanceof KnownSizeInputStream) {
return copyToServer(props, Suppliers.ofInstance(contents), ((KnownSizeInputStream)contents).length(), pathAndFileOnRemoteServer);
} else {
File tempFile = writeTempFile(contents);
try {
return copyToServer(props, tempFile, pathAndFileOnRemoteServer);
} finally {
tempFile.delete();
}
}
}
@Override
public int copyToServer(Map<String,?> props, File localFile, String pathAndFileOnRemoteServer) {
return copyToServer(props, newInputStreamSupplier(localFile), (int)localFile.length(), pathAndFileOnRemoteServer);
}
private int copyToServer(Map<String,?> props, Supplier<InputStream> contentsSupplier, long length, String pathAndFileOnRemoteServer) {
acquire(new PutFileAction(props, pathAndFileOnRemoteServer, contentsSupplier, length));
return 0; // TODO Can we assume put will have thrown exception if failed? Rather than exit code != 0?
}
@Override
public int copyFromServer(Map<String,?> props, String pathAndFileOnRemoteServer, File localFile) {
InputStream contents = acquire(new GetFileAction(pathAndFileOnRemoteServer));
try {
FileUtil.copyTo(contents, localFile);
return 0; // TODO Can we assume put will have thrown exception if failed? Rather than exit code != 0?
} finally {
Streams.closeQuietly(contents);
}
}
/**
* This creates a script containing the user's commands, copies it to the remote server, and
* executes the script. The script is then deleted.
* <p>
* Executing commands directly is fraught with dangers! Here are other options, and their problems:
* <ul>
* <li>Use execCommands, rather than shell.
* The user's environment will not be setup normally (e.g. ~/.bash_profile will not have been sourced)
* so things like wget may not be on the PATH.
* <li>Send the stream of commands to the shell.
* But characters being sent can be lost.
* Try the following (e.g. in an OS X terminal):
* - sleep 5
* - <paste a command that is 1000s of characters long>
* Only the first 1024 characters appear. The rest are lost.
* If sending a stream of commands, you need to be careful not send the next (big) command while the
* previous one is still executing.
* <li>Send a stream to the shell, but spot when the previous command has completed.
* e.g. by looking for the prompt (but what if the commands being executed change the prompt?)
* e.g. by putting every second command as "echo <uid>", and waiting for the stdout.
* This gets fiddly...
* </ul>
*
* So on balance, the script-based approach seems most reliable, even if there is an overhead
* of separate message(s) for copying the file!
*
* Another consideration is long-running scripts. On some clouds when executing a script that takes
* several minutes, we have seen it fail with -1 (e.g. 1 in 20 times). This suggests the ssh connection
* is being dropped. To avoid this problem, we can execute the script asynchronously, writing to files
* the stdout/stderr/pid/exitStatus. We then periodically poll to retrieve the contents of these files.
* Use {@link #PROP_EXEC_ASYNC} to force this mode of execution.
*/
@Override
public int execScript(final Map<String,?> props, final List<String> commands, final Map<String,?> env) {
Boolean execAsync = getOptionalVal(props, PROP_EXEC_ASYNC);
if (Boolean.TRUE.equals(execAsync) && BrooklynFeatureEnablement.isEnabled(BrooklynFeatureEnablement.FEATURE_SSH_ASYNC_EXEC)) {
return execScriptAsyncAndPoll(props, commands, env);
} else {
if (Boolean.TRUE.equals(execAsync)) {
if (LOG.isDebugEnabled()) LOG.debug("Ignoring ssh exec-async configuration, because feature is disabled");
}
return new ToolAbstractExecScript(props) {
public int run() {
String scriptContents = toScript(props, commands, env);
if (LOG.isTraceEnabled()) LOG.trace("Running shell command at {} as script: {}", host, scriptContents);
copyToServer(ImmutableMap.of("permissions", "0700"), scriptContents.getBytes(), scriptPath);
return asInt(acquire(new ShellAction(buildRunScriptCommand(), out, err, execTimeout)), -1);
}
}.run();
}
}
/**
* Executes the script in the background (`nohup ... &`), and then executes other ssh commands to poll for the
* stdout, stderr and exit code of that original process (which will each have been written to separate files).
*
* The polling is a "long poll". That is, it executes a long-running ssh command to retrieve the stdout, etc.
* If that long-poll command fails, then we just execute another one to pick up from where it left off.
* This means we do not need to execute many ssh commands (which are expensive), but can still return promptly
* when the command completes.
*
* Much of this was motivated by https://issues.apache.org/jira/browse/BROOKLYN-106, which is no longer
* an issue. The retries (e.g. in the upload-script) are arguably overkill given that {@link #acquire(SshAction)}
* will already retry. However, leaving this in place as it could prove useful when working with flakey
* networks in the future.
*
* TODO There are (probably) issues with this method when using {@link ShellTool#PROP_RUN_AS_ROOT}.
* I (Aled) saw the .pid file having an owner of root:root, and a failure message in stderr of:
* -bash: line 3: /tmp/brooklyn-20150113-161203056-XMEo-move_install_dir_from_user_to_.pid: Permission denied
*/
protected int execScriptAsyncAndPoll(final Map<String,?> props, final List<String> commands, final Map<String,?> env) {
return new ToolAbstractAsyncExecScript(props) {
private int maxConsecutiveSshFailures = 3;
private Duration maxDelayBetweenPolls = Duration.seconds(20);
private Duration pollTimeout = getOptionalVal(props, PROP_EXEC_ASYNC_POLLING_TIMEOUT, Duration.FIVE_MINUTES);
private int iteration = 0;
private int consecutiveSshFailures = 0;
private int stdoutCount = 0;
private int stderrCount = 0;
private Stopwatch timer;
public int run() {
timer = Stopwatch.createStarted();
final String scriptContents = toScript(props, commands, env);
if (LOG.isTraceEnabled()) LOG.trace("Running shell command at {} as async script: {}", host, scriptContents);
// Upload script; try repeatedly because have seen timeout intermittently on vcloud-director (BROOKLYN-106 related).
boolean uploadSuccess = Repeater.create("async script upload on "+SshjTool.this.toString()+" (for "+getSummary()+")")
.backoffTo(maxDelayBetweenPolls)
.limitIterationsTo(3)
.rethrowException()
.until(new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
iteration++;
if (LOG.isDebugEnabled()) {
String msg = "Uploading (iteration="+iteration+") for async script on "+SshjTool.this.toString()+" (for "+getSummary()+")";
if (iteration == 1) {
LOG.trace(msg);
} else {
LOG.debug(msg);
}
}
copyToServer(ImmutableMap.of("permissions", "0700"), scriptContents.getBytes(), scriptPath);
return true;
}})
.run();
if (!uploadSuccess) {
// Unexpected! Should have either returned true or have rethrown the exception; should never get false.
String msg = "Unexpected state: repeated failure for async script upload on "+SshjTool.this.toString()+" ("+getSummary()+")";
LOG.warn(msg+"; rethrowing");
throw new IllegalStateException(msg);
}
// Execute script asynchronously
int execResult = asInt(acquire(new ShellAction(buildRunScriptCommand(), out, err, execTimeout)), -1);
if (execResult != 0) return execResult;
// Long polling to get the status
try {
final AtomicReference<Integer> result = new AtomicReference<Integer>();
boolean success = Repeater.create("async script long-poll on "+SshjTool.this.toString()+" (for "+getSummary()+")")
.backoffTo(maxDelayBetweenPolls)
.limitTimeTo(execTimeout)
.until(new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
iteration++;
if (LOG.isDebugEnabled()) LOG.debug("Doing long-poll (iteration="+iteration+") for async script to complete on "+SshjTool.this.toString()+" (for "+getSummary()+")");
Integer exitstatus = longPoll();
result.set(exitstatus);
return exitstatus != null;
}})
.run();
if (!success) {
// Timed out
String msg = "Timeout for async script to complete on "+SshjTool.this.toString()+" ("+getSummary()+")";
LOG.warn(msg+"; rethrowing");
throw new TimeoutException(msg);
}
return result.get();
} catch (Exception e) {
LOG.debug("Problem polling for async script on "+SshjTool.this.toString()+" (for "+getSummary()+"); rethrowing after deleting temporary files", e);
throw Exceptions.propagate(e);
} finally {
// Delete the temporary files created (and the `tail -c` commands that might have been left behind by long-polls).
// Using pollTimeout so doesn't wait forever, but waits for a reasonable (configurable) length of time.
// TODO also execute this if the `buildRunScriptCommand` fails, as that might have left files behind?
try {
int execDeleteResult = asInt(acquire(new ShellAction(deleteTemporaryFilesCommand(), out, err, pollTimeout)), -1);
if (execDeleteResult != 0) {
LOG.debug("Problem deleting temporary files of async script on "+SshjTool.this.toString()+" (for "+getSummary()+"): exit status "+execDeleteResult);
}
} catch (Exception e) {
Exceptions.propagateIfFatal(e);
LOG.debug("Problem deleting temporary files of async script on "+SshjTool.this.toString()+" (for "+getSummary()+"); continuing", e);
}
}
}
Integer longPoll() throws IOException {
// Long-polling to get stdout, stderr + exit status of async task.
// If our long-poll disconnects, we will just re-execute.
// We wrap the stdout/stderr so that we can get the size count.
// If we disconnect, we will pick up from that char of the stream.
// TODO Additional stdout/stderr written by buildLongPollCommand() could interfere,
// causing us to miss some characters.
Duration nextPollTimeout = Duration.min(pollTimeout, Duration.millis(execTimeout.toMilliseconds()-timer.elapsed(TimeUnit.MILLISECONDS)));
CountingOutputStream countingOut = (out == null) ? null : new CountingOutputStream(out);
CountingOutputStream countingErr = (err == null) ? null : new CountingOutputStream(err);
List<String> pollCommand = buildLongPollCommand(stdoutCount, stderrCount, nextPollTimeout);
Duration sshJoinTimeout = nextPollTimeout.add(Duration.TEN_SECONDS);
ShellAction action = new ShellAction(pollCommand, countingOut, countingErr, sshJoinTimeout);
int longPollResult;
try {
longPollResult = asInt(acquire(action, 3, nextPollTimeout), -1);
} catch (RuntimeTimeoutException e) {
if (LOG.isDebugEnabled()) LOG.debug("Long-poll timed out on "+SshjTool.this.toString()+" (for "+getSummary()+"): "+e);
return null;
}
stdoutCount += (countingOut == null) ? 0 : countingOut.getCount();
stderrCount += (countingErr == null) ? 0 : countingErr.getCount();
if (longPollResult == 0) {
if (LOG.isDebugEnabled()) LOG.debug("Long-poll succeeded (exit status 0) on "+SshjTool.this.toString()+" (for "+getSummary()+")");
return longPollResult; // success
} else if (longPollResult == -1) {
// probably a connection failure; try again
if (LOG.isDebugEnabled()) LOG.debug("Long-poll received exit status -1; will retry on "+SshjTool.this.toString()+" (for "+getSummary()+")");
return null;
} else if (longPollResult == 125) {
// 125 is the special code for timeout in long-poll (see buildLongPollCommand).
// However, there is a tiny chance that the underlying command might have returned that exact exit code!
// Don't treat a timeout as a "consecutiveSshFailure".
if (LOG.isDebugEnabled()) LOG.debug("Long-poll received exit status "+longPollResult+"; most likely timeout; retrieving actual status on "+SshjTool.this.toString()+" (for "+getSummary()+")");
return retrieveStatusCommand();
} else {
// want to double-check whether this is the exit-code from the async process, or
// some unexpected failure in our long-poll command.
if (LOG.isDebugEnabled()) LOG.debug("Long-poll received exit status "+longPollResult+"; retrieving actual status on "+SshjTool.this.toString()+" (for "+getSummary()+")");
Integer result = retrieveStatusCommand();
if (result != null) {
return result;
}
}
consecutiveSshFailures++;
if (consecutiveSshFailures > maxConsecutiveSshFailures) {
LOG.warn("Aborting on "+consecutiveSshFailures+" consecutive ssh connection errors (return -1) when polling for async script to complete on "+SshjTool.this.toString()+" ("+getSummary()+")");
return -1;
} else {
LOG.info("Retrying after ssh connection error when polling for async script to complete on "+SshjTool.this.toString()+" ("+getSummary()+")");
return null;
}
}
Integer retrieveStatusCommand() throws IOException {
// want to double-check whether this is the exit-code from the async process, or
// some unexpected failure in our long-poll command.
ByteArrayOutputStream statusOut = new ByteArrayOutputStream();
ByteArrayOutputStream statusErr = new ByteArrayOutputStream();
int statusResult = asInt(acquire(new ShellAction(buildRetrieveStatusCommand(), statusOut, statusErr, execTimeout)), -1);
if (statusResult == 0) {
// The status we retrieved really is valid; return it.
// TODO How to ensure no additional output in stdout/stderr when parsing below?
String statusOutStr = new String(statusOut.toByteArray()).trim();
if (Strings.isEmpty(statusOutStr)) {
// suggests not yet completed; will retry with long-poll
if (LOG.isDebugEnabled()) LOG.debug("Long-poll retrieved status directly; command successful but no result available on "+SshjTool.this.toString()+" (for "+getSummary()+")");
return null;
} else {
if (LOG.isDebugEnabled()) LOG.debug("Long-poll retrieved status directly; returning '"+statusOutStr+"' on "+SshjTool.this.toString()+" (for "+getSummary()+")");
int result = Integer.parseInt(statusOutStr);
return result;
}
} else if (statusResult == -1) {
// probably a connection failure; try again with long-poll
if (LOG.isDebugEnabled()) LOG.debug("Long-poll retrieving status directly received exit status -1; will retry on "+SshjTool.this.toString()+" (for "+getSummary()+")");
return null;
} else {
if (out != null) {
out.write(toUTF8ByteArray("retrieving status failed with exit code "+statusResult+" (stdout follow)"));
out.write(statusOut.toByteArray());
}
if (err != null) {
err.write(toUTF8ByteArray("retrieving status failed with exit code "+statusResult+" (stderr follow)"));
err.write(statusErr.toByteArray());
}
if (LOG.isDebugEnabled()) LOG.debug("Long-poll retrieving status failed; returning "+statusResult+" on "+SshjTool.this.toString()+" (for "+getSummary()+")");
return statusResult;
}
}
}.run();
}
public int execShellDirect(Map<String,?> props, List<String> commands, Map<String,?> env) {
OutputStream out = getOptionalVal(props, PROP_OUT_STREAM);
OutputStream err = getOptionalVal(props, PROP_ERR_STREAM);
Duration execTimeout = getOptionalVal(props, PROP_EXEC_TIMEOUT);
List<String> cmdSequence = toCommandSequence(commands, env);
List<String> allcmds = ImmutableList.<String>builder()
.add(getOptionalVal(props, PROP_DIRECT_HEADER))
.addAll(cmdSequence)
.add("exit $?")
.build();
if (LOG.isTraceEnabled()) LOG.trace("Running shell command at {}: {}", host, allcmds);
Integer result = acquire(new ShellAction(allcmds, out, err, execTimeout));
if (LOG.isTraceEnabled()) LOG.trace("Running shell command at {} completed: return status {}", host, result);
return asInt(result, -1);
}
@Override
public int execCommands(Map<String,?> props, List<String> commands, Map<String,?> env) {
if (Boolean.FALSE.equals(props.get("blocks"))) {
throw new IllegalArgumentException("Cannot exec non-blocking: command="+commands);
}
// If async is set, then do it as execScript
Boolean execAsync = getOptionalVal(props, PROP_EXEC_ASYNC);
if (Boolean.TRUE.equals(execAsync) && BrooklynFeatureEnablement.isEnabled(BrooklynFeatureEnablement.FEATURE_SSH_ASYNC_EXEC)) {
return execScriptAsyncAndPoll(props, commands, env);
}
OutputStream out = getOptionalVal(props, PROP_OUT_STREAM);
OutputStream err = getOptionalVal(props, PROP_ERR_STREAM);
String separator = getOptionalVal(props, PROP_SEPARATOR);
Duration execTimeout = getOptionalVal(props, PROP_EXEC_TIMEOUT);
List<String> allcmds = toCommandSequence(commands, env);
String singlecmd = Joiner.on(separator).join(allcmds);
if (Boolean.TRUE.equals(getOptionalVal(props, PROP_RUN_AS_ROOT))) {
LOG.warn("Cannot run as root when executing as command; run as a script instead (will run as normal user): "+singlecmd);
}
if (LOG.isTraceEnabled()) LOG.trace("Running command at {}: {}", host, singlecmd);
Command result = acquire(new ExecAction(singlecmd, out, err, execTimeout));
if (LOG.isTraceEnabled()) LOG.trace("Running command at {} completed: exit code {}", host, result.getExitStatus());
// can be null if no exit status is received (observed on kill `ps aux | grep thing-to-grep-for | awk {print $2}`
if (result.getExitStatus()==null) LOG.warn("Null exit status running at {}: {}", host, singlecmd);
return asInt(result.getExitStatus(), -1);
}
protected void checkConnected() {
if (!isConnected()) {
throw new IllegalStateException(String.format("(%s) ssh not connected!", toString()));
}
}
protected void backoffForAttempt(int retryAttempt, String message) {
backoffLimitedRetryHandler.imposeBackoffExponentialDelay(retryAttempt, message);
}
protected <T, C extends SshAction<T>> T acquire(C action) {
return acquire(action, sshTries, sshTriesTimeout == 0 ? Duration.PRACTICALLY_FOREVER : Duration.millis(sshTriesTimeout));
}
protected <T, C extends SshAction<T>> T acquire(C action, int sshTries, Duration sshTriesTimeout) {
Stopwatch stopwatch = Stopwatch.createStarted();
for (int i = 0; i < sshTries; i++) {
try {
action.clear();
if (LOG.isTraceEnabled()) LOG.trace(">> ({}) acquiring {}", toString(), action);
Stopwatch perfStopwatch = Stopwatch.createStarted();
T returnVal;
try {
returnVal = action.create();
} catch (AssertionError e) {
/*
* TODO In net.schmizz.sshj.SSHClient.auth(SSHClient.java:204) throws AssertionError
* if not connected. This can happen if another thread has called disconnect
* concurrently. This is changed in sshj v0.9.0 to instead throw an IllegalStateException.
*
* For now, we'll retry. See "TODO" at top of class about synchronization.
*/
throw new IllegalStateException("Problem in "+toString()+" for "+action, e);
}
if (LOG.isTraceEnabled()) LOG.trace("<< ({}) acquired {}", toString(), returnVal);
if (LOG.isTraceEnabled()) LOG.trace("SSH Performance: {} {} took {}", new Object[] {
sshClientConnection.getHostAndPort(),
action.getClass().getSimpleName() != null ? action.getClass().getSimpleName() : action,
Time.makeTimeStringRounded(perfStopwatch)});
return returnVal;
} catch (Exception e) {
// uninformative net.schmizz.sshj.connection.ConnectionException:
// Request failed (reason=UNKNOWN) may mean remote Subsytem is disabled (e.g. for FTP)
// if key is missing, get a UserAuth error
String errorMessage = String.format("(%s) error acquiring %s", toString(), action);
String fullMessage = String.format("%s (attempt %s/%s, in time %s/%s)",
errorMessage, (i+1), sshTries, Time.makeTimeStringRounded(stopwatch.elapsed(TimeUnit.MILLISECONDS)),
(sshTriesTimeout.equals(Duration.PRACTICALLY_FOREVER) ? "unlimited" : Time.makeTimeStringRounded(sshTriesTimeout)));
try {
disconnect();
} catch (Exception e2) {
LOG.debug("<< ("+toString()+") error closing connection: "+e+" / "+e2, e);
}
if (i + 1 == sshTries) {
LOG.debug("<< {} (rethrowing, out of retries): {}", fullMessage, e.getMessage());
throw propagate(e, fullMessage + "; out of retries");
} else if (sshTriesTimeout.isShorterThan(stopwatch)) {
LOG.debug("<< {} (rethrowing, out of time - max {}): {}", new Object[] { fullMessage, Time.makeTimeStringRounded(sshTriesTimeout), e.getMessage() });
throw new RuntimeTimeoutException(fullMessage + "; out of time", e);
} else {
if (LOG.isDebugEnabled()) LOG.debug("<< {}: {}", fullMessage, e.getMessage());
backoffForAttempt(i + 1, errorMessage + ": " + e.getMessage());
if (action != sshClientConnection)
connect();
continue;
}
}
}
assert false : "should not reach here";
return null;
}
private final SshAction<SFTPClient> sftpConnection = new SshAction<SFTPClient>() {
private SFTPClient sftp;
@Override
public void clear() {
closeWhispering(sftp, this);
sftp = null;
}
@Override
public SFTPClient create() throws IOException {
checkConnected();
sftp = sshClientConnection.ssh.newSFTPClient();
return sftp;
}
@Override
public String toString() {
return "SFTPClient()";
}
};
private class GetFileAction implements SshAction<InputStream> {
private final String path;
private SFTPClient sftp;
GetFileAction(String path) {
this.path = checkNotNull(path, "path");
}
@Override
public void clear() throws IOException {
closeWhispering(sftp, this);
sftp = null;
}
@Override
public InputStream create() throws Exception {
sftp = acquire(sftpConnection);
return new CloseFtpChannelOnCloseInputStream(
sftp.getSFTPEngine().open(path).getInputStream(), sftp);
}
@Override
public String toString() {
return "Payload(path=[" + path + "])";
}
}
private class PutFileAction implements SshAction<Void> {
// TODO support backup as a property?
private SFTPClient sftp;
private final String path;
private final int permissionsMask;
private final long lastModificationDate;
private final long lastAccessDate;
private final int uid;
private final Supplier<InputStream> contentsSupplier;
private final Integer length;
PutFileAction(Map<String,?> props, String path, Supplier<InputStream> contentsSupplier, long length) {
String permissions = getOptionalVal(props, PROP_PERMISSIONS);
long lastModificationDateVal = getOptionalVal(props, PROP_LAST_MODIFICATION_DATE);
long lastAccessDateVal = getOptionalVal(props, PROP_LAST_ACCESS_DATE);
if (lastAccessDateVal <= 0 ^ lastModificationDateVal <= 0) {
lastAccessDateVal = Math.max(lastAccessDateVal, lastModificationDateVal);
lastModificationDateVal = Math.max(lastAccessDateVal, lastModificationDateVal);
}
this.permissionsMask = Integer.parseInt(permissions, 8);
this.lastAccessDate = lastAccessDateVal;
this.lastModificationDate = lastModificationDateVal;
this.uid = getOptionalVal(props, PROP_OWNER_UID);
this.path = checkNotNull(path, "path");
this.contentsSupplier = checkNotNull(contentsSupplier, "contents");
this.length = Ints.checkedCast(checkNotNull((long)length, "size"));
}
@Override
public void clear() {
closeWhispering(sftp, this);
sftp = null;
}
@Override
public Void create() throws Exception {
final AtomicReference<InputStream> inputStreamRef = new AtomicReference<InputStream>();
sftp = acquire(sftpConnection);
try {
sftp.put(new InMemorySourceFile() {
@Override public String getName() {
return path;
}
@Override public long getLength() {
return length;
}
@Override public InputStream getInputStream() throws IOException {
InputStream contents = contentsSupplier.get();
inputStreamRef.set(contents);
return contents;
}
}, path);
sftp.chmod(path, permissionsMask);
if (uid != -1) {
sftp.chown(path, uid);
}
if (lastAccessDate > 0) {
sftp.setattr(path, new FileAttributes.Builder()
.withAtimeMtime(lastAccessDate, lastModificationDate)
.build());
}
} finally {
closeWhispering(inputStreamRef.get(), this);
}
return null;
}
@Override
public String toString() {
return "Put(path=[" + path + " "+length+"])";
}
}
// TODO simpler not to use predicates
@VisibleForTesting
Predicate<String> causalChainHasMessageContaining(final Exception from) {
return new Predicate<String>() {
@Override
public boolean apply(final String input) {
return any(getCausalChain(from), new Predicate<Throwable>() {
@Override
public boolean apply(Throwable throwable) {
return (throwable.toString().contains(input))
|| (throwable.getMessage() != null && throwable.getMessage().contains(input));
}
});
}
};
}
protected SshAction<Session> newSessionAction() {
return new SshAction<Session>() {
private Session session = null;
@Override
public void clear() throws TransportException, ConnectionException {
closeWhispering(session, this);
session = null;
}
@Override
public Session create() throws Exception {
checkConnected();
session = sshClientConnection.ssh.startSession();
if (allocatePTY) {
session.allocatePTY(TERM, 80, 24, 0, 0, Collections.<PTYMode, Integer> emptyMap());
}
return session;
}
@Override
public String toString() {
return "Session()";
}
};
}
class ExecAction implements SshAction<Command> {
private final String command;
private final OutputStream out;
private final OutputStream err;
private final Duration timeout;
private Session session;
private Shell shell;
private StreamGobbler outgobbler;
private StreamGobbler errgobbler;
ExecAction(String command, OutputStream out, OutputStream err, Duration timeout) {
this.command = checkNotNull(command, "command");
this.out = out;
this.err = err;
Duration sessionTimeout = (sshClientConnection.getSessionTimeout() == 0)
? Duration.PRACTICALLY_FOREVER
: Duration.millis(sshClientConnection.getSessionTimeout());
this.timeout = (timeout == null) ? sessionTimeout : Duration.min(timeout, sessionTimeout);
}
@Override
public void clear() throws TransportException, ConnectionException {
closeWhispering(session, this);
closeWhispering(shell, this);
closeWhispering(outgobbler, this);
closeWhispering(errgobbler, this);
session = null;
shell = null;
}
@Override
public Command create() throws Exception {
try {
session = acquire(newSessionAction());
Command output = session.exec(checkNotNull(command, "command"));
if (out != null) {
outgobbler = new StreamGobbler(output.getInputStream(), out, (Logger)null);
outgobbler.start();
}
if (err != null) {
errgobbler = new StreamGobbler(output.getErrorStream(), err, (Logger)null);
errgobbler.start();
}
try {
output.join((int)Math.min(timeout.toMilliseconds(), Integer.MAX_VALUE), TimeUnit.MILLISECONDS);
return output;
} finally {
// wait for all stdout/stderr to have been re-directed
try {
// Don't use forever (i.e. 0) because BROOKLYN-106: ssh hangs
long joinTimeout = 10*1000;
if (outgobbler != null) outgobbler.join(joinTimeout);
if (errgobbler != null) errgobbler.join(joinTimeout);
} catch (InterruptedException e) {
LOG.warn("Interrupted gobbling streams from ssh: "+command, e);
Thread.currentThread().interrupt();
}
}
} finally {
clear();
}
}
@Override
public String toString() {
return "Exec(command=[" + command + "])";
}
}
class ShellAction implements SshAction<Integer> {
@VisibleForTesting
final List<String> commands;
@VisibleForTesting
final OutputStream out;
@VisibleForTesting
final OutputStream err;
private Session session;
private Shell shell;
private StreamGobbler outgobbler;
private StreamGobbler errgobbler;
private Duration timeout;
ShellAction(List<String> commands, OutputStream out, OutputStream err, Duration timeout) {
this.commands = checkNotNull(commands, "commands");
this.out = out;
this.err = err;
Duration sessionTimeout = (sshClientConnection.getSessionTimeout() == 0)
? Duration.PRACTICALLY_FOREVER
: Duration.millis(sshClientConnection.getSessionTimeout());
this.timeout = (timeout == null) ? sessionTimeout : Duration.min(timeout, sessionTimeout);
}
@Override
public void clear() throws TransportException, ConnectionException {
closeWhispering(session, this);
closeWhispering(shell, this);
closeWhispering(outgobbler, this);
closeWhispering(errgobbler, this);
session = null;
shell = null;
}
@Override
public Integer create() throws Exception {
try {
session = acquire(newSessionAction());
shell = session.startShell();
if (out != null) {
InputStream outstream = shell.getInputStream();
outgobbler = new StreamGobbler(outstream, out, (Logger)null);
outgobbler.start();
}
if (err != null) {
InputStream errstream = shell.getErrorStream();
errgobbler = new StreamGobbler(errstream, err, (Logger)null);
errgobbler.start();
}
OutputStream output = shell.getOutputStream();
for (CharSequence cmd : commands) {
try {
output.write(toUTF8ByteArray(cmd+"\n"));
output.flush();
} catch (ConnectionException e) {
if (!shell.isOpen()) {
// shell is closed; presumably the user command did `exit`
if (LOG.isDebugEnabled()) LOG.debug("Shell closed to {} when executing {}", SshjTool.this.toString(), commands);
break;
} else {
throw e;
}
}
}
// workaround attempt for SSHJ deadlock - https://github.com/shikhar/sshj/issues/105
synchronized (shell.getOutputStream()) {
shell.sendEOF();
}
closeWhispering(output, this);
boolean timedOut = false;
try {
long timeoutMillis = Math.min(timeout.toMilliseconds(), Integer.MAX_VALUE);
long timeoutEnd = System.currentTimeMillis() + timeoutMillis;
Exception last = null;
do {
if (!shell.isOpen() && ((SessionChannel)session).getExitStatus()!=null)
// shell closed, and exit status returned
break;
boolean endBecauseReturned =
// if either condition is satisfied, then wait 1s in hopes the other does, then return
(!shell.isOpen() || ((SessionChannel)session).getExitStatus()!=null);
try {
shell.join(1000, TimeUnit.MILLISECONDS);
} catch (ConnectionException e) {
last = e;
}
if (endBecauseReturned) {
// shell is still open, ie some process is running
// but we have a result code, so main shell is finished
// we waited one second extra to allow any background process
// which is nohupped to really be in the background (#162)
// now let's bail out
break;
}
} while (System.currentTimeMillis() < timeoutEnd);
if (shell.isOpen() && ((SessionChannel)session).getExitStatus()==null) {
LOG.debug("Timeout ({}) in SSH shell to {}", timeout, this);
// we timed out, or other problem -- reproduce the error.
// The shell.join should always have thrown ConnectionExceptoin (looking at code of
// AbstractChannel), but javadoc of Channel doesn't explicity say that so play it safe.
timedOut = true;
throw (last != null) ? last : new TimeoutException("Timeout after "+timeout+" executing "+this);
}
return ((SessionChannel)session).getExitStatus();
} finally {
// wait for all stdout/stderr to have been re-directed
closeWhispering(shell, this);
shell = null;
try {
// Don't use forever (i.e. 0) because BROOKLYN-106: ssh hangs
long joinTimeout = (timedOut) ? 1000 : 10*1000;
if (outgobbler != null) {
outgobbler.join(joinTimeout);
outgobbler.close();
}
if (errgobbler != null) {
errgobbler.join(joinTimeout);
errgobbler.close();
}
} catch (InterruptedException e) {
LOG.warn("Interrupted gobbling streams from ssh: "+commands, e);
Thread.currentThread().interrupt();
}
}
} finally {
clear();
}
}
@Override
public String toString() {
return "Shell(command=[" + commands + "])";
}
}
private byte[] toUTF8ByteArray(String string) {
return org.bouncycastle.util.Strings.toUTF8ByteArray(string);
}
private Supplier<InputStream> newInputStreamSupplier(final byte[] contents) {
return new Supplier<InputStream>() {
@Override public InputStream get() {
return new ByteArrayInputStream(contents);
}
};
}
private Supplier<InputStream> newInputStreamSupplier(final File file) {
return new Supplier<InputStream>() {
@Override public InputStream get() {
try {
return new FileInputStream(file);
} catch (FileNotFoundException e) {
throw Exceptions.propagate(e);
}
}
};
}
}