/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.source.extractor.extract.sftp; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.List; import java.util.Vector; import lombok.extern.slf4j.Slf4j; import org.apache.commons.io.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import com.google.common.base.Preconditions; import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; import com.jcraft.jsch.Channel; import com.jcraft.jsch.ChannelExec; import com.jcraft.jsch.ChannelSftp; import com.jcraft.jsch.ChannelSftp.LsEntry; import com.jcraft.jsch.JSch; import com.jcraft.jsch.JSchException; import com.jcraft.jsch.ProxyHTTP; import com.jcraft.jsch.Session; import com.jcraft.jsch.SftpException; import com.jcraft.jsch.SftpProgressMonitor; import com.jcraft.jsch.UserInfo; import gobblin.configuration.ConfigurationKeys; import gobblin.configuration.State; import gobblin.password.PasswordManager; import gobblin.source.extractor.filebased.FileBasedHelperException; import gobblin.source.extractor.filebased.TimestampAwareFileBasedHelper; import gobblin.util.io.SeekableFSInputStream; /** * Connects to a source via SFTP and executes a given list of SFTP commands * @author stakiar */ @Slf4j public class SftpFsHelper implements TimestampAwareFileBasedHelper { private Session session; private State state; public SftpFsHelper(State state) { this.state = state; } /** * The method returns a new {@link ChannelSftp} without throwing an exception. Returns a null if any exception occurs * trying to get a new channel. The method exists for backward compatibility * * @deprecated use {@link #getSftpChannel()} instead. * * @return */ @Deprecated public ChannelSftp getSftpConnection() { try { return this.getSftpChannel(); } catch (SftpException e) { log.error("Failed to get new sftp channel", e); return null; } } /** * Create new channel every time a command needs to be executed. This is required to support execution of multiple * commands in parallel. All created channels are cleaned up when the session is closed. * * * @return a new {@link ChannelSftp} * @throws SftpException */ public ChannelSftp getSftpChannel() throws SftpException { try { ChannelSftp channelSftp = (ChannelSftp) this.session.openChannel("sftp"); channelSftp.connect(); return channelSftp; } catch (JSchException e) { throw new SftpException(0, "Cannot open a channel to SFTP server", e); } } /** * Create a new sftp channel to execute commands. * * @param command to execute on the remote machine * @return a new execution channel * @throws SftpException if a channel could not be opened */ public ChannelExec getExecChannel(String command) throws SftpException { ChannelExec channelExec; try { channelExec = (ChannelExec) this.session.openChannel("exec"); channelExec.setCommand(command); channelExec.connect(); return channelExec; } catch (JSchException e) { throw new SftpException(0, "Cannot open a channel to SFTP server", e); } } /** * Opens up a connection to specified host using the username. Connects to the source using a private key without * prompting for a password. This method does not support connecting to a source using a password, only by private * key * @throws gobblin.source.extractor.filebased.FileBasedHelperException */ @Override public void connect() throws FileBasedHelperException { String privateKey = PasswordManager.getInstance(this.state) .readPassword(this.state.getProp(ConfigurationKeys.SOURCE_CONN_PRIVATE_KEY)); String password = PasswordManager.getInstance(this.state) .readPassword(this.state.getProp(ConfigurationKeys.SOURCE_CONN_PASSWORD)); String knownHosts = this.state.getProp(ConfigurationKeys.SOURCE_CONN_KNOWN_HOSTS); String userName = this.state.getProp(ConfigurationKeys.SOURCE_CONN_USERNAME); String hostName = this.state.getProp(ConfigurationKeys.SOURCE_CONN_HOST_NAME); int port = this.state.getPropAsInt(ConfigurationKeys.SOURCE_CONN_PORT, ConfigurationKeys.SOURCE_CONN_DEFAULT_PORT); String proxyHost = this.state.getProp(ConfigurationKeys.SOURCE_CONN_USE_PROXY_URL); int proxyPort = this.state.getPropAsInt(ConfigurationKeys.SOURCE_CONN_USE_PROXY_PORT, -1); JSch.setLogger(new JSchLogger()); JSch jsch = new JSch(); log.info("Attempting to connect to source via SFTP with" + " privateKey: " + privateKey + " knownHosts: " + knownHosts + " userName: " + userName + " hostName: " + hostName + " port: " + port + " proxyHost: " + proxyHost + " proxyPort: " + proxyPort); try { if (!Strings.isNullOrEmpty(privateKey)) { List<IdentityStrategy> identityStrategies = ImmutableList.of(new LocalFileIdentityStrategy(), new DistributedCacheIdentityStrategy(), new HDFSIdentityStrategy()); for (IdentityStrategy identityStrategy : identityStrategies) { if (identityStrategy.setIdentity(privateKey, jsch)) { break; } } } this.session = jsch.getSession(userName, hostName, port); this.session.setConfig("PreferredAuthentications", "publickey,password"); if (Strings.isNullOrEmpty(knownHosts)) { log.info("Known hosts path is not set, StrictHostKeyChecking will be turned off"); this.session.setConfig("StrictHostKeyChecking", "no"); } else { jsch.setKnownHosts(knownHosts); } if (!Strings.isNullOrEmpty(password)) { this.session.setPassword(password); } if (proxyHost != null && proxyPort >= 0) { this.session.setProxy(new ProxyHTTP(proxyHost, proxyPort)); } UserInfo ui = new MyUserInfo(); this.session.setUserInfo(ui); this.session.setDaemonThread(true); this.session.connect(); log.info("Finished connecting to source"); } catch (JSchException e) { if (this.session != null) { this.session.disconnect(); } log.error(e.getMessage(), e); throw new FileBasedHelperException("Cannot connect to SFTP source", e); } } /** * Executes a get SftpCommand and returns an input stream to the file * @param cmd is the command to execute * @param sftp is the channel to execute the command on * @throws SftpException */ @Override public InputStream getFileStream(String file) throws FileBasedHelperException { SftpGetMonitor monitor = new SftpGetMonitor(); try { ChannelSftp channel = getSftpChannel(); return new SftpFsFileInputStream(channel.get(file, monitor), channel); } catch (SftpException e) { throw new FileBasedHelperException("Cannot download file " + file + " due to " + e.getMessage(), e); } } @Override public List<String> ls(String path) throws FileBasedHelperException { try { List<String> list = new ArrayList<>(); ChannelSftp channel = getSftpChannel(); Vector<LsEntry> vector = channel.ls(path); for (LsEntry entry : vector) { list.add(entry.getFilename()); } channel.disconnect(); return list; } catch (SftpException e) { throw new FileBasedHelperException("Cannot execute ls command on sftp connection", e); } } @Override public void close() { if (this.session != null) { this.session.disconnect(); } } @Override public long getFileSize(String filePath) throws FileBasedHelperException { try { ChannelSftp channelSftp = getSftpChannel(); long fileSize = channelSftp.lstat(filePath).getSize(); channelSftp.disconnect(); return fileSize; } catch (SftpException e) { throw new FileBasedHelperException( String.format("Failed to get size for file at path %s due to error %s", filePath, e.getMessage()), e); } } /** * Implementation of an SftpProgressMonitor to monitor the progress of file downloads using the ChannelSftp.GET * methods * @author stakiar */ public static class SftpGetMonitor implements SftpProgressMonitor { private int op; private String src; private String dest; private long totalCount; private long logFrequency; private long startime; @Override public void init(int op, String src, String dest, long max) { this.op = op; this.src = src; this.dest = dest; this.startime = System.currentTimeMillis(); this.logFrequency = 0L; log.info("Operation GET (" + op + ") has started with src: " + src + " dest: " + dest + " and file length: " + (max / 1000000L) + " mb"); } @Override public boolean count(long count) { this.totalCount += count; if (this.logFrequency == 0L) { this.logFrequency = 1000L; log.info( "Transfer is in progress for file: " + this.src + ". Finished transferring " + this.totalCount + " bytes "); long mb = this.totalCount / 1000000L; log.info("Transferd " + mb + " Mb. Speed " + getMbps() + " Mbps"); } this.logFrequency--; return true; } @Override public void end() { long secs = (System.currentTimeMillis() - this.startime) / 1000L; log.info("Transfer finished " + this.op + " src: " + this.src + " dest: " + this.dest + " in " + secs + " at " + getMbps()); } private String getMbps() { long mb = this.totalCount / 1000000L; long secs = (System.currentTimeMillis() - this.startime) / 1000L; double mbps = secs == 0L ? 0.0D : mb * 1.0D / secs; return String.format("%.2f", new Object[] { Double.valueOf(mbps) }); } } /** * Basic implementation of jsch.Logger that logs the output from the JSch commands to slf4j * @author stakiar */ public static class JSchLogger implements com.jcraft.jsch.Logger { @Override public boolean isEnabled(int level) { switch (level) { case DEBUG: return log.isDebugEnabled(); case INFO: return log.isInfoEnabled(); case WARN: return log.isWarnEnabled(); case ERROR: return log.isErrorEnabled(); case FATAL: return log.isErrorEnabled(); default: return false; } } @Override public void log(int level, String message) { switch (level) { case DEBUG: log.debug(message); break; case INFO: log.info(message); break; case WARN: log.warn(message); break; case ERROR: log.error(message); break; case FATAL: log.error(message); break; default: log.info(message); break; } } } /** * Implementation of UserInfo class for JSch which allows for password-less login via keys * @author stakiar */ public static class MyUserInfo implements UserInfo { // The passphrase used to access the private key @Override public String getPassphrase() { return null; } // The password to login to the client server @Override public String getPassword() { return null; } @Override public boolean promptPassword(String message) { return true; } @Override public boolean promptPassphrase(String message) { return true; } @Override public boolean promptYesNo(String message) { return true; } @Override public void showMessage(String message) { log.info(message); } } /** * Interface for multiple identity setter strategies */ private interface IdentityStrategy { public boolean setIdentity(String privateKey, JSch jsch); } /** * Sets identity using a file on HDFS */ private static class HDFSIdentityStrategy implements IdentityStrategy { @Override public boolean setIdentity(String privateKey, JSch jsch) { FileSystem fs; try { fs = FileSystem.get(new Configuration()); } catch (Exception e) { log.warn("Failed to set identity using HDFS file. Will attempt next strategy. " + e.getMessage()); return false; } Preconditions.checkNotNull(fs, "FileSystem cannot be null"); try (FSDataInputStream privateKeyStream = fs.open(new Path(privateKey))) { byte[] bytes = IOUtils.toByteArray(privateKeyStream); jsch.addIdentity("sftpIdentityKey", bytes, (byte[]) null, (byte[]) null); log.info("Successfully set identity using HDFS file"); return true; } catch (Exception e) { log.warn("Failed to set identity using HDFS file. Will attempt next strategy. " + e.getMessage()); return false; } } } /** * Sets identity using a local file */ private static class LocalFileIdentityStrategy implements IdentityStrategy { @Override public boolean setIdentity(String privateKey, JSch jsch) { try { jsch.addIdentity(privateKey); log.info("Successfully set identity using local file " + privateKey); return true; } catch (Exception e) { log.warn("Failed to set identity using local file. Will attempt next strategy. " + e.getMessage()); } return false; } } /** * Sets identity using a file on distributed cache */ private static class DistributedCacheIdentityStrategy extends LocalFileIdentityStrategy { @Override public boolean setIdentity(String privateKey, JSch jsch) { return super.setIdentity(new File(privateKey).getName(), jsch); } } @Override public long getFileMTime(String filePath) throws FileBasedHelperException { ChannelSftp channelSftp = null; try { channelSftp = getSftpChannel(); int modificationTime = channelSftp.lstat(filePath).getMTime(); return modificationTime; } catch (SftpException e) { throw new FileBasedHelperException( String.format("Failed to get modified timestamp for file at path %s due to error %s", filePath, e.getMessage()), e); } finally { if (channelSftp != null) { channelSftp.disconnect(); } } } /** * A {@link SeekableFSInputStream} that holds a handle on the Sftp {@link Channel} used to open the * {@link InputStream}. The {@link Channel} is disconnected when {@link InputStream#close()} is called. */ static class SftpFsFileInputStream extends SeekableFSInputStream { private final Channel channel; public SftpFsFileInputStream(InputStream in, Channel channel) { super(in); this.channel = channel; } @Override public void close() throws IOException { super.close(); this.channel.disconnect(); } } }