/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.fs.sftp; import com.jcraft.jsch.ChannelSftp; import com.jcraft.jsch.ChannelSftp.LsEntry; import com.jcraft.jsch.SftpATTRS; import com.jcraft.jsch.SftpException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.util.Progressable; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.net.URI; import java.net.URLDecoder; import java.util.ArrayList; import java.util.Vector; /** SFTP FileSystem. * This class and its dependency classes were copied from hadoop-2.8 to support SFTP FileSystem so that FTP batch source * can fetch data from SFTP server. Once CDAP-5826 is addressed these classes can be removed. * */ public class SFTPFileSystem extends FileSystem { public static final Log LOG = LogFactory.getLog(SFTPFileSystem.class); private SFTPConnectionPool connectionPool; private URI uri; private static final int DEFAULT_SFTP_PORT = 22; private static final int DEFAULT_MAX_CONNECTION = 5; public static final int DEFAULT_BUFFER_SIZE = 1024 * 1024; public static final int DEFAULT_BLOCK_SIZE = 4 * 1024; public static final String FS_SFTP_USER_PREFIX = "fs.sftp.user."; public static final String FS_SFTP_PASSWORD_PREFIX = "fs.sftp.password."; public static final String FS_SFTP_HOST = "fs.sftp.host"; public static final String FS_SFTP_HOST_PORT = "fs.sftp.host.port"; public static final String FS_SFTP_KEYFILE = "fs.sftp.keyfile"; public static final String FS_SFTP_CONNECTION_MAX = "fs.sftp.connection.max"; public static final String E_SAME_DIRECTORY_ONLY = "only same directory renames are supported"; public static final String E_HOST_NULL = "Invalid host specified"; public static final String E_USER_NULL = "No user specified for sftp connection. Expand URI or credential file."; public static final String E_PATH_DIR = "Path %s is a directory."; public static final String E_FILE_STATUS = "Failed to get file status"; public static final String E_FILE_NOTFOUND = "File %s does not exist."; public static final String E_FILE_EXIST = "File already exists: %s"; public static final String E_CREATE_DIR = "create(): Mkdirs failed to create: %s"; public static final String E_DIR_CREATE_FROMFILE = "Can't make directory for path %s since it is a file."; public static final String E_MAKE_DIR_FORPATH = "Can't make directory for path \"%s\" under \"%s\"."; public static final String E_DIR_NOTEMPTY = "Directory: %s is not empty."; public static final String E_FILE_CHECK_FAILED = "File check failed"; public static final String E_NOT_SUPPORTED = "Not supported"; public static final String E_SPATH_NOTEXIST = "Source path %s does not exist"; public static final String E_DPATH_EXIST = "Destination path %s already exist, cannot rename!"; public static final String E_FAILED_GETHOME = "Failed to get home directory"; public static final String E_FAILED_DISCONNECT = "Failed to disconnect"; /** * Set configuration from UI. * * @param uri * @param conf * @throws IOException */ private void setConfigurationFromURI(URI uriInfo, Configuration conf) throws IOException { // get host information from URI String host = uriInfo.getHost(); host = (host == null) ? conf.get(FS_SFTP_HOST, null) : host; if (host == null) { throw new IOException(E_HOST_NULL); } conf.set(FS_SFTP_HOST, host); int port = uriInfo.getPort(); port = (port == -1) ? conf.getInt(FS_SFTP_HOST_PORT, DEFAULT_SFTP_PORT) : port; conf.setInt(FS_SFTP_HOST_PORT, port); // get user/password information from URI String userAndPwdFromUri = uriInfo.getUserInfo(); if (userAndPwdFromUri != null) { String[] userPasswdInfo = userAndPwdFromUri.split(":"); String user = userPasswdInfo[0]; user = URLDecoder.decode(user, "UTF-8"); conf.set(FS_SFTP_USER_PREFIX + host, user); if (userPasswdInfo.length > 1) { conf.set(FS_SFTP_PASSWORD_PREFIX + host + "." + user, userPasswdInfo[1]); } } String user = conf.get(FS_SFTP_USER_PREFIX + host); if (user == null || user.equals("")) { throw new IllegalStateException(E_USER_NULL); } int connectionMax = conf.getInt(FS_SFTP_CONNECTION_MAX, DEFAULT_MAX_CONNECTION); connectionPool = new SFTPConnectionPool(connectionMax); } /** * Connecting by using configuration parameters. * * @return An FTPClient instance * @throws IOException */ private ChannelSftp connect() throws IOException { Configuration conf = getConf(); String host = conf.get(FS_SFTP_HOST, null); int port = conf.getInt(FS_SFTP_HOST_PORT, DEFAULT_SFTP_PORT); String user = conf.get(FS_SFTP_USER_PREFIX + host, null); String pwd = conf.get(FS_SFTP_PASSWORD_PREFIX + host + "." + user, null); String keyFile = conf.get(FS_SFTP_KEYFILE, null); ChannelSftp channel = connectionPool.connect(host, port, user, pwd, keyFile); return channel; } /** * Logout and disconnect the given channel. * * @param client * @throws IOException */ private void disconnect(ChannelSftp channel) throws IOException { connectionPool.disconnect(channel); } /** * Resolve against given working directory. * * @param workDir * @param path * @return absolute path */ private Path makeAbsolute(Path workDir, Path path) { if (path.isAbsolute()) { return path; } return new Path(workDir, path); } /** * Convenience method, so that we don't open a new connection when using this * method from within another method. Otherwise every API invocation incurs * the overhead of opening/closing a TCP connection. * @throws IOException */ private boolean exists(ChannelSftp channel, Path file) throws IOException { try { getFileStatus(channel, file); return true; } catch (FileNotFoundException fnfe) { return false; } catch (IOException ioe) { throw new IOException(E_FILE_STATUS, ioe); } } /** * Convenience method, so that we don't open a new connection when using this * method from within another method. Otherwise every API invocation incurs * the overhead of opening/closing a TCP connection. */ @SuppressWarnings("unchecked") private FileStatus getFileStatus(ChannelSftp client, Path file) throws IOException { FileStatus fileStat = null; Path workDir; try { workDir = new Path(client.pwd()); } catch (SftpException e) { throw new IOException(e); } Path absolute = makeAbsolute(workDir, file); Path parentPath = absolute.getParent(); if (parentPath == null) { // root directory long length = -1; // Length of root directory on server not known boolean isDir = true; int blockReplication = 1; long blockSize = DEFAULT_BLOCK_SIZE; // Block Size not known. long modTime = -1; // Modification time of root directory not known. Path root = new Path("/"); return new FileStatus(length, isDir, blockReplication, blockSize, modTime, root.makeQualified(this.getUri(), this.getWorkingDirectory())); } String pathName = parentPath.toUri().getPath(); Vector<LsEntry> sftpFiles; try { sftpFiles = (Vector<LsEntry>) client.ls(pathName); } catch (SftpException e) { throw new FileNotFoundException(String.format(E_FILE_NOTFOUND, file)); } if (sftpFiles != null) { for (LsEntry sftpFile : sftpFiles) { if (sftpFile.getFilename().equals(file.getName())) { // file found in directory fileStat = getFileStatus(client, sftpFile, parentPath); break; } } if (fileStat == null) { throw new FileNotFoundException(String.format(E_FILE_NOTFOUND, file)); } } else { throw new FileNotFoundException(String.format(E_FILE_NOTFOUND, file)); } return fileStat; } /** * Convert the file information in LsEntry to a {@link FileStatus} object. * * * @param sftpFile * @param parentPath * @return file status * @throws IOException */ private FileStatus getFileStatus(ChannelSftp channel, LsEntry sftpFile, Path parentPath) throws IOException { SftpATTRS attr = sftpFile.getAttrs(); long length = attr.getSize(); boolean isDir = attr.isDir(); boolean isLink = attr.isLink(); if (isLink) { String link = parentPath.toUri().getPath() + "/" + sftpFile.getFilename(); try { link = channel.realpath(link); Path linkParent = new Path("/", link); FileStatus fstat = getFileStatus(channel, linkParent); isDir = fstat.isDirectory(); length = fstat.getLen(); } catch (Exception e) { throw new IOException(e); } } int blockReplication = 1; // Using default block size since there is no way in SFTP channel to know of // block sizes on server. The assumption could be less than ideal. long blockSize = DEFAULT_BLOCK_SIZE; long modTime = attr.getMTime() * 1000; // convert to milliseconds long accessTime = 0; FsPermission permission = getPermissions(sftpFile); // not be able to get the real user group name, just use the user and group // id String user = Integer.toString(attr.getUId()); String group = Integer.toString(attr.getGId()); Path filePath = new Path(parentPath, sftpFile.getFilename()); return new FileStatus(length, isDir, blockReplication, blockSize, modTime, accessTime, permission, user, group, filePath.makeQualified( this.getUri(), this.getWorkingDirectory())); } /** * Return file permission. * * @param sftpFile * @return file permission */ private FsPermission getPermissions(LsEntry sftpFile) { return new FsPermission((short) sftpFile.getAttrs().getPermissions()); } /** * Convenience method, so that we don't open a new connection when using this * method from within another method. Otherwise every API invocation incurs * the overhead of opening/closing a TCP connection. */ private boolean mkdirs(ChannelSftp client, Path file, FsPermission permission) throws IOException { boolean created = true; Path workDir; try { workDir = new Path(client.pwd()); } catch (SftpException e) { throw new IOException(e); } Path absolute = makeAbsolute(workDir, file); String pathName = absolute.getName(); if (!exists(client, absolute)) { Path parent = absolute.getParent(); created = (parent == null || mkdirs(client, parent, FsPermission.getDefault())); if (created) { String parentDir = parent.toUri().getPath(); boolean succeeded = true; try { client.cd(parentDir); client.mkdir(pathName); } catch (SftpException e) { throw new IOException(String.format(E_MAKE_DIR_FORPATH, pathName, parentDir)); } created = created & succeeded; } } else if (isFile(client, absolute)) { throw new IOException(String.format(E_DIR_CREATE_FROMFILE, absolute)); } return created; } /** * Convenience method, so that we don't open a new connection when using this * method from within another method. Otherwise every API invocation incurs * the overhead of opening/closing a TCP connection. * @throws IOException */ private boolean isFile(ChannelSftp channel, Path file) throws IOException { try { return !getFileStatus(channel, file).isDirectory(); } catch (FileNotFoundException e) { return false; // file does not exist } catch (IOException ioe) { throw new IOException(E_FILE_CHECK_FAILED, ioe); } } /** * Convenience method, so that we don't open a new connection when using this * method from within another method. Otherwise every API invocation incurs * the overhead of opening/closing a TCP connection. */ private boolean delete(ChannelSftp channel, Path file, boolean recursive) throws IOException { Path workDir; try { workDir = new Path(channel.pwd()); } catch (SftpException e) { throw new IOException(e); } Path absolute = makeAbsolute(workDir, file); String pathName = absolute.toUri().getPath(); FileStatus fileStat = null; try { fileStat = getFileStatus(channel, absolute); } catch (FileNotFoundException e) { // file not found, no need to delete, return true return false; } if (!fileStat.isDirectory()) { boolean status = true; try { channel.rm(pathName); } catch (SftpException e) { status = false; } return status; } else { boolean status = true; FileStatus[] dirEntries = listStatus(channel, absolute); if (dirEntries != null && dirEntries.length > 0) { if (!recursive) { throw new IOException(String.format(E_DIR_NOTEMPTY, file)); } for (int i = 0; i < dirEntries.length; ++i) { delete(channel, new Path(absolute, dirEntries[i].getPath()), recursive); } } try { channel.rmdir(pathName); } catch (SftpException e) { status = false; } return status; } } /** * Convenience method, so that we don't open a new connection when using this * method from within another method. Otherwise every API invocation incurs * the overhead of opening/closing a TCP connection. */ @SuppressWarnings("unchecked") private FileStatus[] listStatus(ChannelSftp client, Path file) throws IOException { Path workDir; try { workDir = new Path(client.pwd()); } catch (SftpException e) { throw new IOException(e); } Path absolute = makeAbsolute(workDir, file); FileStatus fileStat = getFileStatus(client, absolute); if (!fileStat.isDirectory()) { return new FileStatus[] {fileStat}; } Vector<LsEntry> sftpFiles; try { sftpFiles = (Vector<LsEntry>) client.ls(absolute.toUri().getPath()); } catch (SftpException e) { throw new IOException(e); } ArrayList<FileStatus> fileStats = new ArrayList<FileStatus>(); for (int i = 0; i < sftpFiles.size(); i++) { LsEntry entry = sftpFiles.get(i); String fname = entry.getFilename(); // skip current and parent directory, ie. "." and ".." if (!".".equalsIgnoreCase(fname) && !"..".equalsIgnoreCase(fname)) { fileStats.add(getFileStatus(client, entry, absolute)); } } return fileStats.toArray(new FileStatus[fileStats.size()]); } /** * Convenience method, so that we don't open a new connection when using this * method from within another method. Otherwise every API invocation incurs * the overhead of opening/closing a TCP connection. * * @param channel * @param src * @param dst * @return rename successful? * @throws IOException */ private boolean rename(ChannelSftp channel, Path src, Path dst) throws IOException { Path workDir; try { workDir = new Path(channel.pwd()); } catch (SftpException e) { throw new IOException(e); } Path absoluteSrc = makeAbsolute(workDir, src); Path absoluteDst = makeAbsolute(workDir, dst); if (!exists(channel, absoluteSrc)) { throw new IOException(String.format(E_SPATH_NOTEXIST, src)); } if (exists(channel, absoluteDst)) { throw new IOException(String.format(E_DPATH_EXIST, dst)); } boolean renamed = true; try { channel.cd("/"); channel.rename(src.toUri().getPath(), dst.toUri().getPath()); } catch (SftpException e) { renamed = false; } return renamed; } @Override public void initialize(URI uriInfo, Configuration conf) throws IOException { super.initialize(uriInfo, conf); setConfigurationFromURI(uriInfo, conf); setConf(conf); this.uri = uriInfo; } @Override public String getScheme() { return "sftp"; } @Override public URI getUri() { return uri; } @Override public FSDataInputStream open(Path f, int bufferSize) throws IOException { ChannelSftp channel = connect(); Path workDir; try { workDir = new Path(channel.pwd()); } catch (SftpException e) { throw new IOException(e); } Path absolute = makeAbsolute(workDir, f); FileStatus fileStat = getFileStatus(channel, absolute); if (fileStat.isDirectory()) { disconnect(channel); throw new IOException(String.format(E_PATH_DIR, f)); } InputStream is; try { // the path could be a symbolic link, so get the real path absolute = new Path("/", channel.realpath(absolute.toUri().getPath())); is = channel.get(absolute.toUri().getPath()); } catch (SftpException e) { throw new IOException(e); } FSDataInputStream fis = new FSDataInputStream(new SFTPInputStream(is, channel, statistics)); return fis; } /** * A stream obtained via this call must be closed before using other APIs of * this class or else the invocation will block. */ @Override public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { final ChannelSftp client = connect(); Path workDir; try { workDir = new Path(client.pwd()); } catch (SftpException e) { throw new IOException(e); } Path absolute = makeAbsolute(workDir, f); if (exists(client, f)) { if (overwrite) { delete(client, f, false); } else { disconnect(client); throw new IOException(String.format(E_FILE_EXIST, f)); } } Path parent = absolute.getParent(); if (parent == null || !mkdirs(client, parent, FsPermission.getDefault())) { parent = (parent == null) ? new Path("/") : parent; disconnect(client); throw new IOException(String.format(E_CREATE_DIR, parent)); } OutputStream os; try { client.cd(parent.toUri().getPath()); os = client.put(f.getName()); } catch (SftpException e) { throw new IOException(e); } FSDataOutputStream fos = new FSDataOutputStream(os, statistics) { @Override public void close() throws IOException { super.close(); disconnect(client); } }; return fos; } @Override public FSDataOutputStream append(Path f, int bufferSize, Progressable progress) throws IOException { throw new IOException(E_NOT_SUPPORTED); } /* * The parent of source and destination can be different. It is suppose to * work like 'move' */ @Override public boolean rename(Path src, Path dst) throws IOException { ChannelSftp channel = connect(); try { boolean success = rename(channel, src, dst); return success; } finally { disconnect(channel); } } @Override public boolean delete(Path f, boolean recursive) throws IOException { ChannelSftp channel = connect(); try { boolean success = delete(channel, f, recursive); return success; } finally { disconnect(channel); } } @Override public FileStatus[] listStatus(Path f) throws IOException { ChannelSftp client = connect(); try { FileStatus[] stats = listStatus(client, f); return stats; } finally { disconnect(client); } } @Override public void setWorkingDirectory(Path newDir) { // we do not maintain the working directory state } @Override public Path getWorkingDirectory() { // Return home directory always since we do not maintain state. return getHomeDirectory(); } @Override public Path getHomeDirectory() { ChannelSftp channel = null; try { channel = connect(); Path homeDir = new Path(channel.pwd()); return homeDir; } catch (Exception ioe) { return null; } finally { try { disconnect(channel); } catch (IOException ioe) { return null; } } } @Override public boolean mkdirs(Path f, FsPermission permission) throws IOException { ChannelSftp client = connect(); try { boolean success = mkdirs(client, f, permission); return success; } finally { disconnect(client); } } @Override public FileStatus getFileStatus(Path f) throws IOException { ChannelSftp channel = connect(); try { FileStatus status = getFileStatus(channel, f); return status; } finally { disconnect(channel); } } }