/* * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 * (the "License"). You may not use this work except in compliance with the License, which is * available at www.apache.org/licenses/LICENSE-2.0 * * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied, as more fully set forth in the License. * * See the NOTICE file distributed with this work for information regarding copyright ownership. */ package alluxio.hadoop; import alluxio.AlluxioURI; import alluxio.Configuration; import alluxio.PropertyKey; import alluxio.client.file.FileOutStream; import alluxio.client.file.FileSystem; import alluxio.client.file.FileSystemContext; import alluxio.client.file.FileSystemMasterClient; import alluxio.client.file.URIStatus; import alluxio.client.file.options.CreateDirectoryOptions; import alluxio.client.file.options.CreateFileOptions; import alluxio.client.file.options.DeleteOptions; import alluxio.client.file.options.SetAttributeOptions; import alluxio.client.lineage.LineageContext; import alluxio.exception.AlluxioException; import alluxio.exception.ExceptionMessage; import alluxio.exception.FileDoesNotExistException; import alluxio.exception.InvalidPathException; import alluxio.exception.PreconditionMessage; import alluxio.security.User; import alluxio.security.authorization.Mode; import alluxio.util.CommonUtils; import alluxio.wire.FileBlockInfo; import com.google.common.base.Preconditions; import com.google.common.net.HostAndPort; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Progressable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.FileNotFoundException; import java.io.IOException; import java.net.InetSocketAddress; import java.net.URI; import java.security.Principal; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import javax.annotation.concurrent.GuardedBy; import javax.annotation.concurrent.NotThreadSafe; import javax.security.auth.Subject; /** * Base class for Apache Hadoop based Alluxio {@link org.apache.hadoop.fs.FileSystem}. This class * really just delegates to {@link alluxio.client.file.FileSystem} for most operations. * * All implementing classes must define {@link #isZookeeperMode()} which states if fault tolerant is * used and {@link #getScheme()} for Hadoop's {@link java.util.ServiceLoader} support. */ @NotThreadSafe abstract class AbstractFileSystem extends org.apache.hadoop.fs.FileSystem { private static final Logger LOG = LoggerFactory.getLogger(AbstractFileSystem.class); public static final String FIRST_COM_PATH = "alluxio_dep/"; // Always tell Hadoop that we have 3x replication. private static final int BLOCK_REPLICATION_CONSTANT = 3; /** Lock for initializing the contexts, currently only one set of contexts is supported. */ private static final Object INIT_LOCK = new Object(); /** Flag for if the contexts have been initialized. */ @GuardedBy("INIT_LOCK") private static volatile boolean sInitialized = false; private FileSystemContext mContext = null; private FileSystem mFileSystem = null; private URI mUri = null; private Path mWorkingDir = new Path(AlluxioURI.SEPARATOR); private Statistics mStatistics = null; private String mAlluxioHeader = null; /** * Constructs a new {@link AbstractFileSystem} instance with specified a {@link FileSystem} * handler for tests. * * @param fileSystem handler to file system */ @SuppressFBWarnings("ST_WRITE_TO_STATIC_FROM_INSTANCE_METHOD") AbstractFileSystem(FileSystem fileSystem) { mFileSystem = fileSystem; sInitialized = true; } /** * Constructs a new {@link AbstractFileSystem} instance. */ AbstractFileSystem() {} @Override public FSDataOutputStream append(Path path, int bufferSize, Progressable progress) throws IOException { LOG.debug("append({}, {}, {})", path, bufferSize, progress); if (mStatistics != null) { mStatistics.incrementWriteOps(1); } AlluxioURI uri = new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)); try { if (mFileSystem.exists(uri)) { throw new IOException(ExceptionMessage.FILE_ALREADY_EXISTS.getMessage(uri)); } return new FSDataOutputStream(mFileSystem.createFile(uri), mStatistics); } catch (AlluxioException e) { throw new IOException(e); } } @Override public void close() throws IOException { if (mContext != null && mContext != FileSystemContext.INSTANCE) { mContext.close(); } super.close(); } /** * Attempts to create a file. Overwrite will not succeed if the path exists and is a folder. * * @param path path to create * @param permission permissions of the created file/folder * @param overwrite overwrite if file exists * @param bufferSize the size in bytes of the buffer to be used * @param replication under filesystem replication factor * @param blockSize block size in bytes * @param progress queryable progress * @return an {@link FSDataOutputStream} created at the indicated path of a file */ @Override public FSDataOutputStream create(Path path, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { LOG.debug("create({}, {}, {}, {}, {}, {}, {})", path, permission, overwrite, bufferSize, replication, blockSize, progress); if (mStatistics != null) { mStatistics.incrementWriteOps(1); } AlluxioURI uri = new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)); CreateFileOptions options = CreateFileOptions.defaults().setBlockSizeBytes(blockSize) .setMode(new Mode(permission.toShort())); FileOutStream outStream; try { outStream = mFileSystem.createFile(uri, options); } catch (AlluxioException e) { //now we should consider the override parameter try { if (mFileSystem.exists(uri)) { if (!overwrite) { throw new IOException(ExceptionMessage.FILE_ALREADY_EXISTS.getMessage(uri)); } if (mFileSystem.getStatus(uri).isFolder()) { throw new IOException( ExceptionMessage.FILE_CREATE_IS_DIRECTORY.getMessage(uri)); } mFileSystem.delete(uri); } outStream = mFileSystem.createFile(uri, options); } catch (AlluxioException e2) { throw new IOException(e2); } } return new FSDataOutputStream(outStream, mStatistics); } /** * Opens an {@link FSDataOutputStream} at the indicated Path with write-progress reporting. * Same as {@link #create(Path, boolean, int, short, long, Progressable)}, except fails if parent * directory doesn't already exist. * * TODO(hy): We need to refactor this method after having a new internal API support (ALLUXIO-46). * * @param path the file name to open * @param overwrite if a file with this name already exists, then if true, the file will be * overwritten, and if false an error will be thrown. * @param bufferSize the size of the buffer to be used * @param replication required block replication for the file * @param blockSize the size in bytes of the buffer to be used * @param progress queryable progress * @see #setPermission(Path, FsPermission) * @deprecated API only for 0.20-append */ @Override @Deprecated public FSDataOutputStream createNonRecursive(Path path, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { AlluxioURI parentUri = new AlluxioURI(HadoopUtils.getPathWithoutScheme(path.getParent())); ensureExists(parentUri); return create(path, permission, overwrite, bufferSize, replication, blockSize, progress); } /** * Attempts to delete the file or directory with the specified path. * * @param path path to delete * @return true if one or more files/directories were deleted; false otherwise * @deprecated Use {@link #delete(Path, boolean)} instead. */ @Override @Deprecated public boolean delete(Path path) throws IOException { return delete(path, true); } /** * Attempts to delete the file or directory with the specified path. * * @param path path to delete * @param recursive if true, will attempt to delete all children of the path * @return true if one or more files/directories were deleted; false otherwise */ @Override public boolean delete(Path path, boolean recursive) throws IOException { LOG.debug("delete({}, {})", path, recursive); if (mStatistics != null) { mStatistics.incrementWriteOps(1); } AlluxioURI uri = new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)); DeleteOptions options = DeleteOptions.defaults().setRecursive(recursive); try { mFileSystem.delete(uri, options); return true; } catch (InvalidPathException | FileDoesNotExistException e) { LOG.warn("delete failed: {}", e.getMessage()); return false; } catch (AlluxioException e) { throw new IOException(e); } } @Override public long getDefaultBlockSize() { return Configuration.getBytes(PropertyKey.USER_BLOCK_SIZE_BYTES_DEFAULT); } @Override public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException { if (file == null) { return null; } if (mStatistics != null) { mStatistics.incrementReadOps(1); } AlluxioURI path = new AlluxioURI(HadoopUtils.getPathWithoutScheme(file.getPath())); List<FileBlockInfo> blocks = getFileBlocks(path); List<BlockLocation> blockLocations = new ArrayList<>(); for (FileBlockInfo fileBlockInfo : blocks) { long offset = fileBlockInfo.getOffset(); long end = offset + fileBlockInfo.getBlockInfo().getLength(); // Check if there is any overlapping between [start, start+len] and [offset, end] if (end >= start && offset <= start + len) { ArrayList<String> names = new ArrayList<>(); ArrayList<String> hosts = new ArrayList<>(); // add the existing in-memory block locations for (alluxio.wire.BlockLocation location : fileBlockInfo.getBlockInfo().getLocations()) { HostAndPort address = HostAndPort.fromParts(location.getWorkerAddress().getHost(), location.getWorkerAddress().getDataPort()); names.add(address.toString()); hosts.add(address.getHostText()); } // add under file system locations for (String location : fileBlockInfo.getUfsLocations()) { names.add(location); hosts.add(HostAndPort.fromString(location).getHostText()); } blockLocations.add(new BlockLocation(CommonUtils.toStringArray(names), CommonUtils.toStringArray(hosts), offset, fileBlockInfo.getBlockInfo().getLength())); } } BlockLocation[] ret = new BlockLocation[blockLocations.size()]; blockLocations.toArray(ret); return ret; } /** * {@inheritDoc} * * If the file does not exist in Alluxio, query it from HDFS. */ @Override public FileStatus getFileStatus(Path path) throws IOException { LOG.debug("getFileStatus({})", path); if (mStatistics != null) { mStatistics.incrementReadOps(1); } AlluxioURI uri = new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)); URIStatus fileStatus; try { fileStatus = mFileSystem.getStatus(uri); } catch (FileDoesNotExistException e) { throw new FileNotFoundException(e.getMessage()); } catch (AlluxioException e) { throw new IOException(e); } return new FileStatus(fileStatus.getLength(), fileStatus.isFolder(), BLOCK_REPLICATION_CONSTANT, fileStatus.getBlockSizeBytes(), fileStatus.getLastModificationTimeMs(), fileStatus.getCreationTimeMs(), new FsPermission((short) fileStatus.getMode()), fileStatus.getOwner(), fileStatus.getGroup(), new Path(mAlluxioHeader + uri)); } /** * Changes owner or group of a path (i.e. a file or a directory). If username is null, the * original username remains unchanged. Same as groupname. If username and groupname are non-null, * both of them will be changed. * * @param path path to set owner or group * @param username username to be set * @param groupname groupname to be set */ @Override public void setOwner(Path path, final String username, final String groupname) throws IOException { LOG.debug("setOwner({},{},{})", path, username, groupname); AlluxioURI uri = new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)); SetAttributeOptions options = SetAttributeOptions.defaults(); boolean ownerOrGroupChanged = false; if (username != null && !username.isEmpty()) { options.setOwner(username).setRecursive(false); ownerOrGroupChanged = true; } if (groupname != null && !groupname.isEmpty()) { options.setGroup(groupname).setRecursive(false); ownerOrGroupChanged = true; } if (ownerOrGroupChanged) { try { mFileSystem.setAttribute(uri, options); } catch (AlluxioException e) { throw new IOException(e); } } } /** * Changes permission of a path. * * @param path path to set permission * @param permission permission set to path */ @Override public void setPermission(Path path, FsPermission permission) throws IOException { LOG.debug("setMode({},{})", path, permission.toString()); AlluxioURI uri = new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)); SetAttributeOptions options = SetAttributeOptions.defaults().setMode(new Mode(permission.toShort())).setRecursive(false); try { mFileSystem.setAttribute(uri, options); } catch (AlluxioException e) { throw new IOException(e); } } /** * Gets the URI scheme that maps to the {@link org.apache.hadoop.fs.FileSystem}. This was * introduced in Hadoop 2.x as a means to make loading new {@link org.apache.hadoop.fs.FileSystem} * s simpler. This doesn't exist in Hadoop 1.x, so cannot put {@literal @Override}. * * @return scheme hadoop should map to * * @see org.apache.hadoop.fs.FileSystem#createFileSystem(java.net.URI, * org.apache.hadoop.conf.Configuration) */ public abstract String getScheme(); @Override public URI getUri() { return mUri; } @Override public Path getWorkingDirectory() { LOG.debug("getWorkingDirectory: {}", mWorkingDir); return mWorkingDir; } /** * {@inheritDoc} * * Sets up a lazy connection to Alluxio through mFileSystem. This method will override and * invalidate the current contexts. This must be called before client operations in order to * guarantee the integrity of the contexts, meaning users should not alternate between using the * Hadoop compatible API and native Alluxio API in the same process. * * If hadoop file system cache is enabled, this method should only be called when switching user. */ @SuppressFBWarnings("ST_WRITE_TO_STATIC_FROM_INSTANCE_METHOD") @Override public void initialize(URI uri, org.apache.hadoop.conf.Configuration conf) throws IOException { // When using zookeeper we get the leader master address from the alluxio.zookeeper.address // configuration property, so the user doesn't need to specify the authority. if (!Configuration.getBoolean(PropertyKey.ZOOKEEPER_ENABLED)) { Preconditions.checkNotNull(uri.getHost(), PreconditionMessage.URI_HOST_NULL); Preconditions.checkNotNull(uri.getPort(), PreconditionMessage.URI_PORT_NULL); } super.initialize(uri, conf); LOG.debug("initialize({}, {}). Connecting to Alluxio", uri, conf); HadoopUtils.addS3Credentials(conf); HadoopUtils.addSwiftCredentials(conf); setConf(conf); // HDFS doesn't allow the authority to be empty; it must be "/" instead. String authority = uri.getAuthority() == null ? "/" : uri.getAuthority(); mAlluxioHeader = getScheme() + "://" + authority; // Set the statistics member. Use mStatistics instead of the parent class's variable. mStatistics = statistics; mUri = URI.create(mAlluxioHeader); boolean masterAddIsSameAsDefault = checkMasterAddress(); if (sInitialized && masterAddIsSameAsDefault) { updateFileSystemAndContext(); return; } synchronized (INIT_LOCK) { // If someone has initialized the object since the last check, return if (sInitialized) { if (masterAddIsSameAsDefault) { updateFileSystemAndContext(); return; } else { LOG.warn(ExceptionMessage.DIFFERENT_MASTER_ADDRESS .getMessage(mUri.getHost() + ":" + mUri.getPort(), FileSystemContext.INSTANCE.getMasterAddress())); sInitialized = false; } } initializeInternal(uri, conf); sInitialized = true; } updateFileSystemAndContext(); } /** * Initializes the default contexts if the master address specified in the URI is different * from the default one. * * @param uri the uri * @param conf the hadoop conf */ void initializeInternal(URI uri, org.apache.hadoop.conf.Configuration conf) throws IOException { // Load Alluxio configuration if any and merge to the one in Alluxio file system. These // modifications to ClientContext are global, affecting all Alluxio clients in this JVM. // We assume here that all clients use the same configuration. HadoopConfigurationUtils.mergeHadoopConfiguration(conf); Configuration.set(PropertyKey.ZOOKEEPER_ENABLED, isZookeeperMode()); if (!Configuration.getBoolean(PropertyKey.ZOOKEEPER_ENABLED)) { Configuration.set(PropertyKey.MASTER_HOSTNAME, uri.getHost()); Configuration.set(PropertyKey.MASTER_RPC_PORT, uri.getPort()); } // These must be reset to pick up the change to the master address. // TODO(andrew): We should reset key value system in this situation - see ALLUXIO-1706. LineageContext.INSTANCE.reset(); FileSystemContext.INSTANCE.reset(); // Try to connect to master, if it fails, the provided uri is invalid. FileSystemMasterClient client = FileSystemContext.INSTANCE.acquireMasterClient(); try { client.connect(); // Connected, initialize. } finally { FileSystemContext.INSTANCE.releaseMasterClient(client); } } /** * Sets the file system and context. */ private void updateFileSystemAndContext() { Subject subject = getHadoopSubject(); if (subject != null) { mContext = FileSystemContext.create(subject); mFileSystem = FileSystem.Factory.get(mContext); } else { mContext = FileSystemContext.INSTANCE; mFileSystem = FileSystem.Factory.get(); } } /** * @return true if the master address in mUri is the same as the one in the default file * system context. */ private boolean checkMasterAddress() { InetSocketAddress masterAddress = FileSystemContext.INSTANCE.getMasterAddress(); boolean sameHost = masterAddress.getHostString().equals(mUri.getHost()); boolean samePort = masterAddress.getPort() == mUri.getPort(); if (sameHost && samePort) { return true; } return false; } /** * @return the hadoop subject if exists, null if not exist */ private Subject getHadoopSubject() { try { UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); String username = ugi.getShortUserName(); if (username != null && !username.isEmpty()) { User user = new User(ugi.getShortUserName()); HashSet<Principal> principals = new HashSet<>(); principals.add(user); return new Subject(false, principals, new HashSet<>(), new HashSet<>()); } return null; } catch (IOException e) { return null; } } /** * Determines if zookeeper should be used for the {@link org.apache.hadoop.fs.FileSystem}. This * method should only be used for * {@link #initialize(java.net.URI, org.apache.hadoop.conf.Configuration)}. * * @return true if zookeeper should be used */ protected abstract boolean isZookeeperMode(); @Override public FileStatus[] listStatus(Path path) throws IOException { LOG.debug("listStatus({})", path); if (mStatistics != null) { mStatistics.incrementReadOps(1); } AlluxioURI uri = new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)); List<URIStatus> statuses; try { statuses = mFileSystem.listStatus(uri); } catch (FileDoesNotExistException e) { throw new FileNotFoundException(HadoopUtils.getPathWithoutScheme(path)); } catch (AlluxioException e) { throw new IOException(e); } FileStatus[] ret = new FileStatus[statuses.size()]; for (int k = 0; k < statuses.size(); k++) { URIStatus status = statuses.get(k); ret[k] = new FileStatus(status.getLength(), status.isFolder(), BLOCK_REPLICATION_CONSTANT, status.getBlockSizeBytes(), status.getLastModificationTimeMs(), status.getCreationTimeMs(), new FsPermission((short) status.getMode()), status.getOwner(), status.getGroup(), new Path(mAlluxioHeader + status.getPath())); } return ret; } /** * Attempts to create a folder with the specified path. Parent directories will be created. * * @param path path to create * @param permission permissions to grant the created folder * @return true if the indicated folder is created successfully or already exists */ @Override public boolean mkdirs(Path path, FsPermission permission) throws IOException { LOG.debug("mkdirs({}, {})", path, permission); if (mStatistics != null) { mStatistics.incrementWriteOps(1); } AlluxioURI uri = new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)); CreateDirectoryOptions options = CreateDirectoryOptions.defaults().setRecursive(true).setAllowExists(true) .setMode(new Mode(permission.toShort())); try { mFileSystem.createDirectory(uri, options); return true; } catch (AlluxioException e) { throw new IOException(e); } } /** * Attempts to open the specified file for reading. * * @param path the file name to open * @param bufferSize stream buffer size in bytes, currently unused * @return an {@link FSDataInputStream} at the indicated path of a file */ // TODO(calvin): Consider respecting the buffer size option @Override public FSDataInputStream open(Path path, int bufferSize) throws IOException { LOG.debug("open({}, {})", path, bufferSize); if (mStatistics != null) { mStatistics.incrementReadOps(1); } AlluxioURI uri = new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)); return new FSDataInputStream(new HdfsFileInputStream(mContext, uri, mStatistics)); } @Override public boolean rename(Path src, Path dst) throws IOException { LOG.debug("rename({}, {})", src, dst); if (mStatistics != null) { mStatistics.incrementWriteOps(1); } AlluxioURI srcPath = new AlluxioURI(HadoopUtils.getPathWithoutScheme(src)); AlluxioURI dstPath = new AlluxioURI(HadoopUtils.getPathWithoutScheme(dst)); try { mFileSystem.rename(srcPath, dstPath); } catch (FileDoesNotExistException e) { LOG.warn("rename failed: {}", e.getMessage()); return false; } catch (AlluxioException e) { ensureExists(srcPath); URIStatus dstStatus; try { dstStatus = mFileSystem.getStatus(dstPath); } catch (IOException | AlluxioException e2) { LOG.warn("rename failed: {}", e.getMessage()); return false; } // If the destination is an existing folder, try to move the src into the folder if (dstStatus != null && dstStatus.isFolder()) { dstPath = dstPath.join(srcPath.getName()); } else { LOG.warn("rename failed: {}", e.getMessage()); return false; } try { mFileSystem.rename(srcPath, dstPath); } catch (IOException | AlluxioException e2) { LOG.error("Failed to rename {} to {}", src, dst, e2); return false; } } catch (IOException e) { LOG.error("Failed to rename {} to {}", src, dst, e); return false; } return true; } @Override public void setWorkingDirectory(Path path) { LOG.debug("setWorkingDirectory({})", path); if (path.isAbsolute()) { mWorkingDir = path; } else { mWorkingDir = new Path(mWorkingDir, path); } } /** * Convenience method which ensures the given path exists, wrapping any {@link AlluxioException} * in {@link IOException}. * * @param path the path to look up */ private void ensureExists(AlluxioURI path) throws IOException { try { mFileSystem.getStatus(path); } catch (AlluxioException e) { throw new IOException(e); } } private List<FileBlockInfo> getFileBlocks(AlluxioURI path) throws IOException { try { return mFileSystem.getStatus(path).getFileBlockInfos(); } catch (AlluxioException e) { throw new IOException(e); } } }