/*
* The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
* (the "License"). You may not use this work except in compliance with the License, which is
* available at www.apache.org/licenses/LICENSE-2.0
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied, as more fully set forth in the License.
*
* See the NOTICE file distributed with this work for information regarding copyright ownership.
*/
package alluxio.master.file.meta;
import alluxio.AlluxioURI;
import alluxio.Constants;
import alluxio.collections.ConcurrentHashSet;
import alluxio.collections.FieldIndex;
import alluxio.collections.IndexDefinition;
import alluxio.collections.UniqueFieldIndex;
import alluxio.exception.AccessControlException;
import alluxio.exception.BlockInfoException;
import alluxio.exception.ExceptionMessage;
import alluxio.exception.FileAlreadyExistsException;
import alluxio.exception.FileDoesNotExistException;
import alluxio.exception.InvalidPathException;
import alluxio.exception.PreconditionMessage;
import alluxio.master.block.ContainerIdGenerable;
import alluxio.master.file.options.CreateDirectoryOptions;
import alluxio.master.file.options.CreateFileOptions;
import alluxio.master.file.options.CreatePathOptions;
import alluxio.master.file.options.DeleteOptions;
import alluxio.master.journal.JournalContext;
import alluxio.master.journal.JournalEntryIterable;
import alluxio.master.journal.NoopJournalContext;
import alluxio.proto.journal.File;
import alluxio.proto.journal.File.InodeDirectoryEntry;
import alluxio.proto.journal.File.InodeFileEntry;
import alluxio.proto.journal.Journal;
import alluxio.retry.ExponentialBackoffRetry;
import alluxio.retry.RetryPolicy;
import alluxio.security.authorization.Mode;
import alluxio.underfs.UnderFileSystem;
import alluxio.underfs.options.MkdirsOptions;
import alluxio.util.SecurityUtils;
import alluxio.util.io.PathUtils;
import alluxio.wire.TtlAction;
import com.google.common.base.Objects;
import com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Queue;
import java.util.Set;
import javax.annotation.concurrent.NotThreadSafe;
/**
* Represents the tree of Inode's.
*/
@NotThreadSafe
// TODO(jiri): Make this class thread-safe.
public class InodeTree implements JournalEntryIterable {
private static final Logger LOG = LoggerFactory.getLogger(InodeTree.class);
/** The base amount (exponential backoff) to sleep before retrying persisting an inode. */
private static final int PERSIST_WAIT_BASE_SLEEP_MS = 2;
/** Maximum amount (exponential backoff) to sleep before retrying persisting an inode. */
private static final int PERSIST_WAIT_MAX_SLEEP_MS = 1000;
/** The maximum retries for persisting an inode. */
private static final int PERSIST_WAIT_MAX_RETRIES = 50;
/** Value to be used for an inode with no parent. */
public static final long NO_PARENT = -1;
private static final IndexDefinition<Inode<?>> ID_INDEX = new IndexDefinition<Inode<?>>(true) {
@Override
public Object getFieldValue(Inode<?> o) {
return o.getId();
}
};
/**
* The type of lock to lock inode paths with.
*/
public enum LockMode {
/** Read lock the entire path. */
READ,
/** Read lock the entire path, but write lock the target inode. */
WRITE,
/** Read lock the entire path, but write lock the target inode and the parent of the target. */
WRITE_PARENT,
}
/** Only the root inode should have the empty string as its name. */
private static final String ROOT_INODE_NAME = "";
/** Number of retries when trying to lock a path, from a given id. */
private static final int PATH_TRAVERSAL_RETRIES = 1000;
/** The root of the entire file system. */
private InodeDirectory mRoot = null;
/** Mount table manages the file system mount points. */
private final MountTable mMountTable;
/** Use UniqueFieldIndex directly for ID index rather than using IndexedSet. */
private final FieldIndex<Inode<?>> mInodes = new UniqueFieldIndex<>(ID_INDEX);
/** A set of inode ids representing pinned inode files. */
private final Set<Long> mPinnedInodeFileIds = new ConcurrentHashSet<>(64, 0.90f, 64);
/**
* Inode id management. Inode ids are essentially block ids.
*
* inode files: Each file id will be composed of a unique block container id, with the maximum
* sequence number.
*
* inode directories: Each directory id will be a unique block id, in order to avoid any collision
* with file ids.
*/
private final ContainerIdGenerable mContainerIdGenerator;
private final InodeDirectoryIdGenerator mDirectoryIdGenerator;
/**
* This is only used for adding inodes from the journal, to prevent repeated lookups of the same
* inode.
*/
private InodeDirectory mCachedInode;
/**
* @param containerIdGenerator the container id generator to use to get new container ids
* @param directoryIdGenerator the directory id generator to use to get new directory ids
* @param mountTable the mount table to manage the file system mount points
*/
public InodeTree(ContainerIdGenerable containerIdGenerator,
InodeDirectoryIdGenerator directoryIdGenerator, MountTable mountTable) {
mContainerIdGenerator = containerIdGenerator;
mDirectoryIdGenerator = directoryIdGenerator;
mMountTable = mountTable;
}
/**
* Initializes the root of the inode tree.
*
* @param owner the root owner
* @param group the root group
* @param mode the root mode
*/
public void initializeRoot(String owner, String group, Mode mode) {
if (mRoot == null) {
mRoot = InodeDirectory
.create(mDirectoryIdGenerator.getNewDirectoryId(), NO_PARENT, ROOT_INODE_NAME,
CreateDirectoryOptions.defaults().setOwner(owner).setGroup(group).setMode(mode));
mRoot.setPersistenceState(PersistenceState.PERSISTED);
mInodes.add(mRoot);
mCachedInode = mRoot;
}
}
/**
* @return username of root of inode tree, null if the inode tree is not initialized
*/
public String getRootUserName() {
if (mRoot == null) {
return null;
}
return mRoot.getOwner();
}
/**
* @return the number of total inodes
*/
public int getSize() {
return mInodes.size();
}
/**
* @return the number of pinned inodes
*/
public int getPinnedSize() {
return mPinnedInodeFileIds.size();
}
/**
* @param id the id to get the inode for
* @return whether the inode exists
*/
public boolean inodeIdExists(long id) {
return mInodes.containsField(id);
}
/**
* @param uri the {@link AlluxioURI} to check for existence
* @return whether the inode exists
*/
public boolean inodePathExists(AlluxioURI uri) {
try {
TraversalResult traversalResult =
traverseToInode(PathUtils.getPathComponents(uri.getPath()), LockMode.READ, null);
traversalResult.getInodeLockList().close();
return traversalResult.isFound();
} catch (InvalidPathException e) {
return false;
}
}
/**
* Locks existing inodes on the specified path, in the specified {@link LockMode}. The target
* inode is not required to exist.
*
* @param path the path to lock
* @param lockMode the {@link LockMode} to lock the inodes with
* @return the {@link LockedInodePath} representing the locked path of inodes
* @throws InvalidPathException if the path is invalid
*/
public LockedInodePath lockInodePath(AlluxioURI path, LockMode lockMode)
throws InvalidPathException {
TraversalResult traversalResult =
traverseToInode(PathUtils.getPathComponents(path.getPath()), lockMode, null);
return new MutableLockedInodePath(path, traversalResult.getInodes(),
traversalResult.getInodeLockList(), lockMode);
}
/**
* Locks existing inodes on the two specified paths. The two paths will be locked in the
* correct order. The target inodes are not required to exist.
*
* @param path1 the first path to lock
* @param lockMode1 the {@link LockMode} of the first path
* @param path2 the second path to lock
* @param lockMode2 the {@link LockMode} of the second path
* @return a {@link InodePathPair} representing the two locked paths
* @throws InvalidPathException if a path is invalid
*/
public InodePathPair lockInodePathPair(AlluxioURI path1, LockMode lockMode1, AlluxioURI path2,
LockMode lockMode2) throws InvalidPathException {
String[] pathComponents1 = PathUtils.getPathComponents(path1.getPath());
String[] pathComponents2 = PathUtils.getPathComponents(path2.getPath());
List<LockMode> lockHints = new ArrayList<>();
int minLength = Math.min(pathComponents1.length, pathComponents2.length);
for (int i = 0; i < minLength; i++) {
if (pathComponents1[i].equals(pathComponents2[i])) {
// The two paths share a common path prefix.
LockMode mode1 = getLockModeForComponent(i, pathComponents1.length, lockMode1, null);
LockMode mode2 = getLockModeForComponent(i, pathComponents2.length, lockMode2, null);
// If either of the modes are WRITE, lock both components as WRITE to prevent deadlock.
// TODO(gpang): consider a combine helper method
if (mode1 == LockMode.READ && mode2 == LockMode.READ) {
lockHints.add(LockMode.READ);
} else {
lockHints.add(LockMode.WRITE);
}
} else {
// The two paths no longer share a common prefix.
break;
}
}
TraversalResult traversalResult1 = null;
TraversalResult traversalResult2 = null;
boolean valid = false;
try {
// Lock paths in a deterministic order.
if (path1.compareTo(path2) > 0) {
traversalResult2 = traverseToInode(pathComponents2, lockMode2, lockHints);
traversalResult1 = traverseToInode(pathComponents1, lockMode1, lockHints);
} else {
traversalResult1 = traverseToInode(pathComponents1, lockMode1, lockHints);
traversalResult2 = traverseToInode(pathComponents2, lockMode2, lockHints);
}
LockedInodePath inodePath1 = new MutableLockedInodePath(path1, traversalResult1.getInodes(),
traversalResult1.getInodeLockList(), lockMode1);
LockedInodePath inodePath2 = new MutableLockedInodePath(path2, traversalResult2.getInodes(),
traversalResult2.getInodeLockList(), lockMode2);
valid = true;
return new InodePathPair(inodePath1, inodePath2);
} finally {
if (!valid) {
if (traversalResult1 != null) {
traversalResult1.getInodeLockList().close();
}
if (traversalResult2 != null) {
traversalResult2.getInodeLockList().close();
}
}
}
}
/**
* Returns the lock mode for a particular index into the path components.
*
* @param index the index into the path components
* @param length the length of path components
* @param lockMode the specified {@link LockMode}
* @param lockHints the list of lock hints for each index; this can be null, or incomplete
* @return the {@link LockMode} to lock this particular inode at this index with
*/
private LockMode getLockModeForComponent(int index, int length, LockMode lockMode,
List<LockMode> lockHints) {
if (lockHints != null && index < lockHints.size()) {
// Use the lock hint if it exists.
return lockHints.get(index);
}
if (lockMode == LockMode.READ) {
return LockMode.READ;
}
boolean isTarget = (index == length - 1);
boolean isTargetOrParent = (index >= length - 2);
if (isTargetOrParent && lockMode == LockMode.WRITE_PARENT
|| isTarget && lockMode == LockMode.WRITE) {
return LockMode.WRITE;
}
return LockMode.READ;
}
/**
* Locks existing inodes on the specified path, in the specified {@link LockMode}. The target
* inode must exist.
*
* @param path the {@link AlluxioURI} path to lock
* @param lockMode the {@link LockMode} to lock the inodes with
* @return the {@link LockedInodePath} representing the locked path of inodes
* @throws InvalidPathException if the path is invalid
* @throws FileDoesNotExistException if the target inode does not exist
*/
public LockedInodePath lockFullInodePath(AlluxioURI path, LockMode lockMode)
throws InvalidPathException, FileDoesNotExistException {
TraversalResult traversalResult =
traverseToInode(PathUtils.getPathComponents(path.getPath()), lockMode, null);
if (!traversalResult.isFound()) {
traversalResult.getInodeLockList().close();
throw new FileDoesNotExistException(ExceptionMessage.PATH_DOES_NOT_EXIST.getMessage(path));
}
return new MutableLockedInodePath(path, traversalResult.getInodes(),
traversalResult.getInodeLockList(), lockMode);
}
/**
* Locks existing inodes on the path to the inode specified by an id, in the specified
* {@link LockMode}. The target inode must exist. This may require multiple traversals of the
* tree, so may be inefficient.
*
* @param id the inode id
* @param lockMode the {@link LockMode} to lock the inodes with
* @return the {@link LockedInodePath} representing the locked path of inodes
* @throws FileDoesNotExistException if the target inode does not exist
*/
public LockedInodePath lockFullInodePath(long id, LockMode lockMode)
throws FileDoesNotExistException {
int count = 0;
while (true) {
Inode<?> inode = mInodes.getFirst(id);
if (inode == null) {
throw new FileDoesNotExistException(ExceptionMessage.INODE_DOES_NOT_EXIST.getMessage(id));
}
// Compute the path given the target inode.
StringBuilder builder = new StringBuilder();
computePathForInode(inode, builder);
AlluxioURI uri = new AlluxioURI(builder.toString());
boolean valid = false;
LockedInodePath inodePath = null;
try {
inodePath = lockFullInodePath(uri, lockMode);
if (inodePath.getInode().getId() == id) {
// Set to true, so the path is not unlocked before returning.
valid = true;
return inodePath;
}
// The path does not end up at the target inode id. Repeat the traversal.
} catch (InvalidPathException e) {
// ignore and repeat the loop
LOG.warn("Inode lookup id {} computed path {} mismatch id. Repeating.", id, uri);
} finally {
if (!valid && inodePath != null) {
inodePath.close();
}
}
count++;
if (count > PATH_TRAVERSAL_RETRIES) {
throw new FileDoesNotExistException(
ExceptionMessage.INODE_DOES_NOT_EXIST_RETRIES.getMessage(id));
}
}
}
/**
* Attempts to extend an existing {@link LockedInodePath} to reach the target inode (the last
* inode for the full path). If the target inode does not exist, an exception will be thrown.
*
* @param inodePath the {@link LockedInodePath} to extend to the target inode
* @param lockMode the {@link LockMode} to lock the inodes with
* @throws InvalidPathException if the path is invalid
* @throws FileDoesNotExistException if the target inode does not exist
*/
public void ensureFullInodePath(LockedInodePath inodePath, LockMode lockMode)
throws InvalidPathException, FileDoesNotExistException {
if (inodePath.fullPathExists()) {
return;
}
TraversalResult traversalResult = traverseToInode(inodePath, lockMode);
if (!traversalResult.isFound()) {
throw new FileDoesNotExistException(
ExceptionMessage.PATH_DOES_NOT_EXIST.getMessage(inodePath.getUri()));
}
}
/**
* Appends components of the path from a given inode.
*
* @param inode the {@link Inode} to compute the path for
* @param builder a {@link StringBuilder} that is updated with the path components
* @throws FileDoesNotExistException if an inode in the path does not exist
*/
private void computePathForInode(Inode<?> inode, StringBuilder builder)
throws FileDoesNotExistException {
inode.lockRead();
long id = inode.getId();
long parentId = inode.getParentId();
String name = inode.getName();
inode.unlockRead();
if (isRootId(id)) {
builder.append(AlluxioURI.SEPARATOR);
} else if (isRootId(parentId)) {
builder.append(AlluxioURI.SEPARATOR);
builder.append(name);
} else {
Inode<?> parentInode = mInodes.getFirst(parentId);
if (parentInode == null) {
throw new FileDoesNotExistException(
ExceptionMessage.INODE_DOES_NOT_EXIST.getMessage(parentId));
}
computePathForInode(parentInode, builder);
builder.append(AlluxioURI.SEPARATOR);
builder.append(name);
}
}
/**
* Returns the path for a particular inode. The inode and the path to the inode must already be
* locked.
*
* @param inode the {@link Inode} to get the path for
* @return the {@link AlluxioURI} for the path of the inode
* @throws FileDoesNotExistException if the path does not exist
*/
public AlluxioURI getPath(Inode<?> inode) throws FileDoesNotExistException {
Preconditions.checkState(inode.isWriteLocked() || inode.isReadLocked());
StringBuilder builder = new StringBuilder();
computePathForInode(inode, builder);
return new AlluxioURI(builder.toString());
}
/**
* @return the root inode
*/
public InodeDirectory getRoot() {
return mRoot;
}
/**
* Creates a file or directory at path.
*
* @param inodePath the path
* @param options method options
* @param journalContext the journal context
* @return a {@link CreatePathResult} representing the modified inodes and created inodes during
* path creation
* @throws FileAlreadyExistsException when there is already a file at path if we want to create a
* directory there
* @throws BlockInfoException when blockSizeBytes is invalid
* @throws InvalidPathException when path is invalid, for example, (1) when there is nonexistent
* necessary parent directories and recursive is false, (2) when one of the necessary
* parent directories is actually a file
* @throws FileDoesNotExistException if the parent of the path does not exist and the recursive
* option is false
*/
public CreatePathResult createPath(LockedInodePath inodePath, CreatePathOptions<?> options,
JournalContext journalContext)
throws FileAlreadyExistsException, BlockInfoException, InvalidPathException, IOException,
FileDoesNotExistException {
// TODO(gpang): consider splitting this into createFilePath and createDirectoryPath, with a
// helper method for the shared logic.
AlluxioURI path = inodePath.getUri();
if (path.isRoot()) {
String errorMessage = ExceptionMessage.FILE_ALREADY_EXISTS.getMessage(path);
LOG.error(errorMessage);
throw new FileAlreadyExistsException(errorMessage);
}
if (options instanceof CreateFileOptions) {
CreateFileOptions fileOptions = (CreateFileOptions) options;
if (fileOptions.getBlockSizeBytes() < 1) {
throw new BlockInfoException("Invalid block size " + fileOptions.getBlockSizeBytes());
}
}
if (!(inodePath instanceof MutableLockedInodePath)) {
throw new InvalidPathException(
ExceptionMessage.NOT_MUTABLE_INODE_PATH.getMessage(inodePath.getUri()));
}
LOG.debug("createPath {}", path);
TraversalResult traversalResult = traverseToInode(inodePath, inodePath.getLockMode());
InodeLockList lockList = traversalResult.getInodeLockList();
MutableLockedInodePath extensibleInodePath = (MutableLockedInodePath) inodePath;
String[] pathComponents = extensibleInodePath.getPathComponents();
String name = path.getName();
// pathIndex is the index into pathComponents where we start filling in the path from the inode.
int pathIndex = extensibleInodePath.getInodes().size();
if (pathIndex < pathComponents.length - 1) {
// The immediate parent was not found. If it's not recursive, we throw an exception here.
// Otherwise we add the remaining path components to the list of components to create.
if (!options.isRecursive()) {
final String msg = new StringBuilder().append("File ").append(path)
.append(" creation failed. Component ")
.append(pathIndex).append("(")
.append(pathComponents[pathIndex])
.append(") does not exist").toString();
LOG.error("FileDoesNotExistException: {}", msg);
throw new FileDoesNotExistException(msg);
}
}
// The ancestor inode (parent or ancestor) of the target path.
Inode<?> ancestorInode = extensibleInodePath.getAncestorInode();
if (!ancestorInode.isDirectory()) {
throw new InvalidPathException("Could not traverse to parent directory of path " + path
+ ". Component " + pathComponents[pathIndex - 1] + " is not a directory.");
}
InodeDirectory currentInodeDirectory = (InodeDirectory) ancestorInode;
List<Inode<?>> createdInodes = new ArrayList<>();
List<Inode<?>> modifiedInodes = new ArrayList<>();
if (options.isPersisted()) {
// Synchronously persist directories. These inodes are already READ locked.
for (Inode inode : traversalResult.getNonPersisted()) {
syncPersistDirectory((InodeDirectory) inode, journalContext);
}
}
if (pathIndex < (pathComponents.length - 1) || currentInodeDirectory.getChild(name) == null) {
// (1) There are components in parent paths that need to be created. Or
// (2) The last component of the path needs to be created.
// In these two cases, the last traversed Inode will be modified.
currentInodeDirectory.setLastModificationTimeMs(options.getOperationTimeMs());
modifiedInodes.add(currentInodeDirectory);
File.InodeLastModificationTimeEntry inodeLastModificationTime =
File.InodeLastModificationTimeEntry.newBuilder().setId(currentInodeDirectory.getId())
.setLastModificationTimeMs(options.getOperationTimeMs()).build();
journalContext.append(
Journal.JournalEntry.newBuilder().setInodeLastModificationTime(inodeLastModificationTime)
.build());
}
// Fill in the ancestor directories that were missing.
// NOTE, we set the mode of missing ancestor directories to be the default value, rather
// than inheriting the option of the final file to create, because it may not have
// "execute" permission.
CreateDirectoryOptions missingDirOptions = CreateDirectoryOptions.defaults()
.setMountPoint(false)
.setPersisted(options.isPersisted())
.setOperationTimeMs(options.getOperationTimeMs())
.setOwner(options.getOwner())
.setGroup(options.getGroup());
for (int k = pathIndex; k < (pathComponents.length - 1); k++) {
InodeDirectory dir = null;
while (dir == null) {
dir = InodeDirectory.create(mDirectoryIdGenerator.getNewDirectoryId(journalContext),
currentInodeDirectory.getId(), pathComponents[k], missingDirOptions);
// Lock the newly created inode before subsequent operations, and add it to the lock group.
lockList.lockWriteAndCheckNameAndParent(dir, currentInodeDirectory, pathComponents[k]);
if (!currentInodeDirectory.addChild(dir)) {
// The child directory inode already exists. Get the existing child inode.
lockList.unlockLast();
dir =
(InodeDirectory) currentInodeDirectory.getChildReadLock(pathComponents[k], lockList);
if (dir == null) {
// Could not get the child inode. Continue and try again.
continue;
}
} else {
// Successfully added the child, while holding the write lock.
dir.setPinned(currentInodeDirectory.isPinned());
if (options.isPersisted()) {
// Do not journal the persist entry, since a creation entry will be journaled instead.
syncPersistDirectory(dir, NoopJournalContext.INSTANCE);
}
// Journal the new inode.
journalContext.append(dir.toJournalEntry());
mInodes.add(dir);
// After creation and journaling, downgrade to a read lock.
lockList.downgradeLast();
}
}
createdInodes.add(dir);
extensibleInodePath.getInodes().add(dir);
currentInodeDirectory = dir;
}
// Create the final path component. First we need to make sure that there isn't already a file
// here with that name. If there is an existing file that is a directory and we're creating a
// directory, update persistence property of the directories if needed, otherwise, throw
// FileAlreadyExistsException unless options.allowExists is true.
Inode<?> lastInode = null;
while (lastInode == null) {
// Try to lock the last inode with the lock mode of the path.
switch (extensibleInodePath.getLockMode()) {
case READ:
lastInode = currentInodeDirectory.getChildReadLock(name, lockList);
break;
case WRITE_PARENT:
case WRITE:
lastInode = currentInodeDirectory.getChildWriteLock(name, lockList);
break;
default:
// This should not be reachable.
LOG.warn("Unexpected lock mode encountered: {}", extensibleInodePath.getLockMode());
}
if (lastInode != null) {
// inode to create already exists
if (lastInode.isDirectory() && options instanceof CreateDirectoryOptions && !lastInode
.isPersisted() && options.isPersisted()) {
// The final path component already exists and is not persisted, so it should be added
// to the non-persisted Inodes of traversalResult.
syncPersistDirectory((InodeDirectory) lastInode, journalContext);
} else if (!lastInode.isDirectory() || !(options instanceof CreateDirectoryOptions
&& ((CreateDirectoryOptions) options).isAllowExists())) {
String errorMessage = ExceptionMessage.FILE_ALREADY_EXISTS.getMessage(path);
LOG.error(errorMessage);
throw new FileAlreadyExistsException(errorMessage);
}
} else {
// create the new inode, with a write lock
if (options instanceof CreateDirectoryOptions) {
CreateDirectoryOptions directoryOptions = (CreateDirectoryOptions) options;
lastInode = InodeDirectory.create(mDirectoryIdGenerator.getNewDirectoryId(journalContext),
currentInodeDirectory.getId(), name, directoryOptions);
// Lock the created inode before subsequent operations, and add it to the lock group.
lockList.lockWriteAndCheckNameAndParent(lastInode, currentInodeDirectory, name);
if (directoryOptions.isPersisted()) {
// Do not journal the persist entry, since a creation entry will be journaled instead.
syncPersistDirectory((InodeDirectory) lastInode, NoopJournalContext.INSTANCE);
}
} else if (options instanceof CreateFileOptions) {
CreateFileOptions fileOptions = (CreateFileOptions) options;
lastInode = InodeFile.create(mContainerIdGenerator.getNewContainerId(),
currentInodeDirectory.getId(), name, System.currentTimeMillis(), fileOptions);
// Lock the created inode before subsequent operations, and add it to the lock group.
lockList.lockWriteAndCheckNameAndParent(lastInode, currentInodeDirectory, name);
if (fileOptions.isCacheable()) {
((InodeFile) lastInode).setCacheable(true);
}
}
lastInode.setPinned(currentInodeDirectory.isPinned());
if (!currentInodeDirectory.addChild(lastInode)) {
// Could not add the child inode to the parent. Continue and try again.
// Cleanup is not necessary, since other state is updated later, after a successful add.
lockList.unlockLast();
lastInode = null;
continue;
}
if (lastInode instanceof InodeFile) {
if (currentInodeDirectory.isPinned()) {
// Update set of pinned file ids.
mPinnedInodeFileIds.add(lastInode.getId());
}
}
// Journal the new inode.
journalContext.append(lastInode.toJournalEntry());
// Update state while holding the write lock.
mInodes.add(lastInode);
if (extensibleInodePath.getLockMode() == LockMode.READ) {
// After creating the inode, downgrade to a read lock
lockList.downgradeLast();
}
createdInodes.add(lastInode);
extensibleInodePath.getInodes().add(lastInode);
}
}
LOG.debug("createFile: File Created: {} parent: {}", lastInode, currentInodeDirectory);
return new CreatePathResult(modifiedInodes, createdInodes);
}
/**
* Reinitializes the block size and TTL of an existing open file.
*
* @param inodePath the path to the file
* @param blockSizeBytes the new block size
* @param ttl the ttl
* @param ttlAction action to perform after TTL expiry
* @return the file id
* @throws InvalidPathException if the path is invalid
* @throws FileDoesNotExistException if the path does not exist
*/
public long reinitializeFile(LockedInodePath inodePath, long blockSizeBytes, long ttl,
TtlAction ttlAction)
throws InvalidPathException, FileDoesNotExistException {
InodeFile file = inodePath.getInodeFile();
file.setBlockSizeBytes(blockSizeBytes);
file.setTtl(ttl);
file.setTtlAction(ttlAction);
return file.getId();
}
/**
* Locks all descendants of a particular {@link LockedInodePath}. Any directory inode
* precedes its descendants in the list.
*
* @param inodePath the root {@link LockedInodePath} to retrieve all descendants from
* @param lockMode the lock type to use
* @return an {@link InodeLockList} representing the list of all descendants
* @throws FileDoesNotExistException if inode does not exist
*/
public InodeLockList lockDescendants(LockedInodePath inodePath, LockMode lockMode)
throws FileDoesNotExistException {
Inode<?> inode = inodePath.getInode();
InodeLockList inodeGroup = new InodeLockList();
if (!inode.isDirectory()) {
return inodeGroup;
}
return lockDescendantsInternal((InodeDirectory) inode, lockMode, inodeGroup);
}
private InodeLockList lockDescendantsInternal(InodeDirectory inodeDirectory,
LockMode lockMode, InodeLockList inodeGroup) {
for (Inode<?> child : inodeDirectory.getChildren()) {
if (lockMode == LockMode.READ) {
try {
inodeGroup.lockReadAndCheckParent(child, inodeDirectory);
} catch (InvalidPathException e) {
// Inode is no longer a child, continue.
continue;
}
} else {
try {
inodeGroup.lockWriteAndCheckParent(child, inodeDirectory);
} catch (InvalidPathException e) {
// Inode is no longer a child, continue.
continue;
}
}
if (child.isDirectory()) {
lockDescendantsInternal((InodeDirectory) child, lockMode, inodeGroup);
}
}
return inodeGroup;
}
/**
* Deletes a single inode from the inode tree by removing it from the parent inode.
*
* @param inodePath The {@link LockedInodePath} to delete
* @param opTimeMs The operation time
* @param deleteOptions the delete options
* @param journalContext the journal context
* @throws FileDoesNotExistException if the Inode cannot be retrieved
*/
public void deleteInode(LockedInodePath inodePath, long opTimeMs, DeleteOptions deleteOptions,
JournalContext journalContext)
throws FileDoesNotExistException {
Inode<?> inode = inodePath.getInode();
InodeDirectory parent = (InodeDirectory) mInodes.getFirst(inode.getParentId());
if (parent == null) {
LOG.warn("Parent id not found: {} deleting inode: {}", inode.getParentId(), inode);
throw new FileDoesNotExistException(
ExceptionMessage.INODE_DOES_NOT_EXIST.getMessage(inode.getParentId()));
}
// Journal before removing the inode from the parent, since the parent is read locked.
File.DeleteFileEntry deleteFile = File.DeleteFileEntry.newBuilder().setId(inode.getId())
.setAlluxioOnly(deleteOptions.isAlluxioOnly())
.setRecursive(deleteOptions.isRecursive())
.setOpTimeMs(opTimeMs).build();
journalContext.append(Journal.JournalEntry.newBuilder().setDeleteFile(deleteFile).build());
parent.removeChild(inode);
parent.setLastModificationTimeMs(opTimeMs);
mInodes.remove(inode);
mPinnedInodeFileIds.remove(inode.getId());
inode.setDeleted(true);
}
/**
* Sets the pinned state of an inode. If the inode is a directory, the pinned state will be set
* recursively.
*
* @param inodePath the {@link LockedInodePath} to set the pinned state for
* @param pinned the pinned state to set for the inode (and possible descendants)
* @param opTimeMs the operation time
* @throws FileDoesNotExistException if inode does not exist
*/
public void setPinned(LockedInodePath inodePath, boolean pinned, long opTimeMs)
throws FileDoesNotExistException {
Inode<?> inode = inodePath.getInode();
inode.setPinned(pinned);
inode.setLastModificationTimeMs(opTimeMs);
if (inode.isFile()) {
InodeFile inodeFile = (InodeFile) inode;
if (inodeFile.isPinned()) {
mPinnedInodeFileIds.add(inodeFile.getId());
} else {
mPinnedInodeFileIds.remove(inodeFile.getId());
}
} else {
assert inode instanceof InodeDirectory;
// inode is a directory. Set the pinned state for all children.
TempInodePathForDescendant tempInodePath = new TempInodePathForDescendant(inodePath);
for (Inode<?> child : ((InodeDirectory) inode).getChildren()) {
try {
child.lockWriteAndCheckParent(inode);
} catch (InvalidPathException e) {
// Inode is no longer a child of the directory, continue.
continue;
}
try {
tempInodePath.setDescendant(child, getPath(child));
setPinned(tempInodePath, pinned, opTimeMs);
} finally {
child.unlockWrite();
}
}
}
}
/**
* Sets the pinned state of an inode. If the inode is a directory, the pinned state will be set
* recursively.
*
* @param inodePath the {@link LockedInodePath} to set the pinned state for
* @param pinned the pinned state to set for the inode (and possible descendants)
* @throws FileDoesNotExistException if inode does not exist
*/
public void setPinned(LockedInodePath inodePath, boolean pinned)
throws FileDoesNotExistException {
setPinned(inodePath, pinned, System.currentTimeMillis());
}
/**
* @return the set of file ids which are pinned
*/
public Set<Long> getPinIdSet() {
return new HashSet<>(mPinnedInodeFileIds);
}
/**
* @param fileId the file id to check
* @return true if the given file id is the root id
*/
public boolean isRootId(long fileId) {
Preconditions.checkNotNull(mRoot, PreconditionMessage.INODE_TREE_UNINITIALIZED_IS_ROOT_ID);
return fileId == mRoot.getId();
}
@Override
public Iterator<Journal.JournalEntry> getJournalEntryIterator() {
// Write tree via breadth-first traversal, so that during deserialization, it may be more
// efficient than depth-first during deserialization due to parent directory's locality.
final Queue<Inode<?>> inodes = new LinkedList<>();
inodes.add(mRoot);
return new Iterator<Journal.JournalEntry>() {
@Override
public boolean hasNext() {
return !inodes.isEmpty();
}
@Override
public Journal.JournalEntry next() {
if (!hasNext()) {
throw new NoSuchElementException();
}
Inode<?> inode = inodes.poll();
if (inode.isDirectory()) {
inodes.addAll(((InodeDirectory) inode).getChildren());
}
return inode.toJournalEntry();
}
@Override
public void remove() {
throw new UnsupportedOperationException("InodeTree#Iterator#remove is not supported");
}
};
}
/**
* Adds the file represented by the entry parameter into the inode tree.
*
* @param entry the journal entry representing an inode
*/
public void addInodeFileFromJournal(InodeFileEntry entry) {
InodeFile file = InodeFile.fromJournalEntry(entry);
addInodeFromJournalInternal(file);
}
/**
* Adds the directory represented by the entry parameter into the inode tree. If the inode entry
* represents the root inode, the tree is "reset", and all state is cleared.
*
* @param entry the journal entry representing an inode
* @throws AccessControlException when owner of mRoot is not the owner of root journal entry
*/
public void addInodeDirectoryFromJournal(InodeDirectoryEntry entry)
throws AccessControlException {
InodeDirectory directory = InodeDirectory.fromJournalEntry(entry);
if (directory.getName().equals(ROOT_INODE_NAME)) {
// This is the root inode. Clear all the state, and set the root.
// For backwards-compatibility:
// Empty owner in journal entry indicates that previous journal has no security. In this
// case, the journal is allowed to be applied to the new inode with security turned on.
if (SecurityUtils.isSecurityEnabled() && mRoot != null && !directory.getOwner().isEmpty()
&& !mRoot.getOwner().equals(directory.getOwner())) {
// user is not the owner of journal root entry
throw new AccessControlException(
ExceptionMessage.PERMISSION_DENIED.getMessage("Unauthorized user on root"));
}
mInodes.clear();
mPinnedInodeFileIds.clear();
mRoot = directory;
// If journal entry has no security enabled, change the replayed inode permission to be 0777
// for backwards-compatibility.
if (SecurityUtils.isSecurityEnabled() && mRoot != null && mRoot.getOwner().isEmpty() && mRoot
.getGroup().isEmpty()) {
mRoot.setMode(Constants.DEFAULT_FILE_SYSTEM_MODE);
}
mCachedInode = mRoot;
mInodes.add(mRoot);
} else {
addInodeFromJournalInternal(directory);
}
}
/**
* Adds a given inode into the inode tree, by adding the inode to its parent. Also updates the
* appropriate inode indexes.
*
* @param inode the inode to add to the inode tree
*/
private void addInodeFromJournalInternal(Inode<?> inode) {
InodeDirectory parentDirectory = mCachedInode;
if (inode.getParentId() != mCachedInode.getId()) {
parentDirectory = (InodeDirectory) mInodes.getFirst(inode.getParentId());
mCachedInode = parentDirectory;
}
parentDirectory.addChild(inode);
mInodes.add(inode);
// If journal entry has no security enabled, change the replayed inode permission to be 0777
// for backwards-compatibility.
if (SecurityUtils.isSecurityEnabled() && inode != null && inode.getOwner().isEmpty()
&& inode.getGroup().isEmpty()) {
inode.setMode(Constants.DEFAULT_FILE_SYSTEM_MODE);
}
// Update indexes.
if (inode.isFile() && inode.isPinned()) {
mPinnedInodeFileIds.add(inode.getId());
}
}
/**
* Synchronously persists an {@link InodeDirectory} to the UFS. If concurrent calls are made, only
* one thread will persist to UFS, and the others will wait until it is persisted.
*
* @param dir the {@link InodeDirectory} to persist
* @param journalContext the journal context
* @throws InvalidPathException if the path for the inode is invalid
* @throws FileDoesNotExistException if the path for the inode is invalid
*/
public void syncPersistDirectory(InodeDirectory dir, JournalContext journalContext)
throws IOException, InvalidPathException, FileDoesNotExistException {
RetryPolicy retry =
new ExponentialBackoffRetry(PERSIST_WAIT_BASE_SLEEP_MS, PERSIST_WAIT_MAX_SLEEP_MS,
PERSIST_WAIT_MAX_RETRIES);
while (dir.getPersistenceState() != PersistenceState.PERSISTED) {
if (dir.compareAndSwap(PersistenceState.NOT_PERSISTED,
PersistenceState.TO_BE_PERSISTED)) {
boolean success = false;
try {
AlluxioURI uri = getPath(dir);
MountTable.Resolution resolution = mMountTable.resolve(uri);
String ufsUri = resolution.getUri().toString();
UnderFileSystem ufs = resolution.getUfs();
MkdirsOptions mkdirsOptions =
MkdirsOptions.defaults().setCreateParent(false).setOwner(dir.getOwner())
.setGroup(dir.getGroup()).setMode(new Mode(dir.getMode()));
ufs.mkdirs(ufsUri, mkdirsOptions);
dir.setPersistenceState(PersistenceState.PERSISTED);
// Append the persist entry to the journal.
File.PersistDirectoryEntry persistDirectory =
File.PersistDirectoryEntry.newBuilder().setId(dir.getId()).build();
journalContext.append(
Journal.JournalEntry.newBuilder().setPersistDirectory(persistDirectory).build());
success = true;
} finally {
if (!success) {
// Failed to persist the inode, so set the state back to NOT_PERSISTED.
dir.setPersistenceState(PersistenceState.NOT_PERSISTED);
}
}
} else {
if (!retry.attemptRetry()) {
throw new IOException(ExceptionMessage.FAILED_UFS_CREATE.getMessage(dir.getName()));
}
}
}
}
@Override
public int hashCode() {
return Objects.hashCode(mRoot, mInodes, mPinnedInodeFileIds, mContainerIdGenerator,
mDirectoryIdGenerator, mCachedInode);
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (!(o instanceof InodeTree)) {
return false;
}
InodeTree that = (InodeTree) o;
return Objects.equal(mRoot, that.mRoot)
&& Objects.equal(mInodes, that.mInodes)
&& Objects.equal(mPinnedInodeFileIds, that.mPinnedInodeFileIds)
&& Objects.equal(mContainerIdGenerator, that.mContainerIdGenerator)
&& Objects.equal(mDirectoryIdGenerator, that.mDirectoryIdGenerator)
&& Objects.equal(mCachedInode, that.mCachedInode);
}
/**
* Traverses the tree to find the given path components. Hints for the lock mode at each path
* component can be specified.
*
* @param pathComponents the components of the path to traverse
* @param lockMode the {@link LockMode} for the path
* @param lockHints optional {@link List} to specify the lock type for each path component; this
* can be shorter than pathComponents
* @return the {@link TraversalResult} for this traversal
* @throws InvalidPathException if the path is invalid
*/
private TraversalResult traverseToInode(String[] pathComponents, LockMode lockMode,
List<LockMode> lockHints)
throws InvalidPathException {
List<Inode<?>> nonPersistedInodes = new ArrayList<>();
List<Inode<?>> inodes = new ArrayList<>();
InodeLockList lockList = new InodeLockList();
// This must be set to true before returning a valid value, otherwise all the inodes will be
// unlocked.
boolean valid = false;
try {
if (pathComponents == null) {
throw new InvalidPathException(ExceptionMessage.PATH_COMPONENTS_INVALID.getMessage("null"));
} else if (pathComponents.length == 0) {
throw new InvalidPathException(
ExceptionMessage.PATH_COMPONENTS_INVALID.getMessage("empty"));
} else if (pathComponents.length == 1) {
if (pathComponents[0].equals("")) {
if (getLockModeForComponent(0, pathComponents.length, lockMode, lockHints)
== LockMode.READ) {
lockList.lockRead(mRoot);
} else {
lockList.lockWrite(mRoot);
}
inodes.add(mRoot);
valid = true;
return TraversalResult.createFoundResult(nonPersistedInodes, inodes, lockList);
} else {
throw new InvalidPathException(
ExceptionMessage.PATH_COMPONENTS_INVALID_START.getMessage(pathComponents[0]));
}
}
if (getLockModeForComponent(0, pathComponents.length, lockMode, lockHints) == LockMode.READ) {
lockList.lockRead(mRoot);
} else {
lockList.lockWrite(mRoot);
}
inodes.add(mRoot);
TraversalResult result =
traverseToInodeInternal(pathComponents, inodes, nonPersistedInodes, lockList, lockMode,
lockHints);
valid = true;
return result;
} finally {
if (!valid) {
lockList.close();
}
}
}
/**
* Traverses the tree to find the rest of the given {@link LockedInodePath}. Hints for the lock
* mode at each path.
*
* @param inodePath the {@link LockedInodePath} to start the traversal from
* @param lockMode the {@link LockMode} for the path
* @return the {@link TraversalResult} for this traversal
* @throws InvalidPathException if the path is invalid
*/
private TraversalResult traverseToInode(LockedInodePath inodePath, LockMode lockMode)
throws InvalidPathException {
// the inodePath is guaranteed to already include at least the root inode.
if (!(inodePath instanceof MutableLockedInodePath)) {
throw new InvalidPathException(
ExceptionMessage.NOT_MUTABLE_INODE_PATH.getMessage(inodePath.getUri()));
}
MutableLockedInodePath extensibleInodePath = (MutableLockedInodePath) inodePath;
List<Inode<?>> inodes = extensibleInodePath.getInodes();
InodeLockList lockList = extensibleInodePath.getLockList();
List<Inode<?>> nonPersistedInodes = new ArrayList<>();
for (Inode<?> inode : inodes) {
if (!inode.isPersisted()) {
nonPersistedInodes.add(inode);
}
}
return traverseToInodeInternal(extensibleInodePath.getPathComponents(), inodes,
nonPersistedInodes, lockList, lockMode, null);
}
private TraversalResult traverseToInodeInternal(String[] pathComponents, List<Inode<?>> inodes,
List<Inode<?>> nonPersistedInodes, InodeLockList lockList, LockMode lockMode,
List<LockMode> lockHints)
throws InvalidPathException {
Inode<?> current = inodes.get(inodes.size() - 1);
for (int i = inodes.size(); i < pathComponents.length; i++) {
Inode<?> next = ((InodeDirectory) current).getChild(pathComponents[i]);
if (next == null) {
// The user might want to create the nonexistent directories, so return the traversal
// result current inode with the last Inode taken, and the index of the first path
// component that couldn't be found.
return TraversalResult.createNotFoundResult(i, nonPersistedInodes, inodes, lockList);
}
// Lock the existing next inode before proceeding.
if (getLockModeForComponent(i, pathComponents.length, lockMode, lockHints)
== LockMode.READ) {
lockList.lockReadAndCheckNameAndParent(next, current, pathComponents[i]);
} else {
lockList.lockWriteAndCheckNameAndParent(next, current, pathComponents[i]);
}
if (next.isFile()) {
// The inode can't have any children. If this is the last path component, we're good.
// Otherwise, we can't traverse further, so we clean up and throw an exception.
if (i == pathComponents.length - 1) {
inodes.add(next);
return TraversalResult.createFoundResult(nonPersistedInodes, inodes, lockList);
} else {
throw new InvalidPathException(
"Traversal failed. Component " + i + "(" + next.getName() + ") is a file");
}
} else {
inodes.add(next);
if (!next.isPersisted()) {
// next is a directory and not persisted
nonPersistedInodes.add(next);
}
current = next;
}
}
return TraversalResult.createFoundResult(nonPersistedInodes, inodes, lockList);
}
private static final class TraversalResult {
/** True if the traversal found the target inode, false otherwise. */
private final boolean mFound;
/** The list of non-persisted inodes encountered during the traversal. */
private final List<Inode<?>> mNonPersisted;
/** The list of all inodes encountered during the traversal. */
private final List<Inode<?>> mInodes;
/** The {@link InodeLockList} managing the locks for the inodes. */
private final InodeLockList mLockList;
// TODO(gpang): consider a builder paradigm to iteratively build the traversal result.
static TraversalResult createFoundResult(List<Inode<?>> nonPersisted, List<Inode<?>> inodes,
InodeLockList lockList) {
return new TraversalResult(true, nonPersisted, inodes, lockList);
}
static TraversalResult createNotFoundResult(int index, List<Inode<?>> nonPersisted,
List<Inode<?>> inodes, InodeLockList lockList) {
return new TraversalResult(false, nonPersisted, inodes, lockList);
}
private TraversalResult(boolean found, List<Inode<?>> nonPersisted,
List<Inode<?>> inodes, InodeLockList lockList) {
mFound = found;
mNonPersisted = nonPersisted;
mInodes = inodes;
mLockList = lockList;
}
/**
* @return true if target inode was found, false otherwise
*/
boolean isFound() {
return mFound;
}
/**
* @return the list of non-persisted inodes encountered during the traversal
*/
List<Inode<?>> getNonPersisted() {
return mNonPersisted;
}
/**
* @return the list of all inodes encountered during the traversal
*/
List<Inode<?>> getInodes() {
return mInodes;
}
/**
* @return the {@link InodeLockList} managing the locks for all the inodes
*/
InodeLockList getInodeLockList() {
return mLockList;
}
}
/**
* Represents the results of creating a path in the inode tree. This keeps track of inodes which
* were modified, and inodes which were newly created during the path creation.
*
* In particular, a {@link CreatePathResult} consists of an ordered list of modified inodes and an
* ordered list of created inodes. Appending the latter to the former produces a list of inodes
* starting with the root inode and ending in the inode corresponding to the created path.
*/
public static final class CreatePathResult {
private final List<Inode<?>> mModified;
private final List<Inode<?>> mCreated;
/**
* Constructs the results of modified and created inodes when creating a path.
*
* @param modified a list of modified inodes
* @param created a list of created inodes
*/
CreatePathResult(List<Inode<?>> modified, List<Inode<?>> created) {
mModified = Preconditions.checkNotNull(modified);
mCreated = Preconditions.checkNotNull(created);
}
/**
* @return the list of inodes modified during path creation
*/
public List<Inode<?>> getModified() {
return mModified;
}
/**
* @return the list of inodes created during path creation
*/
public List<Inode<?>> getCreated() {
return mCreated;
}
}
}