/* * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 * (the "License"). You may not use this work except in compliance with the License, which is * available at www.apache.org/licenses/LICENSE-2.0 * * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied, as more fully set forth in the License. * * See the NOTICE file distributed with this work for information regarding copyright ownership. */ package alluxio.worker.block; import alluxio.Configuration; import alluxio.PropertyKey; import alluxio.StorageTierAssoc; import alluxio.WorkerStorageTierAssoc; import alluxio.collections.Pair; import alluxio.exception.BlockAlreadyExistsException; import alluxio.exception.BlockDoesNotExistException; import alluxio.exception.ExceptionMessage; import alluxio.exception.InvalidWorkerStateException; import alluxio.exception.WorkerOutOfSpaceException; import alluxio.resource.LockResource; import alluxio.util.io.FileUtils; import alluxio.worker.block.allocator.Allocator; import alluxio.worker.block.evictor.BlockTransferInfo; import alluxio.worker.block.evictor.EvictionPlan; import alluxio.worker.block.evictor.Evictor; import alluxio.worker.block.io.BlockReader; import alluxio.worker.block.io.BlockWriter; import alluxio.worker.block.io.LocalFileBlockReader; import alluxio.worker.block.io.LocalFileBlockWriter; import alluxio.worker.block.meta.BlockMeta; import alluxio.worker.block.meta.StorageDirView; import alluxio.worker.block.meta.TempBlockMeta; import com.google.common.base.Preconditions; import com.google.common.base.Throwables; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.nio.file.Files; import java.nio.file.Paths; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantReadWriteLock; import javax.annotation.concurrent.NotThreadSafe; /** * This class represents an object store that manages all the blocks in the local tiered storage. * This store exposes simple public APIs to operate blocks. Inside this store, it creates an * Allocator to decide where to put a new block, an Evictor to decide where to evict a stale block, * a BlockMetadataManager to maintain the status of the tiered storage, and a LockManager to * coordinate read/write on the same block. * <p> * This class is thread-safe, using the following lock hierarchy to ensure thread-safety: * <ul> * <li>Any block-level operation (e.g., read, move or remove) on an existing block must acquire a * block lock for this block via {@link TieredBlockStore#mLockManager}. This block lock is a * read/write lock, guarding both the metadata operations and the following I/O on this block. It * coordinates different threads (clients) when accessing the same block concurrently.</li> * <li>Any metadata operation (read or write) must go through {@link TieredBlockStore#mMetaManager} * and guarded by {@link TieredBlockStore#mMetadataLock}. This is also a read/write lock and * coordinates different threads (clients) when accessing the shared data structure for metadata. * </li> * <li>Method {@link #createBlock} does not acquire the block lock, because it only creates a * temp block which is only visible to its writer before committed (thus no concurrent access).</li> * <li>Method {@link #abortBlock(long, long)} does not acquire the block lock, because only * temporary blocks can be aborted, and they are only visible to their writers (thus no concurrent * access). * <li>Eviction is done in {@link #freeSpaceInternal} and it is on the basis of best effort. For * operations that may trigger this eviction (e.g., move, create, requestSpace), retry is used</li> * </ul> */ @NotThreadSafe // TODO(jiri): make thread-safe (c.f. ALLUXIO-1624) public final class TieredBlockStore implements BlockStore { private static final Logger LOG = LoggerFactory.getLogger(TieredBlockStore.class); private static final int MAX_RETRIES = Configuration.getInt(PropertyKey.WORKER_TIERED_STORE_RETRY); private final BlockMetadataManager mMetaManager; private final BlockLockManager mLockManager; private final Allocator mAllocator; private final Evictor mEvictor; private final List<BlockStoreEventListener> mBlockStoreEventListeners = new ArrayList<>(); /** A set of pinned inodes fetched from the master. */ private final Set<Long> mPinnedInodes = new HashSet<>(); /** Lock to guard metadata operations. */ private final ReentrantReadWriteLock mMetadataLock = new ReentrantReadWriteLock(); /** ReadLock provided by {@link #mMetadataLock} to guard metadata read operations. */ private final Lock mMetadataReadLock = mMetadataLock.readLock(); /** WriteLock provided by {@link #mMetadataLock} to guard metadata write operations. */ private final Lock mMetadataWriteLock = mMetadataLock.writeLock(); /** Association between storage tier aliases and ordinals. */ private final StorageTierAssoc mStorageTierAssoc; /** * Creates a new instance of {@link TieredBlockStore}. */ public TieredBlockStore() { mMetaManager = BlockMetadataManager.createBlockMetadataManager(); mLockManager = new BlockLockManager(); BlockMetadataManagerView initManagerView = new BlockMetadataManagerView(mMetaManager, Collections.<Long>emptySet(), Collections.<Long>emptySet()); mAllocator = Allocator.Factory.create(initManagerView); if (mAllocator instanceof BlockStoreEventListener) { registerBlockStoreEventListener((BlockStoreEventListener) mAllocator); } initManagerView = new BlockMetadataManagerView(mMetaManager, Collections.<Long>emptySet(), Collections.<Long>emptySet()); mEvictor = Evictor.Factory.create(initManagerView, mAllocator); if (mEvictor instanceof BlockStoreEventListener) { registerBlockStoreEventListener((BlockStoreEventListener) mEvictor); } mStorageTierAssoc = new WorkerStorageTierAssoc(); } @Override public long lockBlock(long sessionId, long blockId) throws BlockDoesNotExistException { long lockId = mLockManager.lockBlock(sessionId, blockId, BlockLockType.READ); boolean hasBlock; try (LockResource r = new LockResource(mMetadataReadLock)) { hasBlock = mMetaManager.hasBlockMeta(blockId); } if (hasBlock) { return lockId; } mLockManager.unlockBlock(lockId); throw new BlockDoesNotExistException(ExceptionMessage.NO_BLOCK_ID_FOUND, blockId); } @Override public long lockBlockNoException(long sessionId, long blockId) { long lockId = mLockManager.lockBlock(sessionId, blockId, BlockLockType.READ); boolean hasBlock; try (LockResource r = new LockResource(mMetadataReadLock)) { hasBlock = mMetaManager.hasBlockMeta(blockId); } if (hasBlock) { return lockId; } mLockManager.unlockBlockNoException(lockId); return BlockLockManager.INVALID_LOCK_ID; } @Override public void unlockBlock(long lockId) throws BlockDoesNotExistException { mLockManager.unlockBlock(lockId); } @Override public boolean unlockBlock(long sessionId, long blockId) { return mLockManager.unlockBlock(sessionId, blockId); } @Override public BlockWriter getBlockWriter(long sessionId, long blockId) throws BlockDoesNotExistException, BlockAlreadyExistsException, InvalidWorkerStateException, IOException { // NOTE: a temp block is supposed to only be visible by its own writer, unnecessary to acquire // block lock here since no sharing // TODO(bin): Handle the case where multiple writers compete for the same block. try (LockResource r = new LockResource(mMetadataReadLock)) { checkTempBlockOwnedBySession(sessionId, blockId); TempBlockMeta tempBlockMeta = mMetaManager.getTempBlockMeta(blockId); return new LocalFileBlockWriter(tempBlockMeta.getPath()); } } @Override public BlockReader getBlockReader(long sessionId, long blockId, long lockId) throws BlockDoesNotExistException, InvalidWorkerStateException, IOException { mLockManager.validateLock(sessionId, blockId, lockId); try (LockResource r = new LockResource(mMetadataReadLock)) { BlockMeta blockMeta = mMetaManager.getBlockMeta(blockId); return new LocalFileBlockReader(blockMeta.getPath()); } } @Override public TempBlockMeta createBlock(long sessionId, long blockId, BlockStoreLocation location, long initialBlockSize) throws BlockAlreadyExistsException, WorkerOutOfSpaceException, IOException { for (int i = 0; i < MAX_RETRIES + 1; i++) { TempBlockMeta tempBlockMeta = createBlockMetaInternal(sessionId, blockId, location, initialBlockSize, true); if (tempBlockMeta != null) { createBlockFile(tempBlockMeta.getPath()); return tempBlockMeta; } if (i < MAX_RETRIES) { // Failed to create a temp block, so trigger Evictor to make some space. // NOTE: a successful {@link freeSpaceInternal} here does not ensure the subsequent // allocation also successful, because these two operations are not atomic. freeSpaceInternal(sessionId, initialBlockSize, location); } } // TODO(bin): We are probably seeing a rare transient failure, maybe define and throw some // other types of exception to indicate this case. throw new WorkerOutOfSpaceException(ExceptionMessage.NO_SPACE_FOR_BLOCK_ALLOCATION, initialBlockSize, MAX_RETRIES, blockId); } // TODO(bin): Make this method to return a snapshot. @Override public BlockMeta getVolatileBlockMeta(long blockId) throws BlockDoesNotExistException { try (LockResource r = new LockResource(mMetadataReadLock)) { return mMetaManager.getBlockMeta(blockId); } } @Override public BlockMeta getBlockMeta(long sessionId, long blockId, long lockId) throws BlockDoesNotExistException, InvalidWorkerStateException { mLockManager.validateLock(sessionId, blockId, lockId); try (LockResource r = new LockResource(mMetadataReadLock)) { return mMetaManager.getBlockMeta(blockId); } } @Override public TempBlockMeta getTempBlockMeta(long sessionId, long blockId) { try (LockResource r = new LockResource(mMetadataReadLock)) { return mMetaManager.getTempBlockMetaOrNull(blockId); } } @Override public void commitBlock(long sessionId, long blockId) throws BlockAlreadyExistsException, InvalidWorkerStateException, BlockDoesNotExistException, IOException { BlockStoreLocation loc = commitBlockInternal(sessionId, blockId); synchronized (mBlockStoreEventListeners) { for (BlockStoreEventListener listener : mBlockStoreEventListeners) { listener.onCommitBlock(sessionId, blockId, loc); } } } @Override public void abortBlock(long sessionId, long blockId) throws BlockAlreadyExistsException, BlockDoesNotExistException, InvalidWorkerStateException, IOException { abortBlockInternal(sessionId, blockId); synchronized (mBlockStoreEventListeners) { for (BlockStoreEventListener listener : mBlockStoreEventListeners) { listener.onAbortBlock(sessionId, blockId); } } } @Override public void requestSpace(long sessionId, long blockId, long additionalBytes) throws BlockDoesNotExistException, WorkerOutOfSpaceException, IOException { for (int i = 0; i < MAX_RETRIES + 1; i++) { Pair<Boolean, BlockStoreLocation> requestResult = requestSpaceInternal(blockId, additionalBytes); if (requestResult.getFirst()) { return; } if (i < MAX_RETRIES) { freeSpaceInternal(sessionId, additionalBytes, requestResult.getSecond()); } } throw new WorkerOutOfSpaceException(ExceptionMessage.NO_SPACE_FOR_BLOCK_ALLOCATION, additionalBytes, MAX_RETRIES, blockId); } @Override public void moveBlock(long sessionId, long blockId, BlockStoreLocation newLocation) throws BlockDoesNotExistException, BlockAlreadyExistsException, InvalidWorkerStateException, WorkerOutOfSpaceException, IOException { moveBlock(sessionId, blockId, BlockStoreLocation.anyTier(), newLocation); } @Override public void moveBlock(long sessionId, long blockId, BlockStoreLocation oldLocation, BlockStoreLocation newLocation) throws BlockDoesNotExistException, BlockAlreadyExistsException, InvalidWorkerStateException, WorkerOutOfSpaceException, IOException { for (int i = 0; i < MAX_RETRIES + 1; i++) { MoveBlockResult moveResult = moveBlockInternal(sessionId, blockId, oldLocation, newLocation); if (moveResult.getSuccess()) { synchronized (mBlockStoreEventListeners) { for (BlockStoreEventListener listener : mBlockStoreEventListeners) { listener.onMoveBlockByClient(sessionId, blockId, moveResult.getSrcLocation(), moveResult.getDstLocation()); } } return; } if (i < MAX_RETRIES) { freeSpaceInternal(sessionId, moveResult.getBlockSize(), newLocation); } } throw new WorkerOutOfSpaceException(ExceptionMessage.NO_SPACE_FOR_BLOCK_MOVE, newLocation, blockId, MAX_RETRIES); } @Override public void removeBlock(long sessionId, long blockId) throws InvalidWorkerStateException, BlockDoesNotExistException, IOException { removeBlock(sessionId, blockId, BlockStoreLocation.anyTier()); } @Override public void removeBlock(long sessionId, long blockId, BlockStoreLocation location) throws InvalidWorkerStateException, BlockDoesNotExistException, IOException { removeBlockInternal(sessionId, blockId, location); synchronized (mBlockStoreEventListeners) { for (BlockStoreEventListener listener : mBlockStoreEventListeners) { listener.onRemoveBlockByClient(sessionId, blockId); } } } @Override public void accessBlock(long sessionId, long blockId) throws BlockDoesNotExistException { boolean hasBlock; try (LockResource r = new LockResource(mMetadataReadLock)) { hasBlock = mMetaManager.hasBlockMeta(blockId); } if (!hasBlock) { throw new BlockDoesNotExistException(ExceptionMessage.NO_BLOCK_ID_FOUND, blockId); } synchronized (mBlockStoreEventListeners) { for (BlockStoreEventListener listener : mBlockStoreEventListeners) { listener.onAccessBlock(sessionId, blockId); } } } @Override public void freeSpace(long sessionId, long availableBytes, BlockStoreLocation location) throws BlockDoesNotExistException, WorkerOutOfSpaceException, IOException { // TODO(bin): Consider whether to retry here. freeSpaceInternal(sessionId, availableBytes, location); } @Override public void cleanupSession(long sessionId) { // Release all locks the session is holding. mLockManager.cleanupSession(sessionId); // Collect a list of temp blocks the given session owns and abort all of them with best effort List<TempBlockMeta> tempBlocksToRemove; try (LockResource r = new LockResource(mMetadataReadLock)) { tempBlocksToRemove = mMetaManager.getSessionTempBlocks(sessionId); } for (TempBlockMeta tempBlockMeta : tempBlocksToRemove) { try { LOG.warn("Clean up expired temporary block {} from session {}.", tempBlockMeta.getBlockId(), sessionId); abortBlockInternal(sessionId, tempBlockMeta.getBlockId()); } catch (Exception e) { LOG.error("Failed to cleanup tempBlock {} due to {}", tempBlockMeta.getBlockId(), e.getMessage()); } } } @Override public boolean hasBlockMeta(long blockId) { try (LockResource r = new LockResource(mMetadataReadLock)) { return mMetaManager.hasBlockMeta(blockId); } } @Override public BlockStoreMeta getBlockStoreMeta() { BlockStoreMeta storeMeta; try (LockResource r = new LockResource(mMetadataReadLock)) { storeMeta = mMetaManager.getBlockStoreMeta(); } return storeMeta; } @Override public BlockStoreMeta getBlockStoreMetaFull() { BlockStoreMeta storeMeta; try (LockResource r = new LockResource(mMetadataReadLock)) { storeMeta = mMetaManager.getBlockStoreMetaFull(); } return storeMeta; } @Override public void registerBlockStoreEventListener(BlockStoreEventListener listener) { synchronized (mBlockStoreEventListeners) { mBlockStoreEventListeners.add(listener); } } /** * Checks if a block id is available for a new temp block. This method must be enclosed by * {@link #mMetadataLock}. * * @param blockId the id of block * @throws BlockAlreadyExistsException if block id already exists */ private void checkTempBlockIdAvailable(long blockId) throws BlockAlreadyExistsException { if (mMetaManager.hasTempBlockMeta(blockId)) { throw new BlockAlreadyExistsException(ExceptionMessage.TEMP_BLOCK_ID_EXISTS, blockId); } if (mMetaManager.hasBlockMeta(blockId)) { throw new BlockAlreadyExistsException(ExceptionMessage.TEMP_BLOCK_ID_COMMITTED, blockId); } } /** * Checks if block id is a temporary block and owned by session id. This method must be enclosed * by {@link #mMetadataLock}. * * @param sessionId the id of session * @param blockId the id of block * @throws BlockDoesNotExistException if block id can not be found in temporary blocks * @throws BlockAlreadyExistsException if block id already exists in committed blocks * @throws InvalidWorkerStateException if block id is not owned by session id */ private void checkTempBlockOwnedBySession(long sessionId, long blockId) throws BlockDoesNotExistException, BlockAlreadyExistsException, InvalidWorkerStateException { if (mMetaManager.hasBlockMeta(blockId)) { throw new BlockAlreadyExistsException(ExceptionMessage.TEMP_BLOCK_ID_COMMITTED, blockId); } TempBlockMeta tempBlockMeta = mMetaManager.getTempBlockMeta(blockId); long ownerSessionId = tempBlockMeta.getSessionId(); if (ownerSessionId != sessionId) { throw new InvalidWorkerStateException(ExceptionMessage.BLOCK_ID_FOR_DIFFERENT_SESSION, blockId, ownerSessionId, sessionId); } } /** * Aborts a temp block. * * @param sessionId the id of session * @param blockId the id of block * @throws BlockDoesNotExistException if block id can not be found in temporary blocks * @throws BlockAlreadyExistsException if block id already exists in committed blocks * @throws InvalidWorkerStateException if block id is not owned by session id */ private void abortBlockInternal(long sessionId, long blockId) throws BlockDoesNotExistException, BlockAlreadyExistsException, InvalidWorkerStateException, IOException { String path; TempBlockMeta tempBlockMeta; try (LockResource r = new LockResource(mMetadataReadLock)) { checkTempBlockOwnedBySession(sessionId, blockId); tempBlockMeta = mMetaManager.getTempBlockMeta(blockId); path = tempBlockMeta.getPath(); } // The metadata lock is released during heavy IO. The temp block is private to one session, so // we do not lock it. Files.delete(Paths.get(path)); try (LockResource r = new LockResource(mMetadataWriteLock)) { mMetaManager.abortTempBlockMeta(tempBlockMeta); } catch (BlockDoesNotExistException e) { throw Throwables.propagate(e); // We shall never reach here } } /** * Commits a temp block. * * @param sessionId the id of session * @param blockId the id of block * @return destination location to move the block * @throws BlockDoesNotExistException if block id can not be found in temporary blocks * @throws BlockAlreadyExistsException if block id already exists in committed blocks * @throws InvalidWorkerStateException if block id is not owned by session id */ private BlockStoreLocation commitBlockInternal(long sessionId, long blockId) throws BlockAlreadyExistsException, InvalidWorkerStateException, BlockDoesNotExistException, IOException { long lockId = mLockManager.lockBlock(sessionId, blockId, BlockLockType.WRITE); try { // When committing TempBlockMeta, the final BlockMeta calculates the block size according to // the actual file size of this TempBlockMeta. Therefore, commitTempBlockMeta must happen // after moving actual block file to its committed path. BlockStoreLocation loc; String srcPath; String dstPath; TempBlockMeta tempBlockMeta; try (LockResource r = new LockResource(mMetadataReadLock)) { checkTempBlockOwnedBySession(sessionId, blockId); tempBlockMeta = mMetaManager.getTempBlockMeta(blockId); srcPath = tempBlockMeta.getPath(); dstPath = tempBlockMeta.getCommitPath(); loc = tempBlockMeta.getBlockLocation(); } // Heavy IO is guarded by block lock but not metadata lock. This may throw IOException. FileUtils.move(srcPath, dstPath); try (LockResource r = new LockResource(mMetadataWriteLock)) { mMetaManager.commitTempBlockMeta(tempBlockMeta); } catch (BlockAlreadyExistsException | BlockDoesNotExistException | WorkerOutOfSpaceException e) { throw Throwables.propagate(e); // we shall never reach here } return loc; } finally { mLockManager.unlockBlock(lockId); } } /** * Creates a temp block meta only if allocator finds available space. This method will not trigger * any eviction. * * @param sessionId session Id * @param blockId block Id * @param location location to create the block * @param initialBlockSize initial block size in bytes * @param newBlock true if this temp block is created for a new block * @return a temp block created if successful, or null if allocation failed (instead of throwing * {@link WorkerOutOfSpaceException} because allocation failure could be an expected case) * @throws BlockAlreadyExistsException if there is already a block with the same block id */ private TempBlockMeta createBlockMetaInternal(long sessionId, long blockId, BlockStoreLocation location, long initialBlockSize, boolean newBlock) throws BlockAlreadyExistsException { // NOTE: a temp block is supposed to be visible for its own writer, unnecessary to acquire // block lock here since no sharing try (LockResource r = new LockResource(mMetadataWriteLock)) { if (newBlock) { checkTempBlockIdAvailable(blockId); } StorageDirView dirView = mAllocator.allocateBlockWithView(sessionId, initialBlockSize, location, getUpdatedView()); if (dirView == null) { // Allocator fails to find a proper place for this new block. return null; } // TODO(carson): Add tempBlock to corresponding storageDir and remove the use of // StorageDirView.createTempBlockMeta. TempBlockMeta tempBlock = dirView.createTempBlockMeta(sessionId, blockId, initialBlockSize); try { // Add allocated temp block to metadata manager. This should never fail if allocator // correctly assigns a StorageDir. mMetaManager.addTempBlockMeta(tempBlock); } catch (WorkerOutOfSpaceException | BlockAlreadyExistsException e) { // If we reach here, allocator is not working properly LOG.error("Unexpected failure: {} bytes allocated at {} by allocator, " + "but addTempBlockMeta failed", initialBlockSize, location); throw Throwables.propagate(e); } return tempBlock; } } /** * Increases the temp block size only if this temp block's parent dir has enough available space. * * @param blockId block Id * @param additionalBytes additional bytes to request for this block * @return a pair of boolean and {@link BlockStoreLocation}. The boolean indicates if the * operation succeeds and the {@link BlockStoreLocation} denotes where to free more space * if it fails. * @throws BlockDoesNotExistException if this block is not found */ private Pair<Boolean, BlockStoreLocation> requestSpaceInternal(long blockId, long additionalBytes) throws BlockDoesNotExistException { // NOTE: a temp block is supposed to be visible for its own writer, unnecessary to acquire // block lock here since no sharing try (LockResource r = new LockResource(mMetadataWriteLock)) { TempBlockMeta tempBlockMeta = mMetaManager.getTempBlockMeta(blockId); if (tempBlockMeta.getParentDir().getAvailableBytes() < additionalBytes) { return new Pair<>(false, tempBlockMeta.getBlockLocation()); } // Increase the size of this temp block try { mMetaManager.resizeTempBlockMeta(tempBlockMeta, tempBlockMeta.getBlockSize() + additionalBytes); } catch (InvalidWorkerStateException e) { throw Throwables.propagate(e); // we shall never reach here } return new Pair<>(true, null); } } /** * Tries to get an eviction plan to free a certain amount of space in the given location, and * carries out this plan with the best effort. * * @param sessionId the session Id * @param availableBytes amount of space in bytes to free * @param location location of space * @throws WorkerOutOfSpaceException if it is impossible to achieve the free requirement */ private void freeSpaceInternal(long sessionId, long availableBytes, BlockStoreLocation location) throws WorkerOutOfSpaceException, IOException { EvictionPlan plan; try (LockResource r = new LockResource(mMetadataReadLock)) { plan = mEvictor.freeSpaceWithView(availableBytes, location, getUpdatedView()); // Absent plan means failed to evict enough space. if (plan == null) { throw new WorkerOutOfSpaceException(ExceptionMessage.NO_EVICTION_PLAN_TO_FREE_SPACE); } } // 1. remove blocks to make room. for (Pair<Long, BlockStoreLocation> blockInfo : plan.toEvict()) { try { removeBlockInternal(sessionId, blockInfo.getFirst(), blockInfo.getSecond()); } catch (InvalidWorkerStateException e) { // Evictor is not working properly LOG.error("Failed to evict blockId {}, this is temp block", blockInfo.getFirst()); continue; } catch (BlockDoesNotExistException e) { LOG.info("Failed to evict blockId {}, it could be already deleted", blockInfo.getFirst()); continue; } synchronized (mBlockStoreEventListeners) { for (BlockStoreEventListener listener : mBlockStoreEventListeners) { listener.onRemoveBlockByWorker(sessionId, blockInfo.getFirst()); } } } // 2. transfer blocks among tiers. // 2.1. group blocks move plan by the destination tier. Map<String, Set<BlockTransferInfo>> blocksGroupedByDestTier = new HashMap<>(); for (BlockTransferInfo entry : plan.toMove()) { String alias = entry.getDstLocation().tierAlias(); if (!blocksGroupedByDestTier.containsKey(alias)) { blocksGroupedByDestTier.put(alias, new HashSet<BlockTransferInfo>()); } blocksGroupedByDestTier.get(alias).add(entry); } // 2.2. move blocks in the order of their dst tiers, from bottom to top for (int tierOrdinal = mStorageTierAssoc.size() - 1; tierOrdinal >= 0; --tierOrdinal) { Set<BlockTransferInfo> toMove = blocksGroupedByDestTier.get(mStorageTierAssoc.getAlias(tierOrdinal)); if (toMove == null) { toMove = new HashSet<>(); } for (BlockTransferInfo entry : toMove) { long blockId = entry.getBlockId(); BlockStoreLocation oldLocation = entry.getSrcLocation(); BlockStoreLocation newLocation = entry.getDstLocation(); MoveBlockResult moveResult; try { moveResult = moveBlockInternal(sessionId, blockId, oldLocation, newLocation); } catch (InvalidWorkerStateException e) { // Evictor is not working properly LOG.error("Failed to evict blockId {}, this is temp block", blockId); continue; } catch (BlockAlreadyExistsException e) { continue; } catch (BlockDoesNotExistException e) { LOG.info("Failed to move blockId {}, it could be already deleted", blockId); continue; } if (moveResult.getSuccess()) { synchronized (mBlockStoreEventListeners) { for (BlockStoreEventListener listener : mBlockStoreEventListeners) { listener.onMoveBlockByWorker(sessionId, blockId, moveResult.getSrcLocation(), newLocation); } } } } } } /** * Gets the most updated view with most recent information on pinned inodes, and currently locked * blocks. * * @return {@link BlockMetadataManagerView}, an updated view with most recent information */ private BlockMetadataManagerView getUpdatedView() { // TODO(calvin): Update the view object instead of creating new one every time. synchronized (mPinnedInodes) { return new BlockMetadataManagerView(mMetaManager, mPinnedInodes, mLockManager.getLockedBlocks()); } } /** * Moves a block to new location only if allocator finds available space in newLocation. This * method will not trigger any eviction. Returns {@link MoveBlockResult}. * * @param sessionId session Id * @param blockId block Id * @param oldLocation the source location of the block * @param newLocation new location to move this block * @return the resulting information about the move operation * @throws BlockDoesNotExistException if block is not found * @throws BlockAlreadyExistsException if a block with same Id already exists in new location * @throws InvalidWorkerStateException if the block to move is a temp block */ private MoveBlockResult moveBlockInternal(long sessionId, long blockId, BlockStoreLocation oldLocation, BlockStoreLocation newLocation) throws BlockDoesNotExistException, BlockAlreadyExistsException, InvalidWorkerStateException, IOException { long lockId = mLockManager.lockBlock(sessionId, blockId, BlockLockType.WRITE); try { long blockSize; String srcFilePath; String dstFilePath; BlockMeta srcBlockMeta; BlockStoreLocation srcLocation; BlockStoreLocation dstLocation; try (LockResource r = new LockResource(mMetadataReadLock)) { if (mMetaManager.hasTempBlockMeta(blockId)) { throw new InvalidWorkerStateException(ExceptionMessage.MOVE_UNCOMMITTED_BLOCK, blockId); } srcBlockMeta = mMetaManager.getBlockMeta(blockId); srcLocation = srcBlockMeta.getBlockLocation(); srcFilePath = srcBlockMeta.getPath(); blockSize = srcBlockMeta.getBlockSize(); } if (!srcLocation.belongsTo(oldLocation)) { throw new BlockDoesNotExistException(ExceptionMessage.BLOCK_NOT_FOUND_AT_LOCATION, blockId, oldLocation); } TempBlockMeta dstTempBlock = createBlockMetaInternal(sessionId, blockId, newLocation, blockSize, false); if (dstTempBlock == null) { return new MoveBlockResult(false, blockSize, null, null); } // When `newLocation` is some specific location, the `newLocation` and the `dstLocation` are // just the same; while for `newLocation` with a wildcard significance, the `dstLocation` // is a specific one with specific tier and dir which belongs to newLocation. dstLocation = dstTempBlock.getBlockLocation(); // When the dstLocation belongs to srcLocation, simply abort the tempBlockMeta just created // internally from the newLocation and return success with specific block location. if (dstLocation.belongsTo(srcLocation)) { mMetaManager.abortTempBlockMeta(dstTempBlock); return new MoveBlockResult(true, blockSize, srcLocation, dstLocation); } dstFilePath = dstTempBlock.getCommitPath(); // Heavy IO is guarded by block lock but not metadata lock. This may throw IOException. FileUtils.move(srcFilePath, dstFilePath); try (LockResource r = new LockResource(mMetadataWriteLock)) { // If this metadata update fails, we panic for now. // TODO(bin): Implement rollback scheme to recover from IO failures. mMetaManager.moveBlockMeta(srcBlockMeta, dstTempBlock); } catch (BlockAlreadyExistsException | BlockDoesNotExistException | WorkerOutOfSpaceException e) { // WorkerOutOfSpaceException is only possible if session id gets cleaned between // createBlockMetaInternal and moveBlockMeta. throw Throwables.propagate(e); // we shall never reach here } return new MoveBlockResult(true, blockSize, srcLocation, dstLocation); } finally { mLockManager.unlockBlock(lockId); } } /** * Removes a block. * * @param sessionId session Id * @param blockId block Id * @param location the source location of the block * @throws InvalidWorkerStateException if the block to remove is a temp block * @throws BlockDoesNotExistException if this block can not be found */ private void removeBlockInternal(long sessionId, long blockId, BlockStoreLocation location) throws InvalidWorkerStateException, BlockDoesNotExistException, IOException { long lockId = mLockManager.lockBlock(sessionId, blockId, BlockLockType.WRITE); try { String filePath; BlockMeta blockMeta; try (LockResource r = new LockResource(mMetadataReadLock)) { if (mMetaManager.hasTempBlockMeta(blockId)) { throw new InvalidWorkerStateException(ExceptionMessage.REMOVE_UNCOMMITTED_BLOCK, blockId); } blockMeta = mMetaManager.getBlockMeta(blockId); filePath = blockMeta.getPath(); } if (!blockMeta.getBlockLocation().belongsTo(location)) { throw new BlockDoesNotExistException(ExceptionMessage.BLOCK_NOT_FOUND_AT_LOCATION, blockId, location); } // Heavy IO is guarded by block lock but not metadata lock. This may throw IOException. Files.delete(Paths.get(filePath)); try (LockResource r = new LockResource(mMetadataWriteLock)) { mMetaManager.removeBlockMeta(blockMeta); } catch (BlockDoesNotExistException e) { throw Throwables.propagate(e); // we shall never reach here } } finally { mLockManager.unlockBlock(lockId); } } /** * Creates a file to represent a block denoted by the given block path. This file will be owned * by the Alluxio worker but have 777 permissions so processes under users different from the * user that launched the Alluxio worker can read and write to the file. The tiered storage * directory has the sticky bit so only the worker user can delete or rename files it creates. * * @param blockPath the block path to create */ // TODO(peis): Consider using domain socket to avoid setting the permission to 777. private static void createBlockFile(String blockPath) throws IOException { FileUtils.createBlockPath(blockPath); FileUtils.createFile(blockPath); FileUtils.changeLocalFileToFullPermission(blockPath); LOG.debug("Created new file block, block path: {}", blockPath); } /** * Updates the pinned blocks. * * @param inodes a set of ids inodes that are pinned */ @Override public void updatePinnedInodes(Set<Long> inodes) { synchronized (mPinnedInodes) { mPinnedInodes.clear(); mPinnedInodes.addAll(Preconditions.checkNotNull(inodes)); } } /** * A wrapper on necessary info after a move block operation. */ private static class MoveBlockResult { /** Whether this move operation succeeds. */ private final boolean mSuccess; /** Size of this block in bytes. */ private final long mBlockSize; /** Source location of this block to move. */ private final BlockStoreLocation mSrcLocation; /** Destination location of this block to move. */ private final BlockStoreLocation mDstLocation; /** * Creates a new instance of {@link MoveBlockResult}. * * @param success success indication * @param blockSize block size * @param srcLocation source location * @param dstLocation destination location */ MoveBlockResult(boolean success, long blockSize, BlockStoreLocation srcLocation, BlockStoreLocation dstLocation) { mSuccess = success; mBlockSize = blockSize; mSrcLocation = srcLocation; mDstLocation = dstLocation; } /** * @return the success indicator */ boolean getSuccess() { return mSuccess; } /** * @return the block size */ long getBlockSize() { return mBlockSize; } /** * @return the source location */ BlockStoreLocation getSrcLocation() { return mSrcLocation; } /** * @return the destination location */ BlockStoreLocation getDstLocation() { return mDstLocation; } } }