/*
* The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
* (the "License"). You may not use this work except in compliance with the License, which is
* available at www.apache.org/licenses/LICENSE-2.0
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied, as more fully set forth in the License.
*
* See the NOTICE file distributed with this work for information regarding copyright ownership.
*/
package alluxio.worker.block;
import alluxio.Configuration;
import alluxio.Constants;
import alluxio.PropertyKey;
import alluxio.RuntimeConstants;
import alluxio.Server;
import alluxio.Sessions;
import alluxio.exception.BlockAlreadyExistsException;
import alluxio.exception.BlockDoesNotExistException;
import alluxio.exception.ExceptionMessage;
import alluxio.exception.InvalidWorkerStateException;
import alluxio.exception.WorkerOutOfSpaceException;
import alluxio.heartbeat.HeartbeatContext;
import alluxio.heartbeat.HeartbeatThread;
import alluxio.metrics.MetricsSystem;
import alluxio.thrift.BlockWorkerClientService;
import alluxio.underfs.UfsManager;
import alluxio.util.CommonUtils;
import alluxio.util.ThreadFactoryUtils;
import alluxio.util.network.NetworkAddressUtils;
import alluxio.util.network.NetworkAddressUtils.ServiceType;
import alluxio.wire.FileInfo;
import alluxio.wire.WorkerNetAddress;
import alluxio.worker.AbstractWorker;
import alluxio.worker.SessionCleaner;
import alluxio.worker.block.io.BlockReader;
import alluxio.worker.block.io.BlockWriter;
import alluxio.worker.block.meta.BlockMeta;
import alluxio.worker.block.meta.TempBlockMeta;
import alluxio.worker.block.options.OpenUfsBlockOptions;
import alluxio.worker.file.FileSystemMasterClient;
import com.codahale.metrics.Gauge;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import org.apache.thrift.TProcessor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicReference;
import javax.annotation.concurrent.NotThreadSafe;
import javax.annotation.concurrent.ThreadSafe;
/**
* The class is responsible for managing all top level components of the Block Worker.
*
* This includes:
*
* Servers: {@link BlockWorkerClientServiceHandler} (RPC Server)
*
* Periodic Threads: {@link BlockMasterSync} (Worker to Master continuous communication)
*
* Logic: {@link DefaultBlockWorker} (Logic for all block related storage operations)
*/
@NotThreadSafe // TODO(jiri): make thread-safe (c.f. ALLUXIO-1624)
public final class DefaultBlockWorker extends AbstractWorker implements BlockWorker {
private static final Logger LOG = LoggerFactory.getLogger(DefaultBlockWorker.class);
/** Runnable responsible for heartbeating and registration with master. */
private BlockMasterSync mBlockMasterSync;
/** Runnable responsible for fetching pinlist from master. */
private PinListSync mPinListSync;
/** Runnable responsible for clean up potential zombie sessions. */
private SessionCleaner mSessionCleaner;
/** Client for all block master communication. */
private final BlockMasterClient mBlockMasterClient;
/** Client for all file system master communication. */
private final FileSystemMasterClient mFileSystemMasterClient;
/** Block store delta reporter for master heartbeat. */
private BlockHeartbeatReporter mHeartbeatReporter;
/** Metrics reporter that listens on block events and increases metrics counters. */
private BlockMetricsReporter mMetricsReporter;
/** Session metadata, used to keep track of session heartbeats. */
private Sessions mSessions;
/** Block Store manager. */
private BlockStore mBlockStore;
private WorkerNetAddress mAddress;
/** The under file system block store. */
private final UnderFileSystemBlockStore mUnderFileSystemBlockStore;
/**
* The worker ID for this worker. This is initialized in {@link #start(WorkerNetAddress)} and may
* be updated by the block sync thread if the master requests re-registration.
*/
private AtomicReference<Long> mWorkerId;
/**
* Constructs a default block worker.
*
* @param ufsManager ufs manager
*/
DefaultBlockWorker(UfsManager ufsManager) {
this(new BlockMasterClient(NetworkAddressUtils.getConnectAddress(ServiceType.MASTER_RPC)),
new FileSystemMasterClient(NetworkAddressUtils.getConnectAddress(ServiceType.MASTER_RPC)),
new Sessions(), new TieredBlockStore(), ufsManager);
}
/**
* Constructs a default block worker.
*
* @param blockMasterClient a client for talking to the block master
* @param fileSystemMasterClient a client for talking to the file system master
* @param sessions an object for tracking and cleaning up client sessions
* @param blockStore an Alluxio block store
* @param ufsManager ufs manager
*/
DefaultBlockWorker(BlockMasterClient blockMasterClient,
FileSystemMasterClient fileSystemMasterClient, Sessions sessions, BlockStore blockStore,
UfsManager ufsManager) {
super(Executors
.newFixedThreadPool(4, ThreadFactoryUtils.build("block-worker-heartbeat-%d", true)));
mBlockMasterClient = blockMasterClient;
mFileSystemMasterClient = fileSystemMasterClient;
mHeartbeatReporter = new BlockHeartbeatReporter();
mMetricsReporter = new BlockMetricsReporter();
mSessions = sessions;
mBlockStore = blockStore;
mWorkerId = new AtomicReference<>(-1L);
mBlockStore.registerBlockStoreEventListener(mHeartbeatReporter);
mBlockStore.registerBlockStoreEventListener(mMetricsReporter);
mUnderFileSystemBlockStore = new UnderFileSystemBlockStore(mBlockStore, ufsManager);
Metrics.registerGauges(this);
}
@Override
public Set<Class<? extends Server>> getDependencies() {
return new HashSet<>();
}
@Override
public String getName() {
return Constants.BLOCK_WORKER_NAME;
}
@Override
public BlockStore getBlockStore() {
return mBlockStore;
}
@Override
public BlockWorkerClientServiceHandler getWorkerServiceHandler() {
return new BlockWorkerClientServiceHandler(this);
}
@Override
public Map<String, TProcessor> getServices() {
Map<String, TProcessor> services = new HashMap<>();
services.put(Constants.BLOCK_WORKER_CLIENT_SERVICE_NAME,
new BlockWorkerClientService.Processor<>(getWorkerServiceHandler()));
return services;
}
@Override
public AtomicReference<Long> getWorkerId() {
return mWorkerId;
}
/**
* Runs the block worker. The thread must be called after all services (e.g., web, dataserver)
* started.
*/
@Override
public void start(WorkerNetAddress address) throws IOException {
mAddress = address;
try {
mWorkerId.set(mBlockMasterClient.getId(address));
} catch (Exception e) {
throw new RuntimeException("Failed to get a worker id from block master: " + e.getMessage());
}
Preconditions.checkNotNull(mWorkerId, "mWorkerId");
Preconditions.checkNotNull(mAddress, "mAddress");
// Setup BlockMasterSync
mBlockMasterSync = new BlockMasterSync(this, mWorkerId, mAddress, mBlockMasterClient);
// Setup PinListSyncer
mPinListSync = new PinListSync(this, mFileSystemMasterClient);
// Setup session cleaner
mSessionCleaner = new SessionCleaner(mSessions, mBlockStore, mUnderFileSystemBlockStore);
// Setup space reserver
if (Configuration.getBoolean(PropertyKey.WORKER_TIERED_STORE_RESERVER_ENABLED)) {
getExecutorService().submit(
new HeartbeatThread(HeartbeatContext.WORKER_SPACE_RESERVER, new SpaceReserver(this),
Configuration.getInt(PropertyKey.WORKER_TIERED_STORE_RESERVER_INTERVAL_MS)));
}
getExecutorService()
.submit(new HeartbeatThread(HeartbeatContext.WORKER_BLOCK_SYNC, mBlockMasterSync,
Configuration.getInt(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS)));
// Start the pinlist syncer to perform the periodical fetching
getExecutorService()
.submit(new HeartbeatThread(HeartbeatContext.WORKER_PIN_LIST_SYNC, mPinListSync,
Configuration.getInt(PropertyKey.WORKER_BLOCK_HEARTBEAT_INTERVAL_MS)));
// Start the session cleanup checker to perform the periodical checking
getExecutorService().submit(mSessionCleaner);
}
/**
* Stops the block worker. This method should only be called to terminate the worker.
*/
@Override
public void stop() {
// Steps to shutdown:
// 1. Gracefully shut down the runnables running in the executors.
// 2. Shutdown the executors.
// 3. Shutdown the clients. This needs to happen after the executors is shutdown because
// runnables running in the executors might be using the clients.
mSessionCleaner.stop();
// The executor shutdown needs to be done in a loop with retry because the interrupt
// signal can sometimes be ignored.
CommonUtils.waitFor("block worker executor shutdown", new Function<Void, Boolean>() {
@Override
public Boolean apply(Void input) {
getExecutorService().shutdownNow();
try {
return getExecutorService().awaitTermination(100, TimeUnit.MILLISECONDS);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
});
mBlockMasterClient.close();
mFileSystemMasterClient.close();
}
@Override
public void abortBlock(long sessionId, long blockId) throws BlockAlreadyExistsException,
BlockDoesNotExistException, InvalidWorkerStateException, IOException {
mBlockStore.abortBlock(sessionId, blockId);
}
@Override
public void accessBlock(long sessionId, long blockId) throws BlockDoesNotExistException {
mBlockStore.accessBlock(sessionId, blockId);
}
@Override
public void commitBlock(long sessionId, long blockId)
throws BlockAlreadyExistsException, BlockDoesNotExistException, InvalidWorkerStateException,
IOException, WorkerOutOfSpaceException {
// NOTE: this may be invoked multiple times due to retry on client side.
// TODO(binfan): find a better way to handle retry logic
try {
mBlockStore.commitBlock(sessionId, blockId);
} catch (BlockAlreadyExistsException e) {
LOG.debug("Block {} has been in block store, this could be a retry due to master-side RPC "
+ "failure, therefore ignore the exception", blockId, e);
}
// TODO(calvin): Reconsider how to do this without heavy locking.
// Block successfully committed, update master with new block metadata
Long lockId = mBlockStore.lockBlock(sessionId, blockId);
try {
BlockMeta meta = mBlockStore.getBlockMeta(sessionId, blockId, lockId);
BlockStoreLocation loc = meta.getBlockLocation();
Long length = meta.getBlockSize();
BlockStoreMeta storeMeta = mBlockStore.getBlockStoreMeta();
Long bytesUsedOnTier = storeMeta.getUsedBytesOnTiers().get(loc.tierAlias());
mBlockMasterClient.commitBlock(mWorkerId.get(), bytesUsedOnTier, loc.tierAlias(), blockId,
length);
} catch (Exception e) {
throw new IOException(ExceptionMessage.FAILED_COMMIT_BLOCK_TO_MASTER.getMessage(blockId), e);
} finally {
mBlockStore.unlockBlock(lockId);
}
}
@Override
public String createBlock(long sessionId, long blockId, String tierAlias, long initialBytes)
throws BlockAlreadyExistsException, WorkerOutOfSpaceException, IOException {
BlockStoreLocation loc = BlockStoreLocation.anyDirInTier(tierAlias);
TempBlockMeta createdBlock;
try {
createdBlock = mBlockStore.createBlock(sessionId, blockId, loc, initialBytes);
} catch (WorkerOutOfSpaceException e) {
InetSocketAddress address =
InetSocketAddress.createUnresolved(mAddress.getHost(), mAddress.getRpcPort());
throw new WorkerOutOfSpaceException(ExceptionMessage.CANNOT_REQUEST_SPACE
.getMessageWithUrl(RuntimeConstants.ALLUXIO_DEBUG_DOCS_URL, address, blockId), e);
}
return createdBlock.getPath();
}
@Override
public void createBlockRemote(long sessionId, long blockId, String tierAlias, long initialBytes)
throws BlockAlreadyExistsException, WorkerOutOfSpaceException, IOException {
BlockStoreLocation loc = BlockStoreLocation.anyDirInTier(tierAlias);
mBlockStore.createBlock(sessionId, blockId, loc, initialBytes);
}
@Override
public void freeSpace(long sessionId, long availableBytes, String tierAlias)
throws WorkerOutOfSpaceException, BlockDoesNotExistException, IOException,
BlockAlreadyExistsException, InvalidWorkerStateException {
BlockStoreLocation location = BlockStoreLocation.anyDirInTier(tierAlias);
mBlockStore.freeSpace(sessionId, availableBytes, location);
}
@Override
public BlockWriter getTempBlockWriterRemote(long sessionId, long blockId)
throws BlockDoesNotExistException, BlockAlreadyExistsException, InvalidWorkerStateException,
IOException {
return mBlockStore.getBlockWriter(sessionId, blockId);
}
@Override
public BlockHeartbeatReport getReport() {
return mHeartbeatReporter.generateReport();
}
@Override
public BlockStoreMeta getStoreMeta() {
return mBlockStore.getBlockStoreMeta();
}
@Override
public BlockStoreMeta getStoreMetaFull() {
return mBlockStore.getBlockStoreMetaFull();
}
@Override
public BlockMeta getVolatileBlockMeta(long blockId) throws BlockDoesNotExistException {
return mBlockStore.getVolatileBlockMeta(blockId);
}
@Override
public BlockMeta getBlockMeta(long sessionId, long blockId, long lockId)
throws BlockDoesNotExistException, InvalidWorkerStateException {
return mBlockStore.getBlockMeta(sessionId, blockId, lockId);
}
@Override
public boolean hasBlockMeta(long blockId) {
return mBlockStore.hasBlockMeta(blockId);
}
@Override
public long lockBlock(long sessionId, long blockId) throws BlockDoesNotExistException {
return mBlockStore.lockBlock(sessionId, blockId);
}
@Override
public long lockBlockNoException(long sessionId, long blockId) {
return mBlockStore.lockBlockNoException(sessionId, blockId);
}
@Override
public void moveBlock(long sessionId, long blockId, String tierAlias)
throws BlockDoesNotExistException, BlockAlreadyExistsException, InvalidWorkerStateException,
WorkerOutOfSpaceException, IOException {
// TODO(calvin): Move this logic into BlockStore#moveBlockInternal if possible
// Because the move operation is expensive, we first check if the operation is necessary
BlockStoreLocation dst = BlockStoreLocation.anyDirInTier(tierAlias);
long lockId = mBlockStore.lockBlock(sessionId, blockId);
try {
BlockMeta meta = mBlockStore.getBlockMeta(sessionId, blockId, lockId);
if (meta.getBlockLocation().belongsTo(dst)) {
return;
}
} finally {
mBlockStore.unlockBlock(lockId);
}
// Execute the block move if necessary
mBlockStore.moveBlock(sessionId, blockId, dst);
}
@Override
public String readBlock(long sessionId, long blockId, long lockId)
throws BlockDoesNotExistException, InvalidWorkerStateException {
BlockMeta meta = mBlockStore.getBlockMeta(sessionId, blockId, lockId);
return meta.getPath();
}
@Override
public BlockReader readBlockRemote(long sessionId, long blockId, long lockId)
throws BlockDoesNotExistException, InvalidWorkerStateException, IOException {
return mBlockStore.getBlockReader(sessionId, blockId, lockId);
}
@Override
public BlockReader readUfsBlock(long sessionId, long blockId, long offset, boolean noCache)
throws BlockDoesNotExistException, IOException {
return mUnderFileSystemBlockStore.getBlockReader(sessionId, blockId, offset, noCache);
}
@Override
public void removeBlock(long sessionId, long blockId)
throws InvalidWorkerStateException, BlockDoesNotExistException, IOException {
mBlockStore.removeBlock(sessionId, blockId);
}
@Override
public void requestSpace(long sessionId, long blockId, long additionalBytes)
throws BlockDoesNotExistException, WorkerOutOfSpaceException, IOException {
mBlockStore.requestSpace(sessionId, blockId, additionalBytes);
}
@Override
public void unlockBlock(long lockId) throws BlockDoesNotExistException {
mBlockStore.unlockBlock(lockId);
}
@Override
// TODO(calvin): Remove when lock and reads are separate operations.
public boolean unlockBlock(long sessionId, long blockId) {
return mBlockStore.unlockBlock(sessionId, blockId);
}
@Override
public void sessionHeartbeat(long sessionId) {
mSessions.sessionHeartbeat(sessionId);
}
@Override
public void updatePinList(Set<Long> pinnedInodes) {
mBlockStore.updatePinnedInodes(pinnedInodes);
}
@Override
public FileInfo getFileInfo(long fileId) throws IOException {
return mFileSystemMasterClient.getFileInfo(fileId);
}
@Override
public boolean openUfsBlock(long sessionId, long blockId, OpenUfsBlockOptions options)
throws BlockAlreadyExistsException {
return mUnderFileSystemBlockStore.acquireAccess(sessionId, blockId, options);
}
@Override
public void closeUfsBlock(long sessionId, long blockId)
throws BlockAlreadyExistsException, IOException, WorkerOutOfSpaceException {
mUnderFileSystemBlockStore.closeReaderOrWriter(sessionId, blockId);
if (mBlockStore.getTempBlockMeta(sessionId, blockId) != null) {
try {
commitBlock(sessionId, blockId);
} catch (BlockDoesNotExistException e) {
// This can only happen if the session is expired. Ignore this exception if that happens.
LOG.warn("Block {} does not exist while being committed.", blockId);
} catch (InvalidWorkerStateException e) {
// This can happen if there are multiple sessions writing to the same block.
// BlockStore#getTempBlockMeta does not check whether the temp block belongs to
// the sessionId.
LOG.debug("Invalid worker state while committing block.", e);
}
}
mUnderFileSystemBlockStore.releaseAccess(sessionId, blockId);
}
/**
* This class contains some metrics related to the block worker.
* This class is public because the metric names are referenced in
* {@link alluxio.web.WebInterfaceWorkerMetricsServlet}.
*/
@ThreadSafe
public static final class Metrics {
public static final String CAPACITY_TOTAL = "CapacityTotal";
public static final String CAPACITY_USED = "CapacityUsed";
public static final String CAPACITY_FREE = "CapacityFree";
public static final String BLOCKS_CACHED = "BlocksCached";
/**
* Registers metric gauges.
*
* @param blockWorker the block worker handle
*/
public static void registerGauges(final BlockWorker blockWorker) {
MetricsSystem.registerGaugeIfAbsent(MetricsSystem.getWorkerMetricName(CAPACITY_TOTAL),
new Gauge<Long>() {
@Override
public Long getValue() {
return blockWorker.getStoreMeta().getCapacityBytes();
}
});
MetricsSystem.registerGaugeIfAbsent(MetricsSystem.getWorkerMetricName(CAPACITY_USED),
new Gauge<Long>() {
@Override
public Long getValue() {
return blockWorker.getStoreMeta().getUsedBytes();
}
});
MetricsSystem.registerGaugeIfAbsent(MetricsSystem.getWorkerMetricName(CAPACITY_FREE),
new Gauge<Long>() {
@Override
public Long getValue() {
return blockWorker.getStoreMeta().getCapacityBytes() - blockWorker.getStoreMeta()
.getUsedBytes();
}
});
MetricsSystem.registerGaugeIfAbsent(MetricsSystem.getWorkerMetricName(BLOCKS_CACHED),
new Gauge<Integer>() {
@Override
public Integer getValue() {
return blockWorker.getStoreMetaFull().getNumberOfBlocks();
}
});
}
private Metrics() {} // prevent instantiation
}
}