package org.corfudb.infrastructure; import java.lang.invoke.MethodHandles; import java.util.Map; import java.util.UUID; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.atomic.AtomicBoolean; import com.codahale.metrics.MetricRegistry; import com.github.benmanes.caffeine.cache.Caffeine; import com.github.benmanes.caffeine.cache.LoadingCache; import com.github.benmanes.caffeine.cache.RemovalCause; import com.google.common.annotations.VisibleForTesting; import com.google.common.util.concurrent.ThreadFactoryBuilder; import io.netty.channel.ChannelHandlerContext; import lombok.Getter; import lombok.extern.slf4j.Slf4j; import org.corfudb.infrastructure.log.InMemoryStreamLog; import org.corfudb.infrastructure.log.LogAddress; import org.corfudb.infrastructure.log.StreamLog; import org.corfudb.infrastructure.log.StreamLogFiles; import org.corfudb.protocols.wireprotocol.*; import org.corfudb.runtime.exceptions.DataCorruptionException; import org.corfudb.runtime.exceptions.DataOutrankedException; import org.corfudb.runtime.exceptions.OverwriteException; import org.corfudb.runtime.exceptions.ValueAdoptedException; import org.corfudb.util.MetricsUtils; import org.corfudb.util.Utils; /** * Created by mwei on 12/10/15. * <p> * A Log Unit Server, which is responsible for providing the persistent storage for the Corfu Distributed Shared Log. * <p> * All reads and writes go through a cache. For persistence, every 10,000 log entries are written to individual * files (logs), which are represented as FileHandles. Each FileHandle contains a pointer to the tail of the file, a * memory-mapped file channel, and a set of addresses known to be in the file. To append an entry, the pointer to the * tail is first extended to the length of the entry, and the entry is added to the set of known addresses. A header * is written, which consists of the ASCII characters LE, followed by a set of flags, the log unit address, the size * of the entry, then the metadata size, metadata and finally the entry itself. When the entry is complete, a written * flag is set in the flags field. */ @Slf4j public class LogUnitServer extends AbstractServer { private final ServerContext serverContext; /** * A scheduler, which is used to schedule periodic tasks like garbage collection. */ private final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool( 1, new ThreadFactoryBuilder() .setDaemon(true) .setNameFormat("LogUnit-Maintenance-%d") .build()); /** * GC parameters * TODO: entire GC handling needs updating, currently not being activated */ private final Thread gcThread = null; private Long gcRetryInterval; private AtomicBoolean running = new AtomicBoolean(true); /** * The options map. */ private final Map<String, Object> opts; /** * Handler for the base server */ @Getter private CorfuMsgHandler handler = new CorfuMsgHandler() .generateHandlers(MethodHandles.lookup(), this); /** * This cache services requests for data at various addresses. In a memory implementation, * it is not backed by anything, but in a disk implementation it is backed by persistent storage. */ private final LoadingCache<LogAddress, ILogData> dataCache; private final long maxCacheSize; private final StreamLog streamLog; private final BatchWriter<LogAddress, ILogData> batchWriter; private static final String metricsPrefix = "corfu.server.logunit."; public LogUnitServer(ServerContext serverContext) { this.opts = serverContext.getServerConfig(); this.serverContext = serverContext; double cacheSizeHeapRatio = Double.parseDouble((String) opts.get("--cache-heap-ratio")); maxCacheSize = (long) (Runtime.getRuntime().maxMemory() * cacheSizeHeapRatio); if ((Boolean) opts.get("--memory")) { log.warn("Log unit opened in-memory mode (Maximum size={}). " + "This should be run for testing purposes only. " + "If you exceed the maximum size of the unit, old entries will be AUTOMATICALLY trimmed. " + "The unit WILL LOSE ALL DATA if it exits.", Utils.convertToByteStringRepresentation(maxCacheSize)); streamLog = new InMemoryStreamLog(); } else { streamLog = new StreamLogFiles(serverContext, (Boolean) opts.get("--no-verify")); } batchWriter = new BatchWriter(streamLog); dataCache = Caffeine.<LogAddress, ILogData>newBuilder() .<LogAddress, ILogData>weigher((k, v) -> ((LogData)v).getData() == null ? 1 : ((LogData)v).getData().length) .maximumWeight(maxCacheSize) .removalListener(this::handleEviction) .writer(batchWriter) .recordStats() .build(this::handleRetrieval); MetricRegistry metrics = serverContext.getMetrics(); MetricsUtils.addCacheGauges(metrics, metricsPrefix + "cache.", dataCache); } /** * Service an incoming request for maximum global address the log unit server has written. * This value is not persisted and only maintained in memory. */ @ServerHandler(type = CorfuMsgType.TAIL_REQUEST, opTimer = metricsPrefix + "tailReq") public void handleTailRequest(CorfuMsg msg, ChannelHandlerContext ctx, IServerRouter r, boolean isMetricsEnabled) { r.sendResponse(ctx, msg, CorfuMsgType.TAIL_RESPONSE.payloadMsg(streamLog.getGlobalTail())); } /** * Service an incoming write request. */ @ServerHandler(type = CorfuMsgType.WRITE, opTimer = metricsPrefix + "write") public void write(CorfuPayloadMsg<WriteRequest> msg, ChannelHandlerContext ctx, IServerRouter r, boolean isMetricsEnabled) { log.debug("log write: global: {}, streams: {}, backpointers: {}", msg .getPayload().getGlobalAddress(), msg.getPayload().getStreamAddresses(), msg.getPayload().getData().getBackpointerMap()); // clear any commit record (or set initially to false). msg.getPayload().clearCommit(); try { if (msg.getPayload().getWriteMode() != WriteMode.REPLEX_STREAM) { dataCache.put(new LogAddress(msg.getPayload().getGlobalAddress(), null), msg.getPayload().getData()); r.sendResponse(ctx, msg, CorfuMsgType.WRITE_OK.msg()); } else { for (UUID streamID : msg.getPayload().getStreamAddresses().keySet()) { dataCache.put(new LogAddress(msg.getPayload().getStreamAddresses().get(streamID), streamID), msg.getPayload().getData()); } r.sendResponse(ctx, msg, CorfuMsgType.WRITE_OK.msg()); } } catch (OverwriteException ex) { if (msg.getPayload().getWriteMode() != WriteMode.REPLEX_STREAM) { r.sendResponse(ctx, msg, CorfuMsgType.ERROR_OVERWRITE.msg()); } else { r.sendResponse(ctx, msg, CorfuMsgType.ERROR_REPLEX_OVERWRITE.msg()); } } catch (DataOutrankedException e) { r.sendResponse(ctx, msg, CorfuMsgType.ERROR_DATA_OUTRANKED.msg()); } catch (ValueAdoptedException e) { r.sendResponse(ctx, msg, CorfuMsgType.ERROR_VALUE_ADOPTED.payloadMsg(e.getReadResponse())); } } /** * Service an incoming commit request. */ @ServerHandler(type = CorfuMsgType.COMMIT, opTimer = metricsPrefix + "commit") public void commit(CorfuPayloadMsg<CommitRequest> msg, ChannelHandlerContext ctx, IServerRouter r, boolean isMetricsEnabled) { Map<UUID, Long> streamAddresses = msg.getPayload().getStreams(); if (streamAddresses == null) { // Then this is a commit bit for the global log. ILogData entry = dataCache.get(new LogAddress(msg.getPayload().getAddress(), null)); if (entry == null) { r.sendResponse(ctx, msg, CorfuMsgType.ERROR_NOENTRY.msg()); return; } else { entry.getMetadataMap().put(IMetadata.LogUnitMetadataType.COMMIT, msg.getPayload().getCommit()); } } else { for (UUID streamID : msg.getPayload().getStreams().keySet()) { ILogData entry = dataCache.get(new LogAddress(streamAddresses.get(streamID), streamID)); if (entry == null) { r.sendResponse(ctx, msg, CorfuMsgType.ERROR_NOENTRY.msg()); // TODO: Crap, we have to go back and undo all the commit bits?? return; } else { entry.getMetadataMap().put(IMetadata.LogUnitMetadataType.COMMIT, msg.getPayload().getCommit()); } } } r.sendResponse(ctx, msg, CorfuMsgType.ACK.msg()); } @ServerHandler(type = CorfuMsgType.READ_REQUEST, opTimer = metricsPrefix + "read") private void read(CorfuPayloadMsg<ReadRequest> msg, ChannelHandlerContext ctx, IServerRouter r, boolean isMetricsEnabled) { log.trace("log read: {} {}", msg.getPayload().getStreamID() == null ? "global" : msg.getPayload().getStreamID(), msg.getPayload().getRange()); ReadResponse rr = new ReadResponse(); try { for (Long l = msg.getPayload().getRange().lowerEndpoint(); l < msg.getPayload().getRange().upperEndpoint() + 1L; l++) { LogAddress logAddress = new LogAddress(l, msg.getPayload().getStreamID()); ILogData e = dataCache.get(logAddress); if (e == null) { rr.put(l, LogData.EMPTY); } else if (e.getType() == DataType.HOLE) { rr.put(l, LogData.HOLE); } else { rr.put(l, (LogData)e); } } r.sendResponse(ctx, msg, CorfuMsgType.READ_RESPONSE.payloadMsg(rr)); } catch (DataCorruptionException e) { r.sendResponse(ctx, msg, CorfuMsgType.ERROR_DATA_CORRUPTION.msg()); } } @ServerHandler(type = CorfuMsgType.GC_INTERVAL, opTimer = metricsPrefix + "gc-interval") private void setGcInterval(CorfuPayloadMsg<Long> msg, ChannelHandlerContext ctx, IServerRouter r, boolean isMetricsEnabled) { gcRetryInterval = msg.getPayload(); r.sendResponse(ctx, msg, CorfuMsgType.ACK.msg()); } @ServerHandler(type = CorfuMsgType.FORCE_GC, opTimer = metricsPrefix + "force-gc") private void forceGc(CorfuMsg msg, ChannelHandlerContext ctx, IServerRouter r, boolean isMetricsEnabled) { gcThread.interrupt(); r.sendResponse(ctx, msg, CorfuMsgType.ACK.msg()); } @ServerHandler(type = CorfuMsgType.FILL_HOLE, opTimer = metricsPrefix + "fill-hole") private void fillHole(CorfuPayloadMsg<TrimRequest> msg, ChannelHandlerContext ctx, IServerRouter r, boolean isMetricsEnabled) { LogAddress l = new LogAddress(msg.getPayload().getPrefix(), msg.getPayload().getStream()); try { dataCache.put(l, LogData.HOLE); r.sendResponse(ctx, msg, CorfuMsgType.WRITE_OK.msg()); } catch (OverwriteException e) { r.sendResponse(ctx, msg, CorfuMsgType.ERROR_OVERWRITE.msg()); } catch (DataOutrankedException e) { r.sendResponse(ctx, msg, CorfuMsgType.ERROR_DATA_OUTRANKED.msg()); } catch (ValueAdoptedException e) { r.sendResponse(ctx, msg, CorfuMsgType.ERROR_VALUE_ADOPTED.payloadMsg(e.getReadResponse())); } } @ServerHandler(type = CorfuMsgType.TRIM) private void trim(CorfuPayloadMsg<TrimRequest> msg, ChannelHandlerContext ctx, IServerRouter r) { batchWriter.trim(new LogAddress(msg.getPayload().getPrefix(), msg.getPayload().getStream())); //TODO(Maithem): should we return an error if the write fails r.sendResponse(ctx, msg, CorfuMsgType.ACK.msg()); } /** * Retrieve the ILogData from disk, given an address. * * @param logAddress The address to retrieve the entry from. * @return The log unit entry to retrieve into the cache. * This function should not care about trimmed addresses, as that is handled in * the read() and append(). Any address that cannot be retrieved should be returned as * unwritten (null). */ public synchronized ILogData handleRetrieval(LogAddress logAddress) { LogData entry = streamLog.read(logAddress); log.trace("Retrieved[{} : {}]", logAddress, entry); return entry; } public synchronized void handleEviction(LogAddress logAddress, ILogData entry, RemovalCause cause) { log.trace("Eviction[{}]: {}", logAddress, cause); streamLog.release(logAddress, (LogData) entry); } /** * Shutdown the server. */ @Override public void shutdown() { scheduler.shutdownNow(); batchWriter.close(); } @VisibleForTesting LoadingCache<LogAddress, ILogData> getDataCache() { return dataCache; } @VisibleForTesting long getMaxCacheSize() { return maxCacheSize; } }