/** * Copyright 2016 LinkedIn Corp. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ package com.github.ambry.router; import com.github.ambry.clustermap.ClusterMap; import com.github.ambry.clustermap.ReplicaId; import com.github.ambry.commons.ByteBufferAsyncWritableChannel; import com.github.ambry.commons.ResponseHandler; import com.github.ambry.config.RouterConfig; import com.github.ambry.messageformat.BlobProperties; import com.github.ambry.network.NetworkClientErrorCode; import com.github.ambry.network.RequestInfo; import com.github.ambry.network.ResponseInfo; import com.github.ambry.notification.NotificationSystem; import com.github.ambry.protocol.PutRequest; import com.github.ambry.protocol.PutResponse; import com.github.ambry.protocol.RequestOrResponse; import com.github.ambry.utils.ByteBufferInputStream; import com.github.ambry.utils.Time; import com.github.ambry.utils.Utils; import java.io.DataInputStream; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicBoolean; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * PutManager class is responsible for handling putBlob operations. PutManager creates a {@link PutOperation} for each * operation submitted to it, and tracks them. */ class PutManager { private static final Logger logger = LoggerFactory.getLogger(PutManager.class); private final Set<PutOperation> putOperations; private final NotificationSystem notificationSystem; private final Time time; private final Thread chunkFillerThread; private final Object chunkFillerSynchronizer = new Object(); private volatile boolean isChunkFillerThreadAsleep = false; private volatile boolean chunkFillerThreadMaySleep = false; // This helps the PutManager quickly find the appropriate PutOperation to hand over the response to. // Requests are added before they are sent out and get cleaned up as and when responses come in. // Because there is a guaranteed response from the NetworkClient for every request sent out, entries // get cleaned up periodically. private final Map<Integer, PutOperation> correlationIdToPutOperation; private final AtomicBoolean isOpen = new AtomicBoolean(true); private final RouterCallback routerCallback; private final ByteBufferAsyncWritableChannel.ChannelEventListener chunkArrivalListener; // shared by all PutOperations private final ClusterMap clusterMap; private final RouterConfig routerConfig; private final ResponseHandler responseHandler; private final NonBlockingRouterMetrics routerMetrics; private class PutRequestRegistrationCallbackImpl implements RequestRegistrationCallback<PutOperation> { private List<RequestInfo> requestListToFill; @Override public void registerRequestToSend(PutOperation putOperation, RequestInfo requestInfo) { requestListToFill.add(requestInfo); correlationIdToPutOperation.put(((RequestOrResponse) requestInfo.getRequest()).getCorrelationId(), putOperation); } } // A single callback as this will never get called concurrently. The list of request to fill will be set as // appropriate before the callback is passed on to the PutOperations, every time. private final PutRequestRegistrationCallbackImpl requestRegistrationCallback = new PutRequestRegistrationCallbackImpl(); /** * Create a PutManager * @param clusterMap The {@link ClusterMap} of the cluster. * @param responseHandler The {@link ResponseHandler} used to notify failures for failure detection. * @param notificationSystem The {@link NotificationSystem} used for notifying blob creations. * @param routerConfig The {@link RouterConfig} containing the configs for the PutManager. * @param routerMetrics The {@link NonBlockingRouterMetrics} to be used for reporting metrics. * @param routerCallback The {@link RouterCallback} to use for callbacks to the router. * @param suffix the suffix to associate with the names of the threads created by this PutManager * @param time The {@link Time} instance to use. */ PutManager(ClusterMap clusterMap, ResponseHandler responseHandler, NotificationSystem notificationSystem, RouterConfig routerConfig, NonBlockingRouterMetrics routerMetrics, RouterCallback routerCallback, String suffix, Time time) { this.clusterMap = clusterMap; this.responseHandler = responseHandler; this.notificationSystem = notificationSystem; this.routerConfig = routerConfig; this.routerMetrics = routerMetrics; this.routerCallback = routerCallback; this.chunkArrivalListener = new ByteBufferAsyncWritableChannel.ChannelEventListener() { @Override public void onEvent(ByteBufferAsyncWritableChannel.EventType e) { synchronized (chunkFillerSynchronizer) { // At this point, the chunk for which this notification came in (if any) could already have been consumed by // the chunk filler, and this might unnecessarily wake it up from its sleep, which should be okay. chunkFillerThreadMaySleep = false; if (isChunkFillerThreadAsleep) { chunkFillerSynchronizer.notify(); } } } }; this.time = time; putOperations = Collections.newSetFromMap(new ConcurrentHashMap<PutOperation, Boolean>()); correlationIdToPutOperation = new HashMap<Integer, PutOperation>(); chunkFillerThread = Utils.newThread("ChunkFillerThread-" + suffix, new ChunkFiller(), true); chunkFillerThread.start(); routerMetrics.initializePutManagerMetrics(chunkFillerThread); } /** * Submit a put blob operation to be processed asynchronously. * @param blobProperties the blobProperties associated with the blob being put. * @param userMetaData the userMetaData associated with the blob being put. * @param channel the {@link ReadableStreamChannel} containing the blob content. * @param futureResult the {@link FutureResult} that contains the pending result of the operation. * @param callback the {@link Callback} object to be called on completion of the operation. */ void submitPutBlobOperation(BlobProperties blobProperties, byte[] userMetaData, ReadableStreamChannel channel, FutureResult<String> futureResult, Callback<String> callback) { try { PutOperation putOperation = new PutOperation(routerConfig, routerMetrics, clusterMap, responseHandler, notificationSystem, userMetaData, channel, futureResult, callback, routerCallback, chunkArrivalListener, time, blobProperties); putOperations.add(putOperation); putOperation.startReadingFromChannel(); } catch (RouterException e) { routerMetrics.operationDequeuingRate.mark(); routerMetrics.onPutBlobError(e); NonBlockingRouter.completeOperation(futureResult, callback, null, e); } } /** * Creates and returns requests in the form of {@link RequestInfo} to be sent to data nodes in order to * complete put operations. Since this is the only method guaranteed to be called periodically by the * RequestResponseHandler thread in the {@link NonBlockingRouter} ({@link #handleResponse} gets called only if a * response is received for a put operation), any error handling or operation completion and cleanup also usually * gets done in the context of this method. * @param requestListToFill list to be filled with the requests created */ void poll(List<RequestInfo> requestListToFill) { long startTime = time.milliseconds(); requestRegistrationCallback.requestListToFill = requestListToFill; for (PutOperation op : putOperations) { try { op.poll(requestRegistrationCallback); } catch (Exception e) { op.setOperationExceptionAndComplete( new RouterException("Put poll encountered unexpected error", e, RouterErrorCode.UnexpectedInternalError)); } if (op.isOperationComplete() && putOperations.remove(op)) { // In order to ensure that an operation is completed only once, call onComplete() only at the place where the // operation actually gets removed from the set of operations. See comment within closePendingOperations(). onComplete(op); } } routerMetrics.putManagerPollTimeMs.update(time.milliseconds() - startTime); } /** * Hands over the response to the associated PutOperation that issued the request. * @param responseInfo the {@link ResponseInfo} containing the response. */ void handleResponse(ResponseInfo responseInfo) { long startTime = time.milliseconds(); PutResponse putResponse = extractPutResponseAndNotifyResponseHandler(responseInfo); RouterRequestInfo routerRequestInfo = (RouterRequestInfo) responseInfo.getRequestInfo(); int correlationId = ((PutRequest) routerRequestInfo.getRequest()).getCorrelationId(); // Get the PutOperation that generated the request. PutOperation putOperation = correlationIdToPutOperation.remove(correlationId); // If it is still an active operation, hand over the response. Otherwise, ignore. if (putOperations.contains(putOperation)) { try { putOperation.handleResponse(responseInfo, putResponse); } catch (Exception e) { putOperation.setOperationExceptionAndComplete( new RouterException("Put handleResponse encountered unexpected error", e, RouterErrorCode.UnexpectedInternalError)); } if (putOperation.isOperationComplete() && putOperations.remove(putOperation)) { onComplete(putOperation); } routerMetrics.putManagerHandleResponseTimeMs.update(time.milliseconds() - startTime); } else { routerMetrics.ignoredResponseCount.inc(); } } /** * Extract the {@link PutResponse} from the given {@link ResponseInfo} * @param responseInfo the {@link ResponseInfo} from which the {@link PutResponse} is to be extracted. * @return the extracted {@link PutResponse} if there is one; null otherwise. */ private PutResponse extractPutResponseAndNotifyResponseHandler(ResponseInfo responseInfo) { PutResponse putResponse = null; ReplicaId replicaId = ((RouterRequestInfo) responseInfo.getRequestInfo()).getReplicaId(); NetworkClientErrorCode networkClientErrorCode = responseInfo.getError(); if (networkClientErrorCode == null) { try { putResponse = PutResponse.readFrom(new DataInputStream(new ByteBufferInputStream(responseInfo.getResponse()))); responseHandler.onEvent(replicaId, putResponse.getError()); } catch (Exception e) { // Ignore. There is no value in notifying the response handler. logger.error("Response deserialization received unexpected error", e); routerMetrics.responseDeserializationErrorCount.inc(); } } else { responseHandler.onEvent(replicaId, networkClientErrorCode); } return putResponse; } /** * Called for a {@link PutOperation} when the operation is complete. Any cleanup that the PutManager needs to do * with respect to this operation will have to be done here. The PutManager also finishes the operation by * performing the callback and notification. * @param op the {@link PutOperation} that has completed. */ void onComplete(PutOperation op) { Exception e = op.getOperationException(); String blobId = op.getBlobIdString(); op.maybeNotifyForBlobCreation(); if (blobId == null && e == null) { e = new RouterException("Operation failed, but exception was not set", RouterErrorCode.UnexpectedInternalError); routerMetrics.operationFailureWithUnsetExceptionCount.inc(); } if (e != null) { blobId = null; routerMetrics.onPutBlobError(e); routerCallback.scheduleDeletes(op.getSuccessfullyPutChunkIdsIfComposite(), op.getServiceId()); } else { updateChunkingAndSizeMetricsOnSuccessfulPut(op); } routerMetrics.operationDequeuingRate.mark(); routerMetrics.putBlobOperationLatencyMs.update(time.milliseconds() - op.getSubmissionTimeMs()); NonBlockingRouter.completeOperation(op.getFuture(), op.getCallback(), blobId, e); } /** * Update chunking and size related metrics - blob size, chunk count, and whether the blob is simple or composite. * @param op the {@link PutOperation} that completed successfully. */ private void updateChunkingAndSizeMetricsOnSuccessfulPut(PutOperation op) { routerMetrics.putBlobSizeBytes.update(op.getBlobProperties().getBlobSize()); routerMetrics.putBlobChunkCount.update(op.getNumDataChunks()); if (op.getNumDataChunks() == 1) { routerMetrics.simpleBlobPutCount.inc(); } else { routerMetrics.compositeBlobPutCount.inc(); } } /** * Check if the PutManager is open. */ boolean isOpen() { return isOpen.get(); } /** * Close the PutManager. * First notify the chunkFillerThread about closing and wait for it to exit. Then, complete all existing operations. */ void close() { if (isOpen.compareAndSet(true, false)) { synchronized (chunkFillerSynchronizer) { if (isChunkFillerThreadAsleep) { chunkFillerThreadMaySleep = false; chunkFillerSynchronizer.notify(); } } try { chunkFillerThread.join(NonBlockingRouter.SHUTDOWN_WAIT_MS); } catch (InterruptedException e) { logger.error("Caught interrupted exception while waiting for chunkFillerThread to finish"); Thread.currentThread().interrupt(); } completePendingOperations(); } } /** * Complete all existing operations. * This can get called two ways: * 1. As part of {@link #close()} when it is called in the context of the router. This is the normal case. * 2. By the {@link ChunkFiller} thread when it exits abnormally. */ void completePendingOperations() { for (PutOperation op : putOperations) { // There is a rare scenario where the operation gets removed from this set and gets completed concurrently by // the RequestResponseHandler thread when it is in poll() or handleResponse(). In order to avoid the completion // from happening twice, complete it here only if the remove was successful. if (putOperations.remove(op)) { Exception e = new RouterException("Aborted operation because Router is closed.", RouterErrorCode.RouterClosed); routerMetrics.operationDequeuingRate.mark(); routerMetrics.operationAbortCount.inc(); routerMetrics.onPutBlobError(e); NonBlockingRouter.completeOperation(op.getFuture(), op.getCallback(), null, e); } } } /** * the ChunkFiller thread continuously iterates over all the putOperations submitted, reads from the {@link * AsyncWritableChannel} associated with the operation, and fills in chunks. The channel will be populated * by the {@link ReadableStreamChannel} associated with the operation. */ private class ChunkFiller implements Runnable { public void run() { try { while (isOpen.get()) { chunkFillerThreadMaySleep = true; for (PutOperation op : putOperations) { op.fillChunks(); if (!op.isChunkFillingDone()) { chunkFillerThreadMaySleep = false; } } if (chunkFillerThreadMaySleep) { synchronized (chunkFillerSynchronizer) { while (chunkFillerThreadMaySleep && isOpen.get()) { isChunkFillerThreadAsleep = true; chunkFillerSynchronizer.wait(); } isChunkFillerThreadAsleep = false; } } } } catch (Throwable e) { logger.error("Aborting, chunkFillerThread received an unexpected error:", e); routerMetrics.chunkFillerUnexpectedErrorCount.inc(); if (isOpen.compareAndSet(true, false)) { completePendingOperations(); } } } } }