/**
* Copyright 2016 LinkedIn Corp. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
package com.github.ambry.router;
import com.github.ambry.clustermap.ClusterMap;
import com.github.ambry.clustermap.ReplicaId;
import com.github.ambry.commons.BlobId;
import com.github.ambry.commons.BlobIdFactory;
import com.github.ambry.commons.ResponseHandler;
import com.github.ambry.commons.ServerErrorCode;
import com.github.ambry.config.RouterConfig;
import com.github.ambry.messageformat.BlobAll;
import com.github.ambry.messageformat.BlobData;
import com.github.ambry.messageformat.BlobInfo;
import com.github.ambry.messageformat.BlobType;
import com.github.ambry.messageformat.CompositeBlobInfo;
import com.github.ambry.messageformat.MessageFormatException;
import com.github.ambry.messageformat.MessageFormatFlags;
import com.github.ambry.messageformat.MessageFormatRecord;
import com.github.ambry.messageformat.MetadataContentSerDe;
import com.github.ambry.network.Port;
import com.github.ambry.network.RequestInfo;
import com.github.ambry.network.ResponseInfo;
import com.github.ambry.protocol.GetOption;
import com.github.ambry.protocol.GetRequest;
import com.github.ambry.protocol.GetResponse;
import com.github.ambry.protocol.RequestOrResponse;
import com.github.ambry.store.StoreKey;
import com.github.ambry.utils.Time;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicBoolean;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* GetBlobOperation class is responsible for maintaining the state associated with a GetBlob operation, and completing
* it. It would help to look at this class in conjunction with {@link PutOperation}.
*
* Blobs that need to be fetched are either simple (those that consist of only a single chunk) or composite (those
* that consists of one metadata chunk and two or more data chunks). Whether a blob is simple or composite can only be
* determined after the first chunk is fetched, based on whether the first chunk turns out to be a metadata chunk or
* a data chunk.
*
* As soon as the first data chunk is fetched (which could be the first chunk in the case of simple blobs and the
* second chunk fetched in the case of composite blobs), the operation callback is invoked (and the future is marked
* as done) so that the caller can start reading in data. The rest of the chunks are asynchronously fetched and
* buffered up to the maximum that can be buffered. When fetched chunks are consumed by the caller, subsequent chunks
* become eligible to be fetched.
*/
class GetBlobOperation extends GetOperation {
// the callback to use to complete the operation.
private final RouterCallback routerCallback;
// whether the operationCallback has been called already.
private final AtomicBoolean operationCallbackInvoked = new AtomicBoolean(false);
// The first chunk may be a metadata chunk if the blob is composite, or the only data chunk if the blob is simple.
private final FirstGetChunk firstChunk;
// Associated with all data chunks in the case of composite blobs. Only a fixed number of these are initialized.
// Each of these is initialized with the information required to fetch a data chunk and is responsible for
// retrieving and adding it to the list of chunk buffers. Once complete, they are reused to fetch subsequent data
// chunks.
private GetChunk[] dataChunks;
// the factory to use to deserialize keys in a metadata chunk.
private final BlobIdFactory blobIdFactory;
// the total number of data chunks associated with this blob.
private int numChunksTotal;
// the total number of data chunks retrieved so far (and may or may not have been written out yet).
private int numChunksRetrieved;
// the maximum size of a data chunk in bytes
private long chunkSize;
// the total size of the object being fetched in this operation
private long totalSize;
// a byte range with defined start/end offsets that has been verified to be within the total blob size
private ByteRange resolvedByteRange;
// a list iterator to the chunk ids that need to be fetched for this operation, if this is a composite blob.
private ListIterator<StoreKey> chunkIdIterator;
// chunk index to retrieved chunk buffer mapping.
private Map<Integer, ByteBuffer> chunkIndexToBuffer;
// To find the GetChunk to hand over the response quickly.
private final Map<Integer, GetChunk> correlationIdToGetChunk = new HashMap<>();
// the blob info that is populated on OperationType.BlobInfo or OperationType.All
private BlobInfo blobInfo;
// the ReadableStreamChannel that is populated on OperationType.Blob or OperationType.All requests.
private BlobDataReadableStreamChannel blobDataChannel;
// the CompositeBlobInfo that will be set if (and when) this blob turns out to be a composite blob.
private CompositeBlobInfo compositeBlobInfo;
private static final Logger logger = LoggerFactory.getLogger(GetBlobOperation.class);
/**
* Construct a GetBlobOperation
* @param routerConfig the {@link RouterConfig} containing the configs for get operations.
* @param routerMetrics The {@link NonBlockingRouterMetrics} to be used for reporting metrics.
* @param clusterMap the {@link ClusterMap} of the cluster
* @param responseHandler the {@link ResponseHandler} responsible for failure detection.
* @param blobIdStr the blob id associated with the operation in string form.
* @param options the {@link GetBlobOptionsInternal} associated with the operation.
* @param callback the callback that is to be called when the operation completes.
* @param routerCallback the {@link RouterCallback} to use to complete operations.
* @param blobIdFactory the factory to use to deserialize keys in a metadata chunk.
* @param time the Time instance to use.
* @throws RouterException if there is an error with any of the parameters, such as an invalid blob id.
*/
GetBlobOperation(RouterConfig routerConfig, NonBlockingRouterMetrics routerMetrics, ClusterMap clusterMap,
ResponseHandler responseHandler, String blobIdStr, GetBlobOptionsInternal options,
Callback<GetBlobResultInternal> callback, RouterCallback routerCallback, BlobIdFactory blobIdFactory, Time time)
throws RouterException {
super(routerConfig, routerMetrics, clusterMap, responseHandler, blobIdStr, options, callback,
routerMetrics.getBlobLocalColoLatencyMs, routerMetrics.getBlobCrossColoLatencyMs,
routerMetrics.getBlobPastDueCount, time);
this.routerCallback = routerCallback;
this.blobIdFactory = blobIdFactory;
firstChunk = new FirstGetChunk();
}
/**
* {@inheritDoc}
* <br>
* If the callback for the operation was already called, then abort the reads from the channel and the caller
* will be notified as part of the read callback eventually. If not, the notification will happen as part of the
* operation completion callback.
*/
@Override
void abort(Exception abortCause) {
if (operationCallbackInvoked.compareAndSet(false, true)) {
NonBlockingRouter.completeOperation(null, getOperationCallback, null, abortCause);
} else {
setOperationException(abortCause);
if (blobDataChannel != null && blobDataChannel.isReadCalled()) {
blobDataChannel.completeRead();
}
}
operationCompleted = true;
}
/**
* Do all that needs to be done (cleanup, notification, etc.) on chunk completion and mark the state of the chunk
* appropriately.
* @param chunk the chunk that has completed.
*/
private void onChunkOperationComplete(GetChunk chunk) {
if (chunk.getChunkException() != null) {
// if operation callback was already called, then this exception will have to be notified as part of the
// read callback.
setOperationException(chunk.getChunkException());
}
if (chunk == firstChunk) {
if (operationCallbackInvoked.compareAndSet(false, true)) {
Exception e = getOperationException();
if (options.getChunkIdsOnly) {
// If this is an operation just to get the chunk ids, then these ids will be returned as part of the
// result callback and no more chunks will be fetched, so mark the operation as complete to let the
// GetManager remove this operation.
operationCompleted = true;
List<StoreKey> chunkIds = e == null && compositeBlobInfo != null ? compositeBlobInfo.getKeys() : null;
operationResult = new GetBlobResultInternal(null, chunkIds);
} else {
// Complete the operation from the caller's perspective, so that the caller can start reading from the
// channel if there is no exception. The operation will not be marked as complete internally as subsequent
// chunk retrievals and channel writes will need to happen and for that, this operation needs the GetManager to
// poll it periodically. If any exception is encountered while processing subsequent chunks, those will be
// notified during the channel read.
long timeElapsed = time.milliseconds() - submissionTimeMs;
routerMetrics.getBlobOperationLatencyMs.update(timeElapsed);
if (e == null) {
blobDataChannel = new BlobDataReadableStreamChannel();
operationResult = new GetBlobResultInternal(new GetBlobResult(blobInfo, blobDataChannel), null);
} else {
blobDataChannel = null;
operationResult = null;
routerMetrics.onGetBlobError(e, options);
}
}
NonBlockingRouter.completeOperation(null, getOperationCallback, operationResult, e);
}
}
chunk.postCompletionCleanup();
if (blobDataChannel != null) {
blobDataChannel.maybeWriteToChannel();
}
}
/**
* Handle the given {@link ResponseInfo} by handing it over to the appropriate chunk that issued the request.
* @param responseInfo the {@link ResponseInfo} to be handled.
* @param getResponse the {@link GetResponse} associated with this response.
*/
@Override
void handleResponse(ResponseInfo responseInfo, GetResponse getResponse) {
GetChunk getChunk = correlationIdToGetChunk.remove(
((RequestOrResponse) responseInfo.getRequestInfo().getRequest()).getCorrelationId());
getChunk.handleResponse(responseInfo, getResponse);
if (getChunk.isComplete()) {
onChunkOperationComplete(getChunk);
}
}
/**
* As part of the poll, GetBlobOperation fetches new requests to be issued and updates the state based on
* whether previously issued requests have timed out and so on. Additionally, any writes of previously retrieved
* chunk buffers into the {@link AsyncWritableChannel} passed in by the caller also happens within this method.
* @param requestRegistrationCallback the {@link RequestRegistrationCallback} to call for every request that gets
* created as part of this poll operation.
*/
@Override
void poll(RequestRegistrationCallback<GetOperation> requestRegistrationCallback) {
if (operationCompleted) {
return;
}
if (operationException.get() == null) {
if (firstChunk.isReady() || firstChunk.isInProgress()) {
firstChunk.poll(requestRegistrationCallback);
}
if (firstChunk.isComplete()) {
// Although an attempt is made to write to the channel as soon as a chunk is successfully retrieved,
// the caller might not have called readInto() and passed in a channel at the time. So an attempt is always
// made from within poll.
if (blobDataChannel != null) {
blobDataChannel.maybeWriteToChannel();
}
// If this is a composite blob, poll for requests for subsequent chunks.
if (dataChunks != null) {
for (GetChunk dataChunk : dataChunks) {
if (dataChunk.isFree() && chunkIdIterator.hasNext()) {
dataChunk.initialize(chunkIdIterator.nextIndex(), (BlobId) chunkIdIterator.next());
}
if (dataChunk.isInProgress() || (dataChunk.isReady()
&& numChunksRetrieved - blobDataChannel.getNumChunksWrittenOut()
< NonBlockingRouter.MAX_IN_MEM_CHUNKS)) {
dataChunk.poll(requestRegistrationCallback);
if (dataChunk.isComplete()) {
onChunkOperationComplete(dataChunk);
if (operationCompleted) {
break;
}
}
}
}
}
}
}
if (operationException.get() != null) {
abort(operationException.get());
}
}
// ReadableStreamChannel implementation:
/**
* A class that implements the result of this GetBlobOperation. This is instantiated if/when the first data chunk of
* the blob arrives, when the operation callback is invoked.
*/
private class BlobDataReadableStreamChannel implements ReadableStreamChannel {
// whether this ReadableStreamChannel is open.
private AtomicBoolean isOpen = new AtomicBoolean(true);
// whether readInto() has been called yet by the caller on this ReadableStreamChannel.
private volatile boolean readCalled = false;
// The channel to write chunks of the blob into. This will be initialized when the caller calls the readInto().
private AsyncWritableChannel asyncWritableChannel;
// the callback to call when all the chunks are successfully written out into the asyncWritableChannel.
private Callback<Long> readIntoCallback;
// the future to mark as done when all the chunks are successfully written out into the asyncWritableChannel.
private FutureResult<Long> readIntoFuture;
// the number of bytes written out to the asyncWritableChannel. This would be the size of the blob eventually.
private Long bytesWritten = 0L;
// the number of chunks that have been written out to the asyncWritableChannel.
private volatile int numChunksWrittenOut = 0;
// the index of the next chunk that is to be written out to the asyncWritableChannel.
private int indexOfNextChunkToWriteOut = 0;
// whether this object has called the readIntoCallback yet.
private final AtomicBoolean readIntoCallbackCalled = new AtomicBoolean(false);
// the callback that is passed into the asyncWritableChannel write() operation.
private final Callback<Long> chunkAsyncWriteCallback = new Callback<Long>() {
@Override
public void onCompletion(Long result, Exception exception) {
bytesWritten += result;
if (exception != null) {
setOperationException(exception);
}
numChunksWrittenOut++;
routerCallback.onPollReady();
}
};
/**
* The bytes that will be read from this channel is not known until the read is complete.
* @return -1
*/
@Override
public long getSize() {
return -1;
}
@Override
public Future<Long> readInto(AsyncWritableChannel asyncWritableChannel, Callback<Long> callback) {
if (!isOpen()) {
throw new IllegalStateException("This ReadableStreamChannel has been closed");
}
if (readCalled) {
throw new IllegalStateException("Cannot read the result of a GetBlob operation more than once");
}
this.asyncWritableChannel = asyncWritableChannel;
readIntoCallback = callback;
readIntoFuture = new FutureResult<>();
readCalled = true;
if (operationException.get() != null) {
completeRead();
}
routerCallback.onPollReady();
return readIntoFuture;
}
@Override
public boolean isOpen() {
return isOpen.get();
}
@Override
public void close() throws IOException {
if (isOpen.compareAndSet(true, false)) {
if (numChunksWrittenOut != numChunksTotal) {
setOperationException(new RouterException(
"The ReadableStreamChannel for blob data has been closed by the user before all chunks were written out.",
RouterErrorCode.ChannelClosed));
}
}
}
/**
* @return whether readInto() has been called yet.
*/
boolean isReadCalled() {
return readCalled;
}
/**
* @return the number of chunks that have been written out to the {@link AsyncWritableChannel}
*/
int getNumChunksWrittenOut() {
return numChunksWrittenOut;
}
/**
* Attempt to write the data associated with the blob to the channel passed in by the caller (if the caller has
* done so).
*/
private void maybeWriteToChannel() {
// if there are chunks available to be written out, do now.
if (firstChunk.isComplete() && readCalled) {
while (operationException.get() == null && chunkIndexToBuffer.containsKey(indexOfNextChunkToWriteOut)) {
ByteBuffer chunkBuf = chunkIndexToBuffer.remove(indexOfNextChunkToWriteOut);
asyncWritableChannel.write(chunkBuf, chunkAsyncWriteCallback);
indexOfNextChunkToWriteOut++;
}
if (operationException.get() != null || numChunksWrittenOut == numChunksTotal) {
completeRead();
}
}
}
/**
* Complete the read from this {@link ReadableStreamChannel} by invoking the callback and marking the future.
*/
void completeRead() {
if (readIntoCallbackCalled.compareAndSet(false, true)) {
Exception e = operationException.get();
readIntoFuture.done(bytesWritten, e);
if (readIntoCallback != null) {
readIntoCallback.onCompletion(bytesWritten, e);
}
if (e == null) {
updateChunkingAndSizeMetricsOnSuccessfulGet();
} else {
routerMetrics.onGetBlobError(e, options);
}
long totalTime = time.milliseconds() - submissionTimeMs;
routerMetrics.getBlobOperationTotalTimeMs.update(totalTime);
}
operationCompleted = true;
}
/**
* Update chunking and size related metrics - blob size, chunk count, and whether the blob is simple or composite.
*/
private void updateChunkingAndSizeMetricsOnSuccessfulGet() {
routerMetrics.getBlobSizeBytes.update(bytesWritten);
routerMetrics.getBlobChunkCount.update(numChunksTotal);
if (options != null && options.getBlobOptions.getRange() != null) {
routerMetrics.getBlobWithRangeSizeBytes.update(bytesWritten);
routerMetrics.getBlobWithRangeTotalBlobSizeBytes.update(totalSize);
}
if (numChunksTotal == 1) {
routerMetrics.simpleBlobGetCount.inc();
} else {
routerMetrics.compositeBlobGetCount.inc();
}
}
}
/**
* GetChunk is used to retrieve and hold a data chunk of a composite blob. An object of this class is initialized
* to retrieve one data chunk at a time. Once the associated chunk is successfully retrieved, this object can be
* reinitialized and used to retrieve a subsequent chunk.
*/
private class GetChunk {
// the operation tracker used to track the operation on the current chunk.
private OperationTracker chunkOperationTracker;
// the blob id of the current chunk.
private BlobId chunkBlobId;
// whether the operation on the current chunk has completed.
private boolean chunkCompleted;
// In general, when the operation tracker returns success, any previously saved exceptions are cleared. This flag
// indicates that the set chunk exception should not be overwritten even when the operation tracker reports success.
protected boolean retainChunkExceptionOnSuccess;
// the index of the current chunk in the overall blob.
protected int chunkIndex;
// the most relevant exception encountered for the current chunk.
protected RouterException chunkException;
// For a GetChunk, responses may be handled multiple times. Regardless of the successTarget,
// the actual body of the response is deserialized only once.
protected boolean successfullyDeserialized;
// map of correlation id to the request metadata for every request issued for this operation.
protected final Map<Integer, GetRequestInfo> correlationIdToGetRequestInfo = new TreeMap<>();
// the state of the chunk.
protected volatile ChunkState state;
/**
* Construct a GetChunk
* @param index the index (in the overall blob) of the initial data chunk that this GetChunk has to fetch.
* @param id the {@link BlobId} of the initial data chunk that this GetChunk has to fetch.
*/
GetChunk(int index, BlobId id) {
reset();
initialize(index, id);
}
/**
* Do what needs to be done after the GetBlobOperation is done with the current chunk that has completed. When
* this is called, the state of the chunk should be {@link ChunkState#Complete}
*/
void postCompletionCleanup() {
reset();
}
/**
* @return the {@link GetOption} to associate with the {@link GetRequest}s that will be issued by this GetChunk.
*/
GetOption getGetOption() {
// Anything other than the first GetChunk should ignore Delete and Expired flags. This is to avoid errors due
// to the blob getting expired or deleted in the middle of a retrieval - after the metadata chunk was
// successfully retrieved.
return GetOption.Include_All;
}
/**
* Return the {@link MessageFormatFlags} to associate with a getBlob chunk operation.
* @return {@link MessageFormatFlags#Blob}
*/
MessageFormatFlags getOperationFlag() {
return MessageFormatFlags.Blob;
}
/**
* Reset the state of this GetChunk.
*/
void reset() {
chunkOperationTracker = null;
chunkCompleted = false;
retainChunkExceptionOnSuccess = false;
chunkBlobId = null;
chunkIndex = -1;
chunkException = null;
successfullyDeserialized = false;
correlationIdToGetRequestInfo.clear();
state = ChunkState.Free;
}
/**
* Assign a chunk of the overall blob to this GetChunk.
* @param index the index of the chunk of the overall blob that needs to be fetched through this GetChunk.
* @param id the id of the chunk of the overall blob that needs to be fetched through this GetChunk.
*/
void initialize(int index, BlobId id) {
chunkIndex = index;
chunkBlobId = id;
chunkOperationTracker = getOperationTracker(chunkBlobId.getPartition());
state = ChunkState.Ready;
}
/**
* return the {@link RouterException} associated with the operation on this chunk, if any.
* @return the {@link RouterException} associated with the operation on this chunk, if any.
*/
RouterException getChunkException() {
return chunkException;
}
/**
* This is one of two main entry points to this class, the other being
* {@link #handleResponse(ResponseInfo, GetResponse)}.
* Apart from fetching requests to send out, this also checks for timeouts of issued requests,
* status of the operation and anything else that needs to be done within this GetChunk. The callers guarantee
* that this method is called on the GetChunks of an operation until either the operation, or the chunk operation
* is completed.
* @param requestRegistrationCallback the {@link RequestRegistrationCallback} to call for every request that gets
* created as part of this poll operation.
*/
void poll(RequestRegistrationCallback<GetOperation> requestRegistrationCallback) {
//First, check if any of the existing requests have timed out.
cleanupExpiredInFlightRequests();
checkAndMaybeComplete();
if (!isComplete()) {
fetchRequests(requestRegistrationCallback);
}
}
/**
* Clean up requests sent out by this operation that have now timed out.
*/
private void cleanupExpiredInFlightRequests() {
//First, check if any of the existing requests have timed out.
Iterator<Map.Entry<Integer, GetRequestInfo>> inFlightRequestsIterator =
correlationIdToGetRequestInfo.entrySet().iterator();
while (inFlightRequestsIterator.hasNext()) {
Map.Entry<Integer, GetRequestInfo> entry = inFlightRequestsIterator.next();
if (time.milliseconds() - entry.getValue().startTimeMs > routerConfig.routerRequestTimeoutMs) {
onErrorResponse(entry.getValue().replicaId);
// Do not notify this as a failure to the response handler, as this timeout could simply be due to
// connection unavailability. If there is indeed a network error, the NetworkClient will provide an error
// response and the response handler will be notified accordingly.
chunkException = new RouterException("Timed out waiting for a response", RouterErrorCode.OperationTimedOut);
inFlightRequestsIterator.remove();
} else {
// the entries are ordered by correlation id and time. Break on the first request that has not timed out.
break;
}
}
}
/**
* Fetch {@link GetRequest}s to send for the current data chunk.
*/
private void fetchRequests(RequestRegistrationCallback<GetOperation> requestRegistrationCallback) {
Iterator<ReplicaId> replicaIterator = chunkOperationTracker.getReplicaIterator();
while (replicaIterator.hasNext()) {
ReplicaId replicaId = replicaIterator.next();
replicaIterator.remove();
String hostname = replicaId.getDataNodeId().getHostname();
Port port = replicaId.getDataNodeId().getPortToConnectTo();
GetRequest getRequest = createGetRequest(chunkBlobId, getOperationFlag(), getGetOption());
RouterRequestInfo request = new RouterRequestInfo(hostname, port, getRequest, replicaId);
int correlationId = getRequest.getCorrelationId();
correlationIdToGetRequestInfo.put(correlationId, new GetRequestInfo(replicaId, time.milliseconds()));
correlationIdToGetChunk.put(correlationId, this);
requestRegistrationCallback.registerRequestToSend(GetBlobOperation.this, request);
if (RouterUtils.isRemoteReplica(routerConfig, replicaId)) {
logger.trace("Making request to a remote replica in", replicaId.getDataNodeId().getDatacenterName());
routerMetrics.crossColoRequestCount.inc();
}
routerMetrics.getDataNodeBasedMetrics(replicaId.getDataNodeId()).getRequestRate.mark();
state = ChunkState.InProgress;
}
}
/**
* Check if the operation on the chunk is eligible for completion, if so complete it.
*/
void checkAndMaybeComplete() {
if (chunkOperationTracker.isDone()) {
if (!retainChunkExceptionOnSuccess && chunkOperationTracker.hasSucceeded()) {
// override any previously set exceptions
chunkException = null;
}
chunkCompleted = true;
}
if (chunkCompleted) {
setOperationException(chunkException);
state = ChunkState.Complete;
}
}
/**
* Handle the body of the response: Deserialize and add to the list of chunk buffers.
* @param payload the body of the response.
* @throws IOException if there is an IOException while deserializing the body.
* @throws MessageFormatException if there is a MessageFormatException while deserializing the body.
*/
void handleBody(InputStream payload) throws IOException, MessageFormatException {
if (!successfullyDeserialized) {
BlobData blobData = MessageFormatRecord.deserializeBlob(payload);
chunkIndexToBuffer.put(chunkIndex, filterChunkToRange(blobData));
numChunksRetrieved++;
successfullyDeserialized = true;
} else {
// If successTarget > 1, then content reconciliation may have to be done. For now, ignore subsequent responses.
}
}
/**
* This method is the entry point for handling responses received for requests sent out on behalf of this chunk.
* For gets, processing involves determining whether the request was successful, notifying the operation
* tracker so it can track the status of the operation, and notifying the response handler for failure detection.
* Finally, a check is done to determine whether the operation on the chunk is eligible for completion,
* if so the chunk operation is completed right away.
* @param responseInfo the response received for a request sent out on behalf of this chunk.
* @param getResponse the {@link GetResponse} associated with this response.
*/
void handleResponse(ResponseInfo responseInfo, GetResponse getResponse) {
int correlationId = ((GetRequest) responseInfo.getRequestInfo().getRequest()).getCorrelationId();
// Get the GetOperation that generated the request.
GetRequestInfo getRequestInfo = correlationIdToGetRequestInfo.remove(correlationId);
if (getRequestInfo == null) {
// Ignore right away. This associated operation has completed.
return;
}
long requestLatencyMs = time.milliseconds() - getRequestInfo.startTimeMs;
routerMetrics.routerRequestLatencyMs.update(requestLatencyMs);
routerMetrics.getDataNodeBasedMetrics(getRequestInfo.replicaId.getDataNodeId()).getRequestLatencyMs.update(
requestLatencyMs);
if (responseInfo.getError() != null) {
chunkException = new RouterException("Operation timed out", RouterErrorCode.OperationTimedOut);
onErrorResponse(getRequestInfo.replicaId);
} else {
if (getResponse == null) {
chunkException = new RouterException("Response deserialization received an unexpected error",
RouterErrorCode.UnexpectedInternalError);
onErrorResponse(getRequestInfo.replicaId);
} else {
if (getResponse.getCorrelationId() != correlationId) {
// The NetworkClient associates a response with a request based on the fact that only one request is sent
// out over a connection id, and the response received on a connection id must be for the latest request
// sent over it. The check here ensures that is indeed the case. If not, log an error and fail this request.
// There is no other way to handle it.
routerMetrics.unknownReplicaResponseError.inc();
chunkException = new RouterException(
"The correlation id in the GetResponse " + getResponse.getCorrelationId()
+ " is not the same as the correlation id in the associated GetRequest: " + correlationId,
RouterErrorCode.UnexpectedInternalError);
onErrorResponse(getRequestInfo.replicaId);
// we do not notify the ResponseHandler responsible for failure detection as this is an unexpected error.
} else {
try {
processGetBlobResponse(getRequestInfo, getResponse);
} catch (IOException | MessageFormatException e) {
// This should really not happen. Again, we do not notify the ResponseHandler responsible for failure
// detection.
routerMetrics.responseDeserializationErrorCount.inc();
chunkException = new RouterException("Response deserialization received an unexpected error", e,
RouterErrorCode.UnexpectedInternalError);
onErrorResponse(getRequestInfo.replicaId);
}
}
}
}
checkAndMaybeComplete();
}
/**
* Process the GetResponse extracted from a {@link ResponseInfo}
* @param getRequestInfo the associated {@link RequestInfo} for which this response was received.
* @param getResponse the {@link GetResponse} extracted from the {@link ResponseInfo}
* @throws IOException if there is an error during deserialization of the GetResponse.
* @throws MessageFormatException if there is an error during deserialization of the GetResponse.
*/
private void processGetBlobResponse(GetRequestInfo getRequestInfo, GetResponse getResponse)
throws IOException, MessageFormatException {
ServerErrorCode getError = getResponse.getError();
if (getError == ServerErrorCode.No_Error) {
int partitionsInResponse = getResponse.getPartitionResponseInfoList().size();
// Each get request issued by the router is for a single blob.
if (partitionsInResponse != 1) {
chunkException = new RouterException(
"Unexpected number of partition responses, expected: 1, " + "received: " + partitionsInResponse,
RouterErrorCode.UnexpectedInternalError);
} else {
getError = getResponse.getPartitionResponseInfoList().get(0).getErrorCode();
if (getError == ServerErrorCode.No_Error) {
handleBody(getResponse.getInputStream());
chunkOperationTracker.onResponse(getRequestInfo.replicaId, true);
if (RouterUtils.isRemoteReplica(routerConfig, getRequestInfo.replicaId)) {
logger.trace("Cross colo request successful for remote replica in ",
getRequestInfo.replicaId.getDataNodeId().getDatacenterName());
routerMetrics.crossColoSuccessCount.inc();
}
} else {
// process and set the most relevant exception.
processServerError(getError);
if (getError == ServerErrorCode.Blob_Deleted || getError == ServerErrorCode.Blob_Expired) {
// this is a successful response and one that completes the operation regardless of whether the
// success target has been reached or not.
chunkCompleted = true;
} else {
onErrorResponse(getRequestInfo.replicaId);
}
}
}
} else {
onErrorResponse(getRequestInfo.replicaId);
}
}
/**
* Perform the necessary actions when a request to a replica fails.
* @param replicaId the {@link ReplicaId} associated with the failed response.
*/
void onErrorResponse(ReplicaId replicaId) {
chunkOperationTracker.onResponse(replicaId, false);
routerMetrics.routerRequestErrorCount.inc();
routerMetrics.getDataNodeBasedMetrics(replicaId.getDataNodeId()).getRequestErrorCount.inc();
}
/**
* Process the given {@link ServerErrorCode} and set operation status accordingly.
* Receiving a {@link ServerErrorCode#Blob_Deleted}, {@link ServerErrorCode#Blob_Expired} or
* {@link ServerErrorCode#Blob_Not_Found} is unexpected for all chunks except for the first.
* @param errorCode the {@link ServerErrorCode} to process.
*/
void processServerError(ServerErrorCode errorCode) {
logger.trace("Server returned an error: ", errorCode);
setChunkException(new RouterException("Server returned: " + errorCode, RouterErrorCode.UnexpectedInternalError));
}
/**
* Set the exception associated with this chunk operation.
* A {@link ServerErrorCode#Blob_Deleted} or {@link ServerErrorCode#Blob_Expired} error overrides any other
* previously received exception.
* @param exception the {@link RouterException} to possibly set.
*/
void setChunkException(RouterException exception) {
if (chunkException == null || exception.getErrorCode() == RouterErrorCode.BlobDeleted
|| exception.getErrorCode() == RouterErrorCode.BlobExpired) {
chunkException = exception;
}
}
/**
* Slice this chunk's data to only include the bytes within the operation's specified byte range.
* @param blobData the {@link BlobData} for this chunk.
* @return A {@link ByteBuffer} that only includes bytes within the operation's specified byte range.
*/
protected ByteBuffer filterChunkToRange(BlobData blobData) {
ByteBuffer buf = blobData.getStream().getByteBuffer();
if (options == null || options.getBlobOptions.getRange() == null) {
return buf;
}
if (resolvedByteRange.getRangeSize() == 0) {
buf.position(0);
buf.limit(0);
} else {
long startOffsetInThisChunk = chunkIndex == 0 ? resolvedByteRange.getStartOffset() % chunkSize : 0;
long endOffsetInThisChunk =
chunkIndex == (numChunksTotal - 1) ? (resolvedByteRange.getEndOffset() % chunkSize) + 1 : chunkSize;
buf.position((int) startOffsetInThisChunk);
buf.limit((int) endOffsetInThisChunk);
}
return buf.slice();
}
/**
* @return true if this GetChunk is free so a chunk of the overall blob can be assigned to it.
*/
boolean isFree() {
return state == ChunkState.Free;
}
/**
* @return true if this GetChunk is assigned with a chunk of the overall blob and is ready to issue requests.
*/
boolean isReady() {
return state == ChunkState.Ready;
}
/**
* @return true if the operation on this chunk is in progress.
*/
boolean isInProgress() {
return state == ChunkState.InProgress;
}
/**
* @return true if the operation on the current chunk is complete.
*/
boolean isComplete() {
return state == ChunkState.Complete;
}
}
/**
* Special GetChunk used to retrieve and hold the first chunk of a blob. The first chunk is special because it
* could either be a metadata chunk of a composite blob, or the single chunk of a simple blob,
* and whether a chunk is composite or simple can only be determined after the first chunk is fetched.
*/
private class FirstGetChunk extends GetChunk {
/**
* Construct a FirstGetChunk and initialize it with the {@link BlobId} of the overall operation.
*/
FirstGetChunk() {
super(-1, blobId);
}
/**
* {@inheritDoc}
* <br>
* Post completion cleanup for the FirstGetChunk only involves clearing the correlation id map (so that
* subsequent responses are ignored). The rest of the state of the FirstGetChunk is never cleared.
*/
@Override
void postCompletionCleanup() {
correlationIdToGetRequestInfo.clear();
}
@Override
GetOption getGetOption() {
return options.getBlobOptions.getGetOption();
}
/**
* Return the {@link MessageFormatFlags} to associate with the first getBlob chunk operation.
* @return {@link MessageFormatFlags#Blob} for {@link GetBlobOptions.OperationType#Data}, or {@link MessageFormatFlags#All} by
* default.
*/
@Override
MessageFormatFlags getOperationFlag() {
return options.getBlobOptions.getOperationType() == GetBlobOptions.OperationType.Data ? MessageFormatFlags.Blob
: MessageFormatFlags.All;
}
/**
* {@inheritDoc}
* <br>
* It would help to keep in mind while going through this method that the first chunk is either a metadata chunk
* or the only chunk of the blob.
*/
@Override
void handleBody(InputStream payload) throws IOException, MessageFormatException {
if (!successfullyDeserialized) {
BlobData blobData;
if (getOperationFlag() == MessageFormatFlags.Blob) {
blobData = MessageFormatRecord.deserializeBlob(payload);
} else {
BlobAll blobAll = MessageFormatRecord.deserializeBlobAll(payload, blobIdFactory);
blobInfo = blobAll.getBlobInfo();
blobData = blobAll.getBlobData();
}
BlobType blobType = blobData.getBlobType();
chunkIndexToBuffer = new TreeMap<>();
if (blobType == BlobType.MetadataBlob) {
handleMetadataBlob(blobData);
} else {
handleSimpleBlob(blobData);
}
successfullyDeserialized = true;
state = ChunkState.Complete;
} else {
// Currently, regardless of the successTarget, only the first successful response is honored. Subsequent ones
// are ignored. If ever in the future, we need some kind of reconciliation, this is the place
// to do that. (Only after the reconciliation will the state be marked as complete).
}
}
/**
* {@inheritDoc}
* <br>
* Receiving a {@link ServerErrorCode#Blob_Deleted}, {@link ServerErrorCode#Blob_Expired} or
* {@link ServerErrorCode#Blob_Not_Found} is not unexpected for the first chunk, unlike for subsequent chunks.
*/
@Override
void processServerError(ServerErrorCode errorCode) {
logger.trace("Server returned an error: ", errorCode);
switch (errorCode) {
case Blob_Deleted:
setChunkException(new RouterException("Server returned: " + errorCode, RouterErrorCode.BlobDeleted));
break;
case Blob_Expired:
setChunkException(new RouterException("Server returned: " + errorCode, RouterErrorCode.BlobExpired));
break;
case Blob_Not_Found:
setChunkException(new RouterException("Server returned: " + errorCode, RouterErrorCode.BlobDoesNotExist));
break;
default:
setChunkException(
new RouterException("Server returned: " + errorCode, RouterErrorCode.UnexpectedInternalError));
break;
}
}
/**
* Process a metadata blob to find the data chunks that need to be fetched.
* @param blobData the metadata blob's data.
* @throws IOException
* @throws MessageFormatException
*/
private void handleMetadataBlob(BlobData blobData) throws IOException, MessageFormatException {
ByteBuffer serializedMetadataContent = blobData.getStream().getByteBuffer();
compositeBlobInfo =
MetadataContentSerDe.deserializeMetadataContentRecord(serializedMetadataContent, blobIdFactory);
chunkSize = compositeBlobInfo.getChunkSize();
totalSize = compositeBlobInfo.getTotalSize();
List<StoreKey> keys = compositeBlobInfo.getKeys();
boolean rangeResolutionFailure = false;
try {
if (options != null && options.getBlobOptions.getRange() != null) {
resolvedByteRange = options.getBlobOptions.getRange().toResolvedByteRange(totalSize);
// Get only the chunks within the range.
int firstChunkIndexInRange = (int) (resolvedByteRange.getStartOffset() / chunkSize);
int lastChunkIndexInRange = (int) (resolvedByteRange.getEndOffset() / chunkSize);
keys = keys.subList(firstChunkIndexInRange, lastChunkIndexInRange + 1);
}
} catch (IllegalArgumentException e) {
onInvalidRange(e);
rangeResolutionFailure = true;
}
if (!rangeResolutionFailure) {
if (options.getChunkIdsOnly) {
chunkIdIterator = null;
numChunksTotal = 0;
dataChunks = null;
} else {
chunkIdIterator = keys.listIterator();
numChunksTotal = keys.size();
dataChunks = new GetChunk[Math.min(keys.size(), NonBlockingRouter.MAX_IN_MEM_CHUNKS)];
for (int i = 0; i < dataChunks.length; i++) {
dataChunks[i] = new GetChunk(chunkIdIterator.nextIndex(), (BlobId) chunkIdIterator.next());
}
}
}
}
/**
* Process a simple blob and extract the requested data from the blob.
* @param blobData the simple blob's data
*/
private void handleSimpleBlob(BlobData blobData) {
totalSize = blobData.getSize();
chunkSize = totalSize;
boolean rangeResolutionFailure = false;
try {
if (options != null && options.getBlobOptions.getRange() != null) {
resolvedByteRange = options.getBlobOptions.getRange().toResolvedByteRange(totalSize);
}
} catch (IllegalArgumentException e) {
onInvalidRange(e);
rangeResolutionFailure = true;
}
if (!rangeResolutionFailure) {
chunkIdIterator = null;
dataChunks = null;
chunkIndex = 0;
numChunksTotal = 1;
chunkIndexToBuffer.put(0, filterChunkToRange(blobData));
numChunksRetrieved = 1;
}
}
/**
* On an invalid range, set a {@link RouterErrorCode#RangeNotSatisfiable} exception for this chunk, mark the chunk
* as unconditionally completed, and set the chunk counters such that the operation will be completed.
* @param exception the reason that the range was invalid.
*/
private void onInvalidRange(Exception exception) {
setChunkException(
new RouterException("Range provided was not satisfiable.", exception, RouterErrorCode.RangeNotSatisfiable));
retainChunkExceptionOnSuccess = true;
chunkIdIterator = null;
dataChunks = null;
numChunksTotal = 0;
numChunksRetrieved = 0;
}
}
/**
* Different states of a GetChunk.
*/
enum ChunkState {
/**
* The GetChunk is free and can be assigned to hold a chunk of the overall blob.
*/
Free,
/**
* The GetChunk has been assigned to get and hold a chunk of the overall blob.
*/
Ready,
/**
* The GetChunk has issued requests and the operation on the chunk it holds is in progress.
*/
InProgress,
/**
* The GetChunk is complete.
*/
Complete,
}
}