/** * Copyright 2016 LinkedIn Corp. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ package com.github.ambry.router; import com.github.ambry.clustermap.ClusterMap; import com.github.ambry.clustermap.ReplicaId; import com.github.ambry.commons.ResponseHandler; import com.github.ambry.commons.ServerErrorCode; import com.github.ambry.config.RouterConfig; import com.github.ambry.messageformat.BlobInfo; import com.github.ambry.messageformat.MessageFormatException; import com.github.ambry.messageformat.MessageFormatFlags; import com.github.ambry.messageformat.MessageFormatRecord; import com.github.ambry.network.Port; import com.github.ambry.network.RequestInfo; import com.github.ambry.network.ResponseInfo; import com.github.ambry.protocol.GetRequest; import com.github.ambry.protocol.GetResponse; import com.github.ambry.utils.Time; import java.io.IOException; import java.io.InputStream; import java.util.Iterator; import java.util.Map; import java.util.TreeMap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * GetBlobInfoOperation class is responsible for maintaining the state associated with a getBlobInfo operation, * and completing it. A GetBlobInfo operation only needs to make requests for a single chunk to get the BlobInfo - * which is either the only chunk in the case of a simple blob, or the metadata chunk in the case of composite blobs. */ class GetBlobInfoOperation extends GetOperation { private final OperationTracker operationTracker; // map of correlation id to the request metadata for every request issued for this operation. private final Map<Integer, GetRequestInfo> correlationIdToGetRequestInfo = new TreeMap<Integer, GetRequestInfo>(); private static final Logger logger = LoggerFactory.getLogger(GetBlobInfoOperation.class); /** * Construct a GetBlobInfoOperation * @param routerConfig the {@link RouterConfig} containing the configs for get operations. * @param routerMetrics The {@link NonBlockingRouterMetrics} to be used for reporting metrics. * @param clusterMap the {@link ClusterMap} of the cluster * @param responseHandler the {@link ResponseHandler} responsible for failure detection. * @param blobIdStr the blob id associated with the operation in string form. * @param options the {@link GetBlobOptionsInternal} containing the options associated with this operation. * @param callback the callback that is to be called when the operation completes. * @param time the Time instance to use. * @throws RouterException if there is an error with any of the parameters, such as an invalid blob id. */ GetBlobInfoOperation(RouterConfig routerConfig, NonBlockingRouterMetrics routerMetrics, ClusterMap clusterMap, ResponseHandler responseHandler, String blobIdStr, GetBlobOptionsInternal options, Callback<GetBlobResultInternal> callback, Time time) throws RouterException { super(routerConfig, routerMetrics, clusterMap, responseHandler, blobIdStr, options, callback, routerMetrics.getBlobInfoLocalColoLatencyMs, routerMetrics.getBlobInfoCrossColoLatencyMs, routerMetrics.getBlobInfoPastDueCount, time); operationTracker = getOperationTracker(blobId.getPartition()); } @Override void abort(Exception abortCause) { NonBlockingRouter.completeOperation(null, getOperationCallback, null, abortCause); operationCompleted = true; } /** * Return the {@link MessageFormatFlags} to associate with a getBlobInfo operation. * @return {@link MessageFormatFlags#BlobInfo} */ MessageFormatFlags getOperationFlag() { return MessageFormatFlags.BlobInfo; } /** * For this operation, create and populate get requests (in the form of {@link RequestInfo}) to send out. * @param requestRegistrationCallback the {@link RequestRegistrationCallback} to call for every request that gets * created as part of this poll operation. */ @Override void poll(RequestRegistrationCallback<GetOperation> requestRegistrationCallback) { //First, check if any of the existing requests have timed out. cleanupExpiredInFlightRequests(); checkAndMaybeComplete(); if (!isOperationComplete()) { fetchRequests(requestRegistrationCallback); } } /** * Clean up requests sent out by this operation that have now timed out. */ private void cleanupExpiredInFlightRequests() { Iterator<Map.Entry<Integer, GetRequestInfo>> inFlightRequestsIterator = correlationIdToGetRequestInfo.entrySet().iterator(); while (inFlightRequestsIterator.hasNext()) { Map.Entry<Integer, GetRequestInfo> entry = inFlightRequestsIterator.next(); if (time.milliseconds() - entry.getValue().startTimeMs > routerConfig.routerRequestTimeoutMs) { onErrorResponse(entry.getValue().replicaId); // Do not notify this as a failure to the response handler, as this timeout could simply be due to // connection unavailability. If there is indeed a network error, the NetworkClient will provide an error // response and the response handler will be notified accordingly. setOperationException( new RouterException("Timed out waiting for a response", RouterErrorCode.OperationTimedOut)); inFlightRequestsIterator.remove(); } else { // the entries are ordered by correlation id and time. Break on the first request that has not timed out. break; } } } /** * Fetch {@link GetRequest}s to send for the operation. */ private void fetchRequests(RequestRegistrationCallback<GetOperation> requestRegistrationCallback) { Iterator<ReplicaId> replicaIterator = operationTracker.getReplicaIterator(); while (replicaIterator.hasNext()) { ReplicaId replicaId = replicaIterator.next(); String hostname = replicaId.getDataNodeId().getHostname(); Port port = replicaId.getDataNodeId().getPortToConnectTo(); GetRequest getRequest = createGetRequest(blobId, getOperationFlag(), options.getBlobOptions.getGetOption()); RouterRequestInfo request = new RouterRequestInfo(hostname, port, getRequest, replicaId); int correlationId = getRequest.getCorrelationId(); correlationIdToGetRequestInfo.put(correlationId, new GetRequestInfo(replicaId, time.milliseconds())); requestRegistrationCallback.registerRequestToSend(this, request); replicaIterator.remove(); if (RouterUtils.isRemoteReplica(routerConfig, replicaId)) { logger.trace("Making request to a remote replica in", replicaId.getDataNodeId().getDatacenterName()); routerMetrics.crossColoRequestCount.inc(); } routerMetrics.getDataNodeBasedMetrics(replicaId.getDataNodeId()).getBlobInfoRequestRate.mark(); } } /** * Handle the given {@link ResponseInfo} and update the status of the operation. * Based on the status of the response, this involves handling the body, notifying the operation tracker, * notifying the response handler and possibly setting the operationException. * @param responseInfo the {@link ResponseInfo} to be handled. * @param getResponse The {@link GetResponse} associated with this response. */ @Override void handleResponse(ResponseInfo responseInfo, GetResponse getResponse) { if (isOperationComplete()) { return; } int correlationId = ((GetRequest) responseInfo.getRequestInfo().getRequest()).getCorrelationId(); // Get the GetOperation that generated the request. GetRequestInfo getRequestInfo = correlationIdToGetRequestInfo.remove(correlationId); if (getRequestInfo == null) { // Ignore. The request must have timed out. return; } long requestLatencyMs = time.milliseconds() - getRequestInfo.startTimeMs; routerMetrics.routerRequestLatencyMs.update(requestLatencyMs); routerMetrics.getDataNodeBasedMetrics(getRequestInfo.replicaId.getDataNodeId()).getBlobInfoRequestLatencyMs.update( requestLatencyMs); if (responseInfo.getError() != null) { setOperationException(new RouterException("Operation timed out", RouterErrorCode.OperationTimedOut)); onErrorResponse(getRequestInfo.replicaId); } else { if (getResponse == null) { setOperationException(new RouterException("Response deserialization received an unexpected error", RouterErrorCode.UnexpectedInternalError)); onErrorResponse(getRequestInfo.replicaId); } else { if (getResponse.getCorrelationId() != correlationId) { // The NetworkClient associates a response with a request based on the fact that only one request is sent // out over a connection id, and the response received on a connection id must be for the latest request // sent over it. The check here ensures that is indeed the case. If not, log an error and fail this request. // There is no other way to handle it. routerMetrics.unknownReplicaResponseError.inc(); setOperationException(new RouterException( "The correlation id in the GetResponse " + getResponse.getCorrelationId() + "is not the same as the correlation id in the associated GetRequest: " + correlationId, RouterErrorCode.UnexpectedInternalError)); onErrorResponse(getRequestInfo.replicaId); // we do not notify the ResponseHandler responsible for failure detection as this is an unexpected error. } else { try { processGetBlobInfoResponse(getRequestInfo, getResponse); } catch (IOException | MessageFormatException e) { // This should really not happen. Again, we do not notify the ResponseHandler responsible for failure // detection. routerMetrics.responseDeserializationErrorCount.inc(); setOperationException(new RouterException("Response deserialization received an unexpected error", e, RouterErrorCode.UnexpectedInternalError)); onErrorResponse(getRequestInfo.replicaId); } } } } checkAndMaybeComplete(); } /** * Process the {@link GetResponse} extracted from a {@link ResponseInfo} * @param getRequestInfo the associated {@link GetRequestInfo} for which this response was received. * @param getResponse the {@link GetResponse} extracted from the {@link ResponseInfo} * @throws IOException if there is an error during deserialization of the GetResponse. * @throws MessageFormatException if there is an error during deserialization of the GetResponse. */ private void processGetBlobInfoResponse(GetRequestInfo getRequestInfo, GetResponse getResponse) throws IOException, MessageFormatException { ServerErrorCode getError = getResponse.getError(); if (getError == ServerErrorCode.No_Error) { int partitionsInResponse = getResponse.getPartitionResponseInfoList().size(); // Each get request issued by the router is for a single blob. if (partitionsInResponse != 1) { setOperationException(new RouterException( "Unexpected number of partition responses, expected: 1, " + "received: " + partitionsInResponse, RouterErrorCode.UnexpectedInternalError)); onErrorResponse(getRequestInfo.replicaId); // Again, no need to notify the responseHandler. } else { getError = getResponse.getPartitionResponseInfoList().get(0).getErrorCode(); if (getError == ServerErrorCode.No_Error) { handleBody(getResponse.getInputStream()); operationTracker.onResponse(getRequestInfo.replicaId, true); if (RouterUtils.isRemoteReplica(routerConfig, getRequestInfo.replicaId)) { logger.trace("Cross colo request successful for remote replica in ", getRequestInfo.replicaId.getDataNodeId().getDatacenterName()); routerMetrics.crossColoSuccessCount.inc(); } } else { // process and set the most relevant exception. processServerError(getError); if (getError == ServerErrorCode.Blob_Deleted || getError == ServerErrorCode.Blob_Expired) { // this is a successful response and one that completes the operation regardless of whether the // success target has been reached or not. operationCompleted = true; } else { onErrorResponse(getRequestInfo.replicaId); } } } } else { onErrorResponse(getRequestInfo.replicaId); } } /** * Perform the necessary actions when a request to a replica fails. * @param replicaId the {@link ReplicaId} associated with the failed response. */ void onErrorResponse(ReplicaId replicaId) { operationTracker.onResponse(replicaId, false); routerMetrics.routerRequestErrorCount.inc(); routerMetrics.getDataNodeBasedMetrics(replicaId.getDataNodeId()).getBlobInfoRequestErrorCount.inc(); } /** * Handle the body of the response: Deserialize and set the {@link BlobInfo} to return. * @param payload the body of the response. * @throws IOException if there is an IOException while deserializing the body. * @throws MessageFormatException if there is a MessageFormatException while deserializing the body. */ private void handleBody(InputStream payload) throws IOException, MessageFormatException { if (operationResult == null) { operationResult = new GetBlobResultInternal(new GetBlobResult( new BlobInfo(MessageFormatRecord.deserializeBlobProperties(payload), MessageFormatRecord.deserializeUserMetadata(payload).array()), null), null); } else { // If the successTarget is 1, this case will never get executed. // If it is more than 1, then, different responses will have to be reconciled in some way. Here is where that // would be done. Since the store is immutable, currently we handle this by ignoring subsequent responses. } } /** * Process the given {@link ServerErrorCode} and set operation status accordingly. * @param errorCode the {@link ServerErrorCode} to process. */ private void processServerError(ServerErrorCode errorCode) { logger.trace("Server returned an error: ", errorCode); switch (errorCode) { case Blob_Deleted: logger.trace("Requested blob was deleted"); setOperationException(new RouterException("Server returned: " + errorCode, RouterErrorCode.BlobDeleted)); break; case Blob_Expired: logger.trace("Requested blob has expired"); setOperationException(new RouterException("Server returned: " + errorCode, RouterErrorCode.BlobExpired)); break; case Blob_Not_Found: logger.trace("Requested blob was not found on this server"); setOperationException(new RouterException("Server returned: " + errorCode, RouterErrorCode.BlobDoesNotExist)); break; default: setOperationException( new RouterException("Server returned: " + errorCode, RouterErrorCode.UnexpectedInternalError)); break; } } /** * Check whether the operation can be completed, if so complete it. */ private void checkAndMaybeComplete() { if (operationTracker.isDone()) { if (operationTracker.hasSucceeded()) { operationException.set(null); } operationCompleted = true; } if (operationCompleted) { Exception e = operationException.get(); if (operationResult == null && e == null) { e = new RouterException("Operation failed, but exception was not set", RouterErrorCode.UnexpectedInternalError); routerMetrics.operationFailureWithUnsetExceptionCount.inc(); } if (e != null) { operationResult = null; routerMetrics.onGetBlobError(e, options); } routerMetrics.getBlobInfoOperationLatencyMs.update(time.milliseconds() - submissionTimeMs); NonBlockingRouter.completeOperation(null, getOperationCallback, operationResult, e); } } }