/* * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 * (the "License"). You may not use this work except in compliance with the License, which is * available at www.apache.org/licenses/LICENSE-2.0 * * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied, as more fully set forth in the License. * * See the NOTICE file distributed with this work for information regarding copyright ownership. */ package alluxio.network.connection; import alluxio.Configuration; import alluxio.PropertyKey; import alluxio.RuntimeConstants; import alluxio.exception.ExceptionMessage; import alluxio.exception.status.UnavailableException; import alluxio.exception.status.UnimplementedException; import alluxio.resource.DynamicResourcePool; import alluxio.retry.ExponentialBackoffRetry; import alluxio.retry.RetryPolicy; import alluxio.security.authentication.TransportProvider; import alluxio.thrift.AlluxioService; import alluxio.util.ThreadFactoryUtils; import com.google.common.base.Preconditions; import org.apache.thrift.TException; import org.apache.thrift.protocol.TBinaryProtocol; import org.apache.thrift.protocol.TMultiplexedProtocol; import org.apache.thrift.protocol.TProtocol; import org.apache.thrift.transport.TTransport; import org.apache.thrift.transport.TTransportException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.net.InetSocketAddress; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.regex.Pattern; import javax.annotation.concurrent.GuardedBy; import javax.annotation.concurrent.ThreadSafe; import javax.security.auth.Subject; /** * A pool to manage Alluxio thrift clients. * 1. It is recommended to keep one ThriftClientPool instance per <serverAddress, serviceType> pair. * 2. Make sure to release every client acquired from the pool even when the client is disconnected * An example usage: * ClientType client = pool.acquire(); * try { * client.doRpc(); * } catch (TTransportException e) { * client.getOutputProtocol().getTransport().close(); * } finally { * pool.release(client) * } * * @param <T> the Alluxio thrift service type */ // TODO(peis): Add unittest. @ThreadSafe public abstract class ThriftClientPool<T extends AlluxioService.Client> extends DynamicResourcePool<T> { private static final Logger LOG = LoggerFactory.getLogger(ThriftClientPool.class); private final TransportProvider mTransportProvider; private final String mServiceName; private final long mServiceVersion; private final InetSocketAddress mAddress; private final long mGcThresholdMs; private final Subject mParentSubject; private static final int THRIFT_CLIENT_POOL_GC_THREADPOOL_SIZE = 5; private static final ScheduledExecutorService GC_EXECUTOR = new ScheduledThreadPoolExecutor(THRIFT_CLIENT_POOL_GC_THREADPOOL_SIZE, ThreadFactoryUtils.build("ThriftClientPoolGcThreads-%d", true)); @GuardedBy("this") private Long mServerVersionFound = null; private static final int BASE_SLEEP_MS = Configuration.getInt(PropertyKey.USER_RPC_RETRY_BASE_SLEEP_MS); private static final int MAX_SLEEP_MS = Configuration.getInt(PropertyKey.USER_RPC_RETRY_MAX_SLEEP_MS); private static final int RPC_MAX_NUM_RETRY = Configuration.getInt(PropertyKey.USER_RPC_RETRY_MAX_NUM_RETRY); /** * The patterns of exception message when client and server transport frame sizes do not match * or corrupted data (e.g. due to incorrect port configured). */ private static final Pattern FRAME_SIZE_TOO_LARGE_EXCEPTION_PATTERN = Pattern.compile("Frame size \\((\\d+)\\) larger than max length"); private static final Pattern FRAME_SIZE_NEGATIVE_EXCEPTION_PATTERN = Pattern.compile("Read a negative frame size"); /** * Creates a thrift client pool instance with a minimum capacity of 1. * * @param subject the parent subject, set to null if not present * @param serviceName the service name (e.g. BlockWorkerClient) * @param serviceVersion the service version * @param address the server address * @param maxCapacity the maximum capacity of the pool * @param gcThresholdMs when a channel is older than this threshold and the pool's capacity * is above the minimum capacity (1), it is closed and removed from the pool. */ public ThriftClientPool(Subject subject, String serviceName, long serviceVersion, InetSocketAddress address, int maxCapacity, long gcThresholdMs) { super(Options.defaultOptions().setMaxCapacity(maxCapacity).setGcExecutor(GC_EXECUTOR)); mTransportProvider = TransportProvider.Factory.create(); mServiceName = serviceName; mServiceVersion = serviceVersion; mAddress = address; mGcThresholdMs = gcThresholdMs; mParentSubject = subject; } /** * A helper function to close thrift clients. * * @param client the thrift client to close * @param <C> the thrift client type */ public static <C extends AlluxioService.Client> void closeThriftClient(C client) { // Note that the input and output protocol is the same in Alluxio. TTransport transport = client.getOutputProtocol().getTransport(); if (transport.isOpen()) { transport.close(); } } @Override protected void closeResource(T client) { closeThriftClient(client); } @Override protected void closeResourceSync(T client) { closeResource(client); } /** * Creates a thrift client instance. * * @return the thrift client created */ @Override protected T createNewResource() throws IOException { TTransport transport = mTransportProvider.getClientTransport(mParentSubject, mAddress); TProtocol binaryProtocol = new TBinaryProtocol(transport); T client = createThriftClient(new TMultiplexedProtocol(binaryProtocol, mServiceName)); TException exception; RetryPolicy retryPolicy = new ExponentialBackoffRetry(BASE_SLEEP_MS, MAX_SLEEP_MS, RPC_MAX_NUM_RETRY); do { LOG.info("Alluxio client (version {}) is trying to connect with {} @ {}", RuntimeConstants.VERSION, mServiceName, mAddress); try { if (!transport.isOpen()) { transport.open(); } if (transport.isOpen()) { checkVersion(client); } LOG.info("Client registered with {} @ {}", mServiceName, mAddress); return client; } catch (TTransportException e) { if (e.getCause() instanceof java.net.SocketTimeoutException) { // Do not retry if socket timeout. String message = "Thrift transport open times out. Please check whether the " + "authentication types match between client and server. Note that NOSASL client " + "is not able to connect to servers with SIMPLE security mode."; throw new UnavailableException(message, e); } LOG.warn("Failed to connect ({}) to {} @ {}: {}", retryPolicy.getRetryCount(), mServiceName, mAddress, e.getMessage()); exception = e; } } while (retryPolicy.attemptRetry()); LOG.error("Failed after " + retryPolicy.getRetryCount() + " retries."); Preconditions.checkNotNull(exception); throw new UnavailableException(exception); } /** * Checks whether a client is healthy. * * @param client the thrift client to check * @return true if the client is open (i.e. connected) */ @Override protected boolean isHealthy(T client) { return client.getOutputProtocol().getTransport().isOpen(); } @Override protected boolean shouldGc(ResourceInternal<T> clientResourceInternal) { return System.currentTimeMillis() - clientResourceInternal.getLastAccessTimeMs() > mGcThresholdMs; } /** * Check the service version to see whether it matches the expected version. * * @param client the client */ private void checkVersion(T client) throws IOException { synchronized (this) { if (mServerVersionFound != null) { if (mServerVersionFound != mServiceVersion) { throw new UnimplementedException(ExceptionMessage.INCOMPATIBLE_VERSION .getMessage(mServiceName, mServiceVersion, mServerVersionFound)); } return; } } try { long serviceVersionFound = client.getServiceVersion(); synchronized (this) { mServerVersionFound = serviceVersionFound; if (mServerVersionFound != mServiceVersion) { throw new UnimplementedException(ExceptionMessage.INCOMPATIBLE_VERSION .getMessage(mServiceName, mServiceVersion, mServerVersionFound)); } } } catch (TTransportException e) { closeResource(client); // The master branch of Apache Thrift provides a dedicated exception type for this // (CORRUPTED_DATA). if (e.getMessage() != null && ( FRAME_SIZE_NEGATIVE_EXCEPTION_PATTERN.matcher(e.getMessage()).find() || FRAME_SIZE_TOO_LARGE_EXCEPTION_PATTERN.matcher(e.getMessage()).find())) { // See an error like "Frame size (67108864) larger than max length (16777216)!", // pointing to the helper page. String message = String.format("Failed to connect to %s @ %s: %s. " + "This exception " + "may be caused by incorrect network configuration. " + "Please consult %s for common solutions to address this problem.", getServiceNameForLogging(), mAddress, e.getMessage(), RuntimeConstants.ALLUXIO_DEBUG_DOCS_URL); throw new UnimplementedException(message, e); } throw new UnavailableException(e); } catch (TException e) { closeResource(client); throw new UnavailableException(e); } } /** * Creates a thrift client from a thrift protocol. * * @param protocol the thrift protocol * @return the created thrift client */ protected abstract T createThriftClient(TProtocol protocol); /** * Sometimes mServiceName passed from the constructor can be misleading for showing messages * to the user. The implementation can optionally override this to display a nice name. * This function should only be used for logging related functionality. * * @return the service name for logging */ protected String getServiceNameForLogging() { return mServiceName; } }