// ================================================================================================= // Copyright 2011 Twitter, Inc. // ------------------------------------------------------------------------------------------------- // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this work except in compliance with the License. // You may obtain a copy of the License in the LICENSE file, or at: // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // ================================================================================================= package com.twitter.common.thrift.callers; import java.lang.reflect.Method; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.logging.Logger; import javax.annotation.Nullable; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Function; import com.google.common.base.Joiner; import com.google.common.base.Predicate; import com.google.common.base.Throwables; import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import org.apache.thrift.async.AsyncMethodCallback; import com.twitter.common.quantity.Amount; import com.twitter.common.quantity.Time; import com.twitter.common.stats.StatsProvider; import com.twitter.common.thrift.TResourceExhaustedException; /** * A caller that will retry calls to the wrapped caller. * * @author William Farner */ public class RetryingCaller extends CallerDecorator { private static final Logger LOG = Logger.getLogger(RetryingCaller.class.getName()); @VisibleForTesting public static final Amount<Long, Time> NONBLOCKING_TIMEOUT = Amount.of(-1L, Time.MILLISECONDS); private final StatsProvider statsProvider; private final String serviceName; private final int retries; private final ImmutableSet<Class<? extends Exception>> retryableExceptions; private final boolean debug; /** * Creates a new retrying caller. The retrying caller will attempt to call invoked methods on the * underlying caller at most {@code retries} times. A retry will be performed only when one of * the {@code retryableExceptions} is caught. * * @param decoratedCall The caller to decorate with retries. * @param async Whether the caller is asynchronous. * @param statsProvider The stat provider to export retry statistics through. * @param serviceName The service name that calls are being invoked on. * @param retries The maximum number of retries to perform. * @param retryableExceptions The exceptions that can be retried. * @param debug Whether to include debugging information when retries are being performed. */ public RetryingCaller(Caller decoratedCall, boolean async, StatsProvider statsProvider, String serviceName, int retries, ImmutableSet<Class<? extends Exception>> retryableExceptions, boolean debug) { super(decoratedCall, async); this.statsProvider = statsProvider; this.serviceName = serviceName; this.retries = retries; this.retryableExceptions = retryableExceptions; this.debug = debug; } private final LoadingCache<Method, AtomicLong> stats = CacheBuilder.newBuilder().build(new CacheLoader<Method, AtomicLong>() { @Override public AtomicLong load(Method method) { // Thrift does not support overloads - so just the name disambiguates all calls. return statsProvider.makeCounter(serviceName + "_" + method.getName() + "_retries"); } }); @Override public Object call(final Method method, final Object[] args, @Nullable final AsyncMethodCallback callback, @Nullable final Amount<Long, Time> connectTimeoutOverride) throws Throwable { final AtomicLong retryCounter = stats.get(method); final AtomicInteger attempts = new AtomicInteger(); final List<Throwable> exceptions = Lists.newArrayList(); final ResultCapture capture = new ResultCapture() { @Override public void success() { // No-op. } @Override public boolean fail(Throwable t) { if (!isRetryable(t)) { if (debug) { LOG.warning(String.format( "Call failed with un-retryable exception of [%s]: %s, previous exceptions: %s", t.getClass().getName(), t.getMessage(), combineStackTraces(exceptions))); } return true; } else if (attempts.get() >= retries) { exceptions.add(t); if (debug) { LOG.warning(String.format("Retried %d times, last error: %s, exceptions: %s", attempts.get(), t, combineStackTraces(exceptions))); } return true; } else { exceptions.add(t); if (isAsync() && attempts.incrementAndGet() <= retries) { try { retryCounter.incrementAndGet(); // override connect timeout in ThriftCaller to prevent blocking for a connection // for async retries (since this is within the callback in the selector thread) invoke(method, args, callback, this, NONBLOCKING_TIMEOUT); } catch (Throwable throwable) { return fail(throwable); } } return false; } } }; boolean continueLoop; do { try { // If this is an async call, the looping will be handled within the capture. return invoke(method, args, callback, capture, connectTimeoutOverride); } catch (Throwable t) { if (!isRetryable(t)) { Throwable propagated = t; if (!exceptions.isEmpty() && (t instanceof TResourceExhaustedException)) { // If we've been trucking along through retries that have had remote call failures // and we suddenly can't immediately get a connection on the next retry, throw the // previous remote call failure - the idea here is that the remote call failure is // more interesting than a transient inability to get an immediate connection. propagated = exceptions.remove(exceptions.size() - 1); } if (isAsync()) { callback.onError(propagated); } else { throw propagated; } } } continueLoop = !isAsync() && attempts.incrementAndGet() <= retries; if (continueLoop) retryCounter.incrementAndGet(); } while (continueLoop); Throwable lastRetriedException = Iterables.getLast(exceptions); if (debug) { if (!exceptions.isEmpty()) { LOG.warning( String.format("Retried %d times, last error: %s, previous exceptions: %s", attempts.get(), lastRetriedException, combineStackTraces(exceptions))); } else { LOG.warning( String.format("Retried 1 time, last error: %s", lastRetriedException)); } } if (!isAsync()) throw lastRetriedException; return null; } private boolean isRetryable(Throwable throwable) { return isRetryable.getUnchecked(throwable.getClass()); } private final LoadingCache<Class<? extends Throwable>, Boolean> isRetryable = CacheBuilder.newBuilder().build(new CacheLoader<Class<? extends Throwable>, Boolean>() { @Override public Boolean load(Class<? extends Throwable> exceptionClass) { return isRetryable(exceptionClass); } }); private boolean isRetryable(final Class<? extends Throwable> exceptionClass) { if (retryableExceptions.contains(exceptionClass)) { return true; } return Iterables.any(retryableExceptions, new Predicate<Class<? extends Exception>>() { @Override public boolean apply(Class<? extends Exception> retryableExceptionClass) { return retryableExceptionClass.isAssignableFrom(exceptionClass); } }); } private static final Joiner STACK_TRACE_JOINER = Joiner.on('\n'); private static String combineStackTraces(List<Throwable> exceptions) { if (exceptions.isEmpty()) { return "none"; } else { return STACK_TRACE_JOINER.join(Iterables.transform(exceptions, new Function<Throwable, String>() { private int index = 1; @Override public String apply(Throwable exception) { return String.format("[%d] %s", index++, Throwables.getStackTraceAsString(exception)); } })); } } }