/* * Copyright 2015-2017 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.glowroot.agent.central; import java.net.SocketAddress; import java.net.URI; import java.util.Collections; import java.util.List; import java.util.Random; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.atomic.AtomicBoolean; import javax.annotation.concurrent.GuardedBy; import com.google.common.base.Stopwatch; import com.google.common.collect.Lists; import io.grpc.Attributes; import io.grpc.ManagedChannel; import io.grpc.NameResolver; import io.grpc.ResolvedServerInfo; import io.grpc.netty.NegotiationType; import io.grpc.netty.NettyChannelBuilder; import io.grpc.stub.StreamObserver; import io.grpc.util.RoundRobinLoadBalancerFactory; import io.netty.channel.EventLoopGroup; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.glowroot.agent.util.RateLimitedLogger; import org.glowroot.agent.util.ThreadFactories; import org.glowroot.common.util.OnlyUsedByTests; import static java.util.concurrent.TimeUnit.MILLISECONDS; import static java.util.concurrent.TimeUnit.SECONDS; class CentralConnection { private static final Logger logger = LoggerFactory.getLogger(CentralConnection.class); // log startup messages using logger name "org.glowroot" private static final Logger startupLogger = LoggerFactory.getLogger("org.glowroot"); // back pressure on connection to the central collector private static final int PENDING_LIMIT = 100; @SuppressWarnings("nullness:type.argument.type.incompatible") private final ThreadLocal<Boolean> suppressLogCollector = new ThreadLocal<Boolean>() { @Override protected Boolean initialValue() { return false; } }; private final EventLoopGroup eventLoopGroup; private final ExecutorService channelExecutor; private final ManagedChannel channel; private final ScheduledExecutorService retryExecutor; private final AtomicBoolean inConnectionFailure; private final Random random = new Random(); private final RateLimitedLogger backPressureLogger = new RateLimitedLogger(CentralConnection.class); // count does not include init call @GuardedBy("backPressureLogger") private int pendingRequestCount; private final RateLimitedLogger connectionErrorLogger = new RateLimitedLogger(CentralConnection.class); private volatile boolean initCallSucceeded; private volatile boolean closed; CentralConnection(List<SocketAddress> collectorAddresses, AtomicBoolean inConnectionFailure) { eventLoopGroup = EventLoopGroups.create("Glowroot-GRPC-Worker-ELG"); channelExecutor = Executors.newSingleThreadExecutor(ThreadFactories.create("Glowroot-GRPC-Executor")); channel = NettyChannelBuilder .forTarget("dummy") .nameResolverFactory(new SimpleNameResolverFactory(collectorAddresses)) .loadBalancerFactory(RoundRobinLoadBalancerFactory.getInstance()) .eventLoopGroup(eventLoopGroup) .executor(channelExecutor) .negotiationType(NegotiationType.PLAINTEXT) .build(); retryExecutor = Executors.newSingleThreadScheduledExecutor( ThreadFactories.create("Glowroot-Collector-Retry")); this.inConnectionFailure = inConnectionFailure; } boolean suppressLogCollector() { return suppressLogCollector.get(); } ManagedChannel getChannel() { return channel; } <T extends /*@NonNull*/ Object> void callOnce(GrpcCall<T> call) { callWithAFewRetries(0, -1, call); } // important that these calls are idempotent <T extends /*@NonNull*/ Object> void callWithAFewRetries(GrpcCall<T> call) { callWithAFewRetries(0, call); } // important that these calls are idempotent <T extends /*@NonNull*/ Object> void callWithAFewRetries(int initialDelayMillis, GrpcCall<T> call) { callWithAFewRetries(initialDelayMillis, 60, call); } // important that these calls are idempotent private <T extends /*@NonNull*/ Object> void callWithAFewRetries(int initialDelayMillis, final int maxTotalInSeconds, final GrpcCall<T> call) { if (closed) { return; } if (inConnectionFailure.get()) { return; } synchronized (backPressureLogger) { if (pendingRequestCount >= PENDING_LIMIT) { backPressureLogger.warn("not sending data to the central collector because of an" + " excessive backlog of {} requests in progress", PENDING_LIMIT); return; } pendingRequestCount++; } // TODO revisit retry/backoff after next grpc version // 60 seconds should be enough time to restart central collector instance without losing // data (though better to use central collector cluster) // // this cannot retry over too long a period since it retains memory of rpc message for // that duration if (initialDelayMillis > 0) { retryExecutor.schedule(new Runnable() { @Override public void run() { try { call.call(new RetryingStreamObserver<T>(call, maxTotalInSeconds, maxTotalInSeconds, false)); } catch (Throwable t) { logger.error(t.getMessage(), t); } } }, initialDelayMillis, MILLISECONDS); } else { call.call(new RetryingStreamObserver<T>(call, maxTotalInSeconds, maxTotalInSeconds, false)); } } // important that these calls are idempotent <T extends /*@NonNull*/ Object> void callInit(GrpcCall<T> call) { if (closed) { return; } // important here not to check inConnectionFailure, since need this to succeed if/when // connection is re-established call.call(new RetryingStreamObserver<T>(call, 15, -1, true)); } void suppressLogCollector(Runnable runnable) { boolean priorValue = suppressLogCollector.get(); suppressLogCollector.set(true); try { runnable.run(); } finally { suppressLogCollector.set(priorValue); } } @OnlyUsedByTests void close() { closed = true; retryExecutor.shutdown(); channel.shutdown(); } @OnlyUsedByTests void awaitClose() throws InterruptedException { if (!retryExecutor.awaitTermination(10, SECONDS)) { throw new IllegalStateException("Could not terminate executor"); } if (!channel.awaitTermination(10, SECONDS)) { throw new IllegalStateException("Could not terminate channel"); } channelExecutor.shutdown(); if (!channelExecutor.awaitTermination(10, SECONDS)) { throw new IllegalStateException("Could not terminate executor"); } if (!eventLoopGroup.shutdownGracefully(0, 0, SECONDS).await(10, SECONDS)) { throw new IllegalStateException("Could not terminate event loop group"); } } static abstract class GrpcCall<T extends /*@NonNull*/ Object> { abstract void call(StreamObserver<T> responseObserver); void doWithResponse(@SuppressWarnings("unused") T response) {} } private class RetryingStreamObserver<T extends /*@NonNull*/ Object> implements StreamObserver<T> { private final GrpcCall<T> grpcCall; private final int maxSingleDelayInSeconds; private final int maxTotalInSeconds; private final boolean init; private final Stopwatch stopwatch = Stopwatch.createStarted(); private volatile boolean initErrorLogged; private volatile long nextDelayInSeconds = 4; private RetryingStreamObserver(GrpcCall<T> grpcCall, int maxSingleDelayInSeconds, int maxTotalInSeconds, boolean init) { this.grpcCall = grpcCall; this.maxSingleDelayInSeconds = maxSingleDelayInSeconds; this.maxTotalInSeconds = maxTotalInSeconds; this.init = init; } @Override public void onNext(T value) { grpcCall.doWithResponse(value); } @Override public void onError(final Throwable t) { if (closed) { decrementPendingRequestCount(); return; } if (init && !initErrorLogged) { startupLogger.warn("unable to establish connection with the central collector" + " (will keep trying): {}", t.getMessage()); logger.debug(t.getMessage(), t); initErrorLogged = true; } if (inConnectionFailure.get()) { decrementPendingRequestCount(); return; } suppressLogCollector(new Runnable() { @Override public void run() { logger.debug(t.getMessage(), t); } }); if (!init && stopwatch.elapsed(SECONDS) > maxTotalInSeconds) { if (initCallSucceeded) { connectionErrorLogger.warn("error sending data to the central collector: {}", t.getMessage(), t); } decrementPendingRequestCount(); return; } // retry delay doubles on average each time, randomized +/- 50% double randomizedDoubling = 0.5 + random.nextDouble(); long currDelay = (long) (nextDelayInSeconds * randomizedDoubling); nextDelayInSeconds = Math.min(nextDelayInSeconds * 2, maxSingleDelayInSeconds); // TODO revisit retry/backoff after next grpc version retryExecutor.schedule(new Runnable() { @Override public void run() { try { grpcCall.call(RetryingStreamObserver.this); } catch (final Throwable t) { // intentionally capturing InterruptedException here as well to ensure // reconnect is attempted no matter what suppressLogCollector(new Runnable() { @Override public void run() { logger.error(t.getMessage(), t); } }); } } }, currDelay, SECONDS); } @Override public void onCompleted() { if (init) { initCallSucceeded = true; } decrementPendingRequestCount(); } private void decrementPendingRequestCount() { if (!init) { synchronized (backPressureLogger) { pendingRequestCount--; } } } } private static class SimpleNameResolverFactory extends NameResolver.Factory { private final List<SocketAddress> collectorAddresses; private SimpleNameResolverFactory(List<SocketAddress> collectorAddresses) { this.collectorAddresses = collectorAddresses; } @Override public NameResolver newNameResolver(URI targetUri, Attributes params) { return new SimpleNameResolver(collectorAddresses); } @Override public String getDefaultScheme() { return "dummy-scheme"; } } private static class SimpleNameResolver extends NameResolver { private final List<SocketAddress> collectorAddresses; private SimpleNameResolver(List<SocketAddress> collectorAddresses) { this.collectorAddresses = collectorAddresses; } @Override public String getServiceAuthority() { return "dummy-service-authority"; } @Override public void start(Listener listener) { List<ResolvedServerInfo> resolvedServerInfos = Lists.newArrayList(); for (SocketAddress collectorAddress : collectorAddresses) { resolvedServerInfos.add(new ResolvedServerInfo(collectorAddress, Attributes.EMPTY)); } Collections.shuffle(resolvedServerInfos); listener.onUpdate(Collections.singletonList(resolvedServerInfos), Attributes.EMPTY); } @Override public void shutdown() {} } }