/* * Copyright 2016-2017 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.glowroot.central; import java.util.List; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import javax.annotation.Nullable; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Stopwatch; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.glowroot.agent.api.Glowroot; import org.glowroot.agent.api.Instrumentation; import org.glowroot.central.repo.AgentDao; import org.glowroot.central.repo.AggregateDao; import org.glowroot.central.repo.ConfigRepositoryImpl; import org.glowroot.central.repo.GaugeValueDao; import org.glowroot.central.repo.HeartbeatDao; import org.glowroot.central.repo.SyntheticResultDao; import org.glowroot.common.repo.AgentRepository.AgentRollup; import org.glowroot.common.repo.util.AlertingService; import org.glowroot.common.util.Clock; import org.glowroot.wire.api.model.AgentConfigOuterClass.AgentConfig.AlertConfig; import org.glowroot.wire.api.model.AgentConfigOuterClass.AgentConfig.AlertConfig.AlertKind; import static java.util.concurrent.TimeUnit.MINUTES; import static java.util.concurrent.TimeUnit.SECONDS; class RollupService implements Runnable { private static final Logger logger = LoggerFactory.getLogger(RollupService.class); private final AgentDao agentDao; private final AggregateDao aggregateDao; private final GaugeValueDao gaugeValueDao; private final SyntheticResultDao syntheticResultDao; private final HeartbeatDao heartbeatDao; private final ConfigRepositoryImpl configRepository; private final AlertingService alertingService; private final DownstreamServiceImpl downstreamService; private final Clock clock; private final ExecutorService executor; private final Stopwatch stopwatch = Stopwatch.createStarted(); private volatile boolean closed; RollupService(AgentDao agentDao, AggregateDao aggregateDao, GaugeValueDao gaugeValueDao, SyntheticResultDao syntheticResultDao, HeartbeatDao heartbeatDao, ConfigRepositoryImpl configRepository, AlertingService alertingService, DownstreamServiceImpl downstreamService, Clock clock) { this.agentDao = agentDao; this.aggregateDao = aggregateDao; this.gaugeValueDao = gaugeValueDao; this.syntheticResultDao = syntheticResultDao; this.heartbeatDao = heartbeatDao; this.configRepository = configRepository; this.alertingService = alertingService; this.downstreamService = downstreamService; this.clock = clock; executor = Executors.newSingleThreadExecutor(); executor.execute(castInitialized(this)); } @Override public void run() { while (!closed) { try { Thread.sleep(millisUntilNextRollup(clock.currentTimeMillis())); runInternal(); } catch (InterruptedException e) { continue; } catch (Throwable t) { logger.error(t.getMessage(), t); } } } void close() throws InterruptedException { closed = true; // shutdownNow() is needed here to send interrupt to RollupService thread executor.shutdownNow(); if (!executor.awaitTermination(10, SECONDS)) { throw new IllegalStateException("Could not terminate executor"); } } @Instrumentation.Transaction(transactionType = "Background", transactionName = "Outer rollup loop", traceHeadline = "Outer rollup loop", timer = "outer rollup loop") private void runInternal() throws Exception { Glowroot.setTransactionOuter(); for (AgentRollup agentRollup : agentDao.readAgentRollups()) { rollupAggregates(agentRollup, null); rollupGauges(agentRollup, null); rollupSyntheticMonitors(agentRollup); // checking for deleted alerts doesn't depend on rollup consumeAgentRollups(agentRollup, this::checkForDeletedAlerts); // checking transaction and gauge alerts after rollup since their calculation can depend // on rollups depending on time period length // // these alerts are also checked right after receiving the respective data // (transaction/gauge/heartbeat) from agent, but need to also check once a minute in // case no data has been received from agent recently consumeAgentRollups(agentRollup, this::checkTransactionAlerts); consumeAgentRollups(agentRollup, this::checkGaugeAlerts); // checking heartbeat alerts doesn't depend on rollups, just here for convenience if (stopwatch.elapsed(MINUTES) >= 4) { // give agents plenty of time to re-connect after central start-up // needs to be at least enough time for grpc max reconnect backoff // which is 2 minutes +/- 20% jitter (see io.grpc.internal.ExponentialBackoffPolicy) // but better to give a bit extra (4 minutes above) to avoid false heartbeat alert consumeAgentRollups(agentRollup, this::checkHeartbeatAlerts); } // updating agent configs doesn't depend on rollups, just here for convenience consumeAgentRollups(agentRollup, this::updateAgentConfigIfConnectedAndNeeded); } } private void rollupAggregates(AgentRollup agentRollup, @Nullable String parentAgentRollupId) throws InterruptedException { for (AgentRollup childAgentRollup : agentRollup.children()) { rollupAggregates(childAgentRollup, agentRollup.id()); } try { aggregateDao.rollup(agentRollup.id(), parentAgentRollupId, agentRollup.children().isEmpty()); } catch (InterruptedException e) { // shutdown requested throw e; } catch (Exception e) { logger.error("{} - {}", agentRollup.id(), e.getMessage(), e); } } // returns true on success, false on failure private boolean rollupGauges(AgentRollup agentRollup, @Nullable String parentAgentRollupId) throws InterruptedException { // important to roll up children first, since gauge values initial roll up from children is // done on the 1-min aggregates of the children boolean success = true; for (AgentRollup childAgentRollup : agentRollup.children()) { boolean childSuccess = rollupGauges(childAgentRollup, agentRollup.id()); success = success && childSuccess; } if (!success) { // also important to not roll up parent if exception occurs while rolling up a child, // since gauge values initial roll up from children is done on the 1-min aggregates of // the children return false; } try { gaugeValueDao.rollup(agentRollup.id(), parentAgentRollupId, agentRollup.children().isEmpty()); return true; } catch (InterruptedException e) { // shutdown requested throw e; } catch (Exception e) { logger.error("{} - {}", agentRollup.id(), e.getMessage(), e); return false; } } private void rollupSyntheticMonitors(AgentRollup agentRollup) throws Exception { for (AgentRollup childAgentRollup : agentRollup.children()) { rollupSyntheticMonitors(childAgentRollup); } try { syntheticResultDao.rollup(agentRollup.id()); } catch (InterruptedException e) { // shutdown requested throw e; } catch (Exception e) { logger.error("{} - {}", agentRollup.id(), e.getMessage(), e); } } private void consumeAgentRollups(AgentRollup agentRollup, AgentRollupConsumer agentRollupConsumer) throws Exception { for (AgentRollup childAgentRollup : agentRollup.children()) { consumeAgentRollups(childAgentRollup, agentRollupConsumer); } agentRollupConsumer.accept(agentRollup); } private void checkForDeletedAlerts(AgentRollup agentRollup) throws Exception { alertingService.checkForDeletedAlerts(agentRollup.id()); } private void checkTransactionAlerts(AgentRollup agentRollup) throws Exception { checkAlerts(agentRollup, AlertKind.TRANSACTION, alertConfig -> checkTransactionAlert(agentRollup, alertConfig, clock.currentTimeMillis())); } private void checkGaugeAlerts(AgentRollup agentRollup) throws Exception { checkAlerts(agentRollup, AlertKind.GAUGE, alertConfig -> checkGaugeAlert(agentRollup, alertConfig, clock.currentTimeMillis())); } private void checkHeartbeatAlerts(AgentRollup agentRollup) throws Exception { checkAlerts(agentRollup, AlertKind.HEARTBEAT, alertConfig -> checkHeartbeatAlert(agentRollup, alertConfig, clock.currentTimeMillis())); } private void updateAgentConfigIfConnectedAndNeeded(AgentRollup agentRollup) throws InterruptedException { if (!agentRollup.children().isEmpty()) { return; } try { downstreamService.updateAgentConfigIfConnectedAndNeeded(agentRollup.id()); } catch (InterruptedException e) { // shutdown requested throw e; } catch (Exception e) { logger.error("{} - {}", agentRollup.id(), e.getMessage(), e); } } private void checkAlerts(AgentRollup agentRollup, AlertKind alertKind, AlertConfigConsumer check) throws InterruptedException { List<AlertConfig> alertConfigs; try { alertConfigs = configRepository.getAlertConfigs(agentRollup.id(), alertKind); } catch (Exception e) { logger.error("{} - {}", agentRollup.display(), e.getMessage(), e); return; } if (alertConfigs.isEmpty()) { return; } for (AlertConfig alertConfig : alertConfigs) { try { check.accept(alertConfig); } catch (InterruptedException e) { // shutdown requested throw e; } catch (Exception e) { logger.error("{} - {}", agentRollup.display(), e.getMessage(), e); } } } @Instrumentation.Transaction(transactionType = "Background", transactionName = "Check transaction alert", traceHeadline = "Check transaction alert: {{0.id}}", timer = "check transaction alert") private void checkTransactionAlert(AgentRollup agentRollup, AlertConfig alertConfig, long endTime) throws Exception { alertingService.checkTransactionAlert(agentRollup.id(), agentRollup.display(), alertConfig, endTime); } @Instrumentation.Transaction(transactionType = "Background", transactionName = "Check gauge alert", traceHeadline = "Check gauge alert: {{0.id}}", timer = "check gauge alert") private void checkGaugeAlert(AgentRollup agentRollup, AlertConfig alertConfig, long endTime) throws Exception { alertingService.checkGaugeAlert(agentRollup.id(), agentRollup.display(), alertConfig, endTime); } @Instrumentation.Transaction(transactionType = "Background", transactionName = "Check heartbeat alert", traceHeadline = "Check heartbeat alert: {{0.id}}", timer = "check heartbeat alert") private void checkHeartbeatAlert(AgentRollup agentRollup, AlertConfig alertConfig, long endTime) throws Exception { long startTime = endTime - SECONDS.toMillis(alertConfig.getTimePeriodSeconds()); boolean currentlyTriggered = !heartbeatDao.exists(agentRollup.id(), startTime, endTime); alertingService.sendHeartbeatAlertIfNeeded(agentRollup.id(), agentRollup.display(), alertConfig, currentlyTriggered); } @VisibleForTesting static long millisUntilNextRollup(long currentTimeMillis) { return 60000 - (currentTimeMillis - 10000) % 60000; } @SuppressWarnings("return.type.incompatible") private static <T> /*@Initialized*/ T castInitialized(/*@UnderInitialization*/ T obj) { return obj; } @FunctionalInterface interface AgentRollupConsumer { void accept(AgentRollup agentRollup) throws Exception; } @FunctionalInterface interface AlertConfigConsumer { void accept(AlertConfig alertConfig) throws Exception; } }