package com.linkedin.thirdeye.anomaly.alert.v2; import com.linkedin.thirdeye.anomaly.ThirdEyeAnomalyConfiguration; import com.linkedin.thirdeye.anomaly.alert.AlertTaskInfo; import com.linkedin.thirdeye.anomaly.alert.AlertTaskRunner; import com.linkedin.thirdeye.anomaly.alert.grouping.AlertGrouper; import com.linkedin.thirdeye.anomaly.alert.grouping.AlertGrouperFactory; import com.linkedin.thirdeye.anomaly.alert.grouping.DummyAlertGrouper; import com.linkedin.thirdeye.anomaly.alert.grouping.filter.AlertGroupFilter; import com.linkedin.thirdeye.anomaly.alert.grouping.filter.AlertGroupFilterFactory; import com.linkedin.thirdeye.anomaly.alert.grouping.SimpleGroupedAnomalyMerger; import com.linkedin.thirdeye.anomaly.alert.template.pojo.MetricDimensionReport; import com.linkedin.thirdeye.anomaly.alert.util.AlertFilterHelper; import com.linkedin.thirdeye.anomaly.alert.util.AnomalyReportGenerator; import com.linkedin.thirdeye.anomaly.alert.util.DataReportHelper; import com.linkedin.thirdeye.anomaly.alert.util.EmailHelper; import com.linkedin.thirdeye.anomaly.task.TaskContext; import com.linkedin.thirdeye.anomaly.task.TaskInfo; import com.linkedin.thirdeye.anomaly.task.TaskResult; import com.linkedin.thirdeye.anomaly.task.TaskRunner; import com.linkedin.thirdeye.anomaly.utils.ThirdeyeMetricsUtil; import com.linkedin.thirdeye.api.DimensionMap; import com.linkedin.thirdeye.client.DAORegistry; import com.linkedin.thirdeye.dashboard.views.contributor.ContributorViewResponse; import com.linkedin.thirdeye.datalayer.bao.AlertConfigManager; import com.linkedin.thirdeye.datalayer.bao.GroupedAnomalyResultsManager; import com.linkedin.thirdeye.datalayer.bao.MergedAnomalyResultManager; import com.linkedin.thirdeye.datalayer.bao.MetricConfigManager; import com.linkedin.thirdeye.datalayer.dto.AlertConfigDTO; import com.linkedin.thirdeye.datalayer.dto.GroupedAnomalyResultsDTO; import com.linkedin.thirdeye.datalayer.dto.MergedAnomalyResultDTO; import com.linkedin.thirdeye.datalayer.dto.MetricConfigDTO; import com.linkedin.thirdeye.datalayer.pojo.AlertConfigBean; import com.linkedin.thirdeye.detector.email.filter.AlertFilterFactory; import freemarker.template.Configuration; import freemarker.template.Template; import freemarker.template.TemplateExceptionHandler; import java.io.ByteArrayOutputStream; import java.io.OutputStreamWriter; import java.io.Writer; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.TimeZone; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.collections.MapUtils; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.exception.ExceptionUtils; import org.apache.commons.mail.EmailException; import org.apache.commons.mail.HtmlEmail; import org.joda.time.DateTimeZone; import org.quartz.JobExecutionException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class AlertTaskRunnerV2 implements TaskRunner { private static final Logger LOG = LoggerFactory.getLogger(AlertTaskRunner.class); public static final TimeZone DEFAULT_TIME_ZONE = TimeZone.getTimeZone("America/Los_Angeles"); public static final String CHARSET = "UTF-8"; private final MergedAnomalyResultManager anomalyMergedResultDAO; private final AlertConfigManager alertConfigDAO; private final MetricConfigManager metricConfigManager; private final GroupedAnomalyResultsManager groupedAnomalyResultsDAO; private AlertConfigDTO alertConfig; private ThirdEyeAnomalyConfiguration thirdeyeConfig; private AlertFilterFactory alertFilterFactory; private final String MAX_ALLOWED_MERGE_GAP_KEY = "maxAllowedMergeGap"; private final long DEFAULT_MAX_ALLOWED_MERGE_GAP = 14400000L; public AlertTaskRunnerV2() { anomalyMergedResultDAO = DAORegistry.getInstance().getMergedAnomalyResultDAO(); alertConfigDAO = DAORegistry.getInstance().getAlertConfigDAO(); metricConfigManager = DAORegistry.getInstance().getMetricConfigDAO(); groupedAnomalyResultsDAO = DAORegistry.getInstance().getGroupedAnomalyResultsDAO(); } @Override public List<TaskResult> execute(TaskInfo taskInfo, TaskContext taskContext) throws Exception { List<TaskResult> taskResult = new ArrayList<>(); AlertTaskInfo alertTaskInfo = (AlertTaskInfo) taskInfo; alertConfig = alertTaskInfo.getAlertConfigDTO(); thirdeyeConfig = taskContext.getThirdEyeAnomalyConfiguration(); alertFilterFactory = new AlertFilterFactory(thirdeyeConfig.getAlertFilterConfigPath()); try { LOG.info("Begin executing task {}", taskInfo); runTask(); } catch (Exception t) { LOG.error("Task failed with exception:", t); sendFailureEmail(t); // Let task driver mark this task failed throw t; } return taskResult; } // TODO : separate code path for new vs old alert config ! private void runTask() throws Exception { LOG.info("Starting email report for id : {}, name : {} ", alertConfig.getId(), alertConfig.getName()); sendAnomalyReport(); sendScheduledDataReport(); ThirdeyeMetricsUtil.alertTaskSuccessCounter.inc(); } private void sendAnomalyReport() throws Exception { AlertConfigBean.EmailConfig emailConfig = alertConfig.getEmailConfig(); if (emailConfig != null && emailConfig.getFunctionIds() != null) { List<Long> functionIds = alertConfig.getEmailConfig().getFunctionIds(); List<MergedAnomalyResultDTO> mergedAnomaliesAllResults = new ArrayList<>(); long lastNotifiedAnomaly = emailConfig.getAnomalyWatermark(); for (Long functionId : functionIds) { List<MergedAnomalyResultDTO> resultsForFunction = anomalyMergedResultDAO .findUnNotifiedByFunctionIdAndIdGreaterThan(functionId, lastNotifiedAnomaly); if (CollectionUtils.isNotEmpty(resultsForFunction)) { mergedAnomaliesAllResults.addAll(resultsForFunction); } // fetch anomalies having id lesser than the watermark for the same function with notified = false & endTime > last one day // these anomalies are the ones that did not qualify filtration rule and got modified. // We should add these again so that these could be included in email if qualified through filtration rule List<MergedAnomalyResultDTO> filteredAnomalies = anomalyMergedResultDAO .findUnNotifiedByFunctionIdAndIdLesserThanAndEndTimeGreaterThanLastOneDay(functionId, lastNotifiedAnomaly); if (CollectionUtils.isNotEmpty(filteredAnomalies)) { mergedAnomaliesAllResults.addAll(filteredAnomalies); } } // apply filtration rule List<MergedAnomalyResultDTO> results = AlertFilterHelper.applyFiltrationRule(mergedAnomaliesAllResults, alertFilterFactory); if (results.isEmpty()) { LOG.info("Zero anomalies found, skipping sending email"); } else { // TODO: Add dimensional alert grouping before the stage of task runner? // There are two approaches to solve the problem of alert grouping: // 1. Anomaly results from detection --> Grouped anomalies from grouper --> Alerter sends emails on grouped anomalies // 2. Anomaly results from detection --> Alerter performs simple grouping and sends alerts in one go // Current implementation uses the second approach for experimental purpose. We might need to move to // approach 1 in order to consider multi-metric grouping. // Input: a list of anomalies. // Output: lists of anomalies; each list contains the anomalies of the same group. AlertGrouper alertGrouper = AlertGrouperFactory.fromSpec(alertConfig.getGroupByConfig()); Map<DimensionMap, GroupedAnomalyResultsDTO> groupedAnomalyResultsMap = alertGrouper.group(results); Map<DimensionMap, GroupedAnomalyResultsDTO> filteredGroupedAnomalyResultsMap; // DummyAlertGroupFilter does not generate any GroupedAnomaly. Thus, we don't apply any additional processes. if (alertGrouper instanceof DummyAlertGrouper) { filteredGroupedAnomalyResultsMap = groupedAnomalyResultsMap; } else { filteredGroupedAnomalyResultsMap = timeBasedMergeAndFilterGroupedAnomalies(groupedAnomalyResultsMap); } for (Map.Entry<DimensionMap, GroupedAnomalyResultsDTO> entry : filteredGroupedAnomalyResultsMap.entrySet()) { // Anomaly results for this group DimensionMap dimensions = entry.getKey(); GroupedAnomalyResultsDTO groupedAnomalyDTO = entry.getValue(); List<MergedAnomalyResultDTO> resultsForThisGroup = groupedAnomalyDTO.getAnomalyResults(); // Append auxiliary recipients for this group String recipientsForThisGroup = alertConfig.getRecipients(); // TODO: Replace with AuxiliaryRecipient provider String auxiliaryRecipients = alertGrouper.groupEmailRecipients(dimensions); if (StringUtils.isNotBlank(auxiliaryRecipients)) { recipientsForThisGroup = recipientsForThisGroup + EmailHelper.EMAIL_ADDRESS_SEPARATOR + auxiliaryRecipients; } // Append group name after config name if dimensions of this group is not empty String emailSubjectName = alertConfig.getName(); if (dimensions.size() != 0) { String groupName = dimensions.toJavaString(); emailSubjectName = emailSubjectName + " " + groupName; } // Generate and send out an anomaly report for this group AnomalyReportGenerator.getInstance() .buildReport(resultsForThisGroup, thirdeyeConfig, recipientsForThisGroup, alertConfig.getFromAddress(), emailSubjectName); // Update notified flag if (alertGrouper instanceof DummyAlertGrouper) { // DummyAlertGroupFilter does not generate real GroupedAnomaly, so the flag has to be set on merged anomalies. updateNotifiedStatus(resultsForThisGroup); } else { // For other alert groupers, the notified flag is set on the grouped anomalies. groupedAnomalyDTO.setNotified(true); groupedAnomalyResultsDAO.update(groupedAnomalyDTO); } } // update anomaly watermark in alertConfig long lastNotifiedAlertId = emailConfig.getAnomalyWatermark(); for (MergedAnomalyResultDTO anomalyResult : results) { if (anomalyResult.getId() > lastNotifiedAlertId) { lastNotifiedAlertId = anomalyResult.getId(); } } if (lastNotifiedAlertId != emailConfig.getAnomalyWatermark()) { alertConfig.getEmailConfig().setAnomalyWatermark(lastNotifiedAlertId); alertConfigDAO.update(alertConfig); } } } } private void sendScheduledDataReport() throws Exception { AlertConfigBean.ReportConfigCollection reportConfigCollection = alertConfig.getReportConfigCollection(); if (reportConfigCollection != null && reportConfigCollection.isEnabled()) { if (reportConfigCollection.getReportMetricConfigs() != null && reportConfigCollection.getReportMetricConfigs().size() > 0) { List<MetricDimensionReport> metricDimensionValueReports; // Used later to provide collection for a metric to help build the url link in report Map<String, MetricConfigDTO> metricMap = new HashMap<>(); List<ContributorViewResponse> reports = new ArrayList<>(); for (int i = 0; i < reportConfigCollection.getReportMetricConfigs().size(); i++) { AlertConfigBean.ReportMetricConfig reportMetricConfig = reportConfigCollection.getReportMetricConfigs().get(i); MetricConfigDTO metricConfig = metricConfigManager.findById(reportMetricConfig.getMetricId()); List<String> dimensions = reportMetricConfig.getDimensions(); if (dimensions != null && dimensions.size() > 0) { for (String dimension : dimensions) { ContributorViewResponse report = EmailHelper .getContributorDataForDataReport(metricConfig.getDataset(), metricConfig.getName(), Arrays.asList(dimension), reportMetricConfig.getCompareMode(), alertConfig.getReportConfigCollection().getDelayOffsetMillis(), alertConfig.getReportConfigCollection().isIntraDay()); if (report != null) { metricMap.put(metricConfig.getName(), metricConfig); reports.add(report); } } } } if (reports.size() == 0) { LOG.warn("Could not fetch report data for " + alertConfig.getName()); return; } long reportStartTs = reports.get(0).getTimeBuckets().get(0).getCurrentStart(); metricDimensionValueReports = DataReportHelper.getInstance().getDimensionReportList(reports); for (int i = 0; i < metricDimensionValueReports.size(); i++) { MetricDimensionReport report = metricDimensionValueReports.get(i); report.setDataset(metricMap.get(report.getMetricName()).getDataset()); long metricId = metricMap.get(report.getMetricName()).getId(); report.setMetricId(metricId); for (AlertConfigBean.ReportMetricConfig reportMetricConfig : reportConfigCollection .getReportMetricConfigs()) { if (reportMetricConfig.getMetricId() == metricId) { metricDimensionValueReports.get(i) .setCompareMode(reportMetricConfig.getCompareMode().name()); } } } Configuration freemarkerConfig = new Configuration(Configuration.VERSION_2_3_21); freemarkerConfig.setClassForTemplateLoading(getClass(), "/com/linkedin/thirdeye/detector/"); freemarkerConfig.setDefaultEncoding(CHARSET); freemarkerConfig.setTemplateExceptionHandler(TemplateExceptionHandler.RETHROW_HANDLER); Map<String, Object> templateData = new HashMap<>(); DateTimeZone timeZone = DateTimeZone.forTimeZone(DEFAULT_TIME_ZONE); DataReportHelper.DateFormatMethod dateFormatMethod = new DataReportHelper.DateFormatMethod(timeZone); templateData.put("timeZone", timeZone); templateData.put("dateFormat", dateFormatMethod); templateData.put("dashboardHost", thirdeyeConfig.getDashboardHost()); templateData.put("fromEmail", alertConfig.getFromAddress()); templateData.put("contactEmail", alertConfig.getReportConfigCollection().getContactEmail()); templateData.put("reportStartDateTime", reportStartTs); templateData.put("metricDimensionValueReports", metricDimensionValueReports); ByteArrayOutputStream baos = new ByteArrayOutputStream(); try (Writer out = new OutputStreamWriter(baos, CHARSET)) { Template template = freemarkerConfig.getTemplate("data-report-by-metric-dimension.ftl"); template.process(templateData, out); // Send email HtmlEmail email = new HtmlEmail(); String alertEmailSubject = String.format("Thirdeye data report : %s", alertConfig.getName()); String alertEmailHtml = new String(baos.toByteArray(), CHARSET); EmailHelper .sendEmailWithHtml(email, thirdeyeConfig.getSmtpConfiguration(), alertEmailSubject, alertEmailHtml, alertConfig.getFromAddress(), alertConfig.getRecipients()); } catch (Exception e) { throw new JobExecutionException(e); } } } } private void updateNotifiedStatus(List<MergedAnomalyResultDTO> mergedResults) { for (MergedAnomalyResultDTO mergedResult : mergedResults) { mergedResult.setNotified(true); anomalyMergedResultDAO.update(mergedResult); } } private void sendFailureEmail(Throwable t) throws JobExecutionException { HtmlEmail email = new HtmlEmail(); String subject = String .format("[ThirdEye Anomaly Detector] FAILED ALERT ID=%d for config %s", alertConfig.getId(), alertConfig.getName()); String textBody = String .format("%s%n%nException:%s", alertConfig.toString(), ExceptionUtils.getStackTrace(t)); try { EmailHelper .sendEmailWithTextBody(email, thirdeyeConfig.getSmtpConfiguration(), subject, textBody, thirdeyeConfig.getFailureFromAddress(), thirdeyeConfig.getFailureToAddress()); } catch (EmailException e) { throw new JobExecutionException(e); } } /** * Given a map, which maps from a dimension map to a grouped anomaly, of new GroupedAnomalies, this method performs * a time based merged with existing grouped anomalies, which are stored in a DB. Afterwards, if a merged grouped * anomaly passes through the filter, it is returned in a map. * * @param groupedAnomalyResultsMap a map of new GroupedAnomaly. * * @return a map of merged GroupedAnomaly that pass through the filter. */ private Map<DimensionMap,GroupedAnomalyResultsDTO> timeBasedMergeAndFilterGroupedAnomalies( Map<DimensionMap, GroupedAnomalyResultsDTO> groupedAnomalyResultsMap) { // Populate basic fields of the new grouped anomaly for (Map.Entry<DimensionMap, GroupedAnomalyResultsDTO> entry : groupedAnomalyResultsMap.entrySet()) { GroupedAnomalyResultsDTO groupedAnomaly = entry.getValue(); DimensionMap dimensions = entry.getKey(); groupedAnomaly.setAlertConfigId(alertConfig.getId()); groupedAnomaly.setDimensions(dimensions); } Map<DimensionMap, GroupedAnomalyResultsDTO> mergedGroupedAnomalyResultsMap = this.timeBasedMergeGroupedAnomalyResults(groupedAnomalyResultsMap); // Read and update grouped anomalies in DB for (Map.Entry<DimensionMap, GroupedAnomalyResultsDTO> entry : mergedGroupedAnomalyResultsMap.entrySet()) { GroupedAnomalyResultsDTO groupedAnomaly = entry.getValue(); groupedAnomalyResultsDAO.save(groupedAnomaly); for (MergedAnomalyResultDTO mergedAnomalyResultDTO : groupedAnomaly.getAnomalyResults()) { if (!mergedAnomalyResultDTO.isNotified()) { mergedAnomalyResultDTO.setNotified(true); anomalyMergedResultDAO.update(mergedAnomalyResultDTO); } } } // Filter out the grouped anomalies that trigger an alert return this.filterMergedGroupedAnomalyResults(mergedGroupedAnomalyResultsMap); } /** * Given a map, which maps from a dimension map to a grouped anomaly, of new GroupedAnomalies, this method performs * a time based merged with existing grouped anomalies, which are stored in a DB. * * @param newGroupedAnomalies a map of new GroupedAnomaly. * * @return a map of merged GroupedAnomaly in time dimensions. */ private Map<DimensionMap, GroupedAnomalyResultsDTO> timeBasedMergeGroupedAnomalyResults( Map<DimensionMap, GroupedAnomalyResultsDTO> newGroupedAnomalies) { // Parse max allowed merge gap from config long maxAllowedMergeGap = DEFAULT_MAX_ALLOWED_MERGE_GAP; Map<String, String> mergeConfig = alertConfig.getGroupTimeBasedMergeConfig(); if (MapUtils.isNotEmpty(mergeConfig)) { if (mergeConfig.containsKey(MAX_ALLOWED_MERGE_GAP_KEY)) { try { Long value = Long.parseLong(mergeConfig.get(MAX_ALLOWED_MERGE_GAP_KEY)); maxAllowedMergeGap = value; } catch (Exception e) { LOG.warn("Failed to parse {} as 'MAX_ALLOWED_MERGE_GAP_KEY'; Use default value {}", mergeConfig.get(MAX_ALLOWED_MERGE_GAP_KEY), DEFAULT_MAX_ALLOWED_MERGE_GAP); } } } // Retrieve the most recent grouped anomalies from DB // TODO: Get update time from merged anomaly after the field "updateTime" is updated in DB correctly Map<DimensionMap, GroupedAnomalyResultsDTO> recentGroupedAnomalies = new HashMap<>(); for (Map.Entry<DimensionMap, GroupedAnomalyResultsDTO> groupedAnomalyEntry : newGroupedAnomalies.entrySet()) { DimensionMap dimensions = groupedAnomalyEntry.getKey(); GroupedAnomalyResultsDTO newGroupedAnomaly = groupedAnomalyEntry.getValue(); long approximateUpdateTime = newGroupedAnomaly.getEndTime(); GroupedAnomalyResultsDTO recentGroupedAnomaly = groupedAnomalyResultsDAO .findMostRecentInTimeWindow(alertConfig.getId(), dimensions.toString(), approximateUpdateTime - maxAllowedMergeGap, approximateUpdateTime); recentGroupedAnomalies.put(dimensions, recentGroupedAnomaly); } // Merge grouped anomalies return SimpleGroupedAnomalyMerger.timeBasedMergeGroupedAnomalyResults(recentGroupedAnomalies, newGroupedAnomalies); } /** * Given a map, which maps from a dimension map to a grouped anomaly, of new GroupedAnomalies, this method returns * the GroupedAnomalies that pass through the filter. * * @param mergedGroupedAnomalies a map of GroupedAnomaly. * * @return a map of GroupedAnomaly that pass through the filter. */ private Map<DimensionMap, GroupedAnomalyResultsDTO> filterMergedGroupedAnomalyResults( Map<DimensionMap, GroupedAnomalyResultsDTO> mergedGroupedAnomalies) { Map<DimensionMap, GroupedAnomalyResultsDTO> filteredGroupedAnomalies = new HashMap<>(); AlertGroupFilter filter = AlertGroupFilterFactory.fromSpec(alertConfig.getGroupFilterConfig()); for (Map.Entry<DimensionMap, GroupedAnomalyResultsDTO> groupedAnomalyEntry : mergedGroupedAnomalies.entrySet()) { GroupedAnomalyResultsDTO groupedAnomaly = groupedAnomalyEntry.getValue(); if (!groupedAnomaly.isNotified()) { assert(groupedAnomalyEntry.getKey().equals(groupedAnomaly.getDimensions())); if (filter.isQualified(groupedAnomaly)) { filteredGroupedAnomalies.put(groupedAnomalyEntry.getKey(), groupedAnomaly); } } } return filteredGroupedAnomalies; } }