package com.linkedin.thirdeye.anomaly.alert.v2;
import com.linkedin.thirdeye.anomaly.ThirdEyeAnomalyConfiguration;
import com.linkedin.thirdeye.anomaly.alert.AlertTaskInfo;
import com.linkedin.thirdeye.anomaly.alert.AlertTaskRunner;
import com.linkedin.thirdeye.anomaly.alert.grouping.AlertGrouper;
import com.linkedin.thirdeye.anomaly.alert.grouping.AlertGrouperFactory;
import com.linkedin.thirdeye.anomaly.alert.grouping.DummyAlertGrouper;
import com.linkedin.thirdeye.anomaly.alert.grouping.filter.AlertGroupFilter;
import com.linkedin.thirdeye.anomaly.alert.grouping.filter.AlertGroupFilterFactory;
import com.linkedin.thirdeye.anomaly.alert.grouping.SimpleGroupedAnomalyMerger;
import com.linkedin.thirdeye.anomaly.alert.template.pojo.MetricDimensionReport;
import com.linkedin.thirdeye.anomaly.alert.util.AlertFilterHelper;
import com.linkedin.thirdeye.anomaly.alert.util.AnomalyReportGenerator;
import com.linkedin.thirdeye.anomaly.alert.util.DataReportHelper;
import com.linkedin.thirdeye.anomaly.alert.util.EmailHelper;
import com.linkedin.thirdeye.anomaly.task.TaskContext;
import com.linkedin.thirdeye.anomaly.task.TaskInfo;
import com.linkedin.thirdeye.anomaly.task.TaskResult;
import com.linkedin.thirdeye.anomaly.task.TaskRunner;
import com.linkedin.thirdeye.anomaly.utils.ThirdeyeMetricsUtil;
import com.linkedin.thirdeye.api.DimensionMap;
import com.linkedin.thirdeye.client.DAORegistry;
import com.linkedin.thirdeye.dashboard.views.contributor.ContributorViewResponse;
import com.linkedin.thirdeye.datalayer.bao.AlertConfigManager;
import com.linkedin.thirdeye.datalayer.bao.GroupedAnomalyResultsManager;
import com.linkedin.thirdeye.datalayer.bao.MergedAnomalyResultManager;
import com.linkedin.thirdeye.datalayer.bao.MetricConfigManager;
import com.linkedin.thirdeye.datalayer.dto.AlertConfigDTO;
import com.linkedin.thirdeye.datalayer.dto.GroupedAnomalyResultsDTO;
import com.linkedin.thirdeye.datalayer.dto.MergedAnomalyResultDTO;
import com.linkedin.thirdeye.datalayer.dto.MetricConfigDTO;
import com.linkedin.thirdeye.datalayer.pojo.AlertConfigBean;
import com.linkedin.thirdeye.detector.email.filter.AlertFilterFactory;
import freemarker.template.Configuration;
import freemarker.template.Template;
import freemarker.template.TemplateExceptionHandler;
import java.io.ByteArrayOutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TimeZone;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.collections.MapUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.commons.mail.EmailException;
import org.apache.commons.mail.HtmlEmail;
import org.joda.time.DateTimeZone;
import org.quartz.JobExecutionException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class AlertTaskRunnerV2 implements TaskRunner {
private static final Logger LOG = LoggerFactory.getLogger(AlertTaskRunner.class);
public static final TimeZone DEFAULT_TIME_ZONE = TimeZone.getTimeZone("America/Los_Angeles");
public static final String CHARSET = "UTF-8";
private final MergedAnomalyResultManager anomalyMergedResultDAO;
private final AlertConfigManager alertConfigDAO;
private final MetricConfigManager metricConfigManager;
private final GroupedAnomalyResultsManager groupedAnomalyResultsDAO;
private AlertConfigDTO alertConfig;
private ThirdEyeAnomalyConfiguration thirdeyeConfig;
private AlertFilterFactory alertFilterFactory;
private final String MAX_ALLOWED_MERGE_GAP_KEY = "maxAllowedMergeGap";
private final long DEFAULT_MAX_ALLOWED_MERGE_GAP = 14400000L;
public AlertTaskRunnerV2() {
anomalyMergedResultDAO = DAORegistry.getInstance().getMergedAnomalyResultDAO();
alertConfigDAO = DAORegistry.getInstance().getAlertConfigDAO();
metricConfigManager = DAORegistry.getInstance().getMetricConfigDAO();
groupedAnomalyResultsDAO = DAORegistry.getInstance().getGroupedAnomalyResultsDAO();
}
@Override
public List<TaskResult> execute(TaskInfo taskInfo, TaskContext taskContext)
throws Exception {
List<TaskResult> taskResult = new ArrayList<>();
AlertTaskInfo alertTaskInfo = (AlertTaskInfo) taskInfo;
alertConfig = alertTaskInfo.getAlertConfigDTO();
thirdeyeConfig = taskContext.getThirdEyeAnomalyConfiguration();
alertFilterFactory = new AlertFilterFactory(thirdeyeConfig.getAlertFilterConfigPath());
try {
LOG.info("Begin executing task {}", taskInfo);
runTask();
} catch (Exception t) {
LOG.error("Task failed with exception:", t);
sendFailureEmail(t);
// Let task driver mark this task failed
throw t;
}
return taskResult;
}
// TODO : separate code path for new vs old alert config !
private void runTask() throws Exception {
LOG.info("Starting email report for id : {}, name : {} ", alertConfig.getId(),
alertConfig.getName());
sendAnomalyReport();
sendScheduledDataReport();
ThirdeyeMetricsUtil.alertTaskSuccessCounter.inc();
}
private void sendAnomalyReport() throws Exception {
AlertConfigBean.EmailConfig emailConfig = alertConfig.getEmailConfig();
if (emailConfig != null && emailConfig.getFunctionIds() != null) {
List<Long> functionIds = alertConfig.getEmailConfig().getFunctionIds();
List<MergedAnomalyResultDTO> mergedAnomaliesAllResults = new ArrayList<>();
long lastNotifiedAnomaly = emailConfig.getAnomalyWatermark();
for (Long functionId : functionIds) {
List<MergedAnomalyResultDTO> resultsForFunction = anomalyMergedResultDAO
.findUnNotifiedByFunctionIdAndIdGreaterThan(functionId, lastNotifiedAnomaly);
if (CollectionUtils.isNotEmpty(resultsForFunction)) {
mergedAnomaliesAllResults.addAll(resultsForFunction);
}
// fetch anomalies having id lesser than the watermark for the same function with notified = false & endTime > last one day
// these anomalies are the ones that did not qualify filtration rule and got modified.
// We should add these again so that these could be included in email if qualified through filtration rule
List<MergedAnomalyResultDTO> filteredAnomalies = anomalyMergedResultDAO
.findUnNotifiedByFunctionIdAndIdLesserThanAndEndTimeGreaterThanLastOneDay(functionId,
lastNotifiedAnomaly);
if (CollectionUtils.isNotEmpty(filteredAnomalies)) {
mergedAnomaliesAllResults.addAll(filteredAnomalies);
}
}
// apply filtration rule
List<MergedAnomalyResultDTO> results =
AlertFilterHelper.applyFiltrationRule(mergedAnomaliesAllResults, alertFilterFactory);
if (results.isEmpty()) {
LOG.info("Zero anomalies found, skipping sending email");
} else {
// TODO: Add dimensional alert grouping before the stage of task runner?
// There are two approaches to solve the problem of alert grouping:
// 1. Anomaly results from detection --> Grouped anomalies from grouper --> Alerter sends emails on grouped anomalies
// 2. Anomaly results from detection --> Alerter performs simple grouping and sends alerts in one go
// Current implementation uses the second approach for experimental purpose. We might need to move to
// approach 1 in order to consider multi-metric grouping.
// Input: a list of anomalies.
// Output: lists of anomalies; each list contains the anomalies of the same group.
AlertGrouper alertGrouper = AlertGrouperFactory.fromSpec(alertConfig.getGroupByConfig());
Map<DimensionMap, GroupedAnomalyResultsDTO> groupedAnomalyResultsMap = alertGrouper.group(results);
Map<DimensionMap, GroupedAnomalyResultsDTO> filteredGroupedAnomalyResultsMap;
// DummyAlertGroupFilter does not generate any GroupedAnomaly. Thus, we don't apply any additional processes.
if (alertGrouper instanceof DummyAlertGrouper) {
filteredGroupedAnomalyResultsMap = groupedAnomalyResultsMap;
} else {
filteredGroupedAnomalyResultsMap = timeBasedMergeAndFilterGroupedAnomalies(groupedAnomalyResultsMap);
}
for (Map.Entry<DimensionMap, GroupedAnomalyResultsDTO> entry : filteredGroupedAnomalyResultsMap.entrySet()) {
// Anomaly results for this group
DimensionMap dimensions = entry.getKey();
GroupedAnomalyResultsDTO groupedAnomalyDTO = entry.getValue();
List<MergedAnomalyResultDTO> resultsForThisGroup = groupedAnomalyDTO.getAnomalyResults();
// Append auxiliary recipients for this group
String recipientsForThisGroup = alertConfig.getRecipients();
// TODO: Replace with AuxiliaryRecipient provider
String auxiliaryRecipients = alertGrouper.groupEmailRecipients(dimensions);
if (StringUtils.isNotBlank(auxiliaryRecipients)) {
recipientsForThisGroup = recipientsForThisGroup + EmailHelper.EMAIL_ADDRESS_SEPARATOR + auxiliaryRecipients;
}
// Append group name after config name if dimensions of this group is not empty
String emailSubjectName = alertConfig.getName();
if (dimensions.size() != 0) {
String groupName = dimensions.toJavaString();
emailSubjectName = emailSubjectName + " " + groupName;
}
// Generate and send out an anomaly report for this group
AnomalyReportGenerator.getInstance()
.buildReport(resultsForThisGroup, thirdeyeConfig, recipientsForThisGroup, alertConfig.getFromAddress(),
emailSubjectName);
// Update notified flag
if (alertGrouper instanceof DummyAlertGrouper) {
// DummyAlertGroupFilter does not generate real GroupedAnomaly, so the flag has to be set on merged anomalies.
updateNotifiedStatus(resultsForThisGroup);
} else {
// For other alert groupers, the notified flag is set on the grouped anomalies.
groupedAnomalyDTO.setNotified(true);
groupedAnomalyResultsDAO.update(groupedAnomalyDTO);
}
}
// update anomaly watermark in alertConfig
long lastNotifiedAlertId = emailConfig.getAnomalyWatermark();
for (MergedAnomalyResultDTO anomalyResult : results) {
if (anomalyResult.getId() > lastNotifiedAlertId) {
lastNotifiedAlertId = anomalyResult.getId();
}
}
if (lastNotifiedAlertId != emailConfig.getAnomalyWatermark()) {
alertConfig.getEmailConfig().setAnomalyWatermark(lastNotifiedAlertId);
alertConfigDAO.update(alertConfig);
}
}
}
}
private void sendScheduledDataReport() throws Exception {
AlertConfigBean.ReportConfigCollection reportConfigCollection =
alertConfig.getReportConfigCollection();
if (reportConfigCollection != null && reportConfigCollection.isEnabled()) {
if (reportConfigCollection.getReportMetricConfigs() != null
&& reportConfigCollection.getReportMetricConfigs().size() > 0) {
List<MetricDimensionReport> metricDimensionValueReports;
// Used later to provide collection for a metric to help build the url link in report
Map<String, MetricConfigDTO> metricMap = new HashMap<>();
List<ContributorViewResponse> reports = new ArrayList<>();
for (int i = 0; i < reportConfigCollection.getReportMetricConfigs().size(); i++) {
AlertConfigBean.ReportMetricConfig reportMetricConfig =
reportConfigCollection.getReportMetricConfigs().get(i);
MetricConfigDTO metricConfig =
metricConfigManager.findById(reportMetricConfig.getMetricId());
List<String> dimensions = reportMetricConfig.getDimensions();
if (dimensions != null && dimensions.size() > 0) {
for (String dimension : dimensions) {
ContributorViewResponse report = EmailHelper
.getContributorDataForDataReport(metricConfig.getDataset(),
metricConfig.getName(), Arrays.asList(dimension),
reportMetricConfig.getCompareMode(),
alertConfig.getReportConfigCollection().getDelayOffsetMillis(),
alertConfig.getReportConfigCollection().isIntraDay());
if (report != null) {
metricMap.put(metricConfig.getName(), metricConfig);
reports.add(report);
}
}
}
}
if (reports.size() == 0) {
LOG.warn("Could not fetch report data for " + alertConfig.getName());
return;
}
long reportStartTs = reports.get(0).getTimeBuckets().get(0).getCurrentStart();
metricDimensionValueReports =
DataReportHelper.getInstance().getDimensionReportList(reports);
for (int i = 0; i < metricDimensionValueReports.size(); i++) {
MetricDimensionReport report = metricDimensionValueReports.get(i);
report.setDataset(metricMap.get(report.getMetricName()).getDataset());
long metricId = metricMap.get(report.getMetricName()).getId();
report.setMetricId(metricId);
for (AlertConfigBean.ReportMetricConfig reportMetricConfig : reportConfigCollection
.getReportMetricConfigs()) {
if (reportMetricConfig.getMetricId() == metricId) {
metricDimensionValueReports.get(i)
.setCompareMode(reportMetricConfig.getCompareMode().name());
}
}
}
Configuration freemarkerConfig = new Configuration(Configuration.VERSION_2_3_21);
freemarkerConfig.setClassForTemplateLoading(getClass(), "/com/linkedin/thirdeye/detector/");
freemarkerConfig.setDefaultEncoding(CHARSET);
freemarkerConfig.setTemplateExceptionHandler(TemplateExceptionHandler.RETHROW_HANDLER);
Map<String, Object> templateData = new HashMap<>();
DateTimeZone timeZone = DateTimeZone.forTimeZone(DEFAULT_TIME_ZONE);
DataReportHelper.DateFormatMethod dateFormatMethod =
new DataReportHelper.DateFormatMethod(timeZone);
templateData.put("timeZone", timeZone);
templateData.put("dateFormat", dateFormatMethod);
templateData.put("dashboardHost", thirdeyeConfig.getDashboardHost());
templateData.put("fromEmail", alertConfig.getFromAddress());
templateData.put("contactEmail", alertConfig.getReportConfigCollection().getContactEmail());
templateData.put("reportStartDateTime", reportStartTs);
templateData.put("metricDimensionValueReports", metricDimensionValueReports);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try (Writer out = new OutputStreamWriter(baos, CHARSET)) {
Template template = freemarkerConfig.getTemplate("data-report-by-metric-dimension.ftl");
template.process(templateData, out);
// Send email
HtmlEmail email = new HtmlEmail();
String alertEmailSubject =
String.format("Thirdeye data report : %s", alertConfig.getName());
String alertEmailHtml = new String(baos.toByteArray(), CHARSET);
EmailHelper
.sendEmailWithHtml(email, thirdeyeConfig.getSmtpConfiguration(), alertEmailSubject,
alertEmailHtml, alertConfig.getFromAddress(), alertConfig.getRecipients());
} catch (Exception e) {
throw new JobExecutionException(e);
}
}
}
}
private void updateNotifiedStatus(List<MergedAnomalyResultDTO> mergedResults) {
for (MergedAnomalyResultDTO mergedResult : mergedResults) {
mergedResult.setNotified(true);
anomalyMergedResultDAO.update(mergedResult);
}
}
private void sendFailureEmail(Throwable t) throws JobExecutionException {
HtmlEmail email = new HtmlEmail();
String subject = String
.format("[ThirdEye Anomaly Detector] FAILED ALERT ID=%d for config %s", alertConfig.getId(),
alertConfig.getName());
String textBody = String
.format("%s%n%nException:%s", alertConfig.toString(), ExceptionUtils.getStackTrace(t));
try {
EmailHelper
.sendEmailWithTextBody(email, thirdeyeConfig.getSmtpConfiguration(), subject, textBody,
thirdeyeConfig.getFailureFromAddress(), thirdeyeConfig.getFailureToAddress());
} catch (EmailException e) {
throw new JobExecutionException(e);
}
}
/**
* Given a map, which maps from a dimension map to a grouped anomaly, of new GroupedAnomalies, this method performs
* a time based merged with existing grouped anomalies, which are stored in a DB. Afterwards, if a merged grouped
* anomaly passes through the filter, it is returned in a map.
*
* @param groupedAnomalyResultsMap a map of new GroupedAnomaly.
*
* @return a map of merged GroupedAnomaly that pass through the filter.
*/
private Map<DimensionMap,GroupedAnomalyResultsDTO> timeBasedMergeAndFilterGroupedAnomalies(
Map<DimensionMap, GroupedAnomalyResultsDTO> groupedAnomalyResultsMap) {
// Populate basic fields of the new grouped anomaly
for (Map.Entry<DimensionMap, GroupedAnomalyResultsDTO> entry : groupedAnomalyResultsMap.entrySet()) {
GroupedAnomalyResultsDTO groupedAnomaly = entry.getValue();
DimensionMap dimensions = entry.getKey();
groupedAnomaly.setAlertConfigId(alertConfig.getId());
groupedAnomaly.setDimensions(dimensions);
}
Map<DimensionMap, GroupedAnomalyResultsDTO> mergedGroupedAnomalyResultsMap =
this.timeBasedMergeGroupedAnomalyResults(groupedAnomalyResultsMap);
// Read and update grouped anomalies in DB
for (Map.Entry<DimensionMap, GroupedAnomalyResultsDTO> entry : mergedGroupedAnomalyResultsMap.entrySet()) {
GroupedAnomalyResultsDTO groupedAnomaly = entry.getValue();
groupedAnomalyResultsDAO.save(groupedAnomaly);
for (MergedAnomalyResultDTO mergedAnomalyResultDTO : groupedAnomaly.getAnomalyResults()) {
if (!mergedAnomalyResultDTO.isNotified()) {
mergedAnomalyResultDTO.setNotified(true);
anomalyMergedResultDAO.update(mergedAnomalyResultDTO);
}
}
}
// Filter out the grouped anomalies that trigger an alert
return this.filterMergedGroupedAnomalyResults(mergedGroupedAnomalyResultsMap);
}
/**
* Given a map, which maps from a dimension map to a grouped anomaly, of new GroupedAnomalies, this method performs
* a time based merged with existing grouped anomalies, which are stored in a DB.
*
* @param newGroupedAnomalies a map of new GroupedAnomaly.
*
* @return a map of merged GroupedAnomaly in time dimensions.
*/
private Map<DimensionMap, GroupedAnomalyResultsDTO> timeBasedMergeGroupedAnomalyResults(
Map<DimensionMap, GroupedAnomalyResultsDTO> newGroupedAnomalies) {
// Parse max allowed merge gap from config
long maxAllowedMergeGap = DEFAULT_MAX_ALLOWED_MERGE_GAP;
Map<String, String> mergeConfig = alertConfig.getGroupTimeBasedMergeConfig();
if (MapUtils.isNotEmpty(mergeConfig)) {
if (mergeConfig.containsKey(MAX_ALLOWED_MERGE_GAP_KEY)) {
try {
Long value = Long.parseLong(mergeConfig.get(MAX_ALLOWED_MERGE_GAP_KEY));
maxAllowedMergeGap = value;
} catch (Exception e) {
LOG.warn("Failed to parse {} as 'MAX_ALLOWED_MERGE_GAP_KEY'; Use default value {}",
mergeConfig.get(MAX_ALLOWED_MERGE_GAP_KEY), DEFAULT_MAX_ALLOWED_MERGE_GAP);
}
}
}
// Retrieve the most recent grouped anomalies from DB
// TODO: Get update time from merged anomaly after the field "updateTime" is updated in DB correctly
Map<DimensionMap, GroupedAnomalyResultsDTO> recentGroupedAnomalies = new HashMap<>();
for (Map.Entry<DimensionMap, GroupedAnomalyResultsDTO> groupedAnomalyEntry : newGroupedAnomalies.entrySet()) {
DimensionMap dimensions = groupedAnomalyEntry.getKey();
GroupedAnomalyResultsDTO newGroupedAnomaly = groupedAnomalyEntry.getValue();
long approximateUpdateTime = newGroupedAnomaly.getEndTime();
GroupedAnomalyResultsDTO recentGroupedAnomaly = groupedAnomalyResultsDAO
.findMostRecentInTimeWindow(alertConfig.getId(), dimensions.toString(),
approximateUpdateTime - maxAllowedMergeGap, approximateUpdateTime);
recentGroupedAnomalies.put(dimensions, recentGroupedAnomaly);
}
// Merge grouped anomalies
return SimpleGroupedAnomalyMerger.timeBasedMergeGroupedAnomalyResults(recentGroupedAnomalies, newGroupedAnomalies);
}
/**
* Given a map, which maps from a dimension map to a grouped anomaly, of new GroupedAnomalies, this method returns
* the GroupedAnomalies that pass through the filter.
*
* @param mergedGroupedAnomalies a map of GroupedAnomaly.
*
* @return a map of GroupedAnomaly that pass through the filter.
*/
private Map<DimensionMap, GroupedAnomalyResultsDTO> filterMergedGroupedAnomalyResults(
Map<DimensionMap, GroupedAnomalyResultsDTO> mergedGroupedAnomalies) {
Map<DimensionMap, GroupedAnomalyResultsDTO> filteredGroupedAnomalies = new HashMap<>();
AlertGroupFilter filter = AlertGroupFilterFactory.fromSpec(alertConfig.getGroupFilterConfig());
for (Map.Entry<DimensionMap, GroupedAnomalyResultsDTO> groupedAnomalyEntry : mergedGroupedAnomalies.entrySet()) {
GroupedAnomalyResultsDTO groupedAnomaly = groupedAnomalyEntry.getValue();
if (!groupedAnomaly.isNotified()) {
assert(groupedAnomalyEntry.getKey().equals(groupedAnomaly.getDimensions()));
if (filter.isQualified(groupedAnomaly)) {
filteredGroupedAnomalies.put(groupedAnomalyEntry.getKey(), groupedAnomaly);
}
}
}
return filteredGroupedAnomalies;
}
}