package com.linkedin.thirdeye.anomaly.detection; import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.ListMultimap; import com.linkedin.thirdeye.anomaly.detection.DetectionJobContext.DetectionJobType; import com.linkedin.thirdeye.anomaly.merge.TimeBasedAnomalyMerger; import com.linkedin.thirdeye.anomaly.task.TaskContext; import com.linkedin.thirdeye.anomaly.task.TaskInfo; import com.linkedin.thirdeye.anomaly.task.TaskResult; import com.linkedin.thirdeye.anomaly.task.TaskRunner; import com.linkedin.thirdeye.anomaly.utils.AnomalyUtils; import com.linkedin.thirdeye.anomalydetection.datafilter.DataFilter; import com.linkedin.thirdeye.anomalydetection.datafilter.DataFilterFactory; import com.linkedin.thirdeye.api.DimensionMap; import com.linkedin.thirdeye.api.MetricTimeSeries; import com.linkedin.thirdeye.client.DAORegistry; import com.linkedin.thirdeye.datalayer.bao.MergedAnomalyResultManager; import com.linkedin.thirdeye.datalayer.bao.RawAnomalyResultManager; import com.linkedin.thirdeye.datalayer.dto.AnomalyFunctionDTO; import com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO; import com.linkedin.thirdeye.datalayer.dto.MergedAnomalyResultDTO; import com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO; import com.linkedin.thirdeye.detector.function.AnomalyFunctionFactory; import com.linkedin.thirdeye.detector.function.BaseAnomalyFunction; import com.linkedin.thirdeye.detector.metric.transfer.MetricTransfer; import com.linkedin.thirdeye.detector.metric.transfer.ScalingFactor; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Properties; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang.NullArgumentException; import org.joda.time.DateTime; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static com.linkedin.thirdeye.anomaly.utils.ThirdeyeMetricsUtil.*; public class DetectionTaskRunner implements TaskRunner { private static final Logger LOG = LoggerFactory.getLogger(DetectionTaskRunner.class); private static final DAORegistry DAO_REGISTRY = DAORegistry.getInstance(); public static final String BACKFILL_PREFIX = "adhoc_"; private List<DateTime> windowStarts; private List<DateTime> windowEnds; private AnomalyFunctionDTO anomalyFunctionSpec; private long jobExecutionId; private DetectionJobType detectionJobType; private List<String> collectionDimensions; private AnomalyFunctionFactory anomalyFunctionFactory; private BaseAnomalyFunction anomalyFunction; public List<TaskResult> execute(TaskInfo taskInfo, TaskContext taskContext) throws Exception { detectionTaskCounter.inc(); List<TaskResult> taskResult = new ArrayList<>(); LOG.info("Setting up task {}", taskInfo); setupTask(taskInfo, taskContext); // Run for all pairs of window start and window end for (int i = 0; i < windowStarts.size(); i ++) { runTask(windowStarts.get(i), windowEnds.get(i)); } return taskResult; } private void setupTask(TaskInfo taskInfo, TaskContext taskContext) throws Exception { DetectionTaskInfo detectionTaskInfo = (DetectionTaskInfo) taskInfo; windowStarts = detectionTaskInfo.getWindowStartTime(); windowEnds = detectionTaskInfo.getWindowEndTime(); anomalyFunctionSpec = detectionTaskInfo.getAnomalyFunctionSpec(); jobExecutionId = detectionTaskInfo.getJobExecutionId(); anomalyFunctionFactory = taskContext.getAnomalyFunctionFactory(); anomalyFunction = anomalyFunctionFactory.fromSpec(anomalyFunctionSpec); detectionJobType = detectionTaskInfo.getDetectionJobType(); String dataset = anomalyFunctionSpec.getCollection(); DatasetConfigDTO datasetConfig = DAO_REGISTRY.getDatasetConfigDAO().findByDataset(dataset); if (datasetConfig == null) { LOG.error("Dataset [" + dataset + "] is not found"); throw new NullArgumentException( "Dataset [" + dataset + "] is not found with function : " + anomalyFunctionSpec .toString()); } collectionDimensions = datasetConfig.getDimensions(); LOG.info( "Running anomaly detection job with metricFunction: [{}], topic metric [{}], collection: [{}]", anomalyFunctionSpec.getFunctionName(), anomalyFunctionSpec.getTopicMetric(), anomalyFunctionSpec.getCollection()); } private void runTask(DateTime windowStart, DateTime windowEnd) throws Exception { LOG.info("Running anomaly detection for time range {} to {}", windowStart, windowEnd); AnomalyDetectionInputContextBuilder anomalyDetectionInputContextBuilder = new AnomalyDetectionInputContextBuilder(anomalyFunctionFactory); anomalyDetectionInputContextBuilder.init(anomalyFunctionSpec); // TODO: Change to DataFetchers/DataSources anomalyDetectionInputContextBuilder = anomalyDetectionInputContextBuilder .fetchTimeSeriesData(windowStart, windowEnd) .fetchExixtingRawAnomalies(windowStart, windowEnd) .fetchExixtingMergedAnomalies(windowStart, windowEnd) .fetchSaclingFactors(windowStart, windowEnd); if (anomalyFunctionSpec.isToCalculateGlobalMetric()) { anomalyDetectionInputContextBuilder.fetchTimeSeriesGlobalMetric(windowStart, windowEnd); } AnomalyDetectionInputContext adContext = anomalyDetectionInputContextBuilder.build(); ListMultimap<DimensionMap, RawAnomalyResultDTO> resultRawAnomalies = dimensionalShuffleAndUnifyAnalyze(windowStart, windowEnd, adContext); detectionTaskSuccessCounter.inc(); boolean isBackfill = false; // If the current job is a backfill (adhoc) detection job, set notified flag to true so the merged anomalies do not // induce alerts and emails. if (detectionJobType != null && (detectionJobType.equals(DetectionJobType.BACKFILL) || detectionJobType.equals(DetectionJobType.OFFLINE))) { LOG.info("BACKFILL is triggered for Detection Job {}. Notified flag is set to be true", jobExecutionId); isBackfill = true; } // Update merged anomalies TimeBasedAnomalyMerger timeBasedAnomalyMerger = new TimeBasedAnomalyMerger(anomalyFunctionFactory); ListMultimap<DimensionMap, MergedAnomalyResultDTO> resultMergedAnomalies = timeBasedAnomalyMerger.mergeAnomalies(anomalyFunctionSpec, resultRawAnomalies, isBackfill); detectionTaskSuccessCounter.inc(); // TODO: Change to DataSink AnomalyDetectionOutputContext adOutputContext = new AnomalyDetectionOutputContext(); adOutputContext.setRawAnomalies(resultRawAnomalies); adOutputContext.setMergedAnomalies(resultMergedAnomalies); storeData(adOutputContext); } private void storeData(AnomalyDetectionOutputContext anomalyDetectionOutputContext) { RawAnomalyResultManager rawAnomalyDAO = DAO_REGISTRY.getRawAnomalyResultDAO(); MergedAnomalyResultManager mergedAmomalyDAO = DAO_REGISTRY.getMergedAnomalyResultDAO(); for (RawAnomalyResultDTO rawAnomalyResultDTO : anomalyDetectionOutputContext.getRawAnomalies().values()) { rawAnomalyDAO.save(rawAnomalyResultDTO); } for (MergedAnomalyResultDTO mergedAnomalyResultDTO : anomalyDetectionOutputContext.getMergedAnomalies().values()) { mergedAmomalyDAO.update(mergedAnomalyResultDTO); } } private ListMultimap<DimensionMap, RawAnomalyResultDTO> dimensionalShuffleAndUnifyAnalyze(DateTime windowStart, DateTime windowEnd, AnomalyDetectionInputContext anomalyDetectionInputContext) { int anomalyCounter = 0; ListMultimap<DimensionMap, RawAnomalyResultDTO> resultRawAnomalies = ArrayListMultimap.create(); DataFilter dataFilter = DataFilterFactory.fromSpec(anomalyFunctionSpec.getDataFilter()); for (DimensionMap dimensionMap : anomalyDetectionInputContext.getDimensionKeyMetricTimeSeriesMap().keySet()) { // Skip anomaly detection if the current time series does not pass data filter, which may check if the traffic // or total count of the data has enough volume for produce sufficient confidence anomaly results MetricTimeSeries metricTimeSeries = anomalyDetectionInputContext.getDimensionKeyMetricTimeSeriesMap().get(dimensionMap); if (!dataFilter.isQualified(metricTimeSeries, dimensionMap, windowStart.getMillis(), windowEnd.getMillis())) { continue; } List<RawAnomalyResultDTO> resultsOfAnEntry = runAnalyze(windowStart, windowEnd, anomalyDetectionInputContext, dimensionMap); // Set raw anomalies' properties handleResults(resultsOfAnEntry); LOG.info("Dimension {} has {} anomalies in window {} to {}", dimensionMap, resultsOfAnEntry.size(), windowStart, windowEnd); anomalyCounter += resultsOfAnEntry.size(); resultRawAnomalies.putAll(dimensionMap, resultsOfAnEntry); } LOG.info("{} anomalies found in total", anomalyCounter); return resultRawAnomalies; } private List<RawAnomalyResultDTO> runAnalyze(DateTime windowStart, DateTime windowEnd, AnomalyDetectionInputContext anomalyDetectionInputContext, DimensionMap dimensionMap) { List<RawAnomalyResultDTO> resultsOfAnEntry = Collections.emptyList(); String metricName = anomalyFunction.getSpec().getTopicMetric(); MetricTimeSeries metricTimeSeries = anomalyDetectionInputContext.getDimensionKeyMetricTimeSeriesMap().get(dimensionMap); /* Check if current task is running offline analysis */ boolean isOffline = false; if (detectionJobType != null && detectionJobType.equals(DetectionJobType.OFFLINE)) { LOG.info("Detection Job {} is running under OFFLINE mode", jobExecutionId); isOffline = true; } // Get current entry's knownMergedAnomalies, which should have the same explored dimensions List<MergedAnomalyResultDTO> knownMergedAnomaliesOfAnEntry = anomalyDetectionInputContext.getKnownMergedAnomalies().get(dimensionMap); List<MergedAnomalyResultDTO> historyMergedAnomalies; if (anomalyFunction.useHistoryAnomaly()) { historyMergedAnomalies = retainHistoryMergedAnomalies(windowStart.getMillis(), knownMergedAnomaliesOfAnEntry); } else { historyMergedAnomalies = Collections.emptyList(); } LOG.info("Analyzing anomaly function with explored dimensions: {}, windowStart: {}, windowEnd: {}", dimensionMap, windowStart, windowEnd); AnomalyUtils.logAnomaliesOverlapWithWindow(windowStart, windowEnd, historyMergedAnomalies); try { // Run algorithm // Scaling time series according to the scaling factor List<ScalingFactor> scalingFactors = anomalyDetectionInputContext.getScalingFactors(); if (CollectionUtils.isNotEmpty(scalingFactors)) { Properties properties = anomalyFunction.getProperties(); MetricTransfer.rescaleMetric(metricTimeSeries, windowStart.getMillis(), scalingFactors, metricName, properties); } if(isOffline) { resultsOfAnEntry = anomalyFunction .offlineAnalyze(dimensionMap, metricTimeSeries, windowStart, windowEnd, historyMergedAnomalies); } else { resultsOfAnEntry = anomalyFunction.analyze(dimensionMap, metricTimeSeries, windowStart, windowEnd, historyMergedAnomalies); } } catch (Exception e) { LOG.error("Could not compute for {}", dimensionMap, e); } // Remove detected anomalies that have existed in database if (CollectionUtils.isNotEmpty(resultsOfAnEntry)) { List<RawAnomalyResultDTO> existingRawAnomaliesOfAnEntry = anomalyDetectionInputContext.getExistingRawAnomalies().get(dimensionMap); resultsOfAnEntry = removeFromExistingRawAnomalies(resultsOfAnEntry, existingRawAnomaliesOfAnEntry); } if (CollectionUtils.isNotEmpty(resultsOfAnEntry)) { List<MergedAnomalyResultDTO> existingMergedAnomalies = retainExistingMergedAnomalies(windowStart.getMillis(), windowEnd.getMillis(), knownMergedAnomaliesOfAnEntry); resultsOfAnEntry = removeFromExistingMergedAnomalies(resultsOfAnEntry, existingMergedAnomalies); } return resultsOfAnEntry; } /** * Returns history anomalies of the monitoring window from the given known anomalies. * * Definition of history anomaly: An anomaly that starts before the monitoring window starts. * * @param monitoringWindowStart the start of the monitoring window * @param knownAnomalies the list of known anomalies * * @return all history anomalies of the monitoring window */ private List<MergedAnomalyResultDTO> retainHistoryMergedAnomalies(long monitoringWindowStart, List<MergedAnomalyResultDTO> knownAnomalies) { List<MergedAnomalyResultDTO> historyAnomalies = new ArrayList<>(); for (MergedAnomalyResultDTO knownAnomaly : knownAnomalies) { if (knownAnomaly.getStartTime() < monitoringWindowStart) { historyAnomalies.add(knownAnomaly); } } return historyAnomalies; } /** * Returns anomalies that overlap with the monitoring window from the given known anomalies * * Definition of existing anomaly: An anomaly that happens in the monitoring window * * @param monitoringWindowStart the start of the monitoring window * @param monitoringWindowEnd the end of the monitoring window * @param knownAnomalies the list of known anomalies * * @return anomalies that happen in the monitoring window from the given known anomalies */ private List<MergedAnomalyResultDTO> retainExistingMergedAnomalies(long monitoringWindowStart, long monitoringWindowEnd, List<MergedAnomalyResultDTO> knownAnomalies) { List<MergedAnomalyResultDTO> existingAnomalies = new ArrayList<>(); for (MergedAnomalyResultDTO knownAnomaly : knownAnomalies) { if (knownAnomaly.getStartTime() <= monitoringWindowEnd && knownAnomaly.getEndTime() >= monitoringWindowStart) { existingAnomalies.add(knownAnomaly); } } return existingAnomalies; } /** * Given a list of raw anomalies, this method returns a list of raw anomalies that are not contained in any existing * merged anomalies. * * @param rawAnomalies * @param existingAnomalies * @return */ private List<RawAnomalyResultDTO> removeFromExistingMergedAnomalies(List<RawAnomalyResultDTO> rawAnomalies, List<MergedAnomalyResultDTO> existingAnomalies) { if (CollectionUtils.isEmpty(rawAnomalies) || CollectionUtils.isEmpty(existingAnomalies)) { return rawAnomalies; } List<RawAnomalyResultDTO> newRawAnomalies = new ArrayList<>(); for (RawAnomalyResultDTO rawAnomaly : rawAnomalies) { boolean isContained = false; for (MergedAnomalyResultDTO existingAnomaly : existingAnomalies) { if (Long.compare(existingAnomaly.getStartTime(), rawAnomaly.getStartTime()) <= 0 && rawAnomaly.getEndTime().compareTo(existingAnomaly.getEndTime()) <= 0) { isContained = true; break; } } if (!isContained) { newRawAnomalies.add(rawAnomaly); } } return newRawAnomalies; } /** * Given a list of raw anomalies, this method returns a list of raw anomalies that are not contained in any existing * raw anomalies. * * @param rawAnomalies * @param existingRawAnomalies * @return */ private List<RawAnomalyResultDTO> removeFromExistingRawAnomalies(List<RawAnomalyResultDTO> rawAnomalies, List<RawAnomalyResultDTO> existingRawAnomalies) { List<RawAnomalyResultDTO> newRawAnomalies = new ArrayList<>(); for (RawAnomalyResultDTO rawAnomaly : rawAnomalies) { boolean matched = false; for (RawAnomalyResultDTO existingAnomaly : existingRawAnomalies) { if (existingAnomaly.getStartTime().compareTo(rawAnomaly.getStartTime()) <= 0 && rawAnomaly.getEndTime().compareTo(existingAnomaly.getEndTime()) <= 0) { matched = true; break; } } if (!matched) { newRawAnomalies.add(rawAnomaly); } } return newRawAnomalies; } private void handleResults(List<RawAnomalyResultDTO> results) { for (RawAnomalyResultDTO result : results) { try { // Properties that always come from the function spec AnomalyFunctionDTO spec = anomalyFunction.getSpec(); // make sure score and weight are valid numbers result.setScore(normalize(result.getScore())); result.setWeight(normalize(result.getWeight())); result.setFunction(spec); } catch (Exception e) { LOG.error("Exception in saving anomaly result : " + result.toString(), e); } } } /** * Handle any infinite or NaN values by replacing them with +/- max value or 0 */ private double normalize(double value) { if (Double.isInfinite(value)) { return (value > 0.0 ? 1 : -1) * Double.MAX_VALUE; } else if (Double.isNaN(value)) { return 0.0; // default? } else { return value; } } }