package com.linkedin.thirdeye.anomaly.merge; import com.linkedin.thirdeye.anomaly.detection.AnomalyDetectionInputContext; import com.linkedin.thirdeye.anomaly.detection.AnomalyDetectionInputContextBuilder; import com.linkedin.thirdeye.anomaly.utils.AnomalyUtils; import com.linkedin.thirdeye.api.DimensionMap; import com.linkedin.thirdeye.api.MetricTimeSeries; import com.linkedin.thirdeye.client.DAORegistry; import com.linkedin.thirdeye.datalayer.bao.AnomalyFunctionManager; import com.linkedin.thirdeye.datalayer.bao.MergedAnomalyResultManager; import com.linkedin.thirdeye.datalayer.bao.OverrideConfigManager; import com.linkedin.thirdeye.datalayer.bao.RawAnomalyResultManager; import com.linkedin.thirdeye.datalayer.dto.AnomalyFunctionDTO; import com.linkedin.thirdeye.datalayer.dto.MergedAnomalyResultDTO; import com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO; import com.linkedin.thirdeye.detector.function.AnomalyFunctionFactory; import com.linkedin.thirdeye.detector.function.BaseAnomalyFunction; import com.linkedin.thirdeye.detector.metric.transfer.MetricTransfer; import com.linkedin.thirdeye.detector.metric.transfer.ScalingFactor; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import org.apache.commons.collections.CollectionUtils; import org.joda.time.DateTime; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * finds raw anomalies grouped by a strategy and merges them with an existing (in the same group) or * new {@link com.linkedin.thirdeye.datalayer.dto.MergedAnomalyResultDTO} */ @Deprecated public class AnomalyMergeExecutor implements Runnable { private final MergedAnomalyResultManager mergedResultDAO; private final RawAnomalyResultManager anomalyResultDAO; private final AnomalyFunctionManager anomalyFunctionDAO; private final OverrideConfigManager overrideConfigDAO; private final ScheduledExecutorService executorService; private final AnomalyFunctionFactory anomalyFunctionFactory; private static final DAORegistry DAO_REGISTRY = DAORegistry.getInstance(); private final static Logger LOG = LoggerFactory.getLogger(AnomalyMergeExecutor.class); private final static AnomalyMergeConfig DEFAULT_MERGE_CONFIG; static { DEFAULT_MERGE_CONFIG = new AnomalyMergeConfig(); DEFAULT_MERGE_CONFIG.setSequentialAllowedGap(TimeUnit.HOURS.toMillis(2)); // merge anomalies apart 2 hours DEFAULT_MERGE_CONFIG.setMaxMergeDurationLength(TimeUnit.DAYS.toMillis(7) - 3600_000); // break anomaly longer than 6 days 23 hours DEFAULT_MERGE_CONFIG.setMergeStrategy(AnomalyMergeStrategy.FUNCTION_DIMENSIONS); } private final static AnomalyMergeConfig DEFAULT_SYNCHRONIZED_MERGE_CONFIG; static { DEFAULT_SYNCHRONIZED_MERGE_CONFIG = new AnomalyMergeConfig(); DEFAULT_SYNCHRONIZED_MERGE_CONFIG.setSequentialAllowedGap(DEFAULT_MERGE_CONFIG.getSequentialAllowedGap()); DEFAULT_SYNCHRONIZED_MERGE_CONFIG.setMaxMergeDurationLength(DEFAULT_MERGE_CONFIG.getMaxMergeDurationLength()); // Synchronized merge always use FUNCTION_DIMENSIONS merge strategy DEFAULT_SYNCHRONIZED_MERGE_CONFIG.setMergeStrategy(AnomalyMergeStrategy.FUNCTION_DIMENSIONS); } public AnomalyMergeExecutor(ScheduledExecutorService executorService, AnomalyFunctionFactory anomalyFunctionFactory) { this.mergedResultDAO = DAO_REGISTRY.getMergedAnomalyResultDAO(); this.anomalyResultDAO = DAO_REGISTRY.getRawAnomalyResultDAO(); this.anomalyFunctionDAO = DAO_REGISTRY.getAnomalyFunctionDAO(); this.overrideConfigDAO = DAO_REGISTRY.getOverrideConfigDAO(); this.executorService = executorService; this.anomalyFunctionFactory = anomalyFunctionFactory; } public void start() { // running every 15 mins executorService.scheduleWithFixedDelay(this, 0, 15, TimeUnit.MINUTES); } public void stop() { executorService.shutdown(); } /** * Performs asynchronous merge base on function id and dimensions. */ public void run() { ExecutorService taskExecutorService = Executors.newFixedThreadPool(5); List<Future<Integer>> taskCallbacks = new ArrayList<>(); List<AnomalyFunctionDTO> activeFunctions = anomalyFunctionDAO.findAllActiveFunctions(); // for each anomaly function, find raw unmerged results and perform merge for (final AnomalyFunctionDTO function : activeFunctions) { Callable<Integer> task = new Callable<Integer>() { @Override public Integer call() throws Exception { final boolean isBackfill = false; // TODO : move merge config within the AnomalyFunction; Every function should have its own merge config. AnomalyMergeConfig anomalyMergeConfig = function.getAnomalyMergeConfig(); if (anomalyMergeConfig == null) { anomalyMergeConfig = DEFAULT_MERGE_CONFIG; } return AnomalyMergeExecutor.this.mergeAnomalies(function, anomalyMergeConfig, isBackfill); } }; Future<Integer> taskFuture = taskExecutorService.submit(task); taskCallbacks.add(taskFuture); } // wait till all the tasks complete try { for (Future<Integer> future : taskCallbacks) { future.get(); } } catch (Exception e) { LOG.error("Error in merge execution", e); } } /** * Performs a time based merge, which merged anomalies that have the same function id and dimensions. * This method is supposed to be performed by anomaly detectors right after their anomaly detection. * * @param functionSpec the spec of the function that detects anomalies * @param isBackfill set to true to disable the alert of the merged anomalies * * @return the number of merged anomalies after merging */ public int synchronousMergeBasedOnFunctionIdAndDimension(AnomalyFunctionDTO functionSpec, boolean isBackfill) { if (functionSpec.getIsActive()) { AnomalyMergeConfig anomalyMergeConfig = functionSpec.getAnomalyMergeConfig(); if (anomalyMergeConfig == null) { anomalyMergeConfig = DEFAULT_SYNCHRONIZED_MERGE_CONFIG; } return mergeAnomalies(functionSpec, anomalyMergeConfig, isBackfill); } else { return 0; } } /** * Merges raw anomalies according to the given merge configuration. The merge logic works as following: * * Step 1: for the given function, find all groups of raw (unprocessed) anomalies based on * merge strategy (FunctionId and/or dimensions) * * Step 2: For each such group, find the base mergedAnomaly * * Step 3: perform time based merge * * Step 4: Recompute anomaly score / weight * * Step 5: persist merged anomalies * * @param functionSpc the spec of the function that has unmerged anomalies * @param mergeConfig the merge strategy * * @return the number of merged anomalies */ private int mergeAnomalies(AnomalyFunctionDTO functionSpc, AnomalyMergeConfig mergeConfig, boolean isBackfill) { List<RawAnomalyResultDTO> unmergedResults = anomalyResultDAO.findUnmergedByFunctionId(functionSpc.getId()); LOG.info("Running merge for function id : [{}], found [{}] raw anomalies", functionSpc.getId(), unmergedResults.size()); if (unmergedResults.size() > 0) { List<MergedAnomalyResultDTO> output = new ArrayList<>(); switch (mergeConfig.getMergeStrategy()) { case FUNCTION: performMergeBasedOnFunctionId(functionSpc, mergeConfig, unmergedResults, output); break; case FUNCTION_DIMENSIONS: performMergeBasedOnFunctionIdAndDimensions(functionSpc, mergeConfig, unmergedResults, output); break; default: throw new IllegalArgumentException("Merge strategy " + mergeConfig.getMergeStrategy() + " not supported"); } for (MergedAnomalyResultDTO mergedAnomalyResultDTO : output) { if (isBackfill) { mergedAnomalyResultDTO.setNotified(isBackfill); } // else notified flag is left as is updateMergedScoreAndPersist(mergedAnomalyResultDTO, mergeConfig); } return output.size(); } else { return 0; } } private void updateMergedScoreAndPersist(MergedAnomalyResultDTO mergedResult, AnomalyMergeConfig mergeConfig) { // Calculate default score and weight in case of the failure during updating score and weight through Pinot's value double weightedScoreSum = 0.0; double weightedWeightSum = 0.0; double totalBucketSize = 0.0; double normalizationFactor = 1000; // to prevent from double overflow String anomalyMessage = ""; for (RawAnomalyResultDTO anomalyResult : mergedResult.getAnomalyResults()) { anomalyResult.setMerged(true); double bucketSizeSeconds = (anomalyResult.getEndTime() - anomalyResult.getStartTime()) / 1000; weightedScoreSum += (anomalyResult.getScore() / normalizationFactor) * bucketSizeSeconds; weightedWeightSum += (anomalyResult.getWeight() / normalizationFactor) * bucketSizeSeconds; totalBucketSize += bucketSizeSeconds; anomalyMessage = anomalyResult.getMessage(); } if (totalBucketSize != 0) { mergedResult.setScore((weightedScoreSum / totalBucketSize) * normalizationFactor); mergedResult.setWeight((weightedWeightSum / totalBucketSize) * normalizationFactor); } mergedResult.setMessage(anomalyMessage); if (mergedResult.getAnomalyResults().size() > 1) { // recompute weight using anomaly function specific method try { updateMergedAnomalyWeight(mergedResult, mergeConfig); } catch (Exception e) { AnomalyFunctionDTO function = mergedResult.getFunction(); LOG.warn( "Unable to compute merged weight and the average weight of raw anomalies is used. Dataset: {}, Topic Metric: {}, Function: {}, Time:{} - {}, Exception: {}", function.getCollection(), function.getTopicMetric(), function.getFunctionName(), new DateTime(mergedResult.getStartTime()), new DateTime(mergedResult.getEndTime()), e); } } try { // persist the merged result mergedResultDAO.update(mergedResult); for (RawAnomalyResultDTO rawAnomalyResultDTO : mergedResult.getAnomalyResults()) { anomalyResultDAO.update(rawAnomalyResultDTO); } } catch (Exception e) { LOG.error("Could not persist merged result : [" + mergedResult.toString() + "]", e); } } /** * Uses function-specific method to re-computes the weight of merged anomaly. * * @param anomalyMergedResult the merged anomaly to be updated * @param mergeConfig the merge configuration that was applied when merge the merged anomaly * @throws Exception if error occurs when retrieving the time series for calculating the weight */ private void updateMergedAnomalyWeight(MergedAnomalyResultDTO anomalyMergedResult, AnomalyMergeConfig mergeConfig) throws Exception { AnomalyFunctionDTO anomalyFunctionSpec = anomalyMergedResult.getFunction(); BaseAnomalyFunction anomalyFunction = anomalyFunctionFactory.fromSpec(anomalyFunctionSpec); DateTime anomalyResultStart = new DateTime(anomalyMergedResult.getStartTime()); DateTime anomalyResultEnd = new DateTime(anomalyMergedResult.getEndTime()); DimensionMap dimensions = anomalyMergedResult.getDimensions(); AnomalyDetectionInputContextBuilder anomalyDetectionInputContextBuilder = new AnomalyDetectionInputContextBuilder(anomalyFunctionFactory); anomalyDetectionInputContextBuilder.init(anomalyFunctionSpec); anomalyDetectionInputContextBuilder .fetchTimeSeriesDataByDimension(anomalyResultStart, anomalyResultEnd, dimensions, false) .fetchExistingMergedAnomaliesByDimension(anomalyResultStart, anomalyResultEnd, dimensions) .fetchSaclingFactors(anomalyResultStart, anomalyResultEnd); AnomalyDetectionInputContext anomalyDetectionInputContext = anomalyDetectionInputContextBuilder.build(); MetricTimeSeries metricTimeSeries = anomalyDetectionInputContext.getDimensionKeyMetricTimeSeriesMap().get(dimensions); if (metricTimeSeries != null) { DateTime windowStart = new DateTime(anomalyMergedResult.getStartTime()); DateTime windowEnd = new DateTime(anomalyMergedResult.getEndTime()); List<MergedAnomalyResultDTO> knownAnomalies = Collections.emptyList(); // Retrieve history merged anomalies if (anomalyFunction.useHistoryAnomaly()) { switch (mergeConfig.getMergeStrategy()) { case FUNCTION: knownAnomalies = anomalyDetectionInputContext.getKnownMergedAnomalies().get(dimensions); break; case FUNCTION_DIMENSIONS: knownAnomalies = anomalyDetectionInputContext.getKnownMergedAnomalies().get(dimensions); break; default: throw new IllegalArgumentException("Merge strategy " + mergeConfig.getMergeStrategy() + " not supported"); } if (knownAnomalies.size() > 0) { LOG.info("Found {} history anomalies for computing the weight of current merged anomaly.", knownAnomalies.size()); LOG.info("Checking if any known anomalies overlap with the monitoring window of anomaly detection, which could result in unwanted holes in current values."); AnomalyUtils.logAnomaliesOverlapWithWindow(windowStart, windowEnd, knownAnomalies); } } // Transform Time Series List<ScalingFactor> scalingFactors = anomalyDetectionInputContext.getScalingFactors(); if (CollectionUtils.isNotEmpty(scalingFactors)) { Properties properties = anomalyFunction.getProperties(); MetricTransfer.rescaleMetric(metricTimeSeries, windowStart.getMillis(), scalingFactors, anomalyFunctionSpec.getTopicMetric(), properties); } anomalyFunction.updateMergedAnomalyInfo(anomalyMergedResult, metricTimeSeries, windowStart, windowEnd, knownAnomalies); } } @Deprecated private void performMergeBasedOnFunctionId(AnomalyFunctionDTO function, AnomalyMergeConfig mergeConfig, List<RawAnomalyResultDTO> unmergedResults, List<MergedAnomalyResultDTO> output) { // Now find last MergedAnomalyResult in same category MergedAnomalyResultDTO latestMergedResult = mergedResultDAO.findLatestByFunctionIdOnly(function.getId()); // TODO : get mergeConfig from function List<MergedAnomalyResultDTO> mergedResults = AnomalyTimeBasedSummarizer .mergeAnomalies(latestMergedResult, unmergedResults, mergeConfig.getMaxMergeDurationLength(), mergeConfig.getSequentialAllowedGap()); for (MergedAnomalyResultDTO mergedResult : mergedResults) { mergedResult.setFunction(function); } LOG.info("Merging [{}] raw anomalies into [{}] merged anomalies for function id : [{}]", unmergedResults.size(), mergedResults.size(), function.getId()); output.addAll(mergedResults); } private void performMergeBasedOnFunctionIdAndDimensions(AnomalyFunctionDTO function, AnomalyMergeConfig mergeConfig, List<RawAnomalyResultDTO> unmergedResults, List<MergedAnomalyResultDTO> output) { Map<DimensionMap, List<RawAnomalyResultDTO>> dimensionsResultMap = new HashMap<>(); for (RawAnomalyResultDTO anomalyResult : unmergedResults) { DimensionMap exploredDimensions = anomalyResult.getDimensions(); if (!dimensionsResultMap.containsKey(exploredDimensions)) { dimensionsResultMap.put(exploredDimensions, new ArrayList<RawAnomalyResultDTO>()); } dimensionsResultMap.get(exploredDimensions).add(anomalyResult); } for (DimensionMap exploredDimensions : dimensionsResultMap.keySet()) { List<RawAnomalyResultDTO> unmergedResultsByDimensions = dimensionsResultMap.get(exploredDimensions); long anomalyWindowStart = Long.MAX_VALUE; long anomalyWindowEnd = Long.MIN_VALUE; for (RawAnomalyResultDTO unmergedResultsByDimension : unmergedResultsByDimensions) { anomalyWindowStart = Math.min(anomalyWindowStart, unmergedResultsByDimension.getStartTime()); anomalyWindowEnd = Math.max(anomalyWindowEnd, unmergedResultsByDimension.getEndTime()); } // NOTE: We get "latest overlapped (Conflict)" merged anomaly instead of "latest" merged anomaly in order to // prevent the merge results of current (online) detection interfere the merge results of back-fill (offline) // detection. // Moreover, the window start is modified by mergeConfig.getSequentialAllowedGap() in order to allow a gap between // anomalies to be merged. MergedAnomalyResultDTO latestOverlappedMergedResult = mergedResultDAO.findLatestConflictByFunctionIdDimensions(function.getId(), exploredDimensions.toString(), anomalyWindowStart - mergeConfig.getSequentialAllowedGap(), anomalyWindowEnd); List<MergedAnomalyResultDTO> mergedResults = AnomalyTimeBasedSummarizer .mergeAnomalies(latestOverlappedMergedResult, unmergedResultsByDimensions, mergeConfig.getMaxMergeDurationLength(), mergeConfig.getSequentialAllowedGap()); for (MergedAnomalyResultDTO mergedResult : mergedResults) { mergedResult.setFunction(function); mergedResult.setDimensions(exploredDimensions); } LOG.info( "Merging [{}] raw anomalies into [{}] merged anomalies for function id : [{}] and dimensions : [{}]", unmergedResultsByDimensions.size(), mergedResults.size(), function.getId(), exploredDimensions); output.addAll(mergedResults); } } private String createMessage(double severity, Double currentVal, Double baseLineVal) { return String.format("change : %.2f %%, currentVal : %.2f, baseLineVal : %.2f", severity * 100, currentVal, baseLineVal); } }