package com.linkedin.thirdeye.anomaly.detection; import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; import com.linkedin.pinot.pql.parsers.utils.Pair; import com.linkedin.thirdeye.anomaly.override.OverrideConfigHelper; import com.linkedin.thirdeye.api.DimensionKey; import com.linkedin.thirdeye.api.DimensionMap; import com.linkedin.thirdeye.api.MetricTimeSeries; import com.linkedin.thirdeye.api.TimeGranularity; import com.linkedin.thirdeye.client.DAORegistry; import com.linkedin.thirdeye.client.MetricExpression; import com.linkedin.thirdeye.client.ResponseParserUtils; import com.linkedin.thirdeye.client.ThirdEyeCacheRegistry; import com.linkedin.thirdeye.client.timeseries.TimeSeriesHandler; import com.linkedin.thirdeye.client.timeseries.TimeSeriesRequest; import com.linkedin.thirdeye.client.timeseries.TimeSeriesResponse; import com.linkedin.thirdeye.client.timeseries.TimeSeriesResponseConverter; import com.linkedin.thirdeye.client.timeseries.TimeSeriesRow; import com.linkedin.thirdeye.dashboard.Utils; import com.linkedin.thirdeye.datalayer.dto.AnomalyFunctionDTO; import com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO; import com.linkedin.thirdeye.datalayer.dto.MergedAnomalyResultDTO; import com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO; import com.linkedin.thirdeye.detector.function.AnomalyFunctionFactory; import com.linkedin.thirdeye.detector.function.BaseAnomalyFunction; import com.linkedin.thirdeye.detector.metric.transfer.ScalingFactor; import com.linkedin.thirdeye.util.ThirdEyeUtils; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import org.apache.commons.collections.MapUtils; import org.apache.commons.lang.NullArgumentException; import org.apache.commons.lang3.StringUtils; import org.joda.time.DateTime; import org.quartz.JobExecutionException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class AnomalyDetectionInputContextBuilder { private static final Logger LOG = LoggerFactory.getLogger(AnomalyDetectionInputContextBuilder.class); private static final DAORegistry DAO_REGISTRY = DAORegistry.getInstance(); private AnomalyDetectionInputContext anomalyDetectionInputContext; private AnomalyFunctionDTO anomalyFunctionSpec; private BaseAnomalyFunction anomalyFunction; private AnomalyFunctionFactory anomalyFunctionFactory; private List<String> collectionDimensions; private String dataset; public AnomalyDetectionInputContextBuilder(AnomalyFunctionFactory anomalyFunctionFactory) { this.anomalyFunctionFactory = anomalyFunctionFactory; } public AnomalyDetectionInputContextBuilder init(AnomalyFunctionDTO anomalyFunctionSpec) throws Exception { return init(anomalyFunctionSpec, new AnomalyDetectionInputContext()); } public AnomalyDetectionInputContextBuilder init(AnomalyFunctionDTO anomalyFunctionSpec, AnomalyDetectionInputContext anomalyDetectionInputContext) throws Exception { this.anomalyFunctionSpec = anomalyFunctionSpec; this.anomalyFunction = anomalyFunctionFactory.fromSpec(anomalyFunctionSpec); this.anomalyDetectionInputContext = anomalyDetectionInputContext; this.dataset = this.anomalyFunctionSpec.getCollection(); DatasetConfigDTO datasetConfig = DAO_REGISTRY.getDatasetConfigDAO().findByDataset(dataset); if (datasetConfig == null) { LOG.error("Dataset [" + dataset + "] is not found"); throw new NullArgumentException( "Dataset [" + dataset + "] is not found with function : " + anomalyFunctionSpec .toString()); } this.collectionDimensions = datasetConfig.getDimensions(); return this; } public AnomalyDetectionInputContext build() { return this.anomalyDetectionInputContext; } /** * Fetch TimeSeriese data from Pinot in the startEndTimeRanges * @param startEndTimeRanges * the time range when we actually fetch timeseries * @return * the builder of the AnomalyDetectionInputContext * @throws JobExecutionException * @throws ExecutionException */ public AnomalyDetectionInputContextBuilder fetchTimeSeriesData(List<Pair<Long, Long>> startEndTimeRanges, boolean endTimeInclusive) throws JobExecutionException, ExecutionException { Map<DimensionKey, MetricTimeSeries> dimensionKeyMetricTimeSeriesMap = getTimeSeriesForAnomalyDetection(anomalyFunctionSpec, startEndTimeRanges, endTimeInclusive); Map<DimensionMap, MetricTimeSeries> dimensionMapMetricTimeSeriesMap = new HashMap<>(); for (Map.Entry<DimensionKey, MetricTimeSeries> entry : dimensionKeyMetricTimeSeriesMap.entrySet()) { DimensionKey dimensionKey = entry.getKey(); // If the current time series belongs to OTHER dimension, which consists of time series whose // sum of all its values belows 1% of sum of all time series values, then its anomaly is // meaningless and hence we don't want to detection anomalies on it. String[] dimensionValues = dimensionKey.getDimensionValues(); boolean isOTHERDimension = false; for (String dimensionValue : dimensionValues) { if (dimensionValue.equalsIgnoreCase(ResponseParserUtils.OTHER) || dimensionValue.equalsIgnoreCase( ResponseParserUtils.UNKNOWN)) { isOTHERDimension = true; break; } } if (isOTHERDimension) { continue; } DimensionMap dimensionMap = DimensionMap.fromDimensionKey(dimensionKey, collectionDimensions); dimensionMapMetricTimeSeriesMap.put(dimensionMap, entry.getValue()); if (entry.getValue().getTimeWindowSet().size() < 1) { LOG.warn("Insufficient data for {} to run anomaly detection function", dimensionMap); } } this.anomalyDetectionInputContext.setDimensionKeyMetricTimeSeriesMap(dimensionMapMetricTimeSeriesMap); return this; } /** * Fetch TimeSeriese data from Pinot with given monitoring window start and end time * The data range is calculated by anomalyFunction.getDataRangeIntervals * If endTimeInclusive is set true, the windowEnd is included in the timeseries; otherwise, not. * @param windowStart * The start time of the monitoring window * @param windowEnd * The start time of the monitoring window * @param endTimeInclusive * true, if the end time is included in the data fetching process * @return * the builder of the AnomalyDetectionInputContext */ public AnomalyDetectionInputContextBuilder fetchTimeSeriesData(DateTime windowStart, DateTime windowEnd, boolean endTimeInclusive) throws JobExecutionException, ExecutionException { List<Pair<Long, Long>> startEndTimeRanges = anomalyFunction.getDataRangeIntervals(windowStart.getMillis(), windowEnd.getMillis()); return fetchTimeSeriesData(startEndTimeRanges, endTimeInclusive); } /** * Fetch TimeSeriese data from Pinot with given monitoring window start and end time * The data range is calculated by anomalyFunction.getDataRangeIntervals * endTimeInclusive is set to false in default * @param windowStart * The start time of the monitoring window * @param windowEnd * The start time of the monitoring window * @return * the builder of the AnomalyDetectionInputContext */ public AnomalyDetectionInputContextBuilder fetchTimeSeriesData(DateTime windowStart, DateTime windowEnd) throws JobExecutionException, ExecutionException { List<Pair<Long, Long>> startEndTimeRanges = anomalyFunction.getDataRangeIntervals(windowStart.getMillis(), windowEnd.getMillis()); return fetchTimeSeriesData(startEndTimeRanges, false); } /** * Fetch TimeSeriese data from Pinot with given time range * endTimeInclusive is set to false in default * * @param startEndTimeRanges * The time range when we should fetch data * @return * the builder of the AnomalyDetectionInputContext */ public AnomalyDetectionInputContextBuilder fetchTimeSeriesData(List<Pair<Long, Long>> startEndTimeRanges) throws JobExecutionException, ExecutionException { return fetchTimeSeriesData(startEndTimeRanges, false); } /** * Fetch time series, known merged anomalies, and scaling factor for the specified dimension. Note that scaling * factor has no dimension information, so all scaling factor in the specified time range will be retrieved. * * @param windowStart the start time for retrieving the data * @param windowEnd the end time for retrieving the data * @param dimensions the dimension of the data * @param endTimeInclusive set to true if the end time should be inclusive; mainly used by the queries from UI * @return * the builder of the AnomalyDetectionInputContext * @throws Exception if it fails to retrieve time series from DB. */ public AnomalyDetectionInputContextBuilder fetchTimeSeriesDataByDimension(DateTime windowStart, DateTime windowEnd, DimensionMap dimensions, boolean endTimeInclusive) throws Exception { List<Pair<Long, Long>> startEndTimeRanges = anomalyFunction.getDataRangeIntervals(windowStart.getMillis(), windowEnd.getMillis()); return fetchTimeSeriesDataByDimension(startEndTimeRanges, dimensions, endTimeInclusive); } /** * Fetch time series, known merged anomalies, and scaling factor for the specified dimension. Note that scaling * factor has no dimension information, so all scaling factor in the specified time range will be retrieved. * * @param startEndTimeRanges the start and end time range for retrieving the data * @param dimensions the dimension of the data * @param endTimeInclusive set to true if the end time should be inclusive; mainly used by the queries from UI * @return * the builder of the AnomalyDetectionInputContext * @throws Exception if it fails to retrieve time series from DB. */ public AnomalyDetectionInputContextBuilder fetchTimeSeriesDataByDimension(List<Pair<Long, Long>> startEndTimeRanges, DimensionMap dimensions, boolean endTimeInclusive) throws Exception { TimeGranularity timeGranularity = new TimeGranularity(anomalyFunctionSpec.getBucketSize(), anomalyFunctionSpec.getBucketUnit()); // Retrieve Time Series MetricTimeSeries metricTimeSeries = getTimeSeriesByDimension(anomalyFunctionSpec, startEndTimeRanges, dimensions, timeGranularity, endTimeInclusive); Map<DimensionMap, MetricTimeSeries> metricTimeSeriesMap = new HashMap<>(); metricTimeSeriesMap.put(dimensions, metricTimeSeries); this.anomalyDetectionInputContext.setDimensionKeyMetricTimeSeriesMap(metricTimeSeriesMap); return this; } /** * Fetch the global metric without dimension and filter in a given monitoring start and end time * the actual data fetching time range is calculated by anomalyFunction.getDataRangeIntervals * @param windowStart * the start time of the monitoring window * @param windowEnd * the end time of the monitoring window * @return * the builder of the AnomalyDetectionInputContext * @throws JobExecutionException * @throws ExecutionException */ public AnomalyDetectionInputContextBuilder fetchTimeSeriesGlobalMetric(DateTime windowStart, DateTime windowEnd) throws JobExecutionException, ExecutionException { List<Pair<Long, Long>> startEndTimeRanges = anomalyFunction.getDataRangeIntervals(windowStart.getMillis(), windowEnd.getMillis()); this.fetchTimeSeriesGlobalMetric(startEndTimeRanges); return this; } /** * Fetch the global metric without dimension and filter in a given time range * @param startEndTimeRanges * the time range when we should fetch the timeseries data * @return * the builder of the AnomalyDetectionInputContext * @throws JobExecutionException * @throws ExecutionException */ public AnomalyDetectionInputContextBuilder fetchTimeSeriesGlobalMetric(List<Pair<Long, Long>> startEndTimeRanges) throws JobExecutionException, ExecutionException { MetricTimeSeries metricSumTimeSeries = getGlobalMetric(anomalyFunctionSpec, startEndTimeRanges); this.anomalyDetectionInputContext.setGlobalMetric(metricSumTimeSeries); return this; } /** * Fetech existing RawAnomalyResults in the monitoring window * @param windowStart * the start time of the monitoring window * @param windowEnd * the end time of the monitoring window * @return */ public AnomalyDetectionInputContextBuilder fetchExixtingRawAnomalies(DateTime windowStart, DateTime windowEnd) { // We always find existing raw anomalies to prevent duplicate raw anomalies are generated List<RawAnomalyResultDTO> existingRawAnomalies = getExistingRawAnomalies(anomalyFunctionSpec.getId(), windowStart.getMillis(), windowEnd.getMillis()); ArrayListMultimap<DimensionMap, RawAnomalyResultDTO> dimensionNamesToKnownRawAnomalies = ArrayListMultimap.create(); for (RawAnomalyResultDTO existingRawAnomaly : existingRawAnomalies) { dimensionNamesToKnownRawAnomalies.put(existingRawAnomaly.getDimensions(), existingRawAnomaly); } this.anomalyDetectionInputContext.setExistingRawAnomalies(dimensionNamesToKnownRawAnomalies); return this; } /** * Fetch existing MergedAnomalyResults in the training window * @param windowStart * the start time of the monitoring window * @param windowEnd * the end time of the monitoring window * @return */ public AnomalyDetectionInputContextBuilder fetchExixtingMergedAnomalies(DateTime windowStart, DateTime windowEnd) { // Get existing anomalies for this time range and this function id for all combinations of dimensions List<MergedAnomalyResultDTO> knownMergedAnomalies; if (anomalyFunction.useHistoryAnomaly()) { // if this anomaly function uses history data, then we get all time ranges return fetchExixtingMergedAnomalies( anomalyFunction.getDataRangeIntervals(windowStart.getMillis(), windowEnd.getMillis())); } else { // otherwise, we only get the merge anomaly for current window in order to remove duplicate raw anomalies List<Pair<Long, Long>> currentTimeRange = new ArrayList<>(); currentTimeRange.add(new Pair<>(windowStart.getMillis(), windowEnd.getMillis())); return fetchExixtingMergedAnomalies(currentTimeRange); } } public AnomalyDetectionInputContextBuilder fetchExixtingMergedAnomalies(List<Pair<Long, Long>> startEndTimeRanges) { // Get existing anomalies for this time range and this function id for all combinations of dimensions List<MergedAnomalyResultDTO> knownMergedAnomalies; knownMergedAnomalies = getKnownMergedAnomalies(anomalyFunctionSpec.getId(), startEndTimeRanges); // Sort the known merged and raw anomalies by their dimension names ArrayListMultimap<DimensionMap, MergedAnomalyResultDTO> dimensionMapToKnownMergedAnomalies = ArrayListMultimap.create(); for (MergedAnomalyResultDTO knownMergedAnomaly : knownMergedAnomalies) { dimensionMapToKnownMergedAnomalies.put(knownMergedAnomaly.getDimensions(), knownMergedAnomaly); } this.anomalyDetectionInputContext.setKnownMergedAnomalies(dimensionMapToKnownMergedAnomalies); return this; } public AnomalyDetectionInputContextBuilder fetchExistingMergedAnomaliesByDimension(DateTime windowStart, DateTime windowEnd, DimensionMap dimensions) { return fetchExistingMergedAnomaliesByDimension( anomalyFunction.getDataRangeIntervals(windowStart.getMillis(), windowEnd.getMillis()), dimensions); } public AnomalyDetectionInputContextBuilder fetchExistingMergedAnomaliesByDimension(List<Pair<Long, Long>> startEndTimeRanges, DimensionMap dimensions) { ArrayListMultimap<DimensionMap, MergedAnomalyResultDTO> dimensionMapToKnownMergedAnomalies = ArrayListMultimap.create(); dimensionMapToKnownMergedAnomalies.putAll(dimensions, getKnownMergedAnomaliesByDimension(anomalyFunctionSpec.getId(), startEndTimeRanges, dimensions)); this.anomalyDetectionInputContext.setKnownMergedAnomalies(dimensionMapToKnownMergedAnomalies); return this; } /** * Fetch Scaling Factors in the training window * @param windowStart * the start time of the monitoring window * @param windowEnd * the end time of the monitoring window * @return */ public AnomalyDetectionInputContextBuilder fetchSaclingFactors(DateTime windowStart, DateTime windowEnd) { List<ScalingFactor> scalingFactors = OverrideConfigHelper .getTimeSeriesScalingFactors(DAO_REGISTRY.getOverrideConfigDAO(), anomalyFunctionSpec.getCollection(), anomalyFunctionSpec.getMetric(), anomalyFunctionSpec.getId(), anomalyFunction.getDataRangeIntervals(windowStart.getMillis(), windowEnd.getMillis())); this.anomalyDetectionInputContext.setScalingFactors(scalingFactors); return this; } /** * Returns existing raw anomalies in the given monitoring window *ingraph_dashboard_config_index * @param functionId the id of the anomaly function * @param monitoringWindowStart inclusive * @param monitoringWindowEnd inclusive but it doesn't matter * * @return known raw anomalies in the given window */ private List<RawAnomalyResultDTO> getExistingRawAnomalies(long functionId, long monitoringWindowStart, long monitoringWindowEnd) { List<RawAnomalyResultDTO> results = new ArrayList<>(); try { results.addAll(DAO_REGISTRY.getRawAnomalyResultDAO().findAllByTimeAndFunctionId(monitoringWindowStart, monitoringWindowEnd, functionId)); } catch (Exception e) { LOG.error("Exception in getting existing anomalies", e); } return results; } /** * Returns all known merged anomalies of the function id that are needed for anomaly detection, i.e., the merged * anomalies that overlap with the monitoring window and baseline windows. * * @param functionId the id of the anomaly function * @param startEndTimeRanges the time ranges for retrieving the known merge anomalies * @return known merged anomalies of the function id that are needed for anomaly detection */ private List<MergedAnomalyResultDTO> getKnownMergedAnomalies(long functionId, List<Pair<Long, Long>> startEndTimeRanges) { List<MergedAnomalyResultDTO> results = new ArrayList<>(); for (Pair<Long, Long> startEndTimeRange : startEndTimeRanges) { try { results.addAll( DAO_REGISTRY.getMergedAnomalyResultDAO().findAllConflictByFunctionId(functionId, startEndTimeRange.getFirst(), startEndTimeRange.getSecond())); } catch (Exception e) { LOG.error("Exception in getting merged anomalies", e); } } return results; } /** * Returns all known merged anomalies of the function id that are needed for anomaly detection, i.e., the merged * anomalies that overlap with the monitoring window and baseline windows. * * @param functionId the id of the anomaly function * @param startEndTimeRanges the time ranges for retrieving the known merge anomalies * @return known merged anomalies of the function id that are needed for anomaly detection */ private List<MergedAnomalyResultDTO> getKnownMergedAnomaliesByDimension(long functionId, List<Pair<Long, Long>> startEndTimeRanges, DimensionMap dimensions) { List<MergedAnomalyResultDTO> results = new ArrayList<>(); for (Pair<Long, Long> startEndTimeRange : startEndTimeRanges) { try { results.addAll( DAO_REGISTRY.getMergedAnomalyResultDAO().findAllConflictByFunctionIdDimensions(functionId, startEndTimeRange.getFirst(), startEndTimeRange.getSecond(), dimensions.toString())); } catch (Exception e) { LOG.error("Exception in getting merged anomalies", e); } } return results; } /** * Get the metric filter setting for an anomaly function * @param anomalyFunctionSpec * @return */ public static Multimap<String, String> getFiltersForFunction(AnomalyFunctionDTO anomalyFunctionSpec) { // Get the original filter Multimap<String, String> filters; String filterString = anomalyFunctionSpec.getFilters(); if (StringUtils.isNotBlank(filterString)) { filters = ThirdEyeUtils.getFilterSet(filterString); } else { filters = HashMultimap.create(); } return filters; } /** * Get the explore dimensions for an anomaly function * @param anomalyFunctionSpec * @return */ public static List<String> getDimensionsForFunction(AnomalyFunctionDTO anomalyFunctionSpec) { List<String> groupByDimensions; String exploreDimensionString = anomalyFunctionSpec.getExploreDimensions(); if (StringUtils.isNotBlank(exploreDimensionString)) { groupByDimensions = Arrays.asList(exploreDimensionString.trim().split(",")); } else { groupByDimensions = Collections.emptyList(); } return groupByDimensions; } /** * Returns the set of metric time series that are needed by the given anomaly function for detecting anomalies. * * The time granularity is the granularity of the function's collection, i.e., the buckets are not aggregated, * in order to increase the accuracy for detecting anomalies. * * @param anomalyFunctionSpec spec of the anomaly function * @param startEndTimeRanges the time ranges to retrieve the data for constructing the time series * @param endTimeInclusive if the end time is included * * @return the data that is needed by the anomaly function for detecting anomalies. * @throws JobExecutionException * @throws ExecutionException */ public Map<DimensionKey, MetricTimeSeries> getTimeSeriesForAnomalyDetection( AnomalyFunctionDTO anomalyFunctionSpec, List<Pair<Long, Long>> startEndTimeRanges, boolean endTimeInclusive) throws JobExecutionException, ExecutionException { Multimap<String, String> filters = getFiltersForFunction(anomalyFunctionSpec); List<String> groupByDimensions = getDimensionsForFunction(anomalyFunctionSpec); TimeGranularity timeGranularity = new TimeGranularity(anomalyFunctionSpec.getBucketSize(), anomalyFunctionSpec.getBucketUnit()); DatasetConfigDTO dataset = DAORegistry.getInstance().getDatasetConfigDAO().findByDataset(anomalyFunctionSpec.getCollection()); boolean doRollUp = true; if (!dataset.isAdditive()) { doRollUp = false; } TimeSeriesResponse timeSeriesResponse = getTimeSeriesResponseImpl(anomalyFunctionSpec, startEndTimeRanges, timeGranularity, filters, groupByDimensions, endTimeInclusive, doRollUp); try { Map<DimensionKey, MetricTimeSeries> dimensionKeyMetricTimeSeriesMap = TimeSeriesResponseConverter.toMap(timeSeriesResponse, Utils.getSchemaDimensionNames(anomalyFunctionSpec.getCollection())); return dimensionKeyMetricTimeSeriesMap; } catch (Exception e) { LOG.info("Failed to get schema dimensions for constructing dimension keys:", e.toString()); return Collections.emptyMap(); } } /** * Returns the metric time series that were given to the anomaly function for anomaly detection. If the dimension to * retrieve is OTHER, this method retrieves all combinations of dimensions and calculate the metric time series for * OTHER dimension on-the-fly. * * @param anomalyFunctionSpec spec of the anomaly function * @param startEndTimeRanges the time ranges to retrieve the data for constructing the time series * @param dimensionMap a dimension map that is used to construct the filter for retrieving the corresponding data * that was used to detected the anomaly * @param timeGranularity time granularity of the time series * @param endTimeInclusive set to true if the end time should be inclusive; mainly used by the query for UI * @return the time series in the same format as those used by the given anomaly function for anomaly detection * * @throws JobExecutionException * @throws ExecutionException */ public MetricTimeSeries getTimeSeriesByDimension(AnomalyFunctionDTO anomalyFunctionSpec, List<Pair<Long, Long>> startEndTimeRanges, DimensionMap dimensionMap, TimeGranularity timeGranularity, boolean endTimeInclusive) throws JobExecutionException, ExecutionException { // Get the original filter Multimap<String, String> filters = getFiltersForFunction(anomalyFunctionSpec); // Decorate filters according to dimensionMap filters = ThirdEyeUtils.getFilterSetFromDimensionMap(dimensionMap, filters); boolean hasOTHERDimensionName = false; for (String dimensionValue : dimensionMap.values()) { if (dimensionValue.equalsIgnoreCase(ResponseParserUtils.OTHER)) { hasOTHERDimensionName = true; break; } } // groupByDimensions (i.e., exploreDimensions) is empty by default because the query for getting the time series // will have the decorated filters according to anomalies' explore dimensions. // However, if there exists any dimension with value "OTHER, then we need to honor the origin groupBy in order to // construct the data for OTHER List<String> groupByDimensions = Collections.emptyList(); if (hasOTHERDimensionName && StringUtils.isNotBlank(anomalyFunctionSpec.getExploreDimensions().trim())) { groupByDimensions = Arrays.asList(anomalyFunctionSpec.getExploreDimensions().trim().split(",")); } final boolean doRollUp = false; TimeSeriesResponse response = getTimeSeriesResponseImpl(anomalyFunctionSpec, startEndTimeRanges, timeGranularity, filters, groupByDimensions, endTimeInclusive, doRollUp); try { Map<DimensionKey, MetricTimeSeries> metricTimeSeriesMap = TimeSeriesResponseConverter.toMap(response, Utils.getSchemaDimensionNames(anomalyFunctionSpec.getCollection())); return extractMetricTimeSeriesByDimension(metricTimeSeriesMap); } catch (Exception e) { LOG.warn("Unable to get schema dimension name for retrieving metric time series: {}", e.toString()); return null; } } /** * Return a global metric for an anomaly function to calculate the contribution of anomalies to the global metric * @param anomalyFunctionSpec spec of the anomaly function * @param startEndTimeRanges the time ranges to retrieve the data for constructing the time series * @return the data that is needed by the anomaly function for knowing the global metric * @throws JobExecutionException * @throws ExecutionException */ public MetricTimeSeries getGlobalMetric(AnomalyFunctionDTO anomalyFunctionSpec, List<Pair<Long, Long>> startEndTimeRanges) throws JobExecutionException, ExecutionException { // Set empty dimension and filter Multimap<String, String> filters = HashMultimap.create(); List<String> groupByDimensions = Collections.emptyList(); TimeGranularity timeGranularity = new TimeGranularity(anomalyFunctionSpec.getBucketSize(), anomalyFunctionSpec.getBucketUnit()); DatasetConfigDTO dataset = DAORegistry.getInstance().getDatasetConfigDAO().findByDataset(anomalyFunctionSpec.getCollection()); boolean doRollUp = false; List<String> metricsToFetch = new ArrayList<>(); if(StringUtils.isNotEmpty(anomalyFunctionSpec.getGlobalMetric())) { metricsToFetch.add(anomalyFunctionSpec.getGlobalMetric()); } else { metricsToFetch.add(anomalyFunctionSpec.getMetric()); } TimeSeriesResponse timeSeriesResponse = getTimeSeriesResponseImpl(anomalyFunctionSpec, metricsToFetch, startEndTimeRanges, timeGranularity, filters, groupByDimensions, false, doRollUp); MetricTimeSeries globalMetric = null; try { Map<DimensionKey, MetricTimeSeries> dimensionKeyMetricTimeSeriesMap = TimeSeriesResponseConverter.toMap(timeSeriesResponse, Utils.getSchemaDimensionNames(anomalyFunctionSpec.getCollection())); if (MapUtils.isEmpty(dimensionKeyMetricTimeSeriesMap)) { LOG.error("Unable to fetch global metric for {}", anomalyFunctionSpec); } if (dimensionKeyMetricTimeSeriesMap.size() > 2) { LOG.warn("More than 1 dimensions when fetching traffic data for {}; take the 1st dimension", anomalyFunctionSpec); } globalMetric = dimensionKeyMetricTimeSeriesMap.values().iterator().next(); } catch (Exception e) { LOG.warn("Failed to get schema dimensions for constructing dimension keys:", e.toString()); } return globalMetric; } /** * Extract current and baseline values from the parsed Pinot results. There are two possible time series for presenting * the time series after anomaly detection: 1. the time series with a specific dimension and 2. the time series for * OTHER dimension. * * For case 1, the input map should contain only one time series and hence we can just return it. For case 2, the * input map would contain all combination of explored dimension and hence we need to filter out the one for OTHER * dimension. * * @param metricTimeSeriesMap * * @return the time series when the anomaly is detected */ private MetricTimeSeries extractMetricTimeSeriesByDimension(Map<DimensionKey, MetricTimeSeries> metricTimeSeriesMap) { MetricTimeSeries metricTimeSeries = null; if (MapUtils.isNotEmpty(metricTimeSeriesMap)) { // For most anomalies, there should be only one time series due to its dimensions. The exception is the OTHER // dimension, in which time series of different dimensions are returned due to the calculation of OTHER dimension. // Therefore, we need to get the time series of OTHER dimension manually. if (metricTimeSeriesMap.size() == 1) { Iterator<MetricTimeSeries> ite = metricTimeSeriesMap.values().iterator(); if (ite.hasNext()) { metricTimeSeries = ite.next(); } } else { // Retrieve the time series of OTHER dimension Iterator<Map.Entry<DimensionKey, MetricTimeSeries>> ite = metricTimeSeriesMap.entrySet().iterator(); while (ite.hasNext()) { Map.Entry<DimensionKey, MetricTimeSeries> entry = ite.next(); DimensionKey dimensionKey = entry.getKey(); boolean foundOTHER = false; for (String dimensionValue : dimensionKey.getDimensionValues()) { if (dimensionValue.equalsIgnoreCase(ResponseParserUtils.OTHER)) { metricTimeSeries = entry.getValue(); foundOTHER = true; break; } } if (foundOTHER) { break; } } } } return metricTimeSeries; } private TimeSeriesResponse getTimeSeriesResponseImpl(AnomalyFunctionDTO anomalyFunctionSpec, List<Pair<Long, Long>> startEndTimeRanges, TimeGranularity timeGranularity, Multimap<String, String> filters, List<String> groupByDimensions, boolean endTimeInclusive, boolean doRollUp) throws JobExecutionException, ExecutionException { return getTimeSeriesResponseImpl(anomalyFunctionSpec, anomalyFunctionSpec.getMetrics(), startEndTimeRanges, timeGranularity, filters, groupByDimensions, endTimeInclusive, doRollUp); } private TimeSeriesResponse getTimeSeriesResponseImpl(AnomalyFunctionDTO anomalyFunctionSpec, List<String> metrics, List<Pair<Long, Long>> startEndTimeRanges, TimeGranularity timeGranularity, Multimap<String, String> filters, List<String> groupByDimensions, boolean endTimeInclusive, boolean doRollUp) throws JobExecutionException, ExecutionException { TimeSeriesHandler timeSeriesHandler = new TimeSeriesHandler(ThirdEyeCacheRegistry.getInstance().getQueryCache(), doRollUp); // Seed request with top-level... TimeSeriesRequest seedRequest = new TimeSeriesRequest(); seedRequest.setCollectionName(anomalyFunctionSpec.getCollection()); // TODO: Check low level support for multiple metrics retrieval String metricsToRetrieve = StringUtils.join(metrics, ","); List<MetricExpression> metricExpressions = Utils .convertToMetricExpressions(metricsToRetrieve, anomalyFunctionSpec.getMetricFunction(), anomalyFunctionSpec.getCollection()); seedRequest.setMetricExpressions(metricExpressions); seedRequest.setAggregationTimeGranularity(timeGranularity); seedRequest.setEndDateInclusive(false); seedRequest.setFilterSet(filters); seedRequest.setGroupByDimensions(groupByDimensions); seedRequest.setEndDateInclusive(endTimeInclusive); LOG.info("Found [{}] time ranges to fetch data", startEndTimeRanges.size()); for (Pair<Long, Long> timeRange : startEndTimeRanges) { LOG.info("Start Time [{}], End Time [{}] for anomaly analysis", new DateTime(timeRange.getFirst()), new DateTime(timeRange.getSecond())); } // MultiQuery request List<Future<TimeSeriesResponse>> futureResponses = new ArrayList<>(); List<TimeSeriesRequest> requests = new ArrayList<>(); Set<TimeSeriesRow> timeSeriesRowSet = new HashSet<>(); for (Pair<Long, Long> startEndInterval : startEndTimeRanges) { TimeSeriesRequest request = new TimeSeriesRequest(seedRequest); DateTime startTime = new DateTime(startEndInterval.getFirst()); DateTime endTime = new DateTime(startEndInterval.getSecond()); request.setStart(startTime); request.setEnd(endTime); Future<TimeSeriesResponse> response = timeSeriesHandler.asyncHandle(request); if (response != null) { futureResponses.add(response); requests.add(request); LOG.info("Fetching data with startTime: [{}], endTime: [{}], metricExpressions: [{}], timeGranularity: [{}]", startTime, endTime, metricExpressions, timeGranularity); } } for (int i = 0; i < futureResponses.size(); i++) { Future<TimeSeriesResponse> futureResponse = futureResponses.get(i); TimeSeriesRequest request = requests.get(i); try { TimeSeriesResponse response = futureResponse.get(); timeSeriesRowSet.addAll(response.getRows()); } catch (InterruptedException e) { LOG.warn("Failed to fetch data with request: [{}]", request); } } timeSeriesHandler.shutdownAsyncHandler(); List<TimeSeriesRow> timeSeriesRows = new ArrayList<>(); timeSeriesRows.addAll(timeSeriesRowSet); return new TimeSeriesResponse(timeSeriesRows); } }