package com.linkedin.thirdeye.detector.function; import com.linkedin.pinot.pql.parsers.utils.Pair; import com.linkedin.thirdeye.anomaly.views.AnomalyTimelinesView; import com.linkedin.thirdeye.api.DimensionMap; import com.linkedin.thirdeye.datalayer.dto.MergedAnomalyResultDTO; import java.util.List; import org.joda.time.DateTime; import com.linkedin.thirdeye.api.MetricTimeSeries; import com.linkedin.thirdeye.datalayer.dto.AnomalyFunctionDTO; import com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO; import com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO; import com.linkedin.thirdeye.util.AnomalyOffset; public interface AnomalyFunction { /** Initializes this function with its configuration, call before analyze */ void init(AnomalyFunctionDTO spec) throws Exception; /** Returns the specification for this function instance */ AnomalyFunctionDTO getSpec(); /** * Returns the time ranges of data that is used by this anomaly function. This method is useful when multiple time * intervals are needed for fetching current vs baseline data * * @param monitoringWindowStartTime inclusive * @param monitoringWindowEndTime exclusive * * @return the time ranges of data that is used by this anomaly function */ List<Pair<Long, Long>> getDataRangeIntervals(Long monitoringWindowStartTime, Long monitoringWindowEndTime); /** * Analyzes a metric time series and returns any anomalous points / intervals. * @param exploredDimensions * Pairs of dimension value and name corresponding to timeSeries. * @param timeSeries * The metric time series data. * @param windowStart * The beginning of the range corresponding to timeSeries. * @param windowEnd * The end of the range corresponding to timeSeries. * @param knownAnomalies * Any known anomalies in the time range. * @return * A list of anomalies that were not previously known. */ List<RawAnomalyResultDTO> analyze(DimensionMap exploredDimensions, MetricTimeSeries timeSeries, DateTime windowStart, DateTime windowEnd, List<MergedAnomalyResultDTO> knownAnomalies) throws Exception; /** * Analyzes a metric time series before windowStart and returns any anomalous points / intervals. * @param exploredDimensions * Pairs of dimension value and name corresponding to timeSeries. * @param timeSeries * The metric time series data. * @param windowStart * The beginning of the range corresponding to timeSeries. * @param windowEnd * The end of the range corresponding to timeSeries. * @param knownAnomalies * Any known anomalies in the time range. * @return * A list of anomalies that were not previously known. */ List<RawAnomalyResultDTO> offlineAnalyze(DimensionMap exploredDimensions, MetricTimeSeries timeSeries, DateTime windowStart, DateTime windowEnd, List<MergedAnomalyResultDTO> knownAnomalies) throws Exception; /** * Computes the score and severity according to the current and baseline of the given timeSeries and stores the * information to the merged anomaly. The start and end time of the time series is provided * * @param anomalyToUpdated * the merged anomaly to be updated. * @param timeSeries * The metric time series data. * @param knownAnomalies * Any known anomalies in the time range. * @return * A list of anomalies that were not previously known. * @return the severity according to the current and baseline of the given timeSeries */ void updateMergedAnomalyInfo(MergedAnomalyResultDTO anomalyToUpdated, MetricTimeSeries timeSeries, DateTime windowStart, DateTime windowEnd, List<MergedAnomalyResultDTO> knownAnomalies) throws Exception; /** * Given any metric, this method returns the corresponding current and baseline time series to * be presented in the frontend. The given metric is not necessary the data that is used for * detecting anomaly. * * For instance, if a function uses the average values of the past 3 weeks as the baseline for * anomaly detection, then this method should construct a baseline that contains the average * value of the past 3 weeks of any given metric. * * Note that the usage of this method should be similar to the method {@link #analyze}, i.e., it * does not take care of setting filters, dimension names, etc. for retrieving the data from the * backend database. Specifically, it only processes the given data, i.e., timeSeries, for * presentation purpose. * * The only difference between this method and {@link #analyze} is that their bucket sizes are * different. This method's bucket size is given by frontend, which should larger or equal to the * minimum time granularity of the data. On the other hand, {@link #analyze}'s buckets size is * always the minimum time granularity of the data. * * @param timeSeries the time series that contains the metric to be processed * @param metric the metric name to retrieve the data from the given time series * @param bucketMillis the size of a bucket in milli-seconds * @param viewWindowStartTime the start time bucket of current time series, inclusive * @param viewWindowEndTime the end time buckets of current time series, exclusive * @return Two sets of time series: a current and a baseline values, to be represented in the frontend */ AnomalyTimelinesView getTimeSeriesView(MetricTimeSeries timeSeries, long bucketMillis, String metric, long viewWindowStartTime, long viewWindowEndTime, List<MergedAnomalyResultDTO> knownAnomalies); /** * * @return List of property keys applied in case of specific anomaly function */ String[] getPropertyKeys(); /** * This method is added to support the viewing of anomalies on the dashboard. * When viewing anomalies, we need to fetch more data than just the anomaly * region (typically some buffer before and after the anomaly region). * This fetched data will be transformed, and then used to display on the front end. * The data we need to fetch may not follow a simple rule * such as, "fetch x number of days before and after for this granularity". * We have come to see that this data to fetch, might depend on which function is fetching it, * as some functions are designed to already fetch all data they need, and no offset needs to be supplied. * Hence it is best that the function makes the decision of how much data to fetch. * * To summarize, this method will allow us to decide the offset for an anomaly region, * and append appropriate padding before and after the anomaly region, while FETCHING DATA FOR IT * * @param datasetConfig * @return */ AnomalyOffset getAnomalyWindowOffset(DatasetConfigDTO datasetConfig); /** * This method is added to support the viewing of anomalies on the dashboard. * When displaying anomalies, we first fetch the data for the baseline and current. * Some functions fetch more historical data than just the anomaly window provided. * But when displaying, we should display only some of it, typically some padding before and after the anomaly region * This is again logic which is specific to the function requesting it. * Hence it is best that the function makes the decision of how much of the fetched data it wants to actually display. * * To summarize, this method will allow us to decide the offset for an anomaly region, * and append appropriate padding before and after the anomaly region, while VIEWING IT * @param datasetConfig * @return */ AnomalyOffset getViewWindowOffset(DatasetConfigDTO datasetConfig); }