AnomalyFunction.java example

Explorer
pinot-master
package com.linkedin.thirdeye.detector.function;

import com.linkedin.pinot.pql.parsers.utils.Pair;
import com.linkedin.thirdeye.anomaly.views.AnomalyTimelinesView;
import com.linkedin.thirdeye.api.DimensionMap;
import com.linkedin.thirdeye.datalayer.dto.MergedAnomalyResultDTO;

import java.util.List;
import org.joda.time.DateTime;

import com.linkedin.thirdeye.api.MetricTimeSeries;
import com.linkedin.thirdeye.datalayer.dto.AnomalyFunctionDTO;
import com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO;
import com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO;
import com.linkedin.thirdeye.util.AnomalyOffset;

public interface AnomalyFunction {
  /** Initializes this function with its configuration, call before analyze */
  void init(AnomalyFunctionDTO spec) throws Exception;

  /** Returns the specification for this function instance */
  AnomalyFunctionDTO getSpec();

  /**
   * Returns the time ranges of data that is used by this anomaly function. This method is useful when multiple time
   * intervals are needed for fetching current vs baseline data
   *
   * @param monitoringWindowStartTime inclusive
   * @param monitoringWindowEndTime exclusive
   *
   * @return the time ranges of data that is used by this anomaly function
   */
  List<Pair<Long, Long>> getDataRangeIntervals(Long monitoringWindowStartTime, Long monitoringWindowEndTime);

  /**
   * Analyzes a metric time series and returns any anomalous points / intervals.
   * @param exploredDimensions
   *          Pairs of dimension value and name corresponding to timeSeries.
   * @param timeSeries
   *          The metric time series data.
   * @param windowStart
   *          The beginning of the range corresponding to timeSeries.
   * @param windowEnd
   *          The end of the range corresponding to timeSeries.
   * @param knownAnomalies
   *          Any known anomalies in the time range.
   * @return
   *         A list of anomalies that were not previously known.
   */
  List<RawAnomalyResultDTO> analyze(DimensionMap exploredDimensions, MetricTimeSeries timeSeries,
      DateTime windowStart, DateTime windowEnd, List<MergedAnomalyResultDTO> knownAnomalies)
      throws Exception;
  /**
   * Analyzes a metric time series before windowStart and returns any anomalous points / intervals.
   * @param exploredDimensions
   *          Pairs of dimension value and name corresponding to timeSeries.
   * @param timeSeries
   *          The metric time series data.
   * @param windowStart
   *          The beginning of the range corresponding to timeSeries.
   * @param windowEnd
   *          The end of the range corresponding to timeSeries.
   * @param knownAnomalies
   *          Any known anomalies in the time range.
   * @return
   *         A list of anomalies that were not previously known.
   */

  List<RawAnomalyResultDTO> offlineAnalyze(DimensionMap exploredDimensions, MetricTimeSeries timeSeries,
      DateTime windowStart, DateTime windowEnd, List<MergedAnomalyResultDTO> knownAnomalies)
      throws Exception;

  /**
   * Computes the score and severity according to the current and baseline of the given timeSeries and stores the
   * information to the merged anomaly. The start and end time of the time series is provided
   *
   * @param anomalyToUpdated
   *          the merged anomaly to be updated.
   * @param timeSeries
   *          The metric time series data.
   * @param knownAnomalies
   *          Any known anomalies in the time range.
   * @return
   *         A list of anomalies that were not previously known.
   * @return the severity according to the current and baseline of the given timeSeries
   */
  void updateMergedAnomalyInfo(MergedAnomalyResultDTO anomalyToUpdated, MetricTimeSeries timeSeries,
      DateTime windowStart, DateTime windowEnd, List<MergedAnomalyResultDTO> knownAnomalies)
      throws Exception;

  /**
   * Given any metric, this method returns the corresponding current and baseline time series to
   * be presented in the frontend. The given metric is not necessary the data that is used for
   * detecting anomaly.
   *
   * For instance, if a function uses the average values of the past 3 weeks as the baseline for
   * anomaly detection, then this method should construct a baseline that contains the average
   * value of the past 3 weeks of any given metric.
   *
   * Note that the usage of this method should be similar to the method {@link #analyze}, i.e., it
   * does not take care of setting filters, dimension names, etc. for retrieving the data from the
   * backend database. Specifically, it only processes the given data, i.e., timeSeries, for
   * presentation purpose.
   *
   * The only difference between this method and {@link #analyze} is that their bucket sizes are
   * different. This method's bucket size is given by frontend, which should larger or equal to the
   * minimum time granularity of the data. On the other hand, {@link #analyze}'s buckets size is
   * always the minimum time granularity of the data.
   *
   * @param timeSeries the time series that contains the metric to be processed
   * @param metric the metric name to retrieve the data from the given time series
   * @param bucketMillis the size of a bucket in milli-seconds
   * @param viewWindowStartTime the start time bucket of current time series, inclusive
   * @param viewWindowEndTime the end time buckets of current time series, exclusive
   * @return Two sets of time series: a current and a baseline values, to be represented in the frontend
   */
  AnomalyTimelinesView getTimeSeriesView(MetricTimeSeries timeSeries, long bucketMillis,
      String metric, long viewWindowStartTime, long viewWindowEndTime,
      List<MergedAnomalyResultDTO> knownAnomalies);

  /**
   *
   * @return List of property keys applied in case of specific anomaly function
   */
  String[] getPropertyKeys();

  /**
   * This method is added to support the viewing of anomalies on the dashboard.
   * When viewing anomalies, we need to fetch more data than just the anomaly
   * region (typically some buffer before and after the anomaly region).
   * This fetched data will be transformed, and then used to display on the front end.
   * The data we need to fetch may not follow a simple rule
   * such as, "fetch x number of days before and after for this granularity".
   * We have come to see that this data to fetch, might depend on which function is fetching it,
   * as some functions are designed to already fetch all data they need, and no offset needs to be supplied.
   * Hence it is best that the function makes the decision of how much data to fetch.
   *
   * To summarize, this method will allow us to decide the offset for an anomaly region,
   * and append appropriate padding before and after the anomaly region, while FETCHING DATA FOR IT
   *
   * @param datasetConfig
   * @return
   */
  AnomalyOffset getAnomalyWindowOffset(DatasetConfigDTO datasetConfig);

  /**
   * This method is added to support the viewing of anomalies on the dashboard.
   * When displaying anomalies, we first fetch the data for the baseline and current.
   * Some functions fetch more historical data than just the anomaly window provided.
   * But when displaying, we should display only some of it, typically some padding before and after the anomaly region
   * This is again logic which is specific to the function requesting it.
   * Hence it is best that the function makes the decision of how much of the fetched data it wants to actually display.
   *
   * To summarize, this method will allow us to decide the offset for an anomaly region,
   * and append appropriate padding before and after the anomaly region, while VIEWING IT
   * @param datasetConfig
   * @return
   */
  AnomalyOffset getViewWindowOffset(DatasetConfigDTO datasetConfig);
}