AnomalyDetectionInputContextBuilder.java example

Explorer
pinot-master
package com.linkedin.thirdeye.anomaly.detection;

import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
import com.linkedin.pinot.pql.parsers.utils.Pair;
import com.linkedin.thirdeye.anomaly.override.OverrideConfigHelper;
import com.linkedin.thirdeye.api.DimensionKey;
import com.linkedin.thirdeye.api.DimensionMap;
import com.linkedin.thirdeye.api.MetricTimeSeries;
import com.linkedin.thirdeye.api.TimeGranularity;
import com.linkedin.thirdeye.client.DAORegistry;
import com.linkedin.thirdeye.client.MetricExpression;
import com.linkedin.thirdeye.client.ResponseParserUtils;
import com.linkedin.thirdeye.client.ThirdEyeCacheRegistry;
import com.linkedin.thirdeye.client.timeseries.TimeSeriesHandler;
import com.linkedin.thirdeye.client.timeseries.TimeSeriesRequest;
import com.linkedin.thirdeye.client.timeseries.TimeSeriesResponse;
import com.linkedin.thirdeye.client.timeseries.TimeSeriesResponseConverter;
import com.linkedin.thirdeye.client.timeseries.TimeSeriesRow;
import com.linkedin.thirdeye.dashboard.Utils;
import com.linkedin.thirdeye.datalayer.dto.AnomalyFunctionDTO;
import com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO;
import com.linkedin.thirdeye.datalayer.dto.MergedAnomalyResultDTO;
import com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO;
import com.linkedin.thirdeye.detector.function.AnomalyFunctionFactory;
import com.linkedin.thirdeye.detector.function.BaseAnomalyFunction;
import com.linkedin.thirdeye.detector.metric.transfer.ScalingFactor;
import com.linkedin.thirdeye.util.ThirdEyeUtils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import org.apache.commons.collections.MapUtils;
import org.apache.commons.lang.NullArgumentException;
import org.apache.commons.lang3.StringUtils;
import org.joda.time.DateTime;
import org.quartz.JobExecutionException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


public class AnomalyDetectionInputContextBuilder {
  private static final Logger LOG = LoggerFactory.getLogger(AnomalyDetectionInputContextBuilder.class);

  private static final DAORegistry DAO_REGISTRY = DAORegistry.getInstance();

  private AnomalyDetectionInputContext anomalyDetectionInputContext;
  private AnomalyFunctionDTO anomalyFunctionSpec;
  private BaseAnomalyFunction anomalyFunction;
  private AnomalyFunctionFactory anomalyFunctionFactory;
  private List<String> collectionDimensions;
  private String dataset;

  public AnomalyDetectionInputContextBuilder(AnomalyFunctionFactory anomalyFunctionFactory) {
    this.anomalyFunctionFactory = anomalyFunctionFactory;
  }

  public AnomalyDetectionInputContextBuilder init(AnomalyFunctionDTO anomalyFunctionSpec) throws Exception {
    return init(anomalyFunctionSpec, new AnomalyDetectionInputContext());
  }

  public AnomalyDetectionInputContextBuilder init(AnomalyFunctionDTO anomalyFunctionSpec, AnomalyDetectionInputContext anomalyDetectionInputContext)
      throws Exception {
    this.anomalyFunctionSpec = anomalyFunctionSpec;
    this.anomalyFunction = anomalyFunctionFactory.fromSpec(anomalyFunctionSpec);
    this.anomalyDetectionInputContext = anomalyDetectionInputContext;
    this.dataset = this.anomalyFunctionSpec.getCollection();
    DatasetConfigDTO datasetConfig = DAO_REGISTRY.getDatasetConfigDAO().findByDataset(dataset);
    if (datasetConfig == null) {
      LOG.error("Dataset [" + dataset + "] is not found");
      throw new NullArgumentException(
          "Dataset [" + dataset + "] is not found with function : " + anomalyFunctionSpec
              .toString());
    }
    this.collectionDimensions = datasetConfig.getDimensions();
    return this;
  }

  public AnomalyDetectionInputContext build() {
    return this.anomalyDetectionInputContext;
  }

  /**
   * Fetch TimeSeriese data from Pinot in the startEndTimeRanges
   * @param startEndTimeRanges
   * the time range when we actually fetch timeseries
   * @return
   * the builder of the AnomalyDetectionInputContext
   * @throws JobExecutionException
   * @throws ExecutionException
   */
  public AnomalyDetectionInputContextBuilder fetchTimeSeriesData(List<Pair<Long, Long>> startEndTimeRanges, boolean endTimeInclusive)
      throws JobExecutionException, ExecutionException {
    Map<DimensionKey, MetricTimeSeries> dimensionKeyMetricTimeSeriesMap =
        getTimeSeriesForAnomalyDetection(anomalyFunctionSpec, startEndTimeRanges, endTimeInclusive);

    Map<DimensionMap, MetricTimeSeries> dimensionMapMetricTimeSeriesMap = new HashMap<>();
    for (Map.Entry<DimensionKey, MetricTimeSeries> entry : dimensionKeyMetricTimeSeriesMap.entrySet()) {
      DimensionKey dimensionKey = entry.getKey();

      // If the current time series belongs to OTHER dimension, which consists of time series whose
      // sum of all its values belows 1% of sum of all time series values, then its anomaly is
      // meaningless and hence we don't want to detection anomalies on it.
      String[] dimensionValues = dimensionKey.getDimensionValues();
      boolean isOTHERDimension = false;
      for (String dimensionValue : dimensionValues) {
        if (dimensionValue.equalsIgnoreCase(ResponseParserUtils.OTHER) || dimensionValue.equalsIgnoreCase(
            ResponseParserUtils.UNKNOWN)) {
          isOTHERDimension = true;
          break;
        }
      }
      if (isOTHERDimension) {
        continue;
      }

      DimensionMap dimensionMap = DimensionMap.fromDimensionKey(dimensionKey, collectionDimensions);
      dimensionMapMetricTimeSeriesMap.put(dimensionMap, entry.getValue());

      if (entry.getValue().getTimeWindowSet().size() < 1) {
        LOG.warn("Insufficient data for {} to run anomaly detection function", dimensionMap);
      }
    }
    this.anomalyDetectionInputContext.setDimensionKeyMetricTimeSeriesMap(dimensionMapMetricTimeSeriesMap);

    return this;
  }
  /**
   * Fetch TimeSeriese data from Pinot with given monitoring window start and end time
   * The data range is calculated by anomalyFunction.getDataRangeIntervals
   * If endTimeInclusive is set true, the windowEnd is included in the timeseries; otherwise, not.
   * @param windowStart
   * The start time of the monitoring window
   * @param windowEnd
   * The start time of the monitoring window
   * @param endTimeInclusive
   * true, if the end time is included in the data fetching process
   * @return
   * the builder of the AnomalyDetectionInputContext
   */
  public AnomalyDetectionInputContextBuilder fetchTimeSeriesData(DateTime windowStart, DateTime windowEnd, boolean endTimeInclusive)
      throws JobExecutionException, ExecutionException {
    List<Pair<Long, Long>> startEndTimeRanges = anomalyFunction.getDataRangeIntervals(windowStart.getMillis(), windowEnd.getMillis());
    return fetchTimeSeriesData(startEndTimeRanges, endTimeInclusive);
  }
  /**
   * Fetch TimeSeriese data from Pinot with given monitoring window start and end time
   * The data range is calculated by anomalyFunction.getDataRangeIntervals
   * endTimeInclusive is set to false in default
   * @param windowStart
   * The start time of the monitoring window
   * @param windowEnd
   * The start time of the monitoring window
   * @return
   * the builder of the AnomalyDetectionInputContext
   */
  public AnomalyDetectionInputContextBuilder fetchTimeSeriesData(DateTime windowStart, DateTime windowEnd)
      throws JobExecutionException, ExecutionException {
    List<Pair<Long, Long>> startEndTimeRanges = anomalyFunction.getDataRangeIntervals(windowStart.getMillis(), windowEnd.getMillis());
    return fetchTimeSeriesData(startEndTimeRanges, false);
  }
  /**
   * Fetch TimeSeriese data from Pinot with given time range
   * endTimeInclusive is set to false in default
   *
   * @param startEndTimeRanges
   * The time range when we should fetch data
   * @return
   * the builder of the AnomalyDetectionInputContext
   */
  public AnomalyDetectionInputContextBuilder fetchTimeSeriesData(List<Pair<Long, Long>> startEndTimeRanges)
      throws JobExecutionException, ExecutionException {
    return fetchTimeSeriesData(startEndTimeRanges, false);
  }


  /**
   * Fetch time series, known merged anomalies, and scaling factor for the specified dimension. Note that scaling
   * factor has no dimension information, so all scaling factor in the specified time range will be retrieved.
   *
   * @param windowStart the start time for retrieving the data
   * @param windowEnd the end time for retrieving the data
   * @param dimensions the dimension of the data
   * @param endTimeInclusive set to true if the end time should be inclusive; mainly used by the queries from UI
   * @return
   * the builder of the AnomalyDetectionInputContext
   * @throws Exception if it fails to retrieve time series from DB.
   */
  public AnomalyDetectionInputContextBuilder fetchTimeSeriesDataByDimension(DateTime windowStart, DateTime windowEnd,
      DimensionMap dimensions, boolean endTimeInclusive)
      throws Exception {
    List<Pair<Long, Long>> startEndTimeRanges = anomalyFunction.getDataRangeIntervals(windowStart.getMillis(), windowEnd.getMillis());
    return fetchTimeSeriesDataByDimension(startEndTimeRanges, dimensions, endTimeInclusive);
  }

  /**
   * Fetch time series, known merged anomalies, and scaling factor for the specified dimension. Note that scaling
   * factor has no dimension information, so all scaling factor in the specified time range will be retrieved.
   *
   * @param startEndTimeRanges the start and end time range for retrieving the data
   * @param dimensions the dimension of the data
   * @param endTimeInclusive set to true if the end time should be inclusive; mainly used by the queries from UI
   * @return
   * the builder of the AnomalyDetectionInputContext
   * @throws Exception if it fails to retrieve time series from DB.
   */
  public AnomalyDetectionInputContextBuilder fetchTimeSeriesDataByDimension(List<Pair<Long, Long>> startEndTimeRanges,
      DimensionMap dimensions, boolean endTimeInclusive)
      throws Exception {
    TimeGranularity timeGranularity = new TimeGranularity(anomalyFunctionSpec.getBucketSize(), anomalyFunctionSpec.getBucketUnit());

    // Retrieve Time Series
    MetricTimeSeries metricTimeSeries =
        getTimeSeriesByDimension(anomalyFunctionSpec, startEndTimeRanges, dimensions, timeGranularity, endTimeInclusive);
    Map<DimensionMap, MetricTimeSeries> metricTimeSeriesMap = new HashMap<>();
    metricTimeSeriesMap.put(dimensions, metricTimeSeries);
    this.anomalyDetectionInputContext.setDimensionKeyMetricTimeSeriesMap(metricTimeSeriesMap);

    return this;
  }

  /**
   * Fetch the global metric without dimension and filter in a given monitoring start and end time
   * the actual data fetching time range is calculated by anomalyFunction.getDataRangeIntervals
   * @param windowStart
   * the start time of the monitoring window
   * @param windowEnd
   * the end time of the monitoring window
   * @return
   * the builder of the AnomalyDetectionInputContext
   * @throws JobExecutionException
   * @throws ExecutionException
   */
  public AnomalyDetectionInputContextBuilder fetchTimeSeriesGlobalMetric(DateTime windowStart, DateTime windowEnd)
      throws JobExecutionException, ExecutionException {
    List<Pair<Long, Long>> startEndTimeRanges = anomalyFunction.getDataRangeIntervals(windowStart.getMillis(), windowEnd.getMillis());
    this.fetchTimeSeriesGlobalMetric(startEndTimeRanges);

    return this;
  }

  /**
   * Fetch the global metric without dimension and filter in a given time range
   * @param startEndTimeRanges
   * the time range when we should fetch the timeseries data
   * @return
   * the builder of the AnomalyDetectionInputContext
   * @throws JobExecutionException
   * @throws ExecutionException
   */
  public AnomalyDetectionInputContextBuilder fetchTimeSeriesGlobalMetric(List<Pair<Long, Long>> startEndTimeRanges)
      throws JobExecutionException, ExecutionException {
    MetricTimeSeries metricSumTimeSeries = getGlobalMetric(anomalyFunctionSpec, startEndTimeRanges);
    this.anomalyDetectionInputContext.setGlobalMetric(metricSumTimeSeries);

    return this;
  }
  /**
   * Fetech existing RawAnomalyResults in the monitoring window
   * @param windowStart
   * the start time of the monitoring window
   * @param windowEnd
   * the end time of the monitoring window
   * @return
   */
  public AnomalyDetectionInputContextBuilder fetchExixtingRawAnomalies(DateTime windowStart, DateTime windowEnd) {
    // We always find existing raw anomalies to prevent duplicate raw anomalies are generated
    List<RawAnomalyResultDTO> existingRawAnomalies = getExistingRawAnomalies(anomalyFunctionSpec.getId(), windowStart.getMillis(), windowEnd.getMillis());
    ArrayListMultimap<DimensionMap, RawAnomalyResultDTO> dimensionNamesToKnownRawAnomalies = ArrayListMultimap.create();
    for (RawAnomalyResultDTO existingRawAnomaly : existingRawAnomalies) {
      dimensionNamesToKnownRawAnomalies.put(existingRawAnomaly.getDimensions(), existingRawAnomaly);
    }
    this.anomalyDetectionInputContext.setExistingRawAnomalies(dimensionNamesToKnownRawAnomalies);

    return this;
  }

  /**
   * Fetch existing MergedAnomalyResults in the training window
   * @param windowStart
   * the start time of the monitoring window
   * @param windowEnd
   * the end time of the monitoring window
   * @return
   */
  public AnomalyDetectionInputContextBuilder fetchExixtingMergedAnomalies(DateTime windowStart, DateTime windowEnd) {
// Get existing anomalies for this time range and this function id for all combinations of dimensions
    List<MergedAnomalyResultDTO> knownMergedAnomalies;
    if (anomalyFunction.useHistoryAnomaly()) {
      // if this anomaly function uses history data, then we get all time ranges
      return fetchExixtingMergedAnomalies(
          anomalyFunction.getDataRangeIntervals(windowStart.getMillis(), windowEnd.getMillis()));
    } else {
      // otherwise, we only get the merge anomaly for current window in order to remove duplicate raw anomalies
      List<Pair<Long, Long>> currentTimeRange = new ArrayList<>();
      currentTimeRange.add(new Pair<>(windowStart.getMillis(), windowEnd.getMillis()));
      return fetchExixtingMergedAnomalies(currentTimeRange);
    }
  }

  public AnomalyDetectionInputContextBuilder fetchExixtingMergedAnomalies(List<Pair<Long, Long>> startEndTimeRanges) {
// Get existing anomalies for this time range and this function id for all combinations of dimensions
    List<MergedAnomalyResultDTO> knownMergedAnomalies;
    knownMergedAnomalies = getKnownMergedAnomalies(anomalyFunctionSpec.getId(), startEndTimeRanges);
    // Sort the known merged and raw anomalies by their dimension names
    ArrayListMultimap<DimensionMap, MergedAnomalyResultDTO> dimensionMapToKnownMergedAnomalies = ArrayListMultimap.create();
    for (MergedAnomalyResultDTO knownMergedAnomaly : knownMergedAnomalies) {
      dimensionMapToKnownMergedAnomalies.put(knownMergedAnomaly.getDimensions(), knownMergedAnomaly);
    }
    this.anomalyDetectionInputContext.setKnownMergedAnomalies(dimensionMapToKnownMergedAnomalies);

    return this;
  }

  public AnomalyDetectionInputContextBuilder fetchExistingMergedAnomaliesByDimension(DateTime windowStart,
      DateTime windowEnd, DimensionMap dimensions) {
    return fetchExistingMergedAnomaliesByDimension(
        anomalyFunction.getDataRangeIntervals(windowStart.getMillis(), windowEnd.getMillis()), dimensions);
  }

  public AnomalyDetectionInputContextBuilder fetchExistingMergedAnomaliesByDimension(List<Pair<Long, Long>> startEndTimeRanges,
      DimensionMap dimensions) {
    ArrayListMultimap<DimensionMap, MergedAnomalyResultDTO> dimensionMapToKnownMergedAnomalies = ArrayListMultimap.create();
    dimensionMapToKnownMergedAnomalies.putAll(dimensions,
        getKnownMergedAnomaliesByDimension(anomalyFunctionSpec.getId(), startEndTimeRanges, dimensions));

    this.anomalyDetectionInputContext.setKnownMergedAnomalies(dimensionMapToKnownMergedAnomalies);
    return this;
  }
  /**
   * Fetch Scaling Factors in the training window
   * @param windowStart
   * the start time of the monitoring window
   * @param windowEnd
   * the end time of the monitoring window
   * @return
   */
  public AnomalyDetectionInputContextBuilder fetchSaclingFactors(DateTime windowStart, DateTime windowEnd) {
    List<ScalingFactor> scalingFactors = OverrideConfigHelper
        .getTimeSeriesScalingFactors(DAO_REGISTRY.getOverrideConfigDAO(), anomalyFunctionSpec.getCollection(),
            anomalyFunctionSpec.getMetric(), anomalyFunctionSpec.getId(),
            anomalyFunction.getDataRangeIntervals(windowStart.getMillis(), windowEnd.getMillis()));
    this.anomalyDetectionInputContext.setScalingFactors(scalingFactors);
    return this;
  }

  /**
   * Returns existing raw anomalies in the given monitoring window
   *ingraph_dashboard_config_index
   * @param functionId the id of the anomaly function
   * @param monitoringWindowStart inclusive
   * @param monitoringWindowEnd inclusive but it doesn't matter
   *
   * @return known raw anomalies in the given window
   */
  private List<RawAnomalyResultDTO> getExistingRawAnomalies(long functionId, long monitoringWindowStart,
      long monitoringWindowEnd) {
    List<RawAnomalyResultDTO> results = new ArrayList<>();
    try {
      results.addAll(DAO_REGISTRY.getRawAnomalyResultDAO().findAllByTimeAndFunctionId(monitoringWindowStart, monitoringWindowEnd, functionId));
    } catch (Exception e) {
      LOG.error("Exception in getting existing anomalies", e);
    }
    return results;
  }

  /**
   * Returns all known merged anomalies of the function id that are needed for anomaly detection, i.e., the merged
   * anomalies that overlap with the monitoring window and baseline windows.
   *
   * @param functionId the id of the anomaly function
   * @param startEndTimeRanges the time ranges for retrieving the known merge anomalies

   * @return known merged anomalies of the function id that are needed for anomaly detection
   */
  private List<MergedAnomalyResultDTO> getKnownMergedAnomalies(long functionId, List<Pair<Long, Long>> startEndTimeRanges) {

    List<MergedAnomalyResultDTO> results = new ArrayList<>();
    for (Pair<Long, Long> startEndTimeRange : startEndTimeRanges) {
      try {
        results.addAll(
            DAO_REGISTRY.getMergedAnomalyResultDAO().findAllConflictByFunctionId(functionId, startEndTimeRange.getFirst(),
                startEndTimeRange.getSecond()));
      } catch (Exception e) {
        LOG.error("Exception in getting merged anomalies", e);
      }
    }

    return results;
  }

  /**
   * Returns all known merged anomalies of the function id that are needed for anomaly detection, i.e., the merged
   * anomalies that overlap with the monitoring window and baseline windows.
   *
   * @param functionId the id of the anomaly function
   * @param startEndTimeRanges the time ranges for retrieving the known merge anomalies

   * @return known merged anomalies of the function id that are needed for anomaly detection
   */
  private List<MergedAnomalyResultDTO> getKnownMergedAnomaliesByDimension(long functionId,
      List<Pair<Long, Long>> startEndTimeRanges, DimensionMap dimensions) {

    List<MergedAnomalyResultDTO> results = new ArrayList<>();
    for (Pair<Long, Long> startEndTimeRange : startEndTimeRanges) {
      try {
        results.addAll(
            DAO_REGISTRY.getMergedAnomalyResultDAO().findAllConflictByFunctionIdDimensions(functionId,
                startEndTimeRange.getFirst(), startEndTimeRange.getSecond(), dimensions.toString()));
      } catch (Exception e) {
        LOG.error("Exception in getting merged anomalies", e);
      }
    }

    return results;
  }

  /**
   * Get the metric filter setting for an anomaly function
   * @param anomalyFunctionSpec
   * @return
   */
  public static Multimap<String, String> getFiltersForFunction(AnomalyFunctionDTO anomalyFunctionSpec) {
    // Get the original filter
    Multimap<String, String> filters;
    String filterString = anomalyFunctionSpec.getFilters();
    if (StringUtils.isNotBlank(filterString)) {
      filters = ThirdEyeUtils.getFilterSet(filterString);
    } else {
      filters = HashMultimap.create();
    }
    return filters;
  }

  /**
   * Get the explore dimensions for an anomaly function
   * @param anomalyFunctionSpec
   * @return
   */
  public static List<String> getDimensionsForFunction(AnomalyFunctionDTO anomalyFunctionSpec) {
    List<String> groupByDimensions;
    String exploreDimensionString = anomalyFunctionSpec.getExploreDimensions();
    if (StringUtils.isNotBlank(exploreDimensionString)) {
      groupByDimensions = Arrays.asList(exploreDimensionString.trim().split(","));
    } else {
      groupByDimensions = Collections.emptyList();
    }
    return groupByDimensions;
  }

  /**
   * Returns the set of metric time series that are needed by the given anomaly function for detecting anomalies.
   *
   * The time granularity is the granularity of the function's collection, i.e., the buckets are not aggregated,
   * in order to increase the accuracy for detecting anomalies.
   *
   * @param anomalyFunctionSpec spec of the anomaly function
   * @param startEndTimeRanges the time ranges to retrieve the data for constructing the time series
   * @param endTimeInclusive if the end time is included
   *
   * @return the data that is needed by the anomaly function for detecting anomalies.
   * @throws JobExecutionException
   * @throws ExecutionException
   */
  public Map<DimensionKey, MetricTimeSeries> getTimeSeriesForAnomalyDetection(
      AnomalyFunctionDTO anomalyFunctionSpec, List<Pair<Long, Long>> startEndTimeRanges, boolean endTimeInclusive)
      throws JobExecutionException, ExecutionException {

    Multimap<String, String> filters = getFiltersForFunction(anomalyFunctionSpec);

    List<String> groupByDimensions = getDimensionsForFunction(anomalyFunctionSpec);

    TimeGranularity timeGranularity = new TimeGranularity(anomalyFunctionSpec.getBucketSize(),
        anomalyFunctionSpec.getBucketUnit());

    DatasetConfigDTO dataset = DAORegistry.getInstance().getDatasetConfigDAO().findByDataset(anomalyFunctionSpec.getCollection());
    boolean doRollUp = true;
    if (!dataset.isAdditive()) {
      doRollUp = false;
    }

    TimeSeriesResponse timeSeriesResponse =
        getTimeSeriesResponseImpl(anomalyFunctionSpec, startEndTimeRanges,
            timeGranularity, filters, groupByDimensions, endTimeInclusive, doRollUp);

    try {
      Map<DimensionKey, MetricTimeSeries> dimensionKeyMetricTimeSeriesMap =
          TimeSeriesResponseConverter.toMap(timeSeriesResponse, Utils.getSchemaDimensionNames(anomalyFunctionSpec.getCollection()));
      return dimensionKeyMetricTimeSeriesMap;
    } catch (Exception e) {
      LOG.info("Failed to get schema dimensions for constructing dimension keys:", e.toString());
      return Collections.emptyMap();
    }
  }

  /**
   * Returns the metric time series that were given to the anomaly function for anomaly detection. If the dimension to
   * retrieve is OTHER, this method retrieves all combinations of dimensions and calculate the metric time series for
   * OTHER dimension on-the-fly.
   *
   * @param anomalyFunctionSpec spec of the anomaly function
   * @param startEndTimeRanges the time ranges to retrieve the data for constructing the time series
   * @param dimensionMap a dimension map that is used to construct the filter for retrieving the corresponding data
   *                     that was used to detected the anomaly
   * @param timeGranularity time granularity of the time series
   * @param endTimeInclusive set to true if the end time should be inclusive; mainly used by the query for UI
   * @return the time series in the same format as those used by the given anomaly function for anomaly detection
   *
   * @throws JobExecutionException
   * @throws ExecutionException
   */
  public MetricTimeSeries getTimeSeriesByDimension(AnomalyFunctionDTO anomalyFunctionSpec,
      List<Pair<Long, Long>> startEndTimeRanges, DimensionMap dimensionMap, TimeGranularity timeGranularity,
      boolean endTimeInclusive)
      throws JobExecutionException, ExecutionException {

    // Get the original filter
    Multimap<String, String> filters = getFiltersForFunction(anomalyFunctionSpec);

    // Decorate filters according to dimensionMap
    filters = ThirdEyeUtils.getFilterSetFromDimensionMap(dimensionMap, filters);

    boolean hasOTHERDimensionName = false;
    for (String dimensionValue : dimensionMap.values()) {
      if (dimensionValue.equalsIgnoreCase(ResponseParserUtils.OTHER)) {
        hasOTHERDimensionName = true;
        break;
      }
    }

    // groupByDimensions (i.e., exploreDimensions) is empty by default because the query for getting the time series
    // will have the decorated filters according to anomalies' explore dimensions.
    // However, if there exists any dimension with value "OTHER, then we need to honor the origin groupBy in order to
    // construct the data for OTHER
    List<String> groupByDimensions = Collections.emptyList();
    if (hasOTHERDimensionName && StringUtils.isNotBlank(anomalyFunctionSpec.getExploreDimensions().trim())) {
      groupByDimensions = Arrays.asList(anomalyFunctionSpec.getExploreDimensions().trim().split(","));
    }

    final boolean doRollUp = false;
    TimeSeriesResponse response =
        getTimeSeriesResponseImpl(anomalyFunctionSpec, startEndTimeRanges,
            timeGranularity, filters, groupByDimensions, endTimeInclusive, doRollUp);
    try {
      Map<DimensionKey, MetricTimeSeries> metricTimeSeriesMap = TimeSeriesResponseConverter.toMap(response,
          Utils.getSchemaDimensionNames(anomalyFunctionSpec.getCollection()));
      return extractMetricTimeSeriesByDimension(metricTimeSeriesMap);
    } catch (Exception e) {
      LOG.warn("Unable to get schema dimension name for retrieving metric time series: {}", e.toString());
      return null;
    }
  }

  /**
   * Return a global metric for an anomaly function to calculate the contribution of anomalies to the global metric
   * @param anomalyFunctionSpec spec of the anomaly function
   * @param startEndTimeRanges the time ranges to retrieve the data for constructing the time series
   * @return the data that is needed by the anomaly function for knowing the global metric
   * @throws JobExecutionException
   * @throws ExecutionException
   */
  public MetricTimeSeries getGlobalMetric(AnomalyFunctionDTO anomalyFunctionSpec, List<Pair<Long, Long>> startEndTimeRanges)
      throws JobExecutionException, ExecutionException {
    // Set empty dimension and filter
    Multimap<String, String> filters = HashMultimap.create();
    List<String> groupByDimensions = Collections.emptyList();

    TimeGranularity timeGranularity = new TimeGranularity(anomalyFunctionSpec.getBucketSize(),
        anomalyFunctionSpec.getBucketUnit());
    DatasetConfigDTO dataset = DAORegistry.getInstance().getDatasetConfigDAO().findByDataset(anomalyFunctionSpec.getCollection());
    boolean doRollUp = false;

    List<String> metricsToFetch = new ArrayList<>();
    if(StringUtils.isNotEmpty(anomalyFunctionSpec.getGlobalMetric())) {
      metricsToFetch.add(anomalyFunctionSpec.getGlobalMetric());
    } else {
      metricsToFetch.add(anomalyFunctionSpec.getMetric());
    }
    TimeSeriesResponse timeSeriesResponse =
        getTimeSeriesResponseImpl(anomalyFunctionSpec, metricsToFetch, startEndTimeRanges,
            timeGranularity, filters, groupByDimensions, false, doRollUp);

    MetricTimeSeries globalMetric = null;
    try {
      Map<DimensionKey, MetricTimeSeries> dimensionKeyMetricTimeSeriesMap =
          TimeSeriesResponseConverter.toMap(timeSeriesResponse, Utils.getSchemaDimensionNames(anomalyFunctionSpec.getCollection()));

      if (MapUtils.isEmpty(dimensionKeyMetricTimeSeriesMap)) {
        LOG.error("Unable to fetch global metric for {}", anomalyFunctionSpec);
      }
      if (dimensionKeyMetricTimeSeriesMap.size() > 2) {
        LOG.warn("More than 1 dimensions when fetching traffic data for {}; take the 1st dimension", anomalyFunctionSpec);
      }
      globalMetric = dimensionKeyMetricTimeSeriesMap.values().iterator().next();
    } catch (Exception e) {
      LOG.warn("Failed to get schema dimensions for constructing dimension keys:", e.toString());
    }
    return globalMetric;
  }

  /**
   * Extract current and baseline values from the parsed Pinot results. There are two possible time series for presenting
   * the time series after anomaly detection: 1. the time series with a specific dimension and 2. the time series for
   * OTHER dimension.
   *
   * For case 1, the input map should contain only one time series and hence we can just return it. For case 2, the
   * input map would contain all combination of explored dimension and hence we need to filter out the one for OTHER
   * dimension.
   *
   * @param metricTimeSeriesMap
   *
   * @return the time series when the anomaly is detected
   */
  private MetricTimeSeries extractMetricTimeSeriesByDimension(Map<DimensionKey, MetricTimeSeries> metricTimeSeriesMap) {
    MetricTimeSeries metricTimeSeries = null;
    if (MapUtils.isNotEmpty(metricTimeSeriesMap)) {
      // For most anomalies, there should be only one time series due to its dimensions. The exception is the OTHER
      // dimension, in which time series of different dimensions are returned due to the calculation of OTHER dimension.
      // Therefore, we need to get the time series of OTHER dimension manually.
      if (metricTimeSeriesMap.size() == 1) {
        Iterator<MetricTimeSeries> ite = metricTimeSeriesMap.values().iterator();
        if (ite.hasNext()) {
          metricTimeSeries = ite.next();
        }
      } else { // Retrieve the time series of OTHER dimension
        Iterator<Map.Entry<DimensionKey, MetricTimeSeries>> ite = metricTimeSeriesMap.entrySet().iterator();
        while (ite.hasNext()) {
          Map.Entry<DimensionKey, MetricTimeSeries> entry = ite.next();
          DimensionKey dimensionKey = entry.getKey();
          boolean foundOTHER = false;
          for (String dimensionValue : dimensionKey.getDimensionValues()) {
            if (dimensionValue.equalsIgnoreCase(ResponseParserUtils.OTHER)) {
              metricTimeSeries = entry.getValue();
              foundOTHER = true;
              break;
            }
          }
          if (foundOTHER) {
            break;
          }
        }
      }
    }
    return metricTimeSeries;
  }

  private TimeSeriesResponse getTimeSeriesResponseImpl(AnomalyFunctionDTO anomalyFunctionSpec,
      List<Pair<Long, Long>> startEndTimeRanges, TimeGranularity timeGranularity, Multimap<String, String> filters,
      List<String> groupByDimensions, boolean endTimeInclusive, boolean doRollUp)
      throws JobExecutionException, ExecutionException {
    return getTimeSeriesResponseImpl(anomalyFunctionSpec, anomalyFunctionSpec.getMetrics(), startEndTimeRanges,
        timeGranularity, filters, groupByDimensions, endTimeInclusive, doRollUp);
  }

  private TimeSeriesResponse getTimeSeriesResponseImpl(AnomalyFunctionDTO anomalyFunctionSpec, List<String> metrics,
      List<Pair<Long, Long>> startEndTimeRanges, TimeGranularity timeGranularity, Multimap<String, String> filters,
      List<String> groupByDimensions, boolean endTimeInclusive, boolean doRollUp)
      throws JobExecutionException, ExecutionException {

    TimeSeriesHandler timeSeriesHandler =
        new TimeSeriesHandler(ThirdEyeCacheRegistry.getInstance().getQueryCache(), doRollUp);

    // Seed request with top-level...
    TimeSeriesRequest seedRequest = new TimeSeriesRequest();
    seedRequest.setCollectionName(anomalyFunctionSpec.getCollection());
    // TODO: Check low level support for multiple metrics retrieval
    String metricsToRetrieve = StringUtils.join(metrics, ",");
    List<MetricExpression> metricExpressions = Utils
        .convertToMetricExpressions(metricsToRetrieve,
            anomalyFunctionSpec.getMetricFunction(), anomalyFunctionSpec.getCollection());
    seedRequest.setMetricExpressions(metricExpressions);
    seedRequest.setAggregationTimeGranularity(timeGranularity);
    seedRequest.setEndDateInclusive(false);
    seedRequest.setFilterSet(filters);
    seedRequest.setGroupByDimensions(groupByDimensions);
    seedRequest.setEndDateInclusive(endTimeInclusive);

    LOG.info("Found [{}] time ranges to fetch data", startEndTimeRanges.size());
    for (Pair<Long, Long> timeRange : startEndTimeRanges) {
      LOG.info("Start Time [{}], End Time [{}] for anomaly analysis", new DateTime(timeRange.getFirst()),
          new DateTime(timeRange.getSecond()));
    }

    // MultiQuery request
    List<Future<TimeSeriesResponse>> futureResponses = new ArrayList<>();
    List<TimeSeriesRequest> requests = new ArrayList<>();
    Set<TimeSeriesRow> timeSeriesRowSet = new HashSet<>();
    for (Pair<Long, Long> startEndInterval : startEndTimeRanges) {
      TimeSeriesRequest request = new TimeSeriesRequest(seedRequest);
      DateTime startTime = new DateTime(startEndInterval.getFirst());
      DateTime endTime = new DateTime(startEndInterval.getSecond());
      request.setStart(startTime);
      request.setEnd(endTime);

      Future<TimeSeriesResponse> response = timeSeriesHandler.asyncHandle(request);
      if (response != null) {
        futureResponses.add(response);
        requests.add(request);
        LOG.info("Fetching data with startTime: [{}], endTime: [{}], metricExpressions: [{}], timeGranularity: [{}]",
            startTime, endTime, metricExpressions, timeGranularity);
      }
    }

    for (int i = 0; i < futureResponses.size(); i++) {
      Future<TimeSeriesResponse> futureResponse = futureResponses.get(i);
      TimeSeriesRequest request = requests.get(i);
      try {
        TimeSeriesResponse response = futureResponse.get();
        timeSeriesRowSet.addAll(response.getRows());
      } catch (InterruptedException e) {
        LOG.warn("Failed to fetch data with request: [{}]", request);
      }
    }

    timeSeriesHandler.shutdownAsyncHandler();

    List<TimeSeriesRow> timeSeriesRows = new ArrayList<>();
    timeSeriesRows.addAll(timeSeriesRowSet);

    return new TimeSeriesResponse(timeSeriesRows);
  }
}