package com.linkedin.thirdeye.anomalydetection.datafilter; import com.fasterxml.jackson.databind.ObjectMapper; import com.linkedin.thirdeye.api.DimensionMap; import com.linkedin.thirdeye.api.MetricTimeSeries; import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.NavigableMap; import java.util.TreeMap; import org.apache.commons.collections.MapUtils; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * The data filter determines whether the average value of a time series passes the threshold. * * Advance Usage 1: A bucket of the time series is taken into consider only if its value is located inside the live * zone, which is specified by minLiveZone and maxLiveZone. In other words, if a bucket's value is smaller than * minLiveZone or is larger than maxLiveZone, then this bucket is ignored when calculating the average value. * * Advance Usage 2: The threshold could be overridden for different dimensions. For instance, the default threshold * for any combination of dimensions could be 1000. However, we could override the threshold for any sub-dimensions that * belong to this dimension {country=US}, e.g., {country=US, pageName=homePage} is a sub-dimension of {country=US}. */ public class AverageThresholdDataFilter extends BaseDataFilter { private static final Logger LOG = LoggerFactory.getLogger(AverageThresholdDataFilter.class); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); public static final String METRIC_NAME_KEY = "metricName"; public static final String THRESHOLD_KEY = "threshold"; // If the value of a bucket is smaller than MIN_LIVE_ZONE_KEY, then that bucket is omitted public static final String MIN_LIVE_ZONE_KEY = "minLiveZone"; // If the value of a bucket is larger than MAX_LIVE_ZONE_KEY, then that bucket is omitted public static final String MAX_LIVE_ZONE_KEY = "maxLiveZone"; // Threshold to the percentage of live buckets among all buckets public static final String LIVE_BUCKETS_PERCENTAGE_KEY = "liveBucketsPctThreshold"; // Override threshold to different dimension map public static final String OVERRIDE_THRESHOLD_KEY = "overrideThreshold"; private static final double DEFAULT_THRESHOLD = Double.NEGATIVE_INFINITY; private static final double DEFAULT_MIN_LIVE_ZONE = Double.NEGATIVE_INFINITY; private static final double DEFAULT_MAX_LIVE_ZONE = Double.POSITIVE_INFINITY; private static final double DEFAULT_LIVE_BUCKETS_PERCENTAGE = 0.5d; private String metricName; private double threshold; private double minLiveZone; private double maxLiveZone; private double liveBucketPercentageThreshold; // The override threshold for different dimension maps, which could form a hierarchy. private NavigableMap<DimensionMap, Double> overrideThreshold = new TreeMap<>(); // For testing purpose NavigableMap<DimensionMap, Double> getOverrideThreshold() { return overrideThreshold; } @Override public void setParameters(Map<String, String> props) { super.setParameters(props); // Initialize threshold from users' setting threshold = DEFAULT_THRESHOLD; if (props.containsKey(THRESHOLD_KEY)) { threshold = Double.parseDouble(props.get(THRESHOLD_KEY)); } if (Double.isNaN(threshold)) { throw new IllegalStateException("Threshold cannot be NaN."); } // Initialize metricName from users' setting metricName = props.get(METRIC_NAME_KEY); if (StringUtils.isBlank(metricName)) { throw new IllegalArgumentException("metric name for average threshold data filter cannot be a blank String."); } // Initialize minLiveZone from users' setting minLiveZone = parseDoubleFromUserInput(MIN_LIVE_ZONE_KEY, DEFAULT_MIN_LIVE_ZONE); // Initialize maxLiveZone from users' setting maxLiveZone = parseDoubleFromUserInput(MAX_LIVE_ZONE_KEY, DEFAULT_MAX_LIVE_ZONE); // Initialize liveBucketPercentageThreshold from users' setting liveBucketPercentageThreshold = parseDoubleFromUserInput(LIVE_BUCKETS_PERCENTAGE_KEY, DEFAULT_LIVE_BUCKETS_PERCENTAGE); // Initialize the lookup table for overriding thresholds if (props.containsKey(OVERRIDE_THRESHOLD_KEY)) { String overrideJsonPayLoad = props.get(OVERRIDE_THRESHOLD_KEY); try { Map<String, Double> rawOverrideThresholdMap = OBJECT_MAPPER.readValue(overrideJsonPayLoad, HashMap.class); for (Map.Entry<String, Double> overrideThresholdEntry : rawOverrideThresholdMap.entrySet()) { DimensionMap dimensionMap = new DimensionMap(overrideThresholdEntry.getKey()); Double threshold = overrideThresholdEntry.getValue(); overrideThreshold.put(dimensionMap, threshold); } } catch (IOException e) { LOG.error("Failed to reconstruct override threshold mappings from this json string: {}", overrideJsonPayLoad); } } } @Override public boolean isQualified(MetricTimeSeries metricTimeSeries, DimensionMap dimensionMap) { return isQualified(metricTimeSeries, dimensionMap, Long.MIN_VALUE, Long.MAX_VALUE); } @Override public boolean isQualified(MetricTimeSeries metricTimeSeries, DimensionMap dimensionMap, long windowStart, long windowEnd) { double threshold = this.threshold; // Read the override threshold for the dimension of this time series if (MapUtils.isNotEmpty(overrideThreshold)) { threshold = overrideThresholdForDimensions(dimensionMap, threshold); } if (threshold == Double.NEGATIVE_INFINITY) { return true; } else if (threshold == Double.POSITIVE_INFINITY) { return false; } // Compute average values among all buckets and check if it passes the threshold double sum = 0d; int count = 0; int totalCount = 0; for (long timestamp : metricTimeSeries.getTimeWindowSet()) { if (timestamp < windowStart || timestamp >= windowEnd) { continue; } double value = metricTimeSeries.get(timestamp, metricName).doubleValue(); // TODO: Distinguish 0 and empty value if (Double.compare(0d, value) == 0) { continue; } ++totalCount; if (isLiveBucket(value, minLiveZone, maxLiveZone)) { sum += value; ++count; } } if (count > 0) { double liveBucketPercentage = count / totalCount; if (liveBucketPercentage > liveBucketPercentageThreshold) { double average = sum / count; return average > threshold; } } return false; } /** * Returns the parsed double value from users' given value that is stored in the properties. * * @param propKey the key to retrieve users' value from the properties. * @param defaultValue the default value if users' value is unreadable (e.g., NaN, NumberFormatException) * * @return the parsed double value that is stored in the properties. */ private double parseDoubleFromUserInput(String propKey, double defaultValue) { double value = defaultValue; if (props.containsKey(propKey)) { try { value = Double.parseDouble(props.get(propKey)); if (Double.isNaN(value)) { LOG.warn("This value {} for the property key {} is unreadable; default value {} is used.", value, propKey, defaultValue); value = defaultValue; } } catch (NumberFormatException e) { LOG.warn("Failed to parse this value {} for the property key {}; default value {} is used.", props.get(propKey), propKey, defaultValue); value = defaultValue; } } return value; } /** * The value of a bucket is considered during the calculation of average bucket value only if the value is not a NaN * and it is not located in the live zone, which is defined by minLiveZone and maxLiveZone. * * @param value the value to be tested * @param minLiveZone if the given value is smaller than minLiveZone, then the value is located in the live zone. * @param maxLiveZone if the given value is larger than maxLiveZone, then the value is located in the live zone. * @return true is the value should be considered when calculating the average of bucket values. */ private boolean isLiveBucket(double value, double minLiveZone, double maxLiveZone) { if (Double.isNaN(value)) { return false; } else if (Double.compare(minLiveZone, value) > 0) { return false; } else if (Double.compare(maxLiveZone, value) < 0) { return false; } else return true; } /** * Find the override threshold based on the given dimension map. The override threshold could given in a hierarchical * dimension structure. Assume that the dimension map contains two dimensions: country and pageName. We could override * the threshold in country level by specifying: overrideDimensionMap {country=US}, overrideThreshold=100. In this * case, any dimensions that contain {country=US}, e.g., {country=US, pageName=homePage}, would use the override * threshold. * * @param dimensionMap the dimension map to be used to search the override threshold. * @param defaultThreshold the default threshold if override threshold does not exist. * * @return the threshold for the given dimension map. */ private double overrideThresholdForDimensions(DimensionMap dimensionMap, double defaultThreshold) { for (DimensionMap overrideDimensionMap : overrideThreshold.descendingKeySet()) { if (dimensionMap.equalsOrChildOf(overrideDimensionMap)) { return overrideThreshold.get(overrideDimensionMap); } } return defaultThreshold; } }