package com.linkedin.thirdeye.anomalydetection.model.transform;
import com.linkedin.thirdeye.anomalydetection.context.AnomalyDetectionContext;
import com.linkedin.thirdeye.anomalydetection.context.AnomalyFeedback;
import com.linkedin.thirdeye.anomalydetection.context.TimeSeries;
import com.linkedin.thirdeye.constant.AnomalyFeedbackType;
import com.linkedin.thirdeye.datalayer.dto.MergedAnomalyResultDTO;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import org.apache.commons.lang3.BooleanUtils;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.Interval;
import org.joda.time.Period;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class AnomalyRemovalFunction extends AbstractTransformationFunction {
private static final Logger LOGGER =
LoggerFactory.getLogger(AnomalyRemovalFunction.class);
// timezone information is used here to denote the nature of the data not how it stored in database
// only useful to get the offset period (to avoid the complication brought up by day light saving time)
public static final String METRIC_TIMEZONE = "metricTimezone";
public static final String DEFAULT_METRIC_TIMEZONE = "America/Los_Angeles";
// if threshold is 0, then threshold is not used at all
public static final String ANOMALY_REMOVAL_WEIGHT_THRESHOLD = "anomalyRemovalWeighThreshold";
public static final String DEFAULT_ANOMALY_REMOVAL_WEIGHT_THRESHOLD = "0";
// tolerance window, right before the current monitoring window, where we do not remove anomalies
public static final String ANOMALY_REMOVAL_TOLERANCE_WINDOW_SIZE = "anomalyRemovalToleranceWindowSize";
public static final String DEFAULT_ANOMALY_REMOVAL_TOLERANCE_WINDOW_SIZE = "0";
/**
* Remove historical anomalies region based on 'user label' and 'absolute weight'
*
* removal -- replace the value at the anomalous point as Double.NaN.
*
* @param timeSeries the time series that provides the data points to be transformed.
* @param anomalyDetectionContext the anomaly detection context that could provide additional
* information for the transformation.
* @return a time series with anomalies removed.
*/
@Override
public TimeSeries transform(TimeSeries timeSeries, AnomalyDetectionContext anomalyDetectionContext) {
Interval timeSeriesInterval = timeSeries.getTimeSeriesInterval();
long startTime = timeSeriesInterval.getStartMillis();
long endTime = timeSeriesInterval.getEndMillis();
long bucketSizeInMillis = anomalyDetectionContext.getBucketSizeInMS();
// get monitoring window
String metricName = anomalyDetectionContext.getAnomalyDetectionFunction().getSpec().getTopicMetric();
Interval currentWindow = anomalyDetectionContext.getCurrent(metricName).getTimeSeriesInterval();
// get historical anomalies
List<MergedAnomalyResultDTO> anomalyHistory = anomalyDetectionContext.getHistoricalAnomalies();
double weightThreshold =
Double.valueOf(getProperties().getProperty(ANOMALY_REMOVAL_WEIGHT_THRESHOLD, DEFAULT_ANOMALY_REMOVAL_WEIGHT_THRESHOLD));
int toleranceSize =
Integer.valueOf(getProperties().getProperty(ANOMALY_REMOVAL_TOLERANCE_WINDOW_SIZE, DEFAULT_ANOMALY_REMOVAL_TOLERANCE_WINDOW_SIZE));
TimeSeries transformedTimeSeries = new TimeSeries();
Interval transformedInterval = new Interval(startTime, endTime);
transformedTimeSeries.setTimeSeriesInterval(transformedInterval);
Map<Long, Boolean> anomalousTimestamps =
getAnomalousTimeStampAndLabel(anomalyHistory, bucketSizeInMillis, weightThreshold);
// get timestamp offset
String metricTimezone = getProperties().getProperty(METRIC_TIMEZONE, DEFAULT_METRIC_TIMEZONE);
long windowStartOffset =
getOffsetTimestamp(currentWindow.getStartMillis(), toleranceSize, bucketSizeInMillis, metricTimezone);
// generate the transformed time series
for (long ts:timeSeries.timestampSet()) {
Boolean userLabel = anomalousTimestamps.get(ts);
userLabel = (userLabel == null) ? false : userLabel;
if (ts < windowStartOffset || (userLabel && ts < currentWindow.getStartMillis())) {
transformedTimeSeries.set(ts, Double.NaN);
} else {
transformedTimeSeries.set(ts, timeSeries.get(ts));
}
}
return transformedTimeSeries;
}
/**
* Extract the timestamps and their labels of historical anomalies if
* 1) user labeled the anomaly as a true anomaly
* 2) without label and the anomaly severity surpass the default weight (severity indicator) threshold
* this only works when a s
* @param historicalAnomalies anomaly results in the past
* @param bucketMillis the bucket in terms of milliseconds
* @param weightThreshold the threshold to use on weight if user label is not given
* @return the Map of timestamps for anomalies and their labels
*/
public static Map<Long, Boolean> getAnomalousTimeStampAndLabel(
List<MergedAnomalyResultDTO> historicalAnomalies, long bucketMillis, double weightThreshold) {
Map<Long, Boolean> anomalousTimestamps = new HashMap<>();
if (historicalAnomalies == null) {
return anomalousTimestamps; //empty map
}
for (MergedAnomalyResultDTO anomaly : historicalAnomalies) {
boolean isRemovable = isRemovable(getUserLabel(anomaly), anomaly.getWeight(), weightThreshold);
if (isRemovable) {
int tsLength = (int) (1 + ((anomaly.getEndTime() - anomaly.getStartTime()) / bucketMillis)); // both ends included
for (int i = 0; i < tsLength; i++) {
anomalousTimestamps.put(anomaly.getStartTime() + i * bucketMillis, getUserLabel(anomaly));
}
}
}
return anomalousTimestamps;
}
/**
* anomalous range is removable if
* 1) labeled as true, or
* 2) those not labeled but weight passed the weight threshold
*
*/
public static boolean isRemovable(Boolean isLabeledTrue, double weight, double weightThreshold) {
return BooleanUtils.isTrue(isLabeledTrue)
|| (isLabeledTrue == null && weightThreshold >= 0 && Math.abs(weight) >= weightThreshold);
}
/**
* translate user feedback type of a given anomaly to user label: 'null, true or false'
*
*/
public static Boolean getUserLabel(MergedAnomalyResultDTO anomalyResultDTO) {
if (anomalyResultDTO == null) {
return null; // if input is null, return null
}
Boolean isUserLabeledAnomaly;
AnomalyFeedbackType feedbackType;
AnomalyFeedback feedback = anomalyResultDTO.getFeedback();
if (feedback == null) {
feedbackType = null;
} else {
feedbackType = feedback.getFeedbackType();
}
if (feedbackType == null || feedbackType.equals(AnomalyFeedbackType.NO_FEEDBACK)) {
isUserLabeledAnomaly = null;
} else {
isUserLabeledAnomaly = !feedbackType.equals(AnomalyFeedbackType.NOT_ANOMALY);
}
return isUserLabeledAnomaly;
}
/**
* get offset timestamp given current timestamp
*
* considering timezone information, solve daylight saving issue
* @param currentTS current timestamp
* @param offsetSize number of offset time unit
* @param offsetUnit offset time unit offsetSize * offsetUnit = number of milliseconds to offset
* @param timezone time zone we operated in
* @return long
*/
public static long getOffsetTimestamp(long currentTS, int offsetSize, long offsetUnit, String timezone) {
// if no daylight saving issues in this timezone, should get direct offset
// of if offsetUnit is not in days, then get direct offset
if (timezone == null || offsetSize == 0 || offsetUnit == 0L
|| (offsetUnit % TimeUnit.DAYS.toMillis(1) != 0L)) {
return currentTS - offsetSize * offsetUnit;
}
// let joda.time to compute the actual offset in milliseconds
DateTimeZone dateTimeZone = DateTimeZone.forID(timezone);
DateTime currentDT = new DateTime(currentTS, dateTimeZone);
int bucketSizeInDays = (int) (offsetUnit / TimeUnit.DAYS.toMillis(1));
Period offsetPeriod = new Period(0, 0, 0, offsetSize * bucketSizeInDays, 0, 0, 0, 0);
return currentDT.minus(offsetPeriod).getMillis();
}
}