package com.linkedin.thirdeye.tools; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import com.linkedin.thirdeye.anomaly.utils.DetectionResourceHttpUtils; import com.linkedin.thirdeye.dashboard.resources.OnboardResource; import com.linkedin.thirdeye.datalayer.bao.AnomalyFunctionManager; import com.linkedin.thirdeye.datalayer.bao.MergedAnomalyResultManager; import com.linkedin.thirdeye.datalayer.bao.RawAnomalyResultManager; import com.linkedin.thirdeye.datalayer.dto.AnomalyFunctionDTO; import com.linkedin.thirdeye.datalayer.util.DaoProviderUtil; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.commons.lang3.StringUtils; import org.apache.http.client.ClientProtocolException; import org.joda.time.DateTime; import org.joda.time.format.ISODateTimeFormat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.testng.collections.Lists; /** * Utility class to cleanup all anomalies for input datasets, * and regenerate anomalies for time range specified in the input * Inputs: * config file for config class CleanupAndRegenerateAnomaliesConfig */ public class CleanupAndRegenerateAnomaliesTool { private static final Logger LOG = LoggerFactory.getLogger(CleanupAndRegenerateAnomaliesTool.class); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(new YAMLFactory()); private enum Mode { DELETE, GENERATE_FOR_RANGE, BACKFILL_FOR_RANGE } private String monitoringWindowStartTime; private String monitoringWindowEndTime; private List<Long> functionIds; private int rawAnomaliesDeleted = 0; private int mergedAnomaliesDeleted = 0; private AnomalyFunctionManager anomalyFunctionDAO; private RawAnomalyResultManager rawResultDAO; private MergedAnomalyResultManager mergedResultDAO; private DetectionResourceHttpUtils detectionResourceHttpUtils; public CleanupAndRegenerateAnomaliesTool(String startTime, String endTime, String datasets, String functionIds, File persistenceFile, String detectionHost, int detectionPort) throws Exception { init(persistenceFile); this.monitoringWindowStartTime = startTime; this.monitoringWindowEndTime = endTime; this.functionIds = getFunctionIds(datasets, functionIds); detectionResourceHttpUtils = new DetectionResourceHttpUtils(detectionHost, detectionPort); } public void init(File persistenceFile) throws Exception { DaoProviderUtil.init(persistenceFile); anomalyFunctionDAO = DaoProviderUtil .getInstance(com.linkedin.thirdeye.datalayer.bao.jdbc.AnomalyFunctionManagerImpl.class); rawResultDAO = DaoProviderUtil .getInstance(com.linkedin.thirdeye.datalayer.bao.jdbc.RawAnomalyResultManagerImpl.class); mergedResultDAO = DaoProviderUtil .getInstance(com.linkedin.thirdeye.datalayer.bao.jdbc.MergedAnomalyResultManagerImpl.class); } private List<Long> getFunctionIds(String datasets, String functionIds) { List<Long> functionIdsList = new ArrayList<>(); if (StringUtils.isNotBlank(functionIds)) { String[] tokens = functionIds.split(","); for (String token : tokens) { functionIdsList.add(Long.valueOf(token)); } } else if (StringUtils.isNotBlank(datasets)) { List<String> datasetsList = Lists.newArrayList(datasets.split(",")); for (String dataset : datasetsList) { List<AnomalyFunctionDTO> anomalyFunctions = anomalyFunctionDAO.findAllByCollection(dataset); for (AnomalyFunctionDTO anomalyFunction : anomalyFunctions) { functionIdsList.add(anomalyFunction.getId()); } } } return functionIdsList; } /** * Delete raw or merged anomalies whose start time is located in the given time ranges, except * the following two cases: * * 1. If a raw anomaly belongs to a merged anomaly whose start time is not located in the given * time ranges, then the raw anomaly will not be deleted. * * 2. If a raw anomaly belongs to a merged anomaly whose start time is located in the given * time ranges, then it is deleted regardless its start time. * * If monitoringWindowStartTime is not given, then start time is set to 0. * If monitoringWindowEndTime is not given, then end time is set to Long.MAX_VALUE. */ private void deleteExistingAnomalies() { long startTime = 0; long endTime = Long.MAX_VALUE; if (StringUtils.isNotBlank(monitoringWindowStartTime)) { startTime = ISODateTimeFormat.dateTimeParser().parseDateTime(monitoringWindowStartTime).getMillis(); } if (StringUtils.isNotBlank(monitoringWindowEndTime)) { endTime = ISODateTimeFormat.dateTimeParser().parseDateTime(monitoringWindowEndTime).getMillis(); } LOG.info("Deleting anomalies in the time range: {} -- {}", new DateTime(startTime), new DateTime(endTime)); for (Long functionId : functionIds) { AnomalyFunctionDTO anomalyFunction = anomalyFunctionDAO.findById(functionId); if(anomalyFunction == null){ LOG.info("Requested functionId {} doesn't exist", functionId); continue; } LOG.info("Beginning cleanup of functionId {} collection {} metric {}", functionId, anomalyFunction.getCollection(), anomalyFunction.getMetric()); // Clean up merged and raw anomaly of functionID OnboardResource onboardResource = new OnboardResource(anomalyFunctionDAO, mergedResultDAO, rawResultDAO); onboardResource.deleteExistingAnomalies(Long.toString(functionId), startTime, endTime); } } /** * Regenerates anomalies for the whole given range as one monitoring window * @throws Exception */ @Deprecated private void regenerateAnomaliesInRange() throws Exception { LOG.info("Begin regenerate anomalies for entire range..."); for (Long functionId : functionIds) { AnomalyFunctionDTO anomalyFunction = anomalyFunctionDAO.findById(functionId); boolean isActive = anomalyFunction.getIsActive(); if (!isActive) { LOG.info("Skipping function {}", functionId); continue; } runAdhocFunctionForWindow(functionId, monitoringWindowStartTime, monitoringWindowEndTime); } } /** * Breaks down the given range into consecutive monitoring windows as per function definition * Regenerates anomalies for each window separately * @throws Exception */ private void regenerateAnomaliesForBucketsInRange(boolean forceBackfill) throws Exception { for (Long functionId : functionIds) { AnomalyFunctionDTO anomalyFunction = anomalyFunctionDAO.findById(functionId); if (!anomalyFunction.getIsActive()) { LOG.info("Skipping deactivated function {}", functionId); continue; } LOG.info("Sending backfill function {} for range {} to {}", functionId, monitoringWindowStartTime, monitoringWindowEndTime); String response = detectionResourceHttpUtils.runBackfillAnomalyFunction(String.valueOf(functionId), monitoringWindowStartTime, monitoringWindowEndTime, forceBackfill); LOG.info("Response {}", response); } } private void runAdhocFunctionForWindow(Long functionId, String monitoringWindowStart, String monitoringWindowEnd) throws ClientProtocolException, IOException { LOG.info("Running adhoc function {} for range {} to {}", functionId, monitoringWindowStart, monitoringWindowEnd); String response = detectionResourceHttpUtils.runAdhocAnomalyFunction(String.valueOf(functionId), monitoringWindowStart, monitoringWindowEnd); LOG.info("Response {}", response); } public static void main(String[] args) throws Exception { if (args.length != 2) { System.err.println("USAGE CleanupAndRegenerateAnomaliesTool <config_yml_file> <mode> \n " + "Please take note: \nDELETE mode will delete all anomalies for that functionId/dataset, " + "\nGENERATE mode will generate anomalies in time range you specify"); System.exit(1); } File configFile = new File(args[0]); CleanupAndRegenerateAnomaliesConfig config = OBJECT_MAPPER.readValue(configFile, CleanupAndRegenerateAnomaliesConfig.class); String mode = args[1]; File persistenceFile = new File(config.getPersistenceFile()); if (!persistenceFile.exists()) { System.err.println("Missing file:" + persistenceFile); System.exit(1); } String detectorHost = config.getDetectorHost(); int detectorPort = config.getDetectorPort(); if (StringUtils.isBlank(detectorHost)) { LOG.error("Detector host and port must be provided"); System.exit(1); } String startTimeIso = config.getStartTimeIso(); String endTimeIso = config.getEndTimeIso(); Mode runMode = Mode.valueOf(mode); if ((runMode.equals(Mode.GENERATE_FOR_RANGE) || runMode.equals(Mode.BACKFILL_FOR_RANGE)) && (StringUtils.isBlank(startTimeIso) || StringUtils.isBlank(endTimeIso))) { LOG.error("StarteTime and endTime must be provided in generate mode"); System.exit(1); } String datasets = config.getDatasets(); String functionIds = config.getFunctionIds(); if (StringUtils.isBlank(datasets) && StringUtils.isBlank(functionIds)) { LOG.error("Must provide one of datasets or functionIds"); System.exit(1); } boolean doForceBackfill = false; String forceBackfill = config.getForceBackfill(); if (StringUtils.isNotBlank(forceBackfill)) { doForceBackfill = Boolean.parseBoolean(forceBackfill); } CleanupAndRegenerateAnomaliesTool tool = new CleanupAndRegenerateAnomaliesTool(startTimeIso, endTimeIso, datasets, functionIds, persistenceFile, detectorHost, detectorPort); if (runMode.equals(Mode.DELETE)) { // DELETE mode deletes *ALL* anomalies for all functions in functionIds or datasets tool.deleteExistingAnomalies(); } else if (runMode.equals(Mode.GENERATE_FOR_RANGE)) { // GENERATE_FOR_RANGE mode regenerates anomalies for all active functions in functionIds or datasets tool.regenerateAnomaliesInRange(); } else if (runMode.equals(Mode.BACKFILL_FOR_RANGE)) { // BACKFILL_FOR_RANGE mode regenerates anomalies for all active functions in functionIds or datasets // It will honor the monitoring window size of the function, and run for all consecutive windows, one by one, // to cover the entire range provided as input tool.regenerateAnomaliesForBucketsInRange(doForceBackfill); } else { LOG.error("Incorrect mode {}", mode); System.exit(1); } // Added this because database connection gets stuck at times and program never terminates System.exit(0); } }