package com.linkedin.thirdeye.rootcause.impl; import ch.qos.logback.classic.Level; import ch.qos.logback.classic.Logger; import com.linkedin.thirdeye.anomaly.ThirdEyeAnomalyConfiguration; import com.linkedin.thirdeye.anomaly.events.EventDataProviderLoader; import com.linkedin.thirdeye.anomaly.events.EventDataProviderManager; import com.linkedin.thirdeye.anomaly.events.EventType; import com.linkedin.thirdeye.anomaly.events.HistoricalAnomalyEventProvider; import com.linkedin.thirdeye.anomaly.events.HolidayEventProvider; import com.linkedin.thirdeye.client.DAORegistry; import com.linkedin.thirdeye.client.ThirdEyeCacheRegistry; import com.linkedin.thirdeye.client.cache.QueryCache; import com.linkedin.thirdeye.common.ThirdEyeConfiguration; import com.linkedin.thirdeye.datalayer.bao.DatasetConfigManager; import com.linkedin.thirdeye.datalayer.bao.EntityToEntityMappingManager; import com.linkedin.thirdeye.datalayer.bao.MetricConfigManager; import com.linkedin.thirdeye.datalayer.util.DaoProviderUtil; import com.linkedin.thirdeye.rootcause.Entity; import com.linkedin.thirdeye.rootcause.Pipeline; import com.linkedin.thirdeye.rootcause.RCAFramework; import com.linkedin.thirdeye.rootcause.RCAFrameworkExecutionResult; import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.io.InputStreamReader; import java.io.InterruptedIOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import org.apache.commons.cli.BasicParser; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.cli.Parser; import org.joda.time.format.DateTimeFormatter; import org.joda.time.format.ISODateTimeFormat; import org.slf4j.LoggerFactory; /** * Console interface for performing root cause search using a sample pipeline configuration. * The user can specify the TimeRange and Baseline entities, as well as arbitrary URNs to * populate the search context with. The console interface allows one-off or interactive REPL execution modes. * * <br/><b>Example:</b> {@code java -cp target/thirdeye-pinot-1.0-SNAPSHOT.jar com.linkedin.thirdeye.rootcause.impl.RCAFrameworkRunner * --config-dir local-configs/ --window-size 28 --baseline-offset 28 --entities thirdeye:metric:pageViews,thirdeye:metric:logins} * */ public class RCAFrameworkRunner { private static final String CLI_CONFIG_DIR = "config-dir"; private static final String CLI_WINDOW_SIZE = "window-size"; private static final String CLI_BASELINE_OFFSET = "baseline-offset"; private static final String CLI_ENTITIES = "entities"; private static final String CLI_PIPELINE = "pipeline"; private static final String CLI_TIME_START = "time-start"; private static final String CLI_TIME_END = "time-end"; private static final String P_INPUT = RCAFramework.INPUT; private static final String P_EVENT_HOLIDAY = "eventHoliday"; private static final String P_EVENT_ANOMALY = "eventAnomaly"; private static final String P_EVENT_TOPK = "eventTopK"; private static final String P_METRIC_DATASET_RAW = "metricDatasetRaw"; private static final String P_METRIC_METRIC_RAW = "metricMetricRaw"; private static final String P_METRIC_TOPK = "metricTopK"; private static final String P_DIMENSION_METRIC_RAW = "dimensionMetricRaw"; private static final String P_DIMENSION_REWRITE = "dimensionRewrite"; private static final String P_DIMENSION_TOPK = "dimensionTopK"; private static final String P_SERVICE_METRIC_RAW = "serviceMetricRaw"; private static final String P_SERVICE_TOPK = "serviceTopK"; private static final String P_OUTPUT = RCAFramework.OUTPUT; private static final DateTimeFormatter ISO8601 = ISODateTimeFormat.basicDateTimeNoMillis(); private static final int TOPK_EVENT = 10; private static final int TOPK_METRIC = 5; private static final int TOPK_DIMENSION = 10; private static final int TOPK_SERVICE = 5; private static final long DAY_IN_MS = 24 * 3600 * 1000; public static void main(String[] args) throws Exception { Options options = new Options(); Option optConfig = new Option("c", CLI_CONFIG_DIR, true, "ThirdEye configuration file"); optConfig.setRequired(true); options.addOption(optConfig); options.addOption(null, CLI_WINDOW_SIZE, true, "window size for search window (in days, defaults to '7')"); options.addOption(null, CLI_BASELINE_OFFSET, true, "baseline offset (in days, from start of window)"); options.addOption(null, CLI_ENTITIES, true, "search context metric entities (not specifying this will activate interactive REPL mode)"); options.addOption(null, CLI_PIPELINE, true, "pipeline config YAML file (not specifying this will launch default pipeline)"); options.addOption(null, CLI_TIME_START, true, "start time of the search window (ISO 8601, e.g. '20170701T150000Z')"); options.addOption(null, CLI_TIME_END, true, "end time of the search window (ISO 8601, e.g. '20170831T030000Z', defaults to now)"); Parser parser = new BasicParser(); CommandLine cmd = null; try { cmd = parser.parse(options, args); } catch (ParseException e) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(RCAFrameworkRunner.class.getSimpleName(), options); System.exit(1); } if(cmd.hasOption(CLI_WINDOW_SIZE) && cmd.hasOption(CLI_TIME_START)) { System.out.println(String.format("--%s and --%s mutually exclusive", CLI_WINDOW_SIZE, CLI_TIME_START)); System.exit(1); } // runtime logger config ((Logger)LoggerFactory.getLogger(Logger.ROOT_LOGGER_NAME)).setLevel(Level.WARN); ((Logger)LoggerFactory.getLogger("com.linkedin.thirdeye.rootcause")).setLevel(Level.INFO); // config File config = new File(cmd.getOptionValue(CLI_CONFIG_DIR)); File daoConfig = new File(config.getAbsolutePath() + "/persistence.yml"); DaoProviderUtil.init(daoConfig); ThirdEyeConfiguration thirdEyeConfig = new ThirdEyeAnomalyConfiguration(); thirdEyeConfig.setRootDir(config.getAbsolutePath()); ThirdEyeCacheRegistry.initializeCaches(thirdEyeConfig); EventDataProviderManager eventProvider = EventDataProviderManager.getInstance(); eventProvider.registerEventDataProvider(EventType.HOLIDAY.toString(), new HolidayEventProvider()); eventProvider.registerEventDataProvider(EventType.HISTORICAL_ANOMALY.toString(), new HistoricalAnomalyEventProvider()); // ************************************************************************ // Framework setup // ************************************************************************ RCAFramework framework; if(cmd.hasOption(CLI_PIPELINE)) { File rcaConfig = new File(cmd.getOptionValue(CLI_PIPELINE)); EventDataProviderLoader.registerEventDataProvidersFromConfig(rcaConfig, eventProvider); List<Pipeline> pipelines = PipelineLoader.getPipelinesFromConfig(rcaConfig); // Executor ExecutorService executor = Executors.newSingleThreadExecutor(); framework = new RCAFramework(pipelines, executor); } else { framework = makeStaticFramework(eventProvider); } // ************************************************************************ // Entities // ************************************************************************ Set<Entity> entities = new HashSet<>(); // time range and baseline long now = System.currentTimeMillis(); long windowEnd = now; if(cmd.hasOption(CLI_TIME_END)) windowEnd = ISO8601.parseDateTime(cmd.getOptionValue(CLI_TIME_END)).getMillis(); long windowSize = 7 * DAY_IN_MS; if(cmd.hasOption(CLI_TIME_START)) windowSize = windowEnd - ISO8601.parseDateTime(cmd.getOptionValue(CLI_TIME_START)).getMillis(); else if(cmd.hasOption(CLI_WINDOW_SIZE)) windowSize = Long.parseLong(cmd.getOptionValue(CLI_WINDOW_SIZE)) * DAY_IN_MS; long baselineOffset = 0; if(cmd.hasOption(CLI_BASELINE_OFFSET)) baselineOffset = Long.parseLong(cmd.getOptionValue(CLI_BASELINE_OFFSET)) * DAY_IN_MS; long windowStart = windowEnd - windowSize; long baselineEnd = windowStart - baselineOffset; long baselineStart = baselineEnd - windowSize; System.out.println(String.format("Using current time range '%d' (%s) to '%d' (%s)", windowStart, ISO8601.print(windowStart), windowEnd, ISO8601.print(windowEnd))); System.out.println(String.format("Using baseline time range '%d' (%s) to '%d' (%s)", baselineStart, ISO8601.print(baselineStart), baselineEnd, ISO8601.print(baselineEnd))); entities.add(TimeRangeEntity.fromRange(1.0, TimeRangeEntity.TYPE_CURRENT, windowStart, windowEnd)); entities.add(TimeRangeEntity.fromRange(1.0, TimeRangeEntity.TYPE_BASELINE, baselineStart, baselineEnd)); // ************************************************************************ // Framework execution // ************************************************************************ if (cmd.hasOption(CLI_ENTITIES)) { entities.addAll(parseURNSequence(cmd.getOptionValue(CLI_ENTITIES), 1.0)); runFramework(framework, entities); } else { try { readExecutePrintLoop(framework, entities); } catch (InterruptedIOException ignore) { // left blank, exit } } System.out.println("done."); // Pinot connection workaround System.exit(0); } private static RCAFramework makeStaticFramework(EventDataProviderManager eventProvider) { Set<Pipeline> pipelines = new HashSet<>(); MetricConfigManager metricDAO = DAORegistry.getInstance().getMetricConfigDAO(); DatasetConfigManager datasetDAO = DAORegistry.getInstance().getDatasetConfigDAO(); EntityToEntityMappingManager entityDAO = DAORegistry.getInstance().getEntityToEntityMappingDAO(); // Metrics pipelines.add(new MetricDatasetPipeline(P_METRIC_DATASET_RAW, asSet(P_INPUT), metricDAO, datasetDAO)); pipelines.add(new EntityMappingPipeline(P_METRIC_METRIC_RAW, asSet(P_INPUT), entityDAO, "METRIC_TO_METRIC", false, false)); pipelines.add(new TopKPipeline(P_METRIC_TOPK, asSet(P_INPUT, P_METRIC_DATASET_RAW, P_METRIC_METRIC_RAW), MetricEntity.class, TOPK_METRIC)); // Dimensions (from metrics) QueryCache cache = ThirdEyeCacheRegistry.getInstance().getQueryCache(); ExecutorService executorScorer = Executors.newFixedThreadPool(3); pipelines.add(new DimensionAnalysisPipeline(P_DIMENSION_METRIC_RAW, asSet(P_INPUT, P_METRIC_TOPK), metricDAO, datasetDAO, cache, executorScorer)); pipelines.add(new EntityMappingPipeline(P_DIMENSION_REWRITE, asSet(P_DIMENSION_METRIC_RAW), entityDAO, "DIMENSION_TO_DIMENSION", true, true)); pipelines.add(new TopKPipeline(P_DIMENSION_TOPK, asSet(P_INPUT, P_DIMENSION_REWRITE), DimensionEntity.class, TOPK_DIMENSION)); // Systems pipelines.add(new EntityMappingPipeline(P_SERVICE_METRIC_RAW, asSet(P_METRIC_TOPK), entityDAO, "METRIC_TO_SERVICE", false, false)); pipelines.add(new TopKPipeline(P_SERVICE_TOPK, asSet(P_INPUT, P_SERVICE_METRIC_RAW), ServiceEntity.class, TOPK_SERVICE)); // Events (from metrics and dimensions) pipelines.add(new AnomalyEventsPipeline(P_EVENT_ANOMALY, asSet(P_INPUT, P_METRIC_TOPK), eventProvider, metricDAO)); pipelines.add(new HolidayEventsPipeline(P_EVENT_HOLIDAY, asSet(P_INPUT, P_DIMENSION_TOPK), eventProvider)); pipelines.add(new TopKPipeline(P_EVENT_TOPK, asSet(P_INPUT, P_EVENT_ANOMALY, P_EVENT_HOLIDAY), EventEntity.class, TOPK_EVENT)); // Aggregation pipelines.add(new LinearAggregationPipeline(P_OUTPUT, asSet(P_EVENT_TOPK, P_METRIC_TOPK, P_DIMENSION_TOPK, P_SERVICE_TOPK), -1)); // Executor ExecutorService executor = Executors.newSingleThreadExecutor(); // Framework return new RCAFramework(pipelines, executor); } private static void readExecutePrintLoop(RCAFramework framework, Collection<Entity> baseEntities) throws IOException { // search loop System.out.println("Enter search context metric entities' URNs (separated by comma \",\"):"); BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); String line; while((line = br.readLine()) != null) { Set<Entity> entities = new HashSet<>(); entities.addAll(baseEntities); entities.addAll(parseURNSequence(line, 1.0)); runFramework(framework, entities); System.out.println("Enter search context metric entities' URNs (separated by comma \",\"):"); } } private static void runFramework(RCAFramework framework, Set<Entity> entities) { System.out.println("*** Search context:"); for(Entity e : entities) { System.out.println(formatEntity(e)); } RCAFrameworkExecutionResult result = null; try { result = framework.run(entities); } catch (Exception e) { System.out.println("*** Exception while running framework:"); e.printStackTrace(); return; } System.out.println("*** Linear results:"); List<Entity> results = new ArrayList<>(result.getResults()); Collections.sort(results, Entity.HIGHEST_SCORE_FIRST); for(Entity e : results) { System.out.println(formatEntity(e)); } System.out.println("*** Grouped results:"); Map<String, Collection<Entity>> grouped = topKPerType(results, 3); for(Map.Entry<String, Collection<Entity>> entry : grouped.entrySet()) { System.out.println(entry.getKey()); for(Entity e : entry.getValue()) { System.out.println(formatEntity(e)); } } } private static Collection<Entity> parseURNSequence(String urns, double score) { Set<Entity> entities = new HashSet<>(); String[] parts = urns.split(","); for(String part : parts) { entities.add(EntityUtils.parseURN(part, score)); } return entities; } /** * Returns the top K (first K) results per entity type from a collection of entities. * * @param entities aggregated entities * @param k maximum number of entities per entity type * @return mapping of entity types to list of entities */ private static Map<String, Collection<Entity>> topKPerType(Collection<Entity> entities, int k) { Map<String, Collection<Entity>> map = new HashMap<>(); for(Entity e : entities) { String prefix = extractPrefix(e); if(!map.containsKey(prefix)) map.put(prefix, new ArrayList<Entity>()); Collection<Entity> current = map.get(prefix); if(current.size() < k) current.add(e); } return map; } static String formatEntity(Entity e) { return String.format("%.3f [%s] %s", e.getScore(), e.getClass().getSimpleName(), e.getUrn()); } static Set<String> asSet(String... s) { return new HashSet<>(Arrays.asList(s)); } static String extractPrefix(Entity e) { String[] parts = e.getUrn().split(":", 3); return parts[0] + ":" + parts[1] + ":"; } }