package com.linkedin.thirdeye.rootcause; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Container class for configuring and executing a root cause search with multiple pipelines. * The framework is instantiated with multiple (named) pipelines and a result aggregator. The run() * method then executes the configured pipelines for arbitrary inputs without * maintaining any additional state within the RCAFramework. * * RCAFramework supports parallel DAG execution and requires pipelines to form a valid path * from {@code INPUT} to {@code OUTPUT}. The execution order of pipelines is guaranteed to be * compatible with serial execution in one single thread. */ /* * /-> pipeline.run() --> pipeline.run() \ * / \ * INPUT --> run() ---> pipeline.run() ---> pipeline.run() --> OUTPUT * \ / * \-> pipeline.run() / */ public class RCAFramework { private static final Logger LOG = LoggerFactory.getLogger(RCAFramework.class); public static final String INPUT = "INPUT"; public static final String OUTPUT = "OUTPUT"; public static final long TIMEOUT = 600000; private final Map<String, Pipeline> pipelines; private final ExecutorService executor; public RCAFramework(Collection<Pipeline> pipelines, ExecutorService executor) { this.executor = executor; if(!isValidDAG(pipelines)) throw new IllegalArgumentException(String.format("Invalid DAG. '%s' not reachable output name '%s'", OUTPUT, INPUT)); this.pipelines = new HashMap<>(); for(Pipeline p : pipelines) { if(INPUT.equals(p.getOutputName())) throw new IllegalArgumentException(String.format("Must not contain a pipeline with output name '%s'", INPUT)); if(this.pipelines.containsKey(p.getOutputName())) throw new IllegalArgumentException(String.format("Already contains pipeline with output name '%s'", p.getOutputName())); this.pipelines.put(p.getOutputName(), p); } if(!this.pipelines.containsKey(OUTPUT)) throw new IllegalArgumentException(String.format("Must contain a pipeline with output name '%s'", OUTPUT)); } /** * Performs rootcause search for a user-specified set of input entities. * Fans out entities to individual pipelines, collects results, and aggregates them. * * @param input user-specified search entities * @return aggregated results */ public RCAFrameworkExecutionResult run(Set<Entity> input) throws Exception { Map<String, Pipeline> pipelines = new HashMap<>(this.pipelines); pipelines.put(INPUT, new StaticPipeline(INPUT, Collections.<String>emptySet(), input)); LOG.info("Constructing flow for input '{}'", input); Map<String, Future<PipelineResult>> flow = constructDAG(pipelines); Map<String, PipelineResult> results = new HashMap<>(); for(Map.Entry<String, Future<PipelineResult>> e : flow.entrySet()) { PipelineResult r = e.getValue().get(TIMEOUT, TimeUnit.MILLISECONDS); if(LOG.isDebugEnabled()) logResultDetails(r); results.put(e.getKey(), r); } return new RCAFrameworkExecutionResult(results.get(OUTPUT).getEntities(), results); } static void logResultDetails(PipelineResult result) { List<Entity> entities = new ArrayList<>(result.getEntities()); Collections.sort(entities, new Comparator<Entity>() { @Override public int compare(Entity o1, Entity o2) { return -Double.compare(o1.getScore(), o2.getScore()); } }); for(Entity e : entities) { LOG.debug("{} [{}] {}", Math.round(e.getScore() * 1000) / 1000.0, e.getClass().getSimpleName(), e.getUrn()); } } static boolean isValidDAG(Collection<Pipeline> pipelines) { Set<String> visited = new HashSet<>(); visited.add(INPUT); int prevSize = 0; while(prevSize < visited.size()) { prevSize = visited.size(); for (Pipeline p : pipelines) { if (visited.containsAll(p.getInputNames())) visited.add(p.getOutputName()); } } return visited.contains(OUTPUT); } Map<String, Future<PipelineResult>> constructDAG(Map<String, Pipeline> pipelines) { // TODO purge pipelines not on critical path Map<String, Future<PipelineResult>> tasks = new HashMap<>(); Pipeline input = pipelines.get(INPUT); PipelineCallable inputCallable = new PipelineCallable(Collections.<String, Future<PipelineResult>>emptyMap(), input); tasks.put(INPUT, this.executor.submit(inputCallable)); int prevSize = 0; while(prevSize < tasks.size()) { prevSize = tasks.size(); for(Pipeline p : pipelines.values()) { if(!tasks.containsKey(p.getOutputName()) && tasks.keySet().containsAll(p.getInputNames())) { Map<String, Future<PipelineResult>> dependencies = new HashMap<>(); for(String inputName : p.getInputNames()) { dependencies.put(inputName, tasks.get(inputName)); } PipelineCallable c = new PipelineCallable(dependencies, p); tasks.put(p.getOutputName(), this.executor.submit(c)); } } } return tasks; } }