package com.linkedin.thirdeye.rootcause.impl; import com.linkedin.thirdeye.rootcause.Entity; import com.linkedin.thirdeye.rootcause.Pipeline; import com.linkedin.thirdeye.rootcause.PipelineContext; import com.linkedin.thirdeye.rootcause.PipelineResult; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; /** * TopKPipeline is a generic pipeline implementation for ordering, filtering, and truncating incoming * Entities. The pipeline first filters incoming Entities based on their {@code class} and then * orders them based on score from highest to lowest. It finally truncates the result to at most * {@code k} elements and emits the result. */ public class TopKPipeline extends Pipeline { public static final String PROP_K = "k"; public static final String PROP_CLASS = "class"; public static final String PROP_CLASS_DEFAULT = Entity.class.getName(); private final int k; private final Class<? extends Entity> clazz; /** * Constructor for dependency injection * * @param outputName pipeline output name * @param inputNames input pipeline names * @param clazz (super) class to filter by * @param k maximum number of result elements */ public TopKPipeline(String outputName, Set<String> inputNames, Class<? extends Entity> clazz, int k) { super(outputName, inputNames); this.k = k; this.clazz = clazz; } /** * Alternate constructor for PipelineLoader * * @param outputName pipeline output name * @param inputNames input pipeline names * @param properties configuration properties ({@code PROP_K}, {@code PROP_CLASS}) */ public TopKPipeline(String outputName, Set<String> inputNames, Map<String, String> properties) throws Exception { super(outputName, inputNames); if(!properties.containsKey(PROP_K)) throw new IllegalArgumentException(String.format("Property '%s' required, but not found", PROP_K)); this.k = Integer.parseInt(properties.get(PROP_K)); String classProp = PROP_CLASS_DEFAULT; if(properties.containsKey(PROP_CLASS)) classProp = properties.get(PROP_CLASS); this.clazz = (Class<? extends Entity>)Class.forName(classProp); } @Override public PipelineResult run(PipelineContext context) { List<Entity> entities = new ArrayList<>(context.filter(this.clazz)); Collections.sort(entities, Entity.HIGHEST_SCORE_FIRST); return new PipelineResult(context, new HashSet<>(entities.subList(0, Math.min(entities.size(), this.k)))); } }