package com.linkedin.thirdeye.rootcause.impl; import com.linkedin.thirdeye.client.DAORegistry; import com.linkedin.thirdeye.datalayer.bao.DatasetConfigManager; import com.linkedin.thirdeye.datalayer.bao.MetricConfigManager; import com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO; import com.linkedin.thirdeye.datalayer.dto.MetricConfigDTO; import com.linkedin.thirdeye.rootcause.Entity; import com.linkedin.thirdeye.rootcause.Pipeline; import com.linkedin.thirdeye.rootcause.PipelineContext; import com.linkedin.thirdeye.rootcause.PipelineResult; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Pipeline for identifying relevant metrics based on dataset * association. The pipeline first fetches metric entities from the context and then * searches Thirdeye's internal database for metrics contained in the same datasets as * any metric entities in the search context. All found metrics are scored equally. */ public class MetricDatasetPipeline extends Pipeline { private static final Logger LOG = LoggerFactory.getLogger(MetricDatasetPipeline.class); final MetricConfigManager metricDAO; final DatasetConfigManager datasetDAO; /** * Constructor for dependency injection * * @param outputName pipeline output name * @param inputNames input pipeline names * @param metricDAO metric config DAO * @param datasetDAO dataset config DAO */ public MetricDatasetPipeline(String outputName, Set<String> inputNames, MetricConfigManager metricDAO, DatasetConfigManager datasetDAO) { super(outputName, inputNames); this.metricDAO = metricDAO; this.datasetDAO = datasetDAO; } /** * Alternate constructor for PipelineLoader * * @param outputName pipeline output name * @param inputNames input pipeline names * @param ignore configuration properties (none) */ public MetricDatasetPipeline(String outputName, Set<String> inputNames, Map<String, String> ignore) { super(outputName, inputNames); this.metricDAO = DAORegistry.getInstance().getMetricConfigDAO(); this.datasetDAO = DAORegistry.getInstance().getDatasetConfigDAO(); } @Override public PipelineResult run(PipelineContext context) { Set<MetricEntity> metrics = context.filter(MetricEntity.class); Set<String> datasets = new HashSet<>(); Map<String, Double> datasetScores = new HashMap<>(); for(MetricEntity me : metrics) { MetricConfigDTO metricDTO = this.metricDAO.findById(me.getId()); String d = metricDTO.getDataset(); datasets.add(d); double metricScore = me.getScore(); if(!datasetScores.containsKey(d)) datasetScores.put(d, 0.0d); datasetScores.put(d, datasetScores.get(d) + metricScore); } Set<Entity> entities = new HashSet<>(); for(String d : datasets) { DatasetConfigDTO dataset = datasetDAO.findByDataset(d); if(dataset == null) { LOG.warn("Could not find dataset '{}'", d); continue; } double datasetScore = datasetScores.get(d); Collection<MetricConfigDTO> dtos = metricDAO.findByDataset(d); dtos = removeExisting(dtos, metrics); for(MetricConfigDTO dto : dtos) { double score = datasetScore / dtos.size(); entities.add(MetricEntity.fromMetric(score, dto.getId())); } } return new PipelineResult(context, entities); } static Collection<MetricConfigDTO> removeExisting(Iterable<MetricConfigDTO> dtos, Iterable<MetricEntity> existing) { Collection<MetricConfigDTO> out = new ArrayList<>(); for(MetricConfigDTO dto : dtos) { if(!findExisting(dto, existing)) out.add(dto); } return out; } static boolean findExisting(MetricConfigDTO dto, Iterable<MetricEntity> existing) { for(MetricEntity me : existing) { if(me.getId() == dto.getId()) { return true; } } return false; } }