package com.linkedin.thirdeye.completeness.checker; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; import com.linkedin.thirdeye.anomaly.job.JobConstants.JobStatus; import com.linkedin.thirdeye.anomaly.job.JobRunner; import com.linkedin.thirdeye.anomaly.task.TaskConstants.TaskStatus; import com.linkedin.thirdeye.anomaly.task.TaskConstants.TaskType; import com.linkedin.thirdeye.client.DAORegistry; import com.linkedin.thirdeye.datalayer.dto.AnomalyFunctionDTO; import com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO; import com.linkedin.thirdeye.datalayer.dto.JobDTO; import com.linkedin.thirdeye.datalayer.dto.TaskDTO; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.concurrent.TimeUnit; import org.joda.time.DateTime; import org.joda.time.format.DateTimeFormat; import org.joda.time.format.DateTimeFormatter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** job runner for data completeness job * */ public class DataCompletenessJobRunner implements JobRunner { private static final Logger LOG = LoggerFactory.getLogger(DataCompletenessJobRunner.class); private static final DAORegistry DAO_REGISTRY = DAORegistry.getInstance(); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); private DataCompletenessJobContext dataCompletenessJobContext; private DateTimeFormatter dateTimeFormatter = DateTimeFormat.forPattern("yyyyMMddHHmm"); public DataCompletenessJobRunner(DataCompletenessJobContext dataCompletenessJobContext) { this.dataCompletenessJobContext = dataCompletenessJobContext; } @Override public void run() { DateTime now = new DateTime(); long checkDurationEndTime = now.getMillis(); long checkDurationStartTime = now.minus(TimeUnit.MILLISECONDS.convert( DataCompletenessConstants.LOOKBACK_TIME_DURATION, DataCompletenessConstants.LOOKBACK_TIMEUNIT)).getMillis(); String checkerEndTime = dateTimeFormatter.print(checkDurationEndTime); String checkerStartTime = dateTimeFormatter.print(checkDurationStartTime); String jobName = String.format("%s-%s-%s", TaskType.DATA_COMPLETENESS.toString(), checkerStartTime, checkerEndTime); dataCompletenessJobContext.setCheckDurationStartTime(checkDurationStartTime); dataCompletenessJobContext.setCheckDurationEndTime(checkDurationEndTime); dataCompletenessJobContext.setJobName(jobName); Set<String> datasetsToCheck = new HashSet<>(); for (DatasetConfigDTO datasetConfig : DAO_REGISTRY.getDatasetConfigDAO().findActiveRequiresCompletenessCheck()) { datasetsToCheck.add(datasetConfig.getDataset()); } for (AnomalyFunctionDTO anomalyFunction : DAO_REGISTRY.getAnomalyFunctionDAO().findAllActiveFunctions()) { if (anomalyFunction.isRequiresCompletenessCheck()) { datasetsToCheck.add(anomalyFunction.getCollection()); } } dataCompletenessJobContext.setDatasetsToCheck(Lists.newArrayList(datasetsToCheck)); // create data completeness job long jobExecutionId = createJob(); dataCompletenessJobContext.setJobExecutionId(jobExecutionId); // create data completeness tasks createTasks(); } public Long createJob() { Long jobExecutionId = null; try { LOG.info("Creating data completeness job"); JobDTO jobSpec = new JobDTO(); jobSpec.setJobName(dataCompletenessJobContext.getJobName()); jobSpec.setScheduleStartTime(System.currentTimeMillis()); jobSpec.setStatus(JobStatus.SCHEDULED); jobSpec.setTaskType(TaskType.DATA_COMPLETENESS); jobExecutionId = DAO_REGISTRY.getJobDAO().save(jobSpec); LOG.info("Created JobSpec {} with jobExecutionId {}", jobSpec, jobExecutionId); } catch (Exception e) { LOG.error("Exception in creating data completeness job", e); } return jobExecutionId; } protected List<DataCompletenessTaskInfo> createDataCompletenessTasks(DataCompletenessJobContext dataCompletenessJobContext) { List<DataCompletenessTaskInfo> tasks = new ArrayList<>(); // create 1 task, which will get data and perform check DataCompletenessTaskInfo dataCompletenessCheck = new DataCompletenessTaskInfo(); dataCompletenessCheck.setDataCompletenessType(DataCompletenessConstants.DataCompletenessType.CHECKER); dataCompletenessCheck.setDataCompletenessStartTime(dataCompletenessJobContext.getCheckDurationStartTime()); dataCompletenessCheck.setDataCompletenessEndTime(dataCompletenessJobContext.getCheckDurationEndTime()); dataCompletenessCheck.setDatasetsToCheck(dataCompletenessJobContext.getDatasetsToCheck()); tasks.add(dataCompletenessCheck); // create 1 task, for cleanup DataCompletenessTaskInfo cleanup = new DataCompletenessTaskInfo(); cleanup.setDataCompletenessType(DataCompletenessConstants.DataCompletenessType.CLEANUP); tasks.add(cleanup); return tasks; } public List<Long> createTasks() { List<Long> taskIds = new ArrayList<>(); try { LOG.info("Creating data completeness checker tasks"); List<DataCompletenessTaskInfo> dataCompletenessTasks = createDataCompletenessTasks(dataCompletenessJobContext); LOG.info("DataCompleteness tasks {}", dataCompletenessTasks); for (DataCompletenessTaskInfo taskInfo : dataCompletenessTasks) { String taskInfoJson = null; try { taskInfoJson = OBJECT_MAPPER.writeValueAsString(taskInfo); } catch (JsonProcessingException e) { LOG.error("Exception when converting DataCompletenessTaskInfo {} to jsonString", taskInfo, e); } TaskDTO taskSpec = new TaskDTO(); taskSpec.setTaskType(TaskType.DATA_COMPLETENESS); taskSpec.setJobName(dataCompletenessJobContext.getJobName()); taskSpec.setStatus(TaskStatus.WAITING); taskSpec.setStartTime(System.currentTimeMillis()); taskSpec.setTaskInfo(taskInfoJson); taskSpec.setJobId(dataCompletenessJobContext.getJobExecutionId()); long taskId = DAO_REGISTRY.getTaskDAO().save(taskSpec); taskIds.add(taskId); LOG.info("Created dataCompleteness task {} with taskId {}", taskSpec, taskId); } } catch (Exception e) { LOG.error("Exception in creating data completeness tasks", e); } return taskIds; } }