package com.linkedin.thirdeye.completeness.checker;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.TimeUnit;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.Period;
import org.joda.time.format.DateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.linkedin.thirdeye.api.TimeSpec;
import com.linkedin.thirdeye.client.DAORegistry;
import com.linkedin.thirdeye.completeness.checker.DataCompletenessConstants.DataCompletenessAlgorithmName;
import com.linkedin.thirdeye.datalayer.bao.DataCompletenessConfigManager;
import com.linkedin.thirdeye.datalayer.dto.DataCompletenessConfigDTO;
/**
* This is the implementation of the WO4W Average function or checking data completeness of datasets
*/
public class Wo4WAvgDataCompletenessAlgorithm implements DataCompletenessAlgorithm {
public static double DEFAULT_EXPECTED_COMPLETENESS = 80;
private static double CONSIDER_COMPLETE_AFTER = 95;
private static final DAORegistry DAO_REGISTRY = DAORegistry.getInstance();
private static final Logger LOG = LoggerFactory.getLogger(Wo4WAvgDataCompletenessAlgorithm.class);
private DataCompletenessConfigManager dataCompletenessConfigDAO = null;
public Wo4WAvgDataCompletenessAlgorithm() {
dataCompletenessConfigDAO = DAO_REGISTRY.getDataCompletenessConfigDAO();
}
@Override
public void computeBaselineCountsIfNotPresent(String dataset, Map<String, Long> bucketNameToBucketValueMS,
DateTimeFormatter dateTimeFormatter, TimeSpec timeSpec, DateTimeZone zone) {
// look for the past 4 weeks
for (int i = 0; i < 4; i ++) {
Period baselineOffsetPeriod = new Period(0, 0, 0, 7*(i+1), 0, 0, 0, 0);
LOG.info("Checking for {} week ago for dataset {}", (i+1), dataset);
// check if baseline is present in database
Map<String, Long> baselineBucketNameToBucketValueMS = new HashMap<>();
for (Entry<String, Long> entry : bucketNameToBucketValueMS.entrySet()) {
DateTime bucketValueDateTime = new DateTime(entry.getValue(), zone);
Long baselineBucketValueMS = bucketValueDateTime.minus(baselineOffsetPeriod).getMillis();
String baselineBucketName = dateTimeFormatter.print(baselineBucketValueMS);
DataCompletenessConfigDTO configDTO = dataCompletenessConfigDAO.findByDatasetAndDateSDF(dataset, baselineBucketName);
if (configDTO == null) {
baselineBucketNameToBucketValueMS.put(baselineBucketName, baselineBucketValueMS);
}
}
// for all baseline values not present in database, fetch their counts, and update in database
LOG.info("Missing baseline buckets {} for dataset {}", baselineBucketNameToBucketValueMS.keySet(), dataset);
if (!baselineBucketNameToBucketValueMS.isEmpty()) {
Map<String, Long> baselineCountsForBuckets =
DataCompletenessUtils.getCountsForBucketsOfDataset(dataset, timeSpec, baselineBucketNameToBucketValueMS);
LOG.info("Baseline bucket counts {}", baselineCountsForBuckets);
for (Entry<String, Long> entry : baselineCountsForBuckets.entrySet()) {
String baselineBucketName = entry.getKey();
Long baselineBucketCount = entry.getValue();
Long baselineBucketValueMS = baselineBucketNameToBucketValueMS.get(baselineBucketName);
DataCompletenessConfigDTO createBaselineConfig = new DataCompletenessConfigDTO();
createBaselineConfig.setDataset(dataset);
createBaselineConfig.setDateToCheckInSDF(baselineBucketName);
createBaselineConfig.setDateToCheckInMS(baselineBucketValueMS);
createBaselineConfig.setCountStar(baselineBucketCount);
dataCompletenessConfigDAO.save(createBaselineConfig);
}
LOG.info("Saved {} number of baseline counts in database for dataset {}",
baselineCountsForBuckets.size(), dataset);
}
}
}
@Override
public List<Long> getBaselineCounts(String dataset, Long bucketValue) {
long weekInMillis = TimeUnit.MILLISECONDS.convert(7, TimeUnit.DAYS);
long baselineInMS = bucketValue;
List<Long> baselineCounts = new ArrayList<>();
for (int i = 0; i < 4; i++) {
long count = 0;
baselineInMS = baselineInMS - weekInMillis;
DataCompletenessConfigDTO config = dataCompletenessConfigDAO.findByDatasetAndDateMS(dataset, baselineInMS);
if (config != null) {
count = config.getCountStar();
}
baselineCounts.add(count);
}
return baselineCounts;
}
@Override
public double getPercentCompleteness(List<Long> baselineCounts, Long currentCount) {
PercentCompletenessFunctionInput input = new PercentCompletenessFunctionInput();
input.setAlgorithm(DataCompletenessAlgorithmName.WO4W_AVERAGE);
input.setBaselineCounts(baselineCounts);
input.setCurrentCount(currentCount);
double percentCompleteness = DataCompletenessUtils.getPercentCompleteness(input);
return percentCompleteness;
}
@Override
public boolean isDataComplete(Double percentComplete, Double expectedCompleteness) {
boolean isDataComplete = false;
if (expectedCompleteness == null) {
expectedCompleteness = DEFAULT_EXPECTED_COMPLETENESS;
}
if (percentComplete >= expectedCompleteness) {
isDataComplete = true;
}
return isDataComplete;
}
@Override
public double getConsiderCompleteAfter() {
return CONSIDER_COMPLETE_AFTER;
}
}