package com.linkedin.thirdeye.completeness.checker;
import java.util.List;
import java.util.Map;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormatter;
import com.linkedin.thirdeye.api.TimeSpec;
/**
* This will serve as the interface for any algorithm we plug in to the completeness checker
*/
public interface DataCompletenessAlgorithm {
/**
* fetch all required baseline values for the dataset, corresponding to the bucketvalue being checked
* @param dataset
* @param bucketValue
* @return
*/
public List<Long> getBaselineCounts(String dataset, Long bucketValue);
/**
* Given the baseline counts and the current count, find out the percent completeness
* @param baselineCounts
* @param currentCount
* @return
*/
public double getPercentCompleteness(List<Long> baselineCounts, Long currentCount);
/**
* Verify whether the data completeness percentage passes the expectations
* @param percentComplete
* @param expectedCompleteness
* @return
*/
public boolean isDataComplete(Double percentComplete, Double expectedCompleteness);
/**
* This method will return the percentage after which we can consider that the entry is complete, and doesn't need to be checked again
* This percentage should be typically higher than the expectedCompleteness.
* Even after an entry has passed expected completeness and been marked as complete,
* we will continue to check it, in case the percentage has improved.
* We want to avoid looking at datasets after they've reached ~100%
*
* @return
*/
public double getConsiderCompleteAfter();
/**
* This method will help in the case of cold start.
* When system starts, or a new dataset is added, baseline entries will not be present.
* This method checks if required baseline entries are available,
* and if not available, computes and stores them, for the rest of the computation to use
* @param dataset
* @param bucketNameToBucketValueMS
* @param dateTimeFormatter
* @param timeSpec
* @param zone
*/
public void computeBaselineCountsIfNotPresent(String dataset, Map<String, Long> bucketNameToBucketValueMS,
DateTimeFormatter dateTimeFormatter, TimeSpec timeSpec, DateTimeZone zone);
}