package com.linkedin.thirdeye.completeness.checker; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.concurrent.TimeUnit; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; import org.joda.time.format.DateTimeFormatter; import org.testng.Assert; import org.testng.annotations.Test; import com.google.common.collect.ListMultimap; import com.google.common.collect.Lists; import com.linkedin.thirdeye.api.TimeGranularity; import com.linkedin.thirdeye.api.TimeSpec; public class DataCompletenessUtilsTest { @Test public void testGetAdjustedStartForDataset() throws Exception { DateTimeZone zone = DateTimeZone.forID("America/Los_Angeles"); DateTime dateTime1 = new DateTime(2017, 01, 12, 15, 46, zone); // SDF, DAYS long startTime = dateTime1.getMillis(); String columnName = "Date"; TimeGranularity timeGranularity = new TimeGranularity(1, TimeUnit.DAYS); String timeFormat = "SIMPLE_DATE_FORMAT:yyyyMMdd"; TimeSpec timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat); long adjustedStartTime = DataCompletenessUtils.getAdjustedTimeForDataset(timeSpec, startTime, zone); Assert.assertEquals(adjustedStartTime, new DateTime(2017, 01, 12, 0, 0, zone).getMillis()); // EPOCH timeFormat = TimeSpec.SINCE_EPOCH_FORMAT; // HOURS zone = DateTimeZone.UTC; dateTime1 = new DateTime(2017, 01, 12, 15, 46, zone); startTime = dateTime1.getMillis(); timeGranularity = new TimeGranularity(1, TimeUnit.HOURS); timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat); adjustedStartTime = DataCompletenessUtils.getAdjustedTimeForDataset(timeSpec, startTime, zone); Assert.assertEquals(adjustedStartTime, new DateTime(2017, 01, 12, 15, 0, zone).getMillis()); // DEFAULT timeGranularity = new TimeGranularity(1, TimeUnit.MILLISECONDS); timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat); adjustedStartTime = DataCompletenessUtils.getAdjustedTimeForDataset(timeSpec, startTime, zone); Assert.assertEquals(adjustedStartTime, new DateTime(2017, 01, 12, 15, 0, zone).getMillis()); // MINUTES timeGranularity = new TimeGranularity(5, TimeUnit.MINUTES); timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat); adjustedStartTime = DataCompletenessUtils.getAdjustedTimeForDataset(timeSpec, startTime, zone); Assert.assertEquals(adjustedStartTime, new DateTime(2017, 01, 12, 15, 30, zone).getMillis()); DateTime dateTime2 = new DateTime(2017, 01, 12, 15, 00, zone); DateTime dateTime3 = new DateTime(2017, 01, 12, 15, 03, zone); startTime = dateTime2.getMillis(); adjustedStartTime = DataCompletenessUtils.getAdjustedTimeForDataset(timeSpec, startTime, zone); Assert.assertEquals(adjustedStartTime, new DateTime(2017, 01, 12, 15, 0, zone).getMillis()); startTime = dateTime3.getMillis(); adjustedStartTime = DataCompletenessUtils.getAdjustedTimeForDataset(timeSpec, startTime, zone); Assert.assertEquals(adjustedStartTime, new DateTime(2017, 01, 12, 15, 0, zone).getMillis()); } @Test public void testGetBucketSizeForDataset() throws Exception { String columnName = "Date"; // DAYS bucket TimeGranularity timeGranularity = new TimeGranularity(1, TimeUnit.DAYS); String timeFormat = TimeSpec.SINCE_EPOCH_FORMAT; TimeSpec timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat); long bucketSize = DataCompletenessUtils.getBucketSizeInMSForDataset(timeSpec); Assert.assertEquals(bucketSize, 24*60*60_000); // HOURS bucket timeGranularity = new TimeGranularity(1, TimeUnit.HOURS); timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat); bucketSize = DataCompletenessUtils.getBucketSizeInMSForDataset(timeSpec); Assert.assertEquals(bucketSize, 60*60_000); // MINUTES returns 30 MINUTES bucket timeGranularity = new TimeGranularity(1, TimeUnit.MINUTES); timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat); bucketSize = DataCompletenessUtils.getBucketSizeInMSForDataset(timeSpec); Assert.assertEquals(bucketSize, 30*60_000); // DEFAULT bucket timeGranularity = new TimeGranularity(1, TimeUnit.MILLISECONDS); timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat); bucketSize = DataCompletenessUtils.getBucketSizeInMSForDataset(timeSpec); Assert.assertEquals(bucketSize, 60*60_000); } @Test public void testGetDateTimeFormatterForDataset() { DateTimeZone zone = DateTimeZone.UTC; long dateTimeInMS = new DateTime(2017, 01, 12, 15, 30, zone).getMillis(); String columnName = "Date"; // DAYS bucket TimeGranularity timeGranularity = new TimeGranularity(1, TimeUnit.DAYS); String timeFormat = TimeSpec.SINCE_EPOCH_FORMAT; TimeSpec timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat); DateTimeFormatter dateTimeFormatter = DataCompletenessUtils.getDateTimeFormatterForDataset(timeSpec, zone); Assert.assertEquals(dateTimeFormatter.print(dateTimeInMS), "20170112"); zone = DateTimeZone.forID("America/Los_Angeles"); long dateTimeInMS1 = new DateTime(2017, 01, 12, 05, 30, zone).getMillis(); // DAYS bucket timeGranularity = new TimeGranularity(1, TimeUnit.DAYS); timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat); dateTimeFormatter = DataCompletenessUtils.getDateTimeFormatterForDataset(timeSpec, zone); Assert.assertEquals(dateTimeFormatter.print(dateTimeInMS1), "20170112"); // HOURS bucket zone = DateTimeZone.UTC; dateTimeInMS = new DateTime(2017, 01, 12, 15, 30, zone).getMillis(); timeGranularity = new TimeGranularity(1, TimeUnit.HOURS); timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat); dateTimeFormatter = DataCompletenessUtils.getDateTimeFormatterForDataset(timeSpec, zone); Assert.assertEquals(dateTimeFormatter.print(dateTimeInMS), "2017011215"); // MINUTES bucket timeGranularity = new TimeGranularity(1, TimeUnit.MINUTES); timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat); dateTimeFormatter = DataCompletenessUtils.getDateTimeFormatterForDataset(timeSpec, zone); Assert.assertEquals(dateTimeFormatter.print(dateTimeInMS), "201701121530"); // DEFAULT bucket timeGranularity = new TimeGranularity(1, TimeUnit.MILLISECONDS); timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat); dateTimeFormatter = DataCompletenessUtils.getDateTimeFormatterForDataset(timeSpec, zone); Assert.assertEquals(dateTimeFormatter.print(dateTimeInMS), "2017011215"); } @Test public void testGetBucketNameToTimeValuesMap() { DateTimeZone zone = DateTimeZone.forID("America/Los_Angeles"); // SDF String columnName = "Date"; TimeGranularity timeGranularity = new TimeGranularity(1, TimeUnit.DAYS); String timeFormat = "SIMPLE_DATE_FORMAT:yyyyMMdd"; TimeSpec timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat); // DAYS Map<String, Long> bucketNameToBucketValue = new HashMap<>(); bucketNameToBucketValue.put("20170112", new DateTime(2017, 01, 12, 0, 0, zone).getMillis()); bucketNameToBucketValue.put("20170113", new DateTime(2017, 01, 13, 0, 0, zone).getMillis()); bucketNameToBucketValue.put("20170114", new DateTime(2017, 01, 14, 0, 0, zone).getMillis()); Map<String, Long> expectedValues = new HashMap<>(); expectedValues.put("20170112", 20170112L); expectedValues.put("20170113", 20170113L); expectedValues.put("20170114", 20170114L); ListMultimap<String,Long> bucketNameToTimeValuesMap = DataCompletenessUtils.getBucketNameToTimeValuesMap(timeSpec, bucketNameToBucketValue); for (Entry<String, Long> entry : bucketNameToTimeValuesMap.entries()) { String bucketName = entry.getKey(); Assert.assertEquals(entry.getValue(), expectedValues.get(bucketName)); } // EPOCH zone = DateTimeZone.UTC; timeFormat = TimeSpec.SINCE_EPOCH_FORMAT; timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat); // HOURS timeGranularity = new TimeGranularity(1, TimeUnit.HOURS); timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat); bucketNameToBucketValue = new HashMap<>(); bucketNameToBucketValue.put("2017011200", new DateTime(2017, 01, 12, 0, 0, zone).getMillis()); bucketNameToBucketValue.put("2017011201", new DateTime(2017, 01, 12, 1, 0, zone).getMillis()); bucketNameToBucketValue.put("2017011202", new DateTime(2017, 01, 12, 2, 0, zone).getMillis()); expectedValues = new HashMap<>(); expectedValues.put("2017011200", 412272L); // hours since epoch values expectedValues.put("2017011201", 412273L); expectedValues.put("2017011202", 412274L); bucketNameToTimeValuesMap = DataCompletenessUtils.getBucketNameToTimeValuesMap(timeSpec, bucketNameToBucketValue); for (Entry<String, Long> entry : bucketNameToTimeValuesMap.entries()) { String bucketName = entry.getKey(); Assert.assertEquals(entry.getValue(), expectedValues.get(bucketName)); } // MINUTES timeGranularity = new TimeGranularity(10, TimeUnit.MINUTES); timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat); bucketNameToBucketValue = new HashMap<>(); bucketNameToBucketValue.put("201701120000", new DateTime(2017, 01, 12, 0, 0, zone).getMillis()); bucketNameToBucketValue.put("201701120030", new DateTime(2017, 01, 12, 0, 30, zone).getMillis()); bucketNameToBucketValue.put("201701120100", new DateTime(2017, 01, 12, 1, 00, zone).getMillis()); bucketNameToBucketValue.put("201701120130", new DateTime(2017, 01, 12, 1, 30, zone).getMillis()); Map<String, List<Long>> expectedValuesList = new HashMap<>(); expectedValuesList.put("201701120000", Lists.newArrayList(2473632L, 2473633L, 2473634L)); // 10 minutes since epoch values expectedValuesList.put("201701120030", Lists.newArrayList(2473635L, 2473636L, 2473637L)); expectedValuesList.put("201701120100", Lists.newArrayList(2473638L, 2473639L, 2473640L)); expectedValuesList.put("201701120130", Lists.newArrayList(2473641L, 2473642L, 2473643L)); bucketNameToTimeValuesMap = DataCompletenessUtils.getBucketNameToTimeValuesMap(timeSpec, bucketNameToBucketValue); for (String bucketName : bucketNameToTimeValuesMap.keySet()) { List<Long> timeValues = bucketNameToTimeValuesMap.get(bucketName); Collections.sort(timeValues); Assert.assertEquals(timeValues, expectedValuesList.get(bucketName)); } } }