package gobblin.source.extractor.partition; import java.util.ArrayList; import java.util.Collections; import java.util.List; import org.joda.time.DateTime; import org.testng.Assert; import org.testng.annotations.Test; import gobblin.configuration.ConfigurationKeys; import gobblin.configuration.SourceState; import gobblin.source.extractor.extract.ExtractType; import gobblin.source.extractor.utils.Utils; import gobblin.source.extractor.watermark.WatermarkType; /** * Unit tests for {@link PartitionerTest} */ public class PartitionerTest { @Test public void testGetPartitionList() { List<Partition> expectedPartitions = new ArrayList<>(); SourceState sourceState = new SourceState(); sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_IS_WATERMARK_OVERRIDE, true); TestPartitioner partitioner = new TestPartitioner(sourceState); long defaultValue = ConfigurationKeys.DEFAULT_WATERMARK_VALUE; expectedPartitions.add(new Partition(defaultValue, defaultValue, true, false)); // Watermark doesn't exist Assert.assertEquals(partitioner.getPartitionList(-1), expectedPartitions); // Set watermark sourceState.setProp(ConfigurationKeys.EXTRACT_DELTA_FIELDS_KEY, "time"); // Set other properties sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_WATERMARK_TYPE, "hour"); sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_EXTRACT_TYPE, "SNAPSHOT"); sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_PARTITION_INTERVAL, "2"); sourceState.setProp(ConfigurationKeys.SOURCE_MAX_NUMBER_OF_PARTITIONS, "2"); sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_IS_WATERMARK_OVERRIDE, true); expectedPartitions.clear(); expectedPartitions.add(new Partition(defaultValue, Long.parseLong(TestPartitioner.currentTimeString), true, false)); // No user specified watermarks Assert.assertEquals(partitioner.getPartitionList(-1), expectedPartitions); // Set user specified low and high watermarks sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_START_VALUE, "20170101002010"); sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_END_VALUE, "20170101122010"); expectedPartitions.clear(); expectedPartitions.add(new Partition(20170101000000L, 20170101060000L)); expectedPartitions.add(new Partition(20170101060000L, 20170101120000L, true, true)); List<Partition> partitions = partitioner.getPartitionList(-1); Collections.sort(partitions, Partitioner.ascendingComparator); Assert.assertEquals(partitions, expectedPartitions); } @Test public void testGetUserSpecifiedPartitionList() { List<Partition> expectedPartitions = new ArrayList<>(); SourceState sourceState = new SourceState(); sourceState.setProp(Partitioner.HAS_USER_SPECIFIED_PARTITIONS, true); TestPartitioner partitioner = new TestPartitioner(sourceState); long defaultValue = ConfigurationKeys.DEFAULT_WATERMARK_VALUE; expectedPartitions.add(new Partition(defaultValue, defaultValue, true, true)); sourceState.setProp(Partitioner.USER_SPECIFIED_PARTITIONS, ""); // Partition list doesn't exist Assert.assertEquals(partitioner.getPartitionList(-1), expectedPartitions); // Date partitions sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_WATERMARK_TYPE, "date"); // Only one partition point sourceState.setProp(Partitioner.USER_SPECIFIED_PARTITIONS, "20140101030201"); expectedPartitions.clear(); expectedPartitions.add(new Partition(20140101000000L, 20170101000000L, true, false)); Assert.assertEquals(partitioner.getPartitionList(-1), expectedPartitions); // Keep upper bounds for append_daily job sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_EXTRACT_TYPE, "APPEND_DAILY"); sourceState.setProp(Partitioner.USER_SPECIFIED_PARTITIONS, "20140101030201, 20140102040201"); expectedPartitions.clear(); expectedPartitions.add(new Partition(20140101000000L, 20140102000000L, true, true)); Assert.assertEquals(partitioner.getPartitionList(-1), expectedPartitions); // Hour partitions, snapshot extract sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_WATERMARK_TYPE, "hour"); sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_EXTRACT_TYPE, "SNAPSHOT"); expectedPartitions.clear(); expectedPartitions.add(new Partition(20140101030000L, 20140102040000L, true, false)); Assert.assertEquals(partitioner.getPartitionList(-1), expectedPartitions); // Hour partitions, timestamp extract sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_WATERMARK_TYPE, "timestamp"); expectedPartitions.clear(); expectedPartitions.add(new Partition(20140101030201L, 20140102040201L, true,false)); Assert.assertEquals(partitioner.getPartitionList(-1), expectedPartitions); } /** * Test getLowWatermark. Is watermark override: true. */ @Test public void testGetLowWatermarkOnUserOverride() { String startValue = "20140101000000"; SourceState sourceState = new SourceState(); sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_IS_WATERMARK_OVERRIDE, true); sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_START_VALUE, startValue); TestPartitioner partitioner = new TestPartitioner(sourceState); Assert.assertEquals( partitioner.getLowWatermark(null, null, -1, 0), Long.parseLong(startValue), "Low watermark should be " + startValue); // It works for full dump too sourceState.removeProp(ConfigurationKeys.SOURCE_QUERYBASED_IS_WATERMARK_OVERRIDE); sourceState.setProp(ConfigurationKeys.EXTRACT_IS_FULL_KEY, true); Assert.assertEquals( partitioner.getLowWatermark(null, null, -1, 0), Long.parseLong(startValue), "Low watermark should be " + startValue); // Should return ConfigurationKeys.DEFAULT_WATERMARK_VALUE if no SOURCE_QUERYBASED_START_VALUE is specified sourceState.removeProp(ConfigurationKeys.SOURCE_QUERYBASED_START_VALUE); Assert.assertEquals( partitioner.getLowWatermark(null, null, -1, 0), ConfigurationKeys.DEFAULT_WATERMARK_VALUE, "Low watermark should be " + ConfigurationKeys.DEFAULT_WATERMARK_VALUE); } /** * Test getLowWatermark. Extract type: Snapshot. */ @Test public void testGetLowWatermarkOnSnapshotExtract() { SourceState sourceState = new SourceState(); String startValue = "20140101000000"; sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_START_VALUE, startValue); TestPartitioner partitioner = new TestPartitioner(sourceState); ExtractType extractType = ExtractType.SNAPSHOT; int delta = 1; // No previous watermark Assert.assertEquals( partitioner.getLowWatermark(extractType, null, ConfigurationKeys.DEFAULT_WATERMARK_VALUE, delta), Long.parseLong(startValue), "Low watermark should be " + startValue); // With previous watermark long previousWatermark = 20140101000050L; long expected = previousWatermark + delta; Assert.assertEquals( partitioner.getLowWatermark(extractType, WatermarkType.SIMPLE, previousWatermark, delta), expected, "Low watermark should be " + expected); Assert.assertEquals( partitioner.getLowWatermark(extractType, WatermarkType.TIMESTAMP, previousWatermark, delta), expected, "Low watermark should be " + expected); // With SOURCE_QUERYBASED_LOW_WATERMARK_BACKUP_SECS int backupSecs = 10; expected = previousWatermark + delta - backupSecs; sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_LOW_WATERMARK_BACKUP_SECS, backupSecs); Assert.assertEquals( partitioner.getLowWatermark(extractType, WatermarkType.SIMPLE, previousWatermark, delta), expected, "Low watermark should be " + expected); Assert.assertEquals( partitioner.getLowWatermark(extractType, WatermarkType.TIMESTAMP, previousWatermark, delta), expected, "Low watermark should be " + expected); } /** * Test getLowWatermark. Extract type: Append. */ @Test public void testGetLowWatermarkOnAppendExtract() { SourceState sourceState = new SourceState(); String startValue = "20140101000000"; sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_START_VALUE, startValue); TestPartitioner partitioner = new TestPartitioner(sourceState); ExtractType extractType = ExtractType.APPEND_DAILY; int delta = 1; // No previous watermark Assert.assertEquals( partitioner.getLowWatermark(extractType, null, ConfigurationKeys.DEFAULT_WATERMARK_VALUE, delta), Long.parseLong(startValue), "Low watermark should be " + startValue); // With previous watermark long previousWatermark = 20140101000050L; long expected = previousWatermark + delta; Assert.assertEquals( partitioner.getLowWatermark(extractType, WatermarkType.SIMPLE, previousWatermark, delta), expected, "Low watermark should be " + expected); Assert.assertEquals( partitioner.getLowWatermark(extractType, WatermarkType.TIMESTAMP, previousWatermark, delta), expected, "Low watermark should be " + expected); // The result has nothing to do with SOURCE_QUERYBASED_LOW_WATERMARK_BACKUP_SECS int backupSecs = 10; sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_LOW_WATERMARK_BACKUP_SECS, backupSecs); Assert.assertEquals( partitioner.getLowWatermark(extractType, WatermarkType.SIMPLE, previousWatermark, delta), expected, "Low watermark should be " + expected); Assert.assertEquals( partitioner.getLowWatermark(extractType, WatermarkType.TIMESTAMP, previousWatermark, delta), expected, "Low watermark should be " + expected); } /** * Test getHighWatermark. Is watermark override: true. */ @Test public void testGetHighWatermarkOnUserOverride() { String endValue = "20140101000000"; SourceState sourceState = new SourceState(); sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_IS_WATERMARK_OVERRIDE, true); sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_END_VALUE, endValue); TestPartitioner partitioner = new TestPartitioner(sourceState); Assert.assertEquals( partitioner.getHighWatermark(null, null), Long.parseLong(endValue), "High watermark should be " + endValue); Assert.assertEquals( partitioner.getUserSpecifiedHighWatermark(), true, "Should mark as user specified high watermark"); partitioner.reset(); // Should return current time if no SOURCE_QUERYBASED_END_VALUE is specified sourceState.removeProp(ConfigurationKeys.SOURCE_QUERYBASED_END_VALUE); long expected = Long.parseLong(TestPartitioner.currentTimeString); Assert.assertEquals( partitioner.getHighWatermark(null, null), expected, "High watermark should be " + expected); Assert.assertEquals( partitioner.getUserSpecifiedHighWatermark(), false, "Should not mark as user specified high watermark"); } /** * Test getHighWatermark. Extract type: Snapshot. */ @Test public void testGetHighWatermarkOnSnapshotExtract() { String endValue = "20140101000000"; SourceState sourceState = new SourceState(); // It won't use SOURCE_QUERYBASED_END_VALUE when extract is full sourceState.setProp(ConfigurationKeys.EXTRACT_IS_FULL_KEY, true); sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_END_VALUE, endValue); ExtractType extractType = ExtractType.SNAPSHOT; TestPartitioner partitioner = new TestPartitioner(sourceState); Assert.assertEquals( partitioner.getHighWatermark(extractType, WatermarkType.SIMPLE), ConfigurationKeys.DEFAULT_WATERMARK_VALUE, "High watermark should be " + ConfigurationKeys.DEFAULT_WATERMARK_VALUE); Assert.assertEquals( partitioner.getUserSpecifiedHighWatermark(), false, "Should not mark as user specified high watermark"); long expected = Long.parseLong(TestPartitioner.currentTimeString); Assert.assertEquals( partitioner.getHighWatermark(extractType, WatermarkType.TIMESTAMP), expected, "High watermark should be " + expected); Assert.assertEquals( partitioner.getUserSpecifiedHighWatermark(), false, "Should not mark as user specified high watermark"); } /** * Test getHighWatermark. Extract type: Append. */ @Test public void testGetHighWatermarkOnAppendExtract() { String endValue = "20140101000000"; SourceState sourceState = new SourceState(); sourceState.setProp(ConfigurationKeys.EXTRACT_IS_FULL_KEY, true); sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_END_VALUE, endValue); ExtractType extractType = ExtractType.APPEND_DAILY; TestPartitioner partitioner = new TestPartitioner(sourceState); Assert.assertEquals( partitioner.getHighWatermark(extractType, null), Long.parseLong(endValue), "High watermark should be " + endValue); Assert.assertEquals( partitioner.getUserSpecifiedHighWatermark(), true, "Should mark as user specified high watermark"); partitioner.reset(); // Test non-full-dump cases below sourceState.removeProp(ConfigurationKeys.EXTRACT_IS_FULL_KEY); // No limit type Assert.assertEquals( partitioner.getHighWatermark(ExtractType.APPEND_BATCH, null), ConfigurationKeys.DEFAULT_WATERMARK_VALUE, "High watermark should be " + ConfigurationKeys.DEFAULT_WATERMARK_VALUE); Assert.assertEquals( partitioner.getUserSpecifiedHighWatermark(), false, "Should not mark as user specified high watermark"); // No limit delta long expected = Long.parseLong(TestPartitioner.currentTimeString); Assert.assertEquals( partitioner.getHighWatermark(extractType, null), expected, "High watermark should be " + expected); Assert.assertEquals( partitioner.getUserSpecifiedHighWatermark(), false, "Should not mark as user specified high watermark"); // CURRENTDATE - 1 String maxLimit = "CURRENTDATE-1"; sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_APPEND_MAX_WATERMARK_LIMIT, maxLimit); Assert.assertEquals( partitioner.getHighWatermark(extractType, null), 20161231235959L, "High watermark should be 20161231235959"); Assert.assertEquals( partitioner.getUserSpecifiedHighWatermark(), true, "Should not mark as user specified high watermark"); partitioner.reset(); // CURRENTHOUR - 1 maxLimit = "CURRENTHOUR-1"; sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_APPEND_MAX_WATERMARK_LIMIT, maxLimit); Assert.assertEquals( partitioner.getHighWatermark(extractType, null), 20161231235959L, "High watermark should be 20161231235959"); Assert.assertEquals( partitioner.getUserSpecifiedHighWatermark(), true, "Should not mark as user specified high watermark"); partitioner.reset(); // CURRENTMINUTE - 1 maxLimit = "CURRENTMINUTE-1"; sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_APPEND_MAX_WATERMARK_LIMIT, maxLimit); Assert.assertEquals( partitioner.getHighWatermark(extractType, null), 20161231235959L, "High watermark should be 20161231235959"); Assert.assertEquals( partitioner.getUserSpecifiedHighWatermark(), true, "Should not mark as user specified high watermark"); partitioner.reset(); // CURRENTSECOND - 1 maxLimit = "CURRENTSECOND-1"; sourceState.setProp(ConfigurationKeys.SOURCE_QUERYBASED_APPEND_MAX_WATERMARK_LIMIT, maxLimit); Assert.assertEquals( partitioner.getHighWatermark(extractType, null), 20161231235959L, "High watermark should be 20161231235959"); Assert.assertEquals( partitioner.getUserSpecifiedHighWatermark(), true, "Should not mark as user specified high watermark"); } private class TestPartitioner extends Partitioner { static final String currentTimeString = "20170101000000"; private DateTime currentTime; TestPartitioner(SourceState state) { super(state); currentTime = Utils.toDateTime(currentTimeString, "yyyyMMddHHmmss", ConfigurationKeys.DEFAULT_SOURCE_TIMEZONE); } boolean getUserSpecifiedHighWatermark() { return hasUserSpecifiedHighWatermark; } @Override public DateTime getCurrentTime(String timeZone) { return currentTime; } void reset() { hasUserSpecifiedHighWatermark = false; } } }