package gobblin.compaction.verify; import gobblin.compaction.dataset.TimeBasedSubDirDatasetsFinder; import gobblin.compaction.mapreduce.MRCompactor; import gobblin.compaction.parser.CompactionPathParser; import gobblin.configuration.State; import gobblin.dataset.FileSystemDataset; import lombok.AllArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; import org.joda.time.Period; import org.joda.time.format.PeriodFormatter; import org.joda.time.format.PeriodFormatterBuilder; /** * A simple class which verify current dataset belongs to a specific time range. Will skip to do * compaction if dataset is not in a correct time range. */ @Slf4j @AllArgsConstructor public class CompactionTimeRangeVerifier implements CompactionVerifier<FileSystemDataset> { public final static String COMPACTION_VERIFIER_TIME_RANGE = COMPACTION_VERIFIER_PREFIX + "time-range"; protected State state; public boolean verify (FileSystemDataset dataset) { CompactionPathParser.CompactionParserResult result = new CompactionPathParser(state).parse(dataset); DateTime folderTime = result.getTime(); DateTimeZone timeZone = DateTimeZone.forID (this.state.getProp(MRCompactor.COMPACTION_TIMEZONE, MRCompactor.DEFAULT_COMPACTION_TIMEZONE)); DateTime current = new DateTime(timeZone); PeriodFormatter formatter = new PeriodFormatterBuilder().appendMonths().appendSuffix("m").appendDays().appendSuffix("d").appendHours() .appendSuffix("h").toFormatter(); // get earliest time String maxTimeAgoStr = this.state.getProp (TimeBasedSubDirDatasetsFinder.COMPACTION_TIMEBASED_MAX_TIME_AGO, TimeBasedSubDirDatasetsFinder.DEFAULT_COMPACTION_TIMEBASED_MAX_TIME_AGO); Period maxTimeAgo = formatter.parsePeriod(maxTimeAgoStr); DateTime earliest = current.minus(maxTimeAgo); // get latest time String minTimeAgoStr = this.state.getProp (TimeBasedSubDirDatasetsFinder.COMPACTION_TIMEBASED_MIN_TIME_AGO, TimeBasedSubDirDatasetsFinder.DEFAULT_COMPACTION_TIMEBASED_MIN_TIME_AGO); Period minTimeAgo = formatter.parsePeriod(minTimeAgoStr); DateTime latest = current.minus(minTimeAgo); if (earliest.isBefore(folderTime) && latest.isAfter(folderTime)) { log.info ("{} falls in the user defined time range", dataset.datasetRoot()); return true; } return false; } public String getName() { return COMPACTION_VERIFIER_TIME_RANGE; } }