/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gobblin.data.management.copy.hive.filter;
import java.util.Arrays;
import java.util.Properties;
import org.joda.time.DateTime;
import org.joda.time.Period;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import com.google.common.base.Preconditions;
import gobblin.data.management.copy.hive.HiveDataset;
import gobblin.data.management.copy.hive.HiveDatasetFinder;
import gobblin.data.management.copy.hive.PartitionFilterGenerator;
/**
* Filters partitions according to a lookback period. The partition values must be time formatted. User must specify
* the partition column, lookback period (as ISO 8601 period), and datetime format of the column values.
*
* <p>
* The generated filter is of the form "datePartition >= 'date'", so the column must be of string type and its format
* must be such that lexycographical string and date ordering are compatible.
* </p>
*/
public class LookbackPartitionFilterGenerator implements PartitionFilterGenerator {
public static final String PARTITION_COLUMN = HiveDatasetFinder.HIVE_DATASET_PREFIX + ".partition.filter.datetime.column";
public static final String LOOKBACK = HiveDatasetFinder.HIVE_DATASET_PREFIX + ".partition.filter.datetime.lookback";
public static final String DATETIME_FORMAT = HiveDatasetFinder.HIVE_DATASET_PREFIX + ".partition.filter.datetime.format";
private static final String ERROR_MESSAGE = LookbackPartitionFilterGenerator.class.getName()
+ " requires the following properties " + Arrays.toString(new String[]{PARTITION_COLUMN, LOOKBACK, DATETIME_FORMAT});
private final String partitionColumn;
private final Period lookback;
private final DateTimeFormatter formatter;
public LookbackPartitionFilterGenerator(Properties properties) {
Preconditions.checkArgument(properties.containsKey(PARTITION_COLUMN), ERROR_MESSAGE);
Preconditions.checkArgument(properties.containsKey(LOOKBACK), ERROR_MESSAGE);
Preconditions.checkArgument(properties.containsKey(DATETIME_FORMAT), ERROR_MESSAGE);
this.partitionColumn = properties.getProperty(PARTITION_COLUMN);
this.lookback = Period.parse(properties.getProperty(LOOKBACK));
this.formatter = DateTimeFormat.forPattern(properties.getProperty(DATETIME_FORMAT));
}
@Override
public String getFilter(HiveDataset hiveDataset) {
DateTime limitDate = (new DateTime()).minus(this.lookback);
return String.format("%s >= \"%s\"", this.partitionColumn, this.formatter.print(limitDate));
}
}