package com.linkedin.camus.etl.kafka.partitioner; import com.linkedin.camus.etl.IEtlKey; import com.linkedin.camus.etl.Partitioner; import com.linkedin.camus.etl.kafka.common.DateUtils; import org.apache.hadoop.mapreduce.JobContext; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; import org.joda.time.format.DateTimeFormatter; import java.util.Locale; /** * Base class for time based partitioners. * Can be configured via {@link #init(long, String, java.util.Locale, org.joda.time.DateTimeZone)}. */ abstract public class BaseTimeBasedPartitioner extends Partitioner { public static final String DEFAULT_TIME_ZONE = "America/Los_Angeles"; /** Size of a partition in milliseconds.*/ private long outfilePartitionMillis = 0; private DateTimeFormatter outputDirFormatter; /** * Initialize the partitioner. * This method must be invoked once, and before any any other method. * @param outfilePartitionMillis duration of a partition, e.g. {@code 3,600,000} for hour partitions * @param destSubTopicPathFormat format of output sub-dir to be created under topic directory, * typically something like {@code "'hourly'/YYYY/MM/dd/HH"}. * For formatting rules see {@link org.joda.time.format.DateTimeFormat}. * @param locale locale to use for formatting of path * @param outputTimeZone time zone to use for date calculations */ protected void init(long outfilePartitionMillis, String destSubTopicPathFormat, Locale locale, DateTimeZone outputTimeZone) { this.outfilePartitionMillis = outfilePartitionMillis; this.outputDirFormatter = DateUtils.getDateTimeFormatter(destSubTopicPathFormat, outputTimeZone).withLocale(locale); } @Override public String encodePartition(JobContext context, IEtlKey key) { return Long.toString(DateUtils.getPartition(outfilePartitionMillis, key.getTime(), outputDirFormatter.getZone())); } @Override public String generatePartitionedPath(JobContext context, String topic, String encodedPartition) { DateTime bucket = new DateTime(Long.valueOf(encodedPartition)); return topic + "/" + bucket.toString(outputDirFormatter); } @Override public String generateFileName(JobContext context, String topic, String brokerId, int partitionId, int count, long offset, String encodedPartition) { return topic + "." + brokerId + "." + partitionId + "." + count + "." + offset + "." + encodedPartition; } @Override public String getWorkingFileName(JobContext context, String topic, String brokerId, int partitionId, String encodedPartition) { return "data." + topic.replace('.', '_') + "." + brokerId + "." + partitionId + "." + encodedPartition; } }