package com.linkedin.camus.etl.kafka.partitioner;
import com.linkedin.camus.etl.IEtlKey;
import com.linkedin.camus.etl.Partitioner;
import com.linkedin.camus.etl.kafka.common.DateUtils;
import com.linkedin.camus.etl.kafka.mapred.EtlMultiOutputFormat;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.JobContext;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormatter;
/**
* Partitions incoming data into hourly partitions, generates pathnames of the form:
* {@code etl.destination.path/topic-name/hourly/YYYY/MM/dd/HH}.
*
* The following configurations are supported:
* <ul>
* <li>{@code etl.destination.path} - top-level data output directory, required</li>
* <li>{@code etl.destination.path.topic.sub.dir} - sub-dir to create under topic dir, defaults to {@code hourly}</li>
* <li>{@code etl.default.timezone} - timezone of the events, defaults to {@code America/Los_Angeles}</li>
* <li>{@code etl.output.file.time.partition.mins} - partitions size in minutes, defaults to {@code 60}</li>
* </ul>
*/
public class DefaultPartitioner extends Partitioner {
protected static final String OUTPUT_DATE_FORMAT = "YYYY/MM/dd/HH";
//protected DateTimeZone outputDateTimeZone = null;
protected DateTimeFormatter outputDateFormatter = null;
@Override
public String encodePartition(JobContext context, IEtlKey key) {
long outfilePartitionMs = EtlMultiOutputFormat.getEtlOutputFileTimePartitionMins(context) * 60000L;
return "" + DateUtils.getPartition(outfilePartitionMs, key.getTime(), outputDateFormatter.getZone());
}
@Override
public String generatePartitionedPath(JobContext context, String topic, String encodedPartition) {
StringBuilder sb = new StringBuilder();
sb.append(topic).append("/");
sb.append(EtlMultiOutputFormat.getDestPathTopicSubDir(context)).append("/");
DateTime bucket = new DateTime(Long.valueOf(encodedPartition));
sb.append(bucket.toString(outputDateFormatter));
return sb.toString();
}
@Override
public String generateFileName(JobContext context, String topic, String brokerId, int partitionId, int count,
long offset, String encodedPartition) {
StringBuilder sb = new StringBuilder();
sb.append(topic);
sb.append(".").append(brokerId);
sb.append(".").append(partitionId);
sb.append(".").append(count);
sb.append(".").append(offset);
sb.append(".").append(encodedPartition);
return sb.toString();
}
@Override
public String getWorkingFileName(JobContext context, String topic, String brokerId, int partitionId,
String encodedPartition) {
StringBuilder sb = new StringBuilder();
sb.append("data.").append(topic.replaceAll("\\.", "_"));
sb.append(".").append(brokerId);
sb.append(".").append(partitionId);
sb.append(".").append(encodedPartition);
return sb.toString();
}
@Override
public void setConf(Configuration conf) {
if (conf != null) {
outputDateFormatter =
DateUtils.getDateTimeFormatter(OUTPUT_DATE_FORMAT,
DateTimeZone.forID(conf.get(EtlMultiOutputFormat.ETL_DEFAULT_TIMEZONE, "America/Los_Angeles")));
}
super.setConf(conf);
}
}