package com.linkedin.camus.etl; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.mapreduce.JobContext; /** * Partitions incoming events, and generates directories and file names in which to * store the incoming events. */ public abstract class Partitioner extends Configured { /** * Encode partition values into a string, to be embedded into the working filename. * Encoded values cannot use '/' or ':'. * * Use partition values in the etlKey. Values should be extracted from the Record and * given to the CamusWrapper. * * The return of the method will be passed as the "encodedPartition" parameter of * generatePartitionedPath() below. * * @param context The JobContext. * @param etlKey The EtlKey containing values extracted from the Record by the MessageDecoder. * @return A string that encodes the partitioning values. */ public abstract String encodePartition(JobContext context, IEtlKey etlKey); /** * Return a string representing the partitioned directory structure where the .avro files will be moved. * * For example, if you were using Hive style partitioning, a timestamp based partitioning scheme would return * topic-name/year=2012/month=02/day=04/hour=12 * * The return of this method will be prepended with the value of the property etl.destination.path * Most users will want to start this path with the topic name. * @param context The JobContext * @param topic The topic name * @param encodedPartition The encoded partition values. This will be the return of the the encodePartition() method * above. * @return A path string where the avro files will be moved to. */ public abstract String generatePartitionedPath(JobContext context, String topic, String encodedPartition); /** * Return a string representing the target filename where data will be moved to. * * @param context The JobContext * @param topic The topic name * @param brokerId the brokerId * @param partitionId the partitionId * @param count totalEventCount in file * @param offset final offset in partition was read too. * @param encodedPartition The encoded partition values. This will be the return of the the encodePartition() method * above. * @return A path string where the avro files will be moved to. */ public abstract String generateFileName(JobContext context, String topic, String brokerId, int partitionId, int count, long offset, String encodedPartition); /** * Return a string representing the target filename where data will be moved to. * * @param context The JobContext * @param topic The topic name * @param brokerId the brokerId * @param partitionId the partitionId * @param encodedPartition The encoded partition values. This will be the return of the the encodePartition() method * above. * @return A path string where the avro files will be moved to. */ public abstract String getWorkingFileName(JobContext context, String topic, String brokerId, int partitionId, String encodedPartition); }