package com.netflix.aegisthus.output;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskID;
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.text.NumberFormat;
public abstract class CustomFileNameFileOutputFormat<K, V> extends FileOutputFormat<K, V> {
private static final NumberFormat NUMBER_FORMAT = NumberFormat.getInstance();
static {
NUMBER_FORMAT.setMinimumIntegerDigits(5);
NUMBER_FORMAT.setGroupingUsed(false);
}
/**
* Generate a unique filename, based on the task id, name, and extension
* @param context the task that is calling this
* @param name the base filename
* @param extension the filename extension
* @return a string like $name-[jobType]-$id$extension
*/
protected synchronized String getCustomFileName(TaskAttemptContext context,
String name,
String extension) {
TaskID taskId = context.getTaskAttemptID().getTaskID();
int partition = taskId.getId();
return name + '-' + NUMBER_FORMAT.format(partition) + extension;
}
/**
* Get the default path and filename for the output format.
* @param context the task context
* @param extension an extension to add to the filename
* @return a full path $output/_temporary/$task-id/part-[mr]-$id
* @throws java.io.IOException
*/
@Override
public Path getDefaultWorkFile(TaskAttemptContext context, String extension) throws IOException {
FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(context);
return new Path(committer.getWorkPath(), getCustomFileName(context, getOutputName(context), extension));
}
}