package com.netflix.hadoop.output; import java.io.IOException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.JobStatus.State; import org.apache.hadoop.mapreduce.OutputCommitter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import com.netflix.aegisthus.tools.StorageHelper; public class CleanOutputCommitter extends OutputCommitter { private static final Log LOG = LogFactory.getLog(CleanOutputCommitter.class);; @Override public void abortTask(TaskAttemptContext ctx) throws IOException { // NOOP - everything will be cleaned up by the job at the end. // Things that are written out will be handled by future attempts } @Override public void abortJob(JobContext job, State state) throws IOException { LOG.info("aborting job"); StorageHelper sh = new StorageHelper(job.getConfiguration()); try { LOG.info("deleting committed files"); sh.deleteCommitted(); } finally { LOG.info("deleting temp files"); sh.deleteBaseTempLocation(); } } @Override public void commitJob(JobContext job) throws IOException { LOG.info("committing job"); StorageHelper sh = new StorageHelper(job.getConfiguration()); sh.deleteBaseTempLocation(); } @Override public void commitTask(TaskAttemptContext ctx) throws IOException { LOG.info("committing task"); StorageHelper sh = new StorageHelper(ctx); sh.moveTaskOutputToFinal(); } @Override public boolean needsTaskCommit(TaskAttemptContext ctx) throws IOException { if (ctx.getTaskAttemptID().isMap()) { return false; } return true; } @Override public void setupJob(JobContext job) throws IOException { StorageHelper sh = new StorageHelper(job.getConfiguration()); LOG.info(String.format("temp location for job: %s", sh.getBaseTempLocation())); } @Override public void setupTask(TaskAttemptContext ctx) throws IOException { StorageHelper sh = new StorageHelper(ctx); LOG.info(String.format("temp location for task: %s", sh.getBaseTaskAttemptTempLocation())); } }