package com.skp.experiment.common;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.mahout.common.HadoopUtil;
public class JobUtils {
private static final int MAP_TASKS = 100000;
private static final long LONG_TIMEOUT = 60000L * 6L;
public static Job prepareReferenceWithLockMapOnlyJob(Path inputPath, Path outputPath,
Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper,
Class<? extends Writable> mapperKey,
Class<? extends Writable> mapperValue,
Class<? extends OutputFormat> outputFormat,
Configuration conf) throws IOException {
conf.setLong("mapred.task.timeout", LONG_TIMEOUT);
conf.setBoolean("mapred.map.tasks.speculative.execution", false);
conf.setBoolean("mapred.map.tasks.speculative.execution", false);
conf.setInt("mapred.map.tasks", HadoopClusterUtil.getNumberOfTaskTrackers(conf) * MAP_TASKS);
conf.setLong("mapred.min.split.size", HadoopClusterUtil.getMinInputSplitSizeMax(conf, inputPath));
conf.setLong("mapred.max.split.size", HadoopClusterUtil.getMinInputSplitSizeMax(conf, inputPath));
/*
conf.set("mapred.child.java.opts", "-Xmx8g");
conf.set("mapred.map.child.java.opts", "-Xmx8g");
conf.setLong("dfs.block.size", HadoopClusterUtil.getMaxBlockSize(conf, pathToTransformed()));
*/
Job job = HadoopUtil.prepareJob(inputPath, outputPath,
inputFormat, mapper, mapperKey, mapperValue, outputFormat, conf);
job.setJobName(HadoopUtil.getCustomJobName(JobUtils.class.getSimpleName(), job, mapper, Reducer.class));
return job;
}
}