package com.skp.experiment.common; import java.io.IOException; import java.util.Map; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.OutputFormat; import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.ToolRunner; import org.apache.mahout.common.AbstractJob; import com.skp.experiment.common.mapreduce.KeyValuesCountMapper; import com.skp.experiment.common.mapreduce.KeyValuesCountReducer; public class KeyValuesCountJob extends AbstractJob { private static final String DELIMETER = ","; public static void main(String[] args) throws Exception { ToolRunner.run(new KeyValuesCountJob(), args); } @Override public int run(String[] args) throws Exception { addInputOption(); addOutputOption(); addOption("inputFileType", "inType", "inut file format{seq, text}", "text"); addOption("outputFileType", "outType", "output file format{seq, text}", "text"); Map<String, String> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } String inputFileType = getOption("inputFileType"); String outputFileType = getOption("outputFileType"); runJob(inputFileType, outputFileType); return 0; } @SuppressWarnings("rawtypes") private void runJob(String inputFileType, String outputFileType) throws IOException, InterruptedException, ClassNotFoundException { Class<? extends InputFormat> inputClass = inputFileType.equals("text") ? TextInputFormat.class : SequenceFileInputFormat.class; Class<? extends OutputFormat> outputClass = outputFileType.equals("text") ? TextOutputFormat.class : SequenceFileOutputFormat.class; Class<? extends WritableComparable> outKeyClass = outputFileType.equals("text") ? NullWritable.class : Text.class; Class<? extends Writable> outValueClass = outputFileType.equals("text") ? Text.class : IntWritable.class; String outputType = outputFileType.equals("text") ? "text" : "seq"; Job job = prepareJob(getInputPath(), getOutputPath(), inputClass, KeyValuesCountMapper.class, Text.class, IntWritable.class, KeyValuesCountReducer.class, outKeyClass, outValueClass, outputClass); job.getConfiguration().set(KeyValuesCountReducer.DELIMETER_KEY, DELIMETER); job.getConfiguration().set(KeyValuesCountReducer.OUTPUT_TYPE, outputType); job.waitForCompletion(true); } }