package edu.umd.cloud9.example.hits;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.lib.IdentityMapper;
import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;
/**
*
* Utility MR job for outputting serialized HITSNode data in human-readable text form
*
* @author michaelmcgrath
*
*/
public class TextOutput extends Configured implements Tool {
private static final Logger sLogger = Logger.getLogger(AFormatterWG.class);
private static int printUsage() {
System.out
.println("usage: [input-path] [output-path] [num-mappers] [num-reducers]");
ToolRunner.printGenericCommandUsage(System.out);
return -1;
}
public int run(String[] args) throws Exception {
// TODO Auto-generated method stub
if (args.length != 4) {
printUsage();
return -1;
}
String inputPath = args[0];
String outputPath = args[1];
int mapTasks = Integer.parseInt(args[2]);
int reduceTasks = Integer.parseInt(args[3]);
sLogger.info("Tool: TextOutputter");
sLogger.info(" - input paths: " + inputPath);
sLogger.info(" - output path: " + outputPath);
sLogger.info(" - number of mappers: " + mapTasks);
sLogger.info(" - number of reducers: " + reduceTasks);
JobConf conf = new JobConf(TextOutput.class);
conf.setJobName("TextOutput");
conf.setNumMapTasks(mapTasks);
conf.setNumReduceTasks(reduceTasks);
FileInputFormat.setInputPaths(conf, new Path(inputPath));
FileOutputFormat.setOutputPath(conf, new Path(outputPath));
FileOutputFormat.setCompressOutput(conf, false);
conf.setInputFormat(SequenceFileInputFormat.class);
conf.setOutputKeyClass(IntWritable.class);
conf.setOutputValueClass(HITSNode.class);
// conf.setOutputFormat(SequenceFileOutputFormat.class);
conf.setMapperClass(IdentityMapper.class);
conf.setReducerClass(IdentityReducer.class);
// Delete the output directory if it exists already
Path outputDir = new Path(outputPath);
FileSystem.get(conf).delete(outputDir, true);
long startTime = System.currentTimeMillis();
JobClient.runJob(conf);
sLogger.info("Job Finished in "
+ (System.currentTimeMillis() - startTime) / 1000.0
+ " seconds");
return 0;
}
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(), new TextOutput(), args);
System.exit(res);
}
}