package edu.umd.cloud9.example.hits; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.hadoop.mapred.lib.IdentityMapper; import org.apache.hadoop.mapred.lib.IdentityReducer; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.log4j.Logger; /** * * Utility MR job for outputting serialized HITSNode data in human-readable text form * * @author michaelmcgrath * */ public class TextOutput extends Configured implements Tool { private static final Logger sLogger = Logger.getLogger(AFormatterWG.class); private static int printUsage() { System.out .println("usage: [input-path] [output-path] [num-mappers] [num-reducers]"); ToolRunner.printGenericCommandUsage(System.out); return -1; } public int run(String[] args) throws Exception { // TODO Auto-generated method stub if (args.length != 4) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; int mapTasks = Integer.parseInt(args[2]); int reduceTasks = Integer.parseInt(args[3]); sLogger.info("Tool: TextOutputter"); sLogger.info(" - input paths: " + inputPath); sLogger.info(" - output path: " + outputPath); sLogger.info(" - number of mappers: " + mapTasks); sLogger.info(" - number of reducers: " + reduceTasks); JobConf conf = new JobConf(TextOutput.class); conf.setJobName("TextOutput"); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); FileOutputFormat.setCompressOutput(conf, false); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(HITSNode.class); // conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setMapperClass(IdentityMapper.class); conf.setReducerClass(IdentityReducer.class); // Delete the output directory if it exists already Path outputDir = new Path(outputPath); FileSystem.get(conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); JobClient.runJob(conf); sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; } public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new TextOutput(), args); System.exit(res); } }