package ivory.lsh.eval;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.lib.IdentityMapper;
import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;
import tl.lin.data.pair.PairOfInts;
/**
* Read in sequence file format and output as text format.
*
* @author ferhanture
*
*/
public class OutputResultsAsText extends Configured implements Tool {
public static final String[] RequiredParameters = {};
private static final Logger sLogger = Logger.getLogger(OutputResultsAsText.class);
static enum mapoutput {
count
};
private static int printUsage() {
System.out.println("usage: [input-path] [output-path]");
return -1;
}
public OutputResultsAsText() {
super();
}
public String[] getRequiredParameters() {
return RequiredParameters;
}
public int run(String[] args) throws Exception {
if (args.length != 2) {
printUsage();
return -1;
}
JobConf job = new JobConf(getConf(), OutputResultsAsText.class);
job.setJobName("OutputAsText");
FileSystem fs = FileSystem.get(job);
String inputPath = args[0];
String outputPath = args[1];
int numMappers = 300;
int numReducers = 1;
if (fs.exists(new Path(outputPath))) {
sLogger.info("Output already exists! Quitting...");
return 0;
}
FileInputFormat.setInputPaths(job, new Path(inputPath));
FileOutputFormat.setOutputPath(job, new Path(outputPath));
FileOutputFormat.setCompressOutput(job, false);
job.set("mapred.child.java.opts", "-Xmx2048m");
job.setInt("mapred.map.max.attempts", 10);
job.setInt("mapred.reduce.max.attempts", 10);
job.setInt("mapred.task.timeout", 6000000);
sLogger.info("Running job " + job.getJobName());
sLogger.info("Input directory: " + inputPath);
sLogger.info("Output directory: " + outputPath);
job.setMapperClass(IdentityMapper.class);
job.setReducerClass(IdentityReducer.class);
job.setMapOutputKeyClass(PairOfInts.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(PairOfInts.class);
job.setOutputValueClass(IntWritable.class);
job.setNumMapTasks(numMappers);
job.setNumReduceTasks(numReducers);
job.setInputFormat(SequenceFileInputFormat.class);
JobClient.runJob(job);
return 0;
}
public static void main(String[] args) throws Exception {
ToolRunner.run(new Configuration(), new OutputResultsAsText(), args);
return;
}
}