package ${package}; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.cli.PosixParser; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.lilyproject.client.LilyClient; import org.lilyproject.mapreduce.LilyMapReduceUtil; import org.lilyproject.repository.api.QName; import org.lilyproject.repository.api.RecordScan; import org.lilyproject.repository.api.LRepository; import org.lilyproject.repository.api.filter.RecordTypeFilter; import org.lilyproject.util.io.Closer; import java.io.IOException; /** * Sets up and launches a Lily-based MapReduce job. */ public class MyJob extends Configured implements Tool { private String zkConnectString; public static void main(String[] args) throws Exception { // Let <code>ToolRunner</code> handle generic command-line options int res = ToolRunner.run(new Configuration(), new MyJob(), args); System.exit(res); } @Override public int run(String[] args) throws Exception { int result = parseArgs(args); if (result != 0) { return result; } Configuration config = getConf(); Job job = new Job(config, "MyJob"); job.setJarByClass(MyJob.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setNumReduceTasks(1); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // The reducer writes directly to Lily, so for Hadoop there is no output to produce job.setOutputFormatClass(NullOutputFormat.class); // The RecordScan defines what subset of the records will be offered as input // to the map task. RecordScan scan = new RecordScan(); scan.setRecordFilter(new RecordTypeFilter(new QName("mrsample", "Document"))); // Need LilyClient here just to be able to serialize the RecordScan. // This is a bit lame, will improve in the future. LilyClient lilyClient = new LilyClient(zkConnectString, 30000); LRepository repository = lilyClient.getDefaultRepository(); // Utility method will configure everything related to LilyInputFormat LilyMapReduceUtil.initMapperJob(scan, zkConnectString, repository, job); Closer.close(lilyClient); // Launch the job boolean b = job.waitForCompletion(true); if (!b) { throw new IOException("error executing job!"); } return 0; } @SuppressWarnings("static-access") protected int parseArgs(String[] args) { Options cliOptions = new Options(); Option zkOption = OptionBuilder .isRequired() .withArgName("connection-string") .hasArg() .withDescription("ZooKeeper connection string: hostname1:port,hostname2:port,...") .withLongOpt("zookeeper") .create("z"); cliOptions.addOption(zkOption); CommandLineParser parser = new PosixParser(); CommandLine cmd; try { cmd = parser.parse(cliOptions, args); } catch (ParseException e) { System.out.println(e.getMessage()); System.out.println(); HelpFormatter help = new HelpFormatter(); help.printHelp(getClass().getSimpleName(), cliOptions, true); return 1; } zkConnectString = cmd.getOptionValue(zkOption.getOpt()); return 0; } }