package sizzle.runtime;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import sizzle.io.EmitKey;
import sizzle.io.EmitValue;
public abstract class SizzleRunner {
/**
* Create a {@link Job} describing the work to be done by this Sizzle job.
*
* @param ins
* An array of {@link Path} containing the locations of the input
* files
*
* @param out
* A {@link Path} containing the location of the output file
*
* @param robust
* A boolean representing whether the job should ignore most
* exceptions
*
* @return A {@link Job} describing the work to be done by this Sizzle job
* @throws IOException
*/
public Job job(final Path[] ins, final Path out, final boolean robust) throws IOException {
final Configuration configuration = new Configuration();
configuration.setBoolean("sizzle.runtime.robust", robust);
final Job job = new Job(configuration);
for (final Path in : ins)
FileInputFormat.addInputPath(job, in);
FileOutputFormat.setOutputPath(job, out);
job.setMapOutputKeyClass(EmitKey.class);
job.setMapOutputValueClass(EmitValue.class);
// TODO: get map output compression working again
// TODO: support protobufs/sequence files/avro here
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
return job;
}
public abstract SizzleMapper getMapper();
public abstract SizzleCombiner getCombiner();
public abstract SizzleReducer getReducer();
}