package org.bigtop.bigpetstore.generator; import java.io.IOException; import java.util.Date; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.lib.MultipleOutputs; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Mapper.Context; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.bigtop.bigpetstore.util.BigPetStoreConstants; import org.bigtop.bigpetstore.util.DeveloperTools; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * This is a mapreduce implementation of a generator of a large sentiment * analysis data set. The scenario is as follows: * * The number of records will (roughly) correspond to the output size - each * record is about 80 bytes. * * 1KB set bigpetstore_records=10 1MB set bigpetstore_records=10,000 1GB set * bigpetstore_records=10,000,000 1TB set bigpetstore_records=10,000,000,000 */ public class BPSGenerator { final static Logger log = LoggerFactory.getLogger(BPSGenerator.class); public enum props { // bigpetstore_splits, bigpetstore_records } public static Job createJob(Path output, int records) throws IOException { Configuration c = new Configuration(); c.setInt(props.bigpetstore_records.name(), 10); return createJob(output, c); } public static Job createJob(Path output, Configuration conf) throws IOException { Job job = new Job(conf, "PetStoreTransaction_ETL_" + System.currentTimeMillis()); // recursively delete the data set if it exists. FileSystem.get(output.toUri(),conf).delete(output, true); job.setJarByClass(BPSGenerator.class); job.setMapperClass(MyMapper.class); // use the default reducer // job.setReducerClass(PetStoreTransactionGeneratorJob.Red.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setInputFormatClass(GeneratePetStoreTransactionsInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, output); return job; } public static class MyMapper extends Mapper<Text, Text, Text, Text> { @Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); } protected void map(Text key, Text value, Context context) throws java.io.IOException, InterruptedException { context.write(key, value); // TODO: Add multiple outputs here which writes mock addresses for // generated users // to a corresponding data file. }; } public static void main(String args[]) throws Exception { if (args.length != 2) { System.err.println("USAGE : [number of records] [output path]"); System.exit(0); } else { Configuration conf = new Configuration(); DeveloperTools.validate( args, "# of records", "output path"); conf.setInt( GeneratePetStoreTransactionsInputFormat.props.bigpetstore_records.name(), Integer.parseInt(args[0])); createJob(new Path(args[1]), conf).waitForCompletion(true); } } }