package com.github.projectflink.testPlan; import java.io.IOException; import java.net.URI; import org.apache.commons.lang.RandomStringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; /** * Very simple generator to create a (large) sequencefile to test in * conjunction with the large Stratosphere test job. * * Based on http://stackoverflow.com/questions/5377118/how-to-convert-txt-file-to-hadoops-sequence-file-format * */ public class SequenceFileGenerator { public SequenceFileGenerator() { } public static void main(String[] args) throws IOException { if(args.length < 3) { System.err.println("Usage: <outFilePath> <KV Count> <String Length>"); System.exit(1); } String uri = args[0]; int kvCount = Integer.parseInt(args[1]); int strlen = Integer.parseInt(args[2]); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create( uri), conf); Path path = new Path(uri); LongWritable key = new LongWritable(); Text value = new Text(); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter( fs, conf, path, key.getClass(), value.getClass()); for (int i = 0; i < kvCount; i ++) { key.set(i); value.set(i+"-"+RandomStringUtils.randomNumeric(strlen)); writer.append( key, value); } } finally { IOUtils.closeStream( writer); } } }