package org.apache.hadoop.mapred; import org.apache.hadoop.hdfs.Constant; import java.io.IOException; import java.io.OutputStream; import java.util.Random; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.SequenceFile.CompressionType; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; @SuppressWarnings("deprecation") public class WriteMapper extends MapReduceBase implements Mapper<Text, LongWritable, Text, Text>, Constant { private byte[] buffer; private int bufferSize; private long totalSize; // in mb public FileSystem fs; private Configuration conf; public void configure(JobConf configuration) { conf = new Configuration(configuration); } @Override public void map(Text key, LongWritable value, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { // for testing // TODO Auto-generated method stub // TODO Auto-generated method stub int percent = 10; bufferSize = (int) BUFFERLIMIT; buffer = new byte[bufferSize]; Random rand = new Random(255); String taskID = conf.get("mapred.task.id"); String name = key.toString() + taskID; Path pathSequence = new Path(DFS_INPUT, name); fs = FileSystem.get(conf); totalSize = value.get(); fs.delete(new Path(DFS_INPUT, key.toString()), true); SequenceFile.Writer write = null; try { write = SequenceFile.createWriter(fs, conf, pathSequence, Text.class, LongWritable.class, CompressionType.NONE); write.append(new Text(name), new LongWritable(totalSize)); } finally { if (write != null) write.close(); write = null; } totalSize *= MEGA; long ntasks = Long.parseLong(conf.get("dfs.nTasks")); for (int task = 0; task < ntasks; task++) { Path pathInput = new Path(INPUT, name + task); OutputStream out = fs.create(pathInput, true, bufferSize + percent / 100 * bufferSize); rand.nextBytes(buffer); long startTime = System.currentTimeMillis(); try { long remain; long per = 100; for (remain = totalSize; remain > 0; remain -= bufferSize) { int temp = (remain > bufferSize) ? bufferSize : (int) remain; out.write(buffer, 0, temp); long t = remain * 100 / totalSize; if (t < per) { per = temp; reporter.setStatus(String.valueOf(per)); } } } finally { out.close(); } long endTime = System.currentTimeMillis(); long execTime = endTime - startTime; float ioRate = (float) (totalSize * 1000.0 / (execTime * MEGA)); output.collect( new Text("1"), new Text(String.valueOf(ioRate))); } } }