package org.apache.hadoop.mapred; import org.apache.hadoop.hdfs.Constant; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; @SuppressWarnings("deprecation") public class ReadMapper extends MapReduceBase implements Mapper<Text, LongWritable, Text, Text>, Constant { private byte[] buffer; private int bufferSize; private long totalSize; private FileSystem fs; private Configuration conf; public void configure(JobConf configuration) { conf = new Configuration(configuration); } @Override public void map(Text key, LongWritable value, OutputCollector<Text, Text> output, Reporter report) throws IOException { // TODO Auto-generated method stub String name = key.toString(); fs = FileSystem.get(conf); totalSize = 0; FSDataInputStream in; bufferSize = (int) BUFFERLIMIT; buffer = new byte[bufferSize]; long tsize; long ntasks = Long.parseLong(conf.get("dfs.nTasks")); for (int task = 0; task < ntasks; task++) { in = fs.open(new Path(INPUT, name + task)); long startTime = System.currentTimeMillis(); try { for (tsize = bufferSize; tsize == bufferSize;) { tsize = in.read(buffer, 0, bufferSize); totalSize += tsize; } } finally { in.close(); } long endTime = System.currentTimeMillis(); long execTime = endTime - startTime; float ioRate = (float) (totalSize * 1000.0 / (execTime * MEGA)); output.collect( new Text("1"), new Text(String.valueOf(ioRate))); } } }