ReadMapper.java example

Explorer
hadoop-20-master
- src
package org.apache.hadoop.mapred;
import org.apache.hadoop.hdfs.Constant;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;

@SuppressWarnings("deprecation")
	public class ReadMapper extends MapReduceBase implements
			Mapper<Text, LongWritable, Text, Text>, Constant {

		private byte[] buffer;
		private int bufferSize;
		private long totalSize;
		private FileSystem fs;
		private Configuration conf;

		public void configure(JobConf configuration) {
			conf = new Configuration(configuration);
		}

		@Override
		public void map(Text key, LongWritable value,
				OutputCollector<Text, Text> output, Reporter report)
				throws IOException {
			// TODO Auto-generated method stub

			String name = key.toString();
			fs = FileSystem.get(conf);
			totalSize = 0;
			FSDataInputStream in;
			bufferSize = (int) BUFFERLIMIT;

			buffer = new byte[bufferSize];
			long tsize;
			long ntasks = Long.parseLong(conf.get("dfs.nTasks"));
			for (int task = 0; task < ntasks; task++) {
				in = fs.open(new Path(INPUT, name + task));
				long startTime = System.currentTimeMillis();
				try {
					for (tsize = bufferSize; tsize == bufferSize;) {
						tsize = in.read(buffer, 0, bufferSize);
						totalSize += tsize;
					}
				} finally {
					in.close();
				}

				long endTime = System.currentTimeMillis();
				long execTime = endTime - startTime;
				float ioRate = (float) (totalSize * 1000.0 / (execTime * MEGA));
				output.collect(
						new Text("1"),
						new Text(String.valueOf(ioRate)));
			}
		}

	}