/*
* The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
* (the "License"). You may not use this work except in compliance with the License, which is
* available at www.apache.org/licenses/LICENSE-2.0
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied, as more fully set forth in the License.
*
* See the NOTICE file distributed with this work for information regarding copyright ownership.
*/
package alluxio.hadoop.fs;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import java.io.Closeable;
import java.io.IOException;
import java.net.InetAddress;
/**
* Base mapper class for IO operations.
* <p>
* Two abstract methods {@link #doIO(Reporter, String, long)} and
* {@link #collectStats(OutputCollector, String,long, Object)} should be
* overloaded in derived classes to define the IO operation and the statistics data to be collected
* by subsequent reducers.
*
*/
public abstract class AbstractIOMapper<T> extends Configured implements
Mapper<Text, LongWritable, Text, Text> {
protected byte[] mBuffer;
protected int mBufferSize;
protected FileSystem mFS;
protected String mHostname;
protected Closeable mStream;
public AbstractIOMapper() {}
@Override
public void configure(JobConf conf) {
setConf(conf);
try {
mFS = FileSystem.get(conf);
} catch (Exception e) {
throw new RuntimeException("Cannot create file system.", e);
}
mBufferSize = conf.getInt("test.io.file.buffer.size", 4096);
mBuffer = new byte[mBufferSize];
try {
mHostname = InetAddress.getLocalHost().getHostName();
} catch (Exception e) {
mHostname = "localhost";
}
}
@Override
public void close() throws IOException {}
/**
* Perform io operation, usually read or write.
*
* @param reporter
* @param name file name
* @param value offset within the file
* @return object that is passed as a parameter to
* {@link #collectStats(OutputCollector, String,long, Object)}
*/
abstract T doIO(Reporter reporter, String name, long value) throws IOException;
/**
* Create an input or output stream based on the specified file. Subclasses should override this
* method to provide an actual stream.
*
* @param name file name
* @return the stream
*/
public Closeable getIOStream(String name) throws IOException {
return null;
}
/**
* Collect stat data to be combined by a subsequent reducer.
*
* @param output
* @param name file name
* @param execTime IO execution time
* @param doIOReturnValue value returned by
* {@link #doIO(Reporter, String,long)}
*/
abstract void collectStats(OutputCollector<Text, Text> output, String name, long execTime,
T doIOReturnValue) throws IOException;
/**
* Map file name and offset into statistical data.
* <p>
* The map task is to get the <tt>key</tt>, which contains the file name, and the <tt>value</tt>,
* which is the offset within the file.
*
* The parameters are passed to the abstract method
* {@link #doIO(Reporter, String,long)}, which performs the io operation,
* usually read or write data, and then
* {@link #collectStats(OutputCollector, String,long, Object)} is called
* to prepare stat data for a subsequent reducer.
*/
@Override
public void map(Text key, LongWritable value, OutputCollector<Text, Text> output,
Reporter reporter) throws IOException {
String name = key.toString();
long longValue = value.get();
reporter.setStatus("starting " + name + " ::host = " + mHostname);
mStream = getIOStream(name);
T statValue = null;
long tStart = System.currentTimeMillis();
try {
statValue = doIO(reporter, name, longValue);
} finally {
if (mStream != null) {
mStream.close();
}
}
long tEnd = System.currentTimeMillis();
long execTime = tEnd - tStart;
collectStats(output, name, execTime, statValue);
reporter.setStatus("finished " + name + " ::host = " + mHostname);
}
}