/*
* Copyright 2013-2014 eXascale Infolab, University of Fribourg. All rights reserved.
*/
package org.apache.hadoop.hadaps;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Time;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.Timer;
import java.util.TimerTask;
public class ReadMode {
private static final Logger LOG = LoggerFactory.getLogger(ReadMode.class);
private static final int ONE_MEGABYTE = 1024 * 1024;
private final Parameters parameters;
private final Configuration configuration;
private final FileContext fileContext;
public static class ReadModeMapper extends Mapper<Text, LongWritable, Text, Text> {
private FileContext fileContext;
@Override
protected void setup(Context context) throws IOException, InterruptedException {
fileContext = FileContext.getFileContext(context.getConfiguration());
}
@Override
protected void map(Text key, LongWritable value, final Context context) throws IOException, InterruptedException {
String filename = key.toString();
filename = filename.substring(0, filename.lastIndexOf('.'));
Path file = new Path(filename);
long size = value.get();
short replication = fileContext.getFileStatus(file).getReplication();
Timer timer = new Timer(true);
// Open file
InputStream inputStream = null;
try {
inputStream = fileContext.open(file);
timer.schedule(new TimerTask() {
@Override
public void run() {
context.progress();
}
}, 0, 60000);
long currentSize = 0;
byte[] bytes = new byte[ONE_MEGABYTE]; // 1 megabyte
// Read file
long startTime = System.currentTimeMillis();
while (currentSize < size) {
int length = inputStream.read(bytes, 0, bytes.length);
if (length == -1) {
break;
}
currentSize += length;
}
long duration = System.currentTimeMillis() - startTime;
// Write statistic
context.write(new Text(filename), new Text(replication + " " + currentSize + " " + duration));
} finally {
timer.cancel();
if (inputStream != null) {
inputStream.close();
}
}
}
}
public static class ReadModeReducer extends Reducer<Text, Text, Text, Text> {
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
for (Text value : values) {
context.write(key, value);
}
}
}
public ReadMode(Parameters parameters, Configuration configuration) throws UnsupportedFileSystemException {
if (parameters == null) throw new IllegalArgumentException();
if (configuration == null) throw new IllegalArgumentException();
this.parameters = parameters;
this.configuration = configuration;
fileContext = FileContext.getFileContext(configuration);
}
void run() throws IOException, InterruptedException, ClassNotFoundException {
// Get outputDirectory
Path outputDirectory = fileContext.makeQualified(new Path(parameters.outputDirectory));
// Get list of all control files
List<Path> files = getFiles();
LOG.debug("Using files: {}", files.toString());
Csv csv = null;
try {
// Create CSV file
csv = new Csv(parameters.csv);
LOG.info("Created csv file {}", Paths.get(parameters.csv).toAbsolutePath().toString());
// Run the test
List<Statistic> statistics = read(files, outputDirectory);
// Write statistics to CSV file
LOG.info("Writing statistics to csv file");
csv.write(statistics);
} finally {
if (csv != null) {
csv.close();
}
}
}
private List<Path> getFiles() throws IOException {
// Switch to directory
Path inputDirectory = fileContext.makeQualified(new Path(parameters.inputDirectory));
if (!fileContext.util().exists(inputDirectory)) {
throw new FileNotFoundException("Directory does not exist: " + inputDirectory.toString());
} else if (!fileContext.getFileStatus(inputDirectory).isDirectory()) {
throw new FileNotFoundException("Is not a directory: " + inputDirectory.toString());
}
fileContext.setWorkingDirectory(inputDirectory);
LOG.debug("Working directory is now {}", fileContext.getWorkingDirectory().toString());
// Get list of all files
List<Path> files = new ArrayList<Path>();
populateFiles(files, fileContext, inputDirectory);
return files;
}
private void populateFiles(List<Path> files, FileContext fileContext, Path path) throws IOException {
assert files != null;
assert fileContext != null;
assert path != null;
FileStatus status = fileContext.getFileStatus(path);
if (status.isFile()) {
if (path.getName().endsWith(".control")) {
files.add(path);
}
} else if (status.isDirectory()) {
RemoteIterator<FileStatus> stats = fileContext.listStatus(path);
while (stats.hasNext()) {
FileStatus stat = stats.next();
populateFiles(files, fileContext, stat.getPath());
}
}
}
private List<Statistic> read(List<Path> files, Path outputDirectory)
throws IOException, ClassNotFoundException, InterruptedException {
assert files != null;
assert outputDirectory != null;
List<Statistic> statistics = new ArrayList<Statistic>();
for (int i = 1; i < parameters.iteration + 1; ++i) {
LOG.info("Starting iteration {}", i);
LOG.info("Deleting output directory {}", outputDirectory);
fileContext.delete(outputDirectory, true);
// Create job
Job job = Job.getInstance(configuration);
job.setJarByClass(HadapsTest.class);
job.setJobName(HadapsTest.class.getSimpleName());
// Add input files
for (Path file : files) {
FileInputFormat.addInputPath(job, file);
}
job.setInputFormatClass(SequenceFileInputFormat.class);
// Add mapper
job.setMapperClass(ReadModeMapper.class);
job.setReducerClass(ReadModeReducer.class);
// Add output directory
FileOutputFormat.setOutputPath(job, outputDirectory);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(1);
// Start the job
long startTime = Time.now();
boolean result = job.waitForCompletion(true);
long totalDuration = Time.now() - startTime;
// Collect statistic
if (result) {
statistics.addAll(analyze(i, totalDuration, outputDirectory));
} else {
LOG.warn("Job failed for iteration {}!", i);
}
}
return statistics;
}
private List<Statistic> analyze(int i, long totalDuration, Path outputDirectory) throws IOException {
assert outputDirectory != null;
List<Statistic> statistics = new ArrayList<Statistic>();
Path resultFile = new Path(outputDirectory, "part-r-00000");
BufferedReader reader = null;
try {
reader = new BufferedReader(new InputStreamReader(fileContext.open(resultFile)));
String line;
while ((line = reader.readLine()) != null) {
String[] tokens = line.split("\\s");
if (tokens.length != 4) {
throw new IllegalStateException("Invalid number of tokens");
}
String filename = tokens[0];
short replication = Short.parseShort(tokens[1]);
long size = Long.parseLong(tokens[2]);
long duration = Long.parseLong(tokens[3]);
statistics.add(new Statistic(i, filename, replication, size, duration));
}
statistics.add(new Statistic(i, "TOTAL", (short) 0, 0, totalDuration));
return statistics;
} finally {
if (reader != null) {
reader.close();
}
}
}
}