/*
* Copyright (c) 2010 Chris Smowton <chris.smowton@cl.cam.ac.uk>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
package uk.co.mrry.mercator.mapreduce;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
public class SWLineRecordReader extends RecordReader<LongWritable, Text> {
private BufferedReader in = null;
private long pos = 0;
private long start = 0;
private long end = 0;
private LongWritable key = null;
private Text value = null;
@Override
public void close() throws IOException {
// Close down the input stream
if (in != null) {
in.close();
}
}
@Override
public LongWritable getCurrentKey() throws IOException, InterruptedException {
// Simply return the key stored (can be null!)
return key;
}
@Override
public Text getCurrentValue() throws IOException, InterruptedException {
// Simply return the value stored (can be null!)
return value;
}
@Override
public float getProgress() throws IOException, InterruptedException {
// Return some notion of progress - we simply use the position within the split range considered
if (start == end) return 0.0f;
else return ((float)pos - (float)start) / ((float)end - (float)start);
}
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext tac)
throws IOException, InterruptedException {
// Cast to FileSplit as this is closest to what we're using
FileSplit split = (FileSplit)genericSplit;
String filePath = split.getPath().toString();
// Open the input file
FileInputStream fis = new FileInputStream(filePath);
in = new BufferedReader(new InputStreamReader(fis));
// Set the position within the input split
pos = fis.getChannel().position();
start = 0;
end = fis.getChannel().size();
}
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
// Check if key and value objects exist and create them if not
if (key == null) {
key = new LongWritable();
}
if (value == null) {
value = new Text();
}
// Set the key to the current position within the input split
key.set(pos);
// Read from the file, store result and record how many bytes were read
String line = in.readLine();
value.set(line);
int bytesRead = line.length();
if (bytesRead == 0) {
// If nothing was read, invalidate key and value
key = null;
value = null;
}
return (bytesRead != 0);
}
}