package skywriting.examples.skyhout.pagerank;
import java.io.IOException;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;
import skywriting.examples.skyhout.common.ClosableOutputCollector;
import skywriting.examples.skyhout.common.IntArrayWritable;
import skywriting.examples.skyhout.common.LineRecordFileMapDriver;
import skywriting.examples.skyhout.common.Mapper;
import skywriting.examples.skyhout.common.SkyhoutTask;
import skywriting.examples.skyhout.common.SkywritingTaskFileSystem;
import skywriting.examples.skyhout.common.SortedPartitionedOutputCollector;
/**
* Class that converts [from] [to] lines into combined [from] [array of to] SequenceFiles.
*/
public class PageRankInitTask extends SkyhoutTask {
private static class PageRankInitMapper implements Mapper<LongWritable, Text, IntWritable, IntWritable> {
private static final Pattern SPLIT_PATTERN = Pattern.compile(" ");
@Override
public void map(LongWritable key, Text value,
OutputCollector<IntWritable, IntWritable> output) throws IOException {
if (value.toString().startsWith("#")) return;
String[] splitLine = SPLIT_PATTERN.split(value.toString());
if (splitLine.length != 2)
System.err.println("Discarding line: " + value);
else
output.collect(new IntWritable(Integer.parseInt(splitLine[0])), new IntWritable(Integer.parseInt(splitLine[1])));
}
}
@Override
public void invoke(SkywritingTaskFileSystem fs, String[] args)
throws IOException {
ClosableOutputCollector<IntWritable, IntWritable> output =
new SortedPartitionedOutputCollector<IntWritable, IntWritable, List<Integer>, IntArrayWritable>(fs,
new HashPartitioner<IntWritable, IntWritable>(),
new IntListCombiner(), IntWritable.class, IntArrayWritable.class);
new LineRecordFileMapDriver<IntWritable, IntWritable>(fs, output, new PageRankInitMapper()).runMap();
}
}