package org.apache.hadoop.examples;
import java.io.*;
import java.util.*;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.MapReduceBase;
public class PageRank {
public static class MapClass extends MapReduceBase implements Mapper {
private Text toUrl = new Text();
private Text toUrlList = new Text();
private Text fromUrl = new Text();
private Text prValue = new Text();
public void map(WritableComparable key, Writable value, OutputCollector output, Reporter reporter)
throws IOException {
String inputString = ((Text)value).toString();
String[] splitString = inputString.split("\t");
fromUrl.set(splitString[0].trim());
String prValString = splitString[1].trim();
String toUrlListString = splitString[2].trim();
toUrlList.set(toUrlListString);
String newToUrlListString = toUrlListString.substring(2);
if(!(newToUrlListString.equals(""))) {
String[] toUrls = newToUrlListString.split(",");
double outdegree = toUrls.length;
Double prDoubleValue = new Double(prValString);
if(outdegree > 0 ) {
double val = prDoubleValue.doubleValue() / outdegree;
String valString = Double.toString(val);
prValue.set(valString);
for(int i = 0; i < outdegree; i++) {
toUrl.set(toUrls[i].trim());
output.collect(toUrl,prValue);
// System.out.println("tourl - " + toUrls[i] + " - " + valString);
}
}
}
output.collect(fromUrl, toUrlList);
//System.out.println("fromurl - " + splitString[0] + " - " + toUrlListString);
}
}
public static class Reduce extends MapReduceBase implements Reducer {
public void reduce(WritableComparable key, Iterator values, OutputCollector output, Reporter
reporter) throws IOException {
Text toUrlList = null;
String toUrlListString = "O:";
double value = 0.0;
while (values.hasNext()) {
String readValue = values.next().toString();
if(readValue.startsWith("O:")) {
toUrlListString = readValue;
//System.out.println("tourllist" + toUrlListString);
} else {
Double val = new Double(readValue);
value+=val.doubleValue();
//System.out.println("value is" + val);
}
}
value*=0.85;
value+=0.15;
String finalOutput = "";
//no outlink, delete accumulated values, keep random jump
if(toUrlListString.equals("O:")) {
value=0.15;
}
finalOutput+=Double.toString(value)+"\t"+toUrlListString;
//System.out.println("final op" + finalOutput);
output.collect(key, new Text(finalOutput));
}
}
public static void main(String[] args) throws IOException {
JobConf conf = new JobConf(PageRank.class);
conf.setJobName("pageRank");
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
conf.setMapperClass(MapClass.class);
//conf.setCombinerClass(Reduce.class);
conf.setReducerClass(Reduce.class);
conf.setInputPath(new Path(args[0].trim()));
conf.setOutputPath(new Path(args[1].trim()));
JobClient.runJob(conf);
}
}