package hip.ch7.pagerank.mr;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.LineIterator;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Arrays;
public final class Main {
public static void main(String... args) throws Exception {
String inputFile = args[0];
String outputDir = args[1];
iterate(inputFile, outputDir);
}
public static void iterate(String input, String output)
throws Exception {
Configuration conf = new Configuration();
Path outputPath = new Path(output);
outputPath.getFileSystem(conf).delete(outputPath, true);
outputPath.getFileSystem(conf).mkdirs(outputPath);
Path inputPath = new Path(outputPath, "input.txt");
int numNodes = createInputFile(new Path(input), inputPath);
int iter = 1;
double desiredConvergence = 0.01;
while (true) {
Path jobOutputPath =
new Path(outputPath, String.valueOf(iter));
System.out.println("======================================");
System.out.println("= Iteration: " + iter);
System.out.println("= Input path: " + inputPath);
System.out.println("= Output path: " + jobOutputPath);
System.out.println("======================================");
if (calcPageRank(inputPath, jobOutputPath, numNodes) <
desiredConvergence) {
System.out.println(
"Convergence is below " + desiredConvergence +
", we're done");
break;
}
inputPath = jobOutputPath;
iter++;
}
}
public static int createInputFile(Path file, Path targetFile)
throws IOException {
Configuration conf = new Configuration();
FileSystem fs = file.getFileSystem(conf);
int numNodes = getNumNodes(file);
double initialPageRank = 1.0 / (double) numNodes;
OutputStream os = fs.create(targetFile);
LineIterator iter = IOUtils
.lineIterator(fs.open(file), "UTF8");
while (iter.hasNext()) {
String line = iter.nextLine();
String[] parts = StringUtils.split(line);
Node node = new Node()
.setPageRank(initialPageRank)
.setAdjacentNodeNames(
Arrays.copyOfRange(parts, 1, parts.length));
IOUtils.write(parts[0] + '\t' + node.toString() + '\n', os);
}
os.close();
return numNodes;
}
public static int getNumNodes(Path file) throws IOException {
Configuration conf = new Configuration();
FileSystem fs = file.getFileSystem(conf);
return IOUtils.readLines(fs.open(file), "UTF8").size();
}
public static double calcPageRank(Path inputPath, Path outputPath, int numNodes)
throws Exception {
Configuration conf = new Configuration();
conf.setInt(Reduce.CONF_NUM_NODES_GRAPH, numNodes);
Job job = new Job(conf);
job.setJarByClass(Main.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(KeyValueTextInputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
if (!job.waitForCompletion(true)) {
throw new Exception("Job failed");
}
long summedConvergence = job.getCounters().findCounter(
Reduce.Counter.CONV_DELTAS).getValue();
double convergence =
((double) summedConvergence /
Reduce.CONVERGENCE_SCALING_FACTOR) /
(double) numNodes;
System.out.println("======================================");
System.out.println("= Num nodes: " + numNodes);
System.out.println("= Summed convergence: " + summedConvergence);
System.out.println("= Convergence: " + convergence);
System.out.println("======================================");
return convergence;
}
}