HubsAndAuthoritiesSchimmy.java example

Explorer
Cloud9-master
- src
/**
 * 
 */
package edu.umd.cloud9.example.hits;

import java.io.IOException;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Partitioner;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.lib.HashPartitioner;
import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;

import tl.lin.data.array.ArrayListOfIntsWritable;
import tl.lin.data.map.HMapIF;
import tl.lin.data.map.MapIF;

/**
 * <p>
 * Main driver program for running the schimmy version of Kleinberg's
 * Hubs and Authorities/Hyperlink-Induced Topic Search (HITS) algorithm
 * Command line arguments are:
 * </p>
 * 
 * <ul>
 * <li>[basePath]: the base path</li>
 * <li>[numNodes]: number of nodes in the graph</li>
 * <li>[start]: starting iteration</li>
 * <li>[end]: ending iteration</li>
 * <li>[useCombiner?]: 1 for using combiner, 0 for not</li>
 * <li>[useInMapCombiner?]: 1 for using in-mapper combining, 0 for not</li>
 * <li>[useRange?]: 1 for range partitioning, 0 for not</li>
 * <li>[num Mappers]: number of mappers to use</li>
 * <li>[numReducers]: number of reducers to use. This should remain constant between iterations</li>
 * </ul>
 * 
 * <p>
 * The starting and ending iterations will correspond to paths
 * <code>/base/path/iterXXXX</code> and <code>/base/path/iterYYYY</code>. As a
 * example, if you specify 0 and 10 as the starting and ending iterations, the
 * driver program will start with the graph structure stored at
 * <code>/base/path/iter0000</code>; final results will be stored at
 * <code>/base/path/iter0010</code>.
 * </p> 
 * 
 * @see HubsAndAuthorities
 * @author Mike McGrath
 * 
 */

public class HubsAndAuthoritiesSchimmy extends Configured implements Tool {

	private static final Logger sLogger = Logger
			.getLogger(HubsAndAuthoritiesSchimmy.class);

	/**
	 * @param args
	 */
	private static class HAMapper extends MapReduceBase implements
			Mapper<IntWritable, HITSNode, IntWritable, HITSNode> {
		// private Tuple valIn = MAP_SCHEMA.instantiate();
		private HITSNode valOut = new HITSNode();
		private ArrayListOfIntsWritable empty = new ArrayListOfIntsWritable();

		public void map(IntWritable key, HITSNode value,
				OutputCollector<IntWritable, HITSNode> output, Reporter reporter)
				throws IOException {

			int typeOut = 0;

			valOut.setType(typeOut);
			valOut.setARank(value.getARank());
			valOut.setHRank(value.getHRank());
			valOut.setType(HITSNode.TYPE_NODE_MASS);
			valOut.setNodeId(value.getNodeId());

			output.collect(key, valOut);

			int curr;
			//auth score for a node X is sum of all hub scores from nodes linking to X
			// so for each outgoing link X1...XN, contribute this node's hub score as part of node X1...XN's auth score
			// ( total auth score will be summed in reducer)
			typeOut = HITSNode.TYPE_AUTH_MASS;
			ArrayListOfIntsWritable adjList = value.getOutlinks();
			
			for (int i = 0; i < adjList.size(); i++) {
				curr = adjList.get(i);
				valOut.setType(typeOut);
				valOut.setARank(value.getHRank());
				output.collect(new IntWritable(curr), valOut);
			}
			
			//hub score for a node X is sum of all auth scores from nodes linked from X
			// so for each incoming link X1...XN, contribute this node's auth score as part of node X1...XN's hub score
			// ( total hub score will be summed in reducer)
			typeOut = HITSNode.TYPE_HUB_MASS;
			adjList = value.getInlinks();
			
			for (int i = 0; i < adjList.size(); i++) {
				curr = adjList.get(i);
				valOut.setType(typeOut);
				valOut.setHRank(value.getARank());
				output.collect(new IntWritable(curr), valOut);
			}
		}

	}

	// mapper using in-mapper combining
	private static class HAMapperIMC extends MapReduceBase implements
			Mapper<IntWritable, HITSNode, IntWritable, HITSNode> {

		// for buffering rank values
		private static HMapIF rankmapA = new HMapIF();
		private static HMapIF rankmapH = new HMapIF();

		// save a reference to the output collector
		private static OutputCollector<IntWritable, HITSNode> mOutput;

		private static HITSNode valOut = new HITSNode();

		// private static ArrayListOfIntsWritable empty = new
		// ArrayListOfIntsWritable();

		public void configure(JobConf job) {
			rankmapA.clear();
			rankmapH.clear();
		}

		public void map(IntWritable key, HITSNode value,
				OutputCollector<IntWritable, HITSNode> output, Reporter reporter)
				throws IOException {

			mOutput = output;

			ArrayListOfIntsWritable adjList;
			valOut.setNodeId(value.getNodeId());
			valOut.setType(HITSNode.TYPE_NODE_MASS);
			valOut.setARank(value.getARank());
			valOut.setHRank(value.getHRank());
			output.collect(key, valOut);

			// check type using new types
			//emit hvals to outlinks as avals



			//emit avals to inlinks as hvals


			int curr;
			
			adjList = value.getOutlinks();
			for (int i = 0; i < adjList.size(); i++) {
				curr = adjList.get(i);
				// System.out.println("[key: " + key.toString() + "] [curr: " +
				// curr + "]");
				if (rankmapA.containsKey(curr)) {
					rankmapA.put(curr, sumLogProbs(rankmapA.get(curr),
							value.getHRank()));
				} else {
					rankmapA.put(curr, value.getHRank());
				}
			}
			
			adjList = value.getInlinks();
			for (int i = 0; i < adjList.size(); i++) {
				curr = adjList.get(i);
				if (rankmapH.containsKey(curr)) {
					rankmapH.put(curr, sumLogProbs(rankmapH.get(curr),
							value.getARank()));
				} else {
					rankmapH.put(curr, value.getARank());
				}
			}
		}

		public void close() throws IOException {
			IntWritable n = new IntWritable();
			HITSNode mass = new HITSNode();
			for (MapIF.Entry e : rankmapH.entrySet()) {
				n.set(e.getKey());
				mass.setType(HITSNode.TYPE_HUB_MASS);
				mass.setHRank(e.getValue());
				mass.setNodeId(e.getKey());
				// System.out.println(e.getKey() + " " + e.getValue());
				mOutput.collect(n, mass);
			}
			for (MapIF.Entry e : rankmapA.entrySet()) {
				n.set(e.getKey());
				mass.setType(HITSNode.TYPE_AUTH_MASS);
				mass.setARank(e.getValue());
				mass.setNodeId(e.getKey());
				// System.out.println(e.getKey() + " " + e.getValue());
				mOutput.collect(n, mass);
			}
		}

	}

	private static class HAReducer extends MapReduceBase implements
			Reducer<IntWritable, HITSNode, IntWritable, HITSNode> {
		private HITSNode valIn;
		private HITSNode valOut = new HITSNode();

		private OutputCollector<IntWritable, HITSNode> mOutput;
		private Reporter mReporter;

		private JobConf mJobConf;
		private String mTaskId;

		private SequenceFile.Reader reader;

		private IntWritable mStateNid = new IntWritable();
		private HITSNode mStateNode = new HITSNode();

		private int jobIter = 0;

		public void configure(JobConf jconf) {
			mJobConf = jconf;
			mTaskId = jconf.get("mapred.task.id");
			jobIter = jconf.getInt("jobIter", 0);

			// we want to reconstruct the mapping from partition file stored on
			// disk and the actual partition...
			String pMappingString = jconf.get("PartitionMapping");

			Map<Integer, String> m = new HashMap<Integer, String>();
			for (String s : pMappingString.split("\\t")) {
				String[] arr = s.split("=");

				sLogger.info(arr[0] + "\t" + arr[1]);

				m.put(Integer.parseInt(arr[0]), arr[1]);
			}

			int partno = Integer.parseInt(mTaskId.substring(
					mTaskId.length() - 7, mTaskId.length() - 2));
			String f = m.get(partno);

			sLogger.info("task id: " + mTaskId);
			sLogger.info("partno: " + partno);
			sLogger.info("file: " + f);

			try {
				FileSystem fs = FileSystem.get(jconf);
				reader = new SequenceFile.Reader(fs, new Path(f), jconf);
			} catch (IOException e) {
				e.printStackTrace();
				throw new RuntimeException("Couldn't open + " + f
						+ " for partno: " + partno + " within: " + mTaskId);
			}
		}

		public void reduce(IntWritable key, Iterator<HITSNode> values,
				OutputCollector<IntWritable, HITSNode> output, Reporter reporter)
				throws IOException {
			ArrayListOfIntsWritable adjList = new ArrayListOfIntsWritable();

			float hrank = Float.NEGATIVE_INFINITY;
			float arank = Float.NEGATIVE_INFINITY;
			long pos;

			valOut.setInlinks(adjList);
			valOut.setOutlinks(adjList);

			mOutput = output;
			mReporter = reporter;

			// we're going to read the node structure until we get to the node
			// of the current message we're processing...
			while (reader.next(mStateNid, mStateNode)) {

				/*
				 * if (mStateNid.get() == key.get() && (mStateNode.getType() ==
				 * HITSNode.TYPE_AUTH_COMPLETE || mStateNode.getType() ==
				 * HITSNode.TYPE_AUTH_STRUCTURE)) { afound = true; } if
				 * (mStateNid.get() == key.get() && (mStateNode.getType() ==
				 * HITSNode.TYPE_HUB_COMPLETE || mStateNode.getType() ==
				 * HITSNode.TYPE_HUB_STRUCTURE)) { hfound = true; }
				 */
				if (mStateNid.get() == key.get())
					break;

				// nodes are sorted in each partition, so if we come across a
				// larger nid than the current message we're processing, there's
				// something seriously wrong...
				if (mStateNid.get() > key.get()) {
					Partitioner<WritableComparable, Writable> p = new HashPartitioner<WritableComparable, Writable>();

					int sp = p.getPartition(mStateNid, mStateNode, mJobConf
							.getNumReduceTasks());
					int kp = p.getPartition(key, mStateNode, mJobConf
							.getNumReduceTasks());

					throw new RuntimeException(
							"Unexpected Schimmy failure during merge! nids: "
									+ mStateNid.get() + " " + key.get()
									+ " parts: " + sp + " " + kp);
				}

				// mStateNode.setHARank(Float.NEGATIVE_INFINITY);

				// do something smarter here
				// output.collect(mStateNid, mStateNode);
			}

			while (values.hasNext()) {
				valIn = values.next();

				// get type
				int type = valIn.getType();
				float arankIn = valIn.getARank();
				float hrankIn = valIn.getHRank();
				if (type == HITSNode.TYPE_HUB_MASS ) {
					// hrank += rankIn;
					hrank = sumLogProbs(hrank, hrankIn);
				} else if (type == HITSNode.TYPE_AUTH_MASS) {
					// arank += rankIn;
					arank = sumLogProbs(arank, arankIn);
				}
			}
			// System.out.println(key.toString() + " " + "H" + " " +
			// hpayloadOut.toString());

			// if this is the first run, set rank to 0 for nodes with no inlinks
			// or outlinks
			if (jobIter == 0) {
				if (hrank == Float.NEGATIVE_INFINITY) {
					hrank = 0;
				}
				if (arank == Float.NEGATIVE_INFINITY) {
					arank = 0;
				}
			}
			// build output tuple and write to output
			if (mStateNode.getType() == HITSNode.TYPE_NODE_COMPLETE)
			{
				valOut.setInlinks(mStateNode.getInlinks()); //????
				valOut.setOutlinks(mStateNode.getOutlinks());
			}
			/*
			pos = reader.getPosition();
			// read ahead to seek if there is another adjlist
			reader.next(mStateNid, mStateNode);
			if (mStateNid.get() == key.get()) {
				if (mStateNode.getType() == HITSNode.TYPE_AUTH_COMPLETE)
					avalOut.setAdjacencyList(mStateNode.getAdjacencyList());
				else if (mStateNode.getType() == HITSNode.TYPE_HUB_COMPLETE)
					hvalOut.setAdjacencyList(mStateNode.getAdjacencyList());
			}
			// if not, go back
			else {
				reader.seek(pos);
			}*/
			valOut.setHRank(hrank);
			valOut.setARank(arank);
			valOut.setType(HITSNode.TYPE_NODE_COMPLETE);
			valOut.setNodeId(key.get());

			output.collect(key, valOut);
		}

		public void close() throws IOException {

			// we have to write out the rest of the nodes we haven't finished
			// reading yet (i.e., these are the ones who don't have any messages
			// sent to them)
			// while (reader.next(mStateNid, mStateNode)) {
			// mStateNode.setHARank(Float.NEGATIVE_INFINITY);
			// mOutput.collect(mStateNid, mStateNode);
			// }

			reader.close();
		}
	}

	private static class Norm1Mapper extends MapReduceBase implements
			Mapper<IntWritable, HITSNode, Text, FloatWritable> {

		FloatWritable rank = new FloatWritable();

		public void map(IntWritable key, HITSNode value,
				OutputCollector<Text, FloatWritable> output, Reporter reporter)
				throws IOException {

			int type = value.getType();

			// System.out.println(key.toString() + " " + valOut.toString());
			if (type == HITSNode.TYPE_NODE_COMPLETE) {
				rank.set(value.getARank() * 2);
				output.collect(new Text("A"), rank);
				rank.set(value.getHRank() * 2);
				output.collect(new Text("H"), rank);
			} else {
				System.err.println("Bad Type: " + type);
			}
		}

	}

	private static class Norm1MapperIMC extends MapReduceBase implements
			Mapper<IntWritable, HITSNode, Text, FloatWritable> {

		private static float hsum = Float.NEGATIVE_INFINITY;
		private static float asum = Float.NEGATIVE_INFINITY;
		private static OutputCollector<Text, FloatWritable> mOutput;

		public void configure(JobConf conf) {
			hsum = Float.NEGATIVE_INFINITY;
			asum = Float.NEGATIVE_INFINITY;
		}

		public void map(IntWritable key, HITSNode value,
				OutputCollector<Text, FloatWritable> output, Reporter reporter)
				throws IOException {

			mOutput = output;

			int type = value.getType();
			float arank = value.getARank() * 2;
			float hrank = value.getHRank() * 2;// <===FIXME

			if (type == HITSNode.TYPE_NODE_COMPLETE) {
				asum = sumLogProbs(asum, arank);
				hsum = sumLogProbs(hsum, hrank);
			} else {
				System.err.println("Bad Type: " + type);
			}
		}

		public void close() throws IOException {
			if (hsum != Float.NEGATIVE_INFINITY)
				mOutput.collect(new Text("H"), new FloatWritable(hsum));
			if (asum != Float.NEGATIVE_INFINITY)
				mOutput.collect(new Text("A"), new FloatWritable(asum));
		}

	}

	private static class Norm1Combiner extends MapReduceBase implements
			Reducer<Text, FloatWritable, Text, FloatWritable> {

		public void reduce(Text key, Iterator<FloatWritable> values,
				OutputCollector<Text, FloatWritable> output, Reporter reporter)
				throws IOException {
			float sum = Float.NEGATIVE_INFINITY;
			FloatWritable valIn;

			while (values.hasNext()) {
				valIn = values.next();
				sum = sumLogProbs(sum, valIn.get());
			}

			if (sum != Float.NEGATIVE_INFINITY)
				output.collect(key, new FloatWritable(sum));
		}
	}

	private static class Norm1Reducer extends MapReduceBase implements
			Reducer<Text, FloatWritable, Text, FloatWritable> {

		public void reduce(Text key, Iterator<FloatWritable> values,
				OutputCollector<Text, FloatWritable> output, Reporter reporter)
				throws IOException {
			float sum = Float.NEGATIVE_INFINITY;
			FloatWritable valIn;

			while (values.hasNext()) {
				valIn = values.next();
				sum = sumLogProbs(sum, valIn.get());
			}

			sum = sum / 2; // sqrt

			output.collect(key, new FloatWritable(sum));
		}
	}

	private static class Norm2Mapper extends MapReduceBase implements
			Mapper<IntWritable, HITSNode, IntWritable, HITSNode> {

		private HITSNode nodeOut = new HITSNode();

		private float rootSumA;
		private float rootSumH;

		public void configure(JobConf jconf) {
			rootSumA = jconf.getFloat("rootSumA", 0);
			rootSumH = jconf.getFloat("rootSumH", 0);
		}

		public void map(IntWritable key, HITSNode value,
				OutputCollector<IntWritable, HITSNode> output, Reporter reporter)
				throws IOException {

			// System.out.println("H: " + rootSumH);
			// System.out.println("A: " + rootSumA);
			float arank = value.getARank();
			float hrank = value.getHRank();

			hrank = hrank - rootSumH;
			arank = arank - rootSumA;

			nodeOut.setNodeId(key.get());
			nodeOut.setType(HITSNode.TYPE_NODE_COMPLETE);
			nodeOut.setARank(arank);
			nodeOut.setHRank(hrank);
			nodeOut.setInlinks(value.getInlinks());
			nodeOut.setOutlinks(value.getOutlinks());
			// System.out.println(tupleOut.toString());

			// System.out.println(key.toString() + " " + valOut.toString());
			output.collect(key, nodeOut);
	
		}

	}

	private ArrayList<Float> readSums(JobConf jconf, String pathIn)
			throws Exception {
		ArrayList<Float> output = new ArrayList<Float>();
		float rootSumA = -1;
		float rootSumH = -1;
		SequenceFile.Reader reader = null;
		try {
			Configuration cfg = new Configuration();
			FileSystem fs = FileSystem.get(cfg);
			Path sumsIn = new Path(pathIn);
			// FSDataInputStream in = fs.open(sumsIn);

			reader = new SequenceFile.Reader(fs, sumsIn, jconf);
			Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(),
					jconf);
			FloatWritable value = (FloatWritable) ReflectionUtils.newInstance(
					reader.getValueClass(), jconf);

			while (reader.next(key, value)) {
				// System.out.printf("%s\t%s\n", key, value);
				if (key.toString().equals("A")) {
					rootSumA = value.get();
				} else if (key.toString().equals("H")) {
					rootSumH = value.get();
				} else {
					System.out.println("PROBLEM");
				}
			}
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			IOUtils.closeStream(reader);
		}

		if (rootSumA == -1 || rootSumH == -1) {
			throw new Exception("error: rootSum == - 1");
		}

		output.add(new Float(rootSumA));
		output.add(new Float(rootSumH));

		return output;
	}

	// adds two log probs
	private static float sumLogProbs(float a, float b) {
		if (a == Float.NEGATIVE_INFINITY)
			return b;

		if (b == Float.NEGATIVE_INFINITY)
			return a;

		if (a < b) {
			return (float) (b + StrictMath.log1p(StrictMath.exp(a - b)));
		}

		return (float) (a + StrictMath.log1p(StrictMath.exp(b - a)));
	}

	public int run(String[] args) throws Exception {

		if (args.length != 9) {
			printUsage();
			return -1;
		}

		String basePath = args[0];
		int n = Integer.parseInt(args[1]);
		int s = Integer.parseInt(args[2]);
		int e = Integer.parseInt(args[3]);
		boolean useCombiner = Integer.parseInt(args[4]) != 0;
		boolean useInmapCombiner = Integer.parseInt(args[5]) != 0;
		boolean useRange = Integer.parseInt(args[6]) != 0;
		int mapTasks = Integer.parseInt(args[7]);
		int reduceTasks = Integer.parseInt(args[8]);

		sLogger.info("Tool name: HubsAndAuthorities");
		sLogger.info(" - base dir: " + basePath);
		sLogger.info(" - node count: " + n);
		sLogger.info(" - start iteration: " + s);
		sLogger.info(" - end iteration: " + e);
		sLogger.info(" - useCombiner: " + useCombiner);
		sLogger.info(" - useInmapCombiner: " + useInmapCombiner);
		sLogger.info(" - useRange: " + useRange);
		sLogger.info(" - number of mappers: " + mapTasks);
		sLogger.info(" - number of reducers: " + reduceTasks);

		for (int i = s; i < e; i++) {
			iterateHA(basePath, i, i + 1, n, useCombiner, useInmapCombiner,
					useRange, mapTasks, reduceTasks);
		}

		return 0;
	}

	public HubsAndAuthoritiesSchimmy() {
	}

	private NumberFormat sFormat = new DecimalFormat("0000");

	private void iterateHA(String path, int i, int j, int n,
			boolean useCombiner, boolean useInmapCombiner, boolean useRange,
			int mapTasks, int reduceTasks) throws IOException {
		HACalc(path, i, j, n, useCombiner, useInmapCombiner, useRange,
				mapTasks, reduceTasks);
		Norm(path, i, j, n, useCombiner, useInmapCombiner, useRange, mapTasks,
				reduceTasks);
	}

	private static int printUsage() {
		System.out
				.println("usage: [base-path] [num-nodes] [start] [end] [useCombiner?] [useInMapCombiner?] [useRange?] [num-mappers] [num-reducers]");
		ToolRunner.printGenericCommandUsage(System.out);
		return -1;
	}

	public int HACalc(String path, int iter, int jter, int nodeCount,
			boolean useCombiner, boolean useInmapCombiner, boolean useRange,
			int mapTasks, int reduceTasks) throws IOException {

		JobConf conf = new JobConf(HubsAndAuthoritiesSchimmy.class);

		String inputPath = path + "/iter" + sFormat.format(iter);
		String outputPath = path + "/iter" + sFormat.format(jter) + "t";

		FileSystem fs = FileSystem.get(conf);

		// int numPartitions = FileSystem.get(conf).listStatus(new
		// Path(inputPath)).length - 1;
		// we need to actually count the number of part files to get the number
		// of partitions (because the directory might contain _log)
		int numPartitions = 0;
		for (FileStatus s : FileSystem.get(conf)
				.listStatus(new Path(inputPath))) {
			if (s.getPath().getName().contains("part-"))
				numPartitions++;
		}
		conf.setInt("NodeCount", nodeCount);

		Partitioner p = null;

		if (useRange) {
			p = new RangePartitioner<IntWritable, Writable>();
			p.configure(conf);
		} else {
			p = new HashPartitioner<WritableComparable, Writable>();
		}

		// this is really annoying: the mapping between the partition numbers on
		// disk (i.e., part-XXXX) and what partition the file contains (i.e.,
		// key.hash % #reducer) is arbitrary... so this means that we need to
		// open up each partition, peek inside to find out.
		IntWritable key = new IntWritable();
		HITSNode value = new HITSNode();
		FileStatus[] status = fs.listStatus(new Path(inputPath));

		StringBuilder sb = new StringBuilder();

		for (FileStatus f : status) {
			if (f.getPath().getName().contains("_logs"))
				continue;

			SequenceFile.Reader reader = new SequenceFile.Reader(fs, f
					.getPath(), conf);

			reader.next(key, value);
			@SuppressWarnings("unchecked")
			int np = p.getPartition(key, value, numPartitions);
			reader.close();

			sLogger.info(f.getPath() + "\t" + np);
			sb.append(np + "=" + f.getPath() + "\t");
		}

		sLogger.info(sb.toString().trim());

		sLogger.info("Tool: HubsAndAuthorities");
		sLogger.info(" - iteration: " + iter);
		sLogger.info(" - number of mappers: " + mapTasks);
		sLogger.info(" - number of reducers: " + reduceTasks);

		conf.setJobName("Iter" + iter + "HubsAndAuthorities");

		conf.setNumMapTasks(mapTasks);
		conf.setNumReduceTasks(reduceTasks);

		FileInputFormat.setInputPaths(conf, new Path(inputPath));
		FileOutputFormat.setOutputPath(conf, new Path(outputPath));
		FileOutputFormat.setCompressOutput(conf, false);

		conf.setInputFormat(SequenceFileInputFormat.class);
		conf.setOutputKeyClass(IntWritable.class);
		conf.setOutputValueClass(HITSNode.class);
		conf.setOutputFormat(SequenceFileOutputFormat.class);

		if (useInmapCombiner == true) {
			conf.setMapperClass(HAMapperIMC.class);
		} else {
			conf.setMapperClass(HAMapper.class);
		}

		if (useRange == true) {
			conf.setPartitionerClass(RangePartitioner.class);
		}
		conf.setReducerClass(HAReducer.class);

		conf.setInt("jobIter", iter);
		conf.setInt("NodeCount", nodeCount);
		conf.set("PartitionMapping", sb.toString().trim());

		// Delete the output directory if it exists already
		Path outputDir = new Path(outputPath);
		FileSystem.get(conf).delete(outputDir, true);

		long startTime = System.currentTimeMillis();
		JobClient.runJob(conf);
		sLogger.info("Job Finished in "
				+ (System.currentTimeMillis() - startTime) / 1000.0
				+ " seconds");

		return 0;
	}

	public int Norm(String path, int iter, int jter, int nodeCount,
			boolean useCombiner, boolean useInmapCombiner, boolean useRange,
			int mapTasks, int reduceTasks) throws IOException {

		// FIXME
		String inputPath = path + "/iter" + sFormat.format(jter) + "t";
		String outputPath = path + "/iter" + sFormat.format(jter);
		String tempPath = path + "/sqrt";

		sLogger.info("Tool: Normalizer");
		sLogger.info(" - input path: " + inputPath);
		sLogger.info(" - output path: " + outputPath);
		sLogger.info(" - iteration: " + iter);
		sLogger.info(" - number of mappers: " + mapTasks);
		sLogger.info(" - number of reducers: " + reduceTasks);

		JobConf conf = new JobConf(HubsAndAuthoritiesSchimmy.class);
		conf.setJobName("Iter" + iter + "NormalizerStep1");

		conf.setNumMapTasks(mapTasks);
		conf.setNumReduceTasks(1);

		FileInputFormat.setInputPaths(conf, new Path(inputPath));
		FileOutputFormat.setOutputPath(conf, new Path(tempPath));
		FileOutputFormat.setCompressOutput(conf, false);

		conf.setInputFormat(SequenceFileInputFormat.class);
		conf.setOutputKeyClass(Text.class);
		conf.setOutputValueClass(FloatWritable.class);
		conf.setOutputFormat(SequenceFileOutputFormat.class);

		if (useInmapCombiner == true) {
			conf.setMapperClass(Norm1MapperIMC.class);
		} else {
			conf.setMapperClass(Norm1Mapper.class);
		}
		if (useCombiner == true) {
			conf.setCombinerClass(Norm1Combiner.class);
		}
		conf.setReducerClass(Norm1Reducer.class);

		JobConf conf2 = new JobConf(HubsAndAuthoritiesSchimmy.class);
		conf2.setJobName("Iter" + iter + "NormalizerStep2");
		conf2.setInt("NodeCount", nodeCount);

		conf2.setNumMapTasks(mapTasks);
		conf2.setNumReduceTasks(reduceTasks);

		FileInputFormat.setInputPaths(conf2, new Path(inputPath));
		FileOutputFormat.setOutputPath(conf2, new Path(outputPath));
		FileOutputFormat.setCompressOutput(conf2, false);

		conf2.setInputFormat(SequenceFileInputFormat.class);
		conf2.setOutputKeyClass(IntWritable.class);
		conf2.setOutputValueClass(HITSNode.class);
		conf2.setOutputFormat(SequenceFileOutputFormat.class);

		conf2.setMapperClass(Norm2Mapper.class);
		if (useRange == true) {
			conf2.setPartitionerClass(RangePartitioner.class);
		}
		conf2.setReducerClass(IdentityReducer.class);

		// Delete the output directory if it exists already
		Path tempDir = new Path(tempPath);
		FileSystem.get(conf).delete(tempDir, true);

		long startTime = System.currentTimeMillis();
		JobClient.runJob(conf);
		sLogger.info("Job Finished in "
				+ (System.currentTimeMillis() - startTime) / 1000.0
				+ " seconds");

		Path outputDir = new Path(outputPath);

		// read sums
		ArrayList<Float> sums = new ArrayList<Float>();
		try {
			sums = readSums(conf2, tempPath + "/part-00000");
		} catch (Exception e) {
			System.err.println("Failed to read in Sums");
			System.exit(1);
		}

		// conf2.set("rootSumA", sums.get(0).toString());
		conf2.setFloat("rootSumA", sums.get(0));
		// conf2.set("rootSumH", sums.get(1).toString());
		conf2.setFloat("rootSumH", sums.get(1));

		FileSystem.get(conf2).delete(outputDir, true);

		startTime = System.currentTimeMillis();
		JobClient.runJob(conf2);
		sLogger.info("Job Finished in "
				+ (System.currentTimeMillis() - startTime) / 1000.0
				+ " seconds");

		return 0;
	}

	public static void main(String[] args) throws Exception {
		int res = ToolRunner.run(new Configuration(),
				new HubsAndAuthoritiesSchimmy(), args);
		System.exit(res);
	}

}