package com.github.projectflink.pagerank; /* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import org.apache.flink.api.common.functions.FlatJoinFunction; import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.common.functions.RichGroupReduceFunction; import org.apache.flink.api.common.operators.base.JoinOperatorBase.JoinHint; import org.apache.flink.api.java.DataSet; import org.apache.flink.api.java.ExecutionEnvironment; import org.apache.flink.api.java.functions.FunctionAnnotation.ForwardedFields; import org.apache.flink.api.java.operators.IterativeDataSet; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.core.fs.FileSystem.WriteMode; import org.apache.flink.util.Collector; @SuppressWarnings("serial") public class PageRankStephan { public static void main(String[] args) throws Exception { String adjacencyPath = args[0]; //"/data/demodata/pagerank/adjacency/adjacency.csv"; String outpath = args[1]; //"/home/cicero/Desktop/out.txt"; int numIterations = Integer.valueOf(args[2]); long numVertices = Integer.valueOf(args[3]); final double threshold = 0.005 / numVertices; ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, long[]>> adjacency = env.readTextFile(adjacencyPath).map(new AdjacencyBuilder()); DataSet<Tuple2<Long, long[]>> adjacency2 = env.readTextFile(adjacencyPath).map(new AdjacencyBuilder()); DataSet<Tuple2<Long, Double>> initialRanks = adjacency.map(new VertexInitializer(1.0 / numVertices)); IterativeDataSet<Tuple2<Long, Double>> iteration = initialRanks.iterate(numIterations); DataSet<Tuple2<Long, Double>> newRanks = iteration .join(adjacency2, JoinHint.REPARTITION_HASH_SECOND).where(0).equalTo(0).with(new RankDistributor(0.85, numVertices)) .groupBy(0) .reduceGroup(new Adder()); DataSet<Integer> tc = iteration.join(newRanks).where(0).equalTo(0).with(new FlatJoinFunction<Tuple2<Long, Double>, Tuple2<Long, Double>, Integer>() { @Override public void join(Tuple2<Long, Double> longDoubleTuple2, Tuple2<Long, Double> longDoubleTuple22, Collector<Integer> collector) throws Exception { double delta = Math.abs(longDoubleTuple2.f1 - longDoubleTuple22.f1); if(delta > threshold) { collector.collect(1); } } }); iteration.closeWith(newRanks, tc).writeAsCsv(outpath+"_fastbulk", WriteMode.OVERWRITE); // System.out.println(env.getExecutionPlan()); env.execute("Page Rank Optimized"); } public static final class AdjacencyBuilder implements MapFunction<String, Tuple2<Long, long[]>> { @Override public Tuple2<Long, long[]> map(String value) throws Exception { String[] parts = value.split(" "); if (parts.length < 1) { throw new Exception("Malformed line: " + value); } long id = Long.parseLong(parts[0]); long[] targets = new long[parts.length - 1]; for (int i = 0; i < targets.length; i++) { targets[i] = Long.parseLong(parts[i+1]); } return new Tuple2<Long, long[]>(id, targets); } } public static final class VertexInitializer implements MapFunction<Tuple2<Long, long[]>, Tuple2<Long, Double>> { private final Double initialRank; public VertexInitializer(double initialRank) { this.initialRank = initialRank; } @Override public Tuple2<Long, Double> map(Tuple2<Long, long[]> value) { return new Tuple2<Long, Double>(value.f0, initialRank); } } public static final class RankDistributor implements FlatJoinFunction<Tuple2<Long, Double>, Tuple2<Long, long[]>, Tuple2<Long, Double>> { private final Tuple2<Long, Double> tuple = new Tuple2<Long, Double>(); private final double dampeningFactor; private final long numVertices; public RankDistributor(double dampeningFactor, long numVertices) { this.dampeningFactor = dampeningFactor; this.numVertices = numVertices; } @Override public void join(Tuple2<Long, Double> page, Tuple2<Long, long[]> neighbors, Collector<Tuple2<Long, Double>> out) { long[] targets = neighbors.f1; double rankPerTarget = dampeningFactor * page.f1 / targets.length; double randomJump = (1-dampeningFactor) / numVertices; // emit random jump to self tuple.f0 = page.f0; tuple.f1 = randomJump; out.collect(tuple); tuple.f1 = rankPerTarget; for (long target : targets) { tuple.f0 = target; out.collect(tuple); } } } @ForwardedFields("0") public static final class Adder extends RichGroupReduceFunction<Tuple2<Long, Double>, Tuple2<Long, Double>> { @Override public void reduce(Iterable<Tuple2<Long, Double>> values, Collector<Tuple2<Long, Double>> out) { Long key = null; double agg = 0.0; for (Tuple2<Long, Double> t : values) { key = t.f0; agg += t.f1; } out.collect(new Tuple2<Long, Double>(key, agg)); } } }