/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.flink.graph.library.link_analysis; import org.apache.flink.api.java.DataSet; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.graph.asm.AsmTestBase; import org.apache.flink.graph.asm.dataset.Collect; import org.apache.flink.graph.library.link_analysis.PageRank.Result; import org.apache.flink.types.DoubleValue; import org.apache.flink.types.IntValue; import org.apache.flink.types.LongValue; import org.apache.flink.types.NullValue; import org.junit.Test; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import static org.junit.Assert.assertEquals; public class PageRankTest extends AsmTestBase { private static final double DAMPING_FACTOR = 0.85; /* * This test result can be verified with the following Python script. import networkx as nx graph=nx.read_edgelist('directedSimpleGraph.csv', delimiter=',', create_using=nx.DiGraph()) pagerank=nx.algorithms.link_analysis.pagerank(graph) for key in sorted(pagerank): print('{}: {}'.format(key, pagerank[key])) */ @Test public void testWithSimpleGraph() throws Exception { DataSet<Result<IntValue>> pr = new PageRank<IntValue, NullValue, NullValue>(DAMPING_FACTOR, 10) .run(directedSimpleGraph); List<Double> expectedResults = new ArrayList<>(); expectedResults.add(0.09091296131286301); expectedResults.add(0.27951855944178117); expectedResults.add(0.12956847924535586); expectedResults.add(0.22329643739217675); expectedResults.add(0.18579060129496028); expectedResults.add(0.09091296131286301); for (Tuple2<IntValue, DoubleValue> result : pr.collect()) { int id = result.f0.getValue(); assertEquals(expectedResults.get(id), result.f1.getValue(), 0.000001); } } @Test public void testWithCompleteGraph() throws Exception { double expectedScore = 1.0 / completeGraphVertexCount; DataSet<Result<LongValue>> pr = new PageRank<LongValue, NullValue, NullValue>(DAMPING_FACTOR, 0.000001) .run(completeGraph); List<Result<LongValue>> results = pr.collect(); assertEquals(completeGraphVertexCount, results.size()); for (Tuple2<LongValue, DoubleValue> result : results) { assertEquals(expectedScore, result.f1.getValue(), 0.000001); } } /* * This test result can be verified with the following Python script. import networkx as nx graph=nx.read_edgelist('directedRMatGraph.csv', delimiter=',', create_using=nx.DiGraph()) pagerank=nx.algorithms.link_analysis.pagerank(graph) for key in [0, 1, 2, 8, 13, 29, 109, 394, 652, 1020]: print('{}: {}'.format(key, pagerank[str(key)])) */ @Test public void testWithRMatGraph() throws Exception { DataSet<Result<LongValue>> pr = new PageRank<LongValue, NullValue, NullValue>(DAMPING_FACTOR, 0.000001) .run(directedRMatGraph(10, 16)); Map<Long, Result<LongValue>> results = new HashMap<>(); for (Result<LongValue> result : new Collect<Result<LongValue>>().run(pr).execute()) { results.put(result.getVertexId0().getValue(), result); } assertEquals(902, results.size()); Map<Long, Double> expectedResults = new HashMap<>(); // a pseudo-random selection of results, both high and low expectedResults.put(0L, 0.027111807822); expectedResults.put(1L, 0.0132842310382); expectedResults.put(2L, 0.0121818392504); expectedResults.put(8L, 0.0115916809743); expectedResults.put(13L, 0.00183249490033); expectedResults.put(29L, 0.000848095047082); expectedResults.put(109L, 0.000308507844048); expectedResults.put(394L, 0.000828743280246); expectedResults.put(652L, 0.000684102931253); expectedResults.put(1020L, 0.000250487135148); for (Map.Entry<Long, Double> expected : expectedResults.entrySet()) { double value = results.get(expected.getKey()).getPageRankScore().getValue(); assertEquals(expected.getValue(), value, 0.00001); } } }