/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.flink.graph.library.link_analysis; import org.apache.flink.api.java.DataSet; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.graph.asm.AsmTestBase; import org.apache.flink.graph.asm.dataset.Collect; import org.apache.flink.graph.library.link_analysis.HITS.Result; import org.apache.flink.types.IntValue; import org.apache.flink.types.LongValue; import org.apache.flink.types.NullValue; import org.junit.Test; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import static org.junit.Assert.assertEquals; public class HITSTest extends AsmTestBase { /* * This test result can be verified with the following Python script. import math import networkx as nx graph=nx.read_edgelist('directedSimpleGraph.csv', delimiter=',', create_using=nx.DiGraph()) hits=nx.algorithms.link_analysis.hits(graph) hubbiness_norm=math.sqrt(sum(v*v for v in hits[0].values())) authority_norm=math.sqrt(sum(v*v for v in hits[1].values())) for key in sorted(hits[0]): print('{}: {}, {}'.format(key, hits[0][key]/hubbiness_norm, hits[1][key]/authority_norm)) */ @Test public void testWithSimpleGraph() throws Exception { DataSet<Result<IntValue>> hits = new HITS<IntValue, NullValue, NullValue>(20) .run(directedSimpleGraph); List<Tuple2<Double, Double>> expectedResults = new ArrayList<>(); expectedResults.add(Tuple2.of(0.544643396306, 0.0)); expectedResults.add(Tuple2.of(0.0, 0.836329395866)); expectedResults.add(Tuple2.of(0.607227031134, 0.268492526138)); expectedResults.add(Tuple2.of(0.544643396306, 0.395444899355)); expectedResults.add(Tuple2.of(0.0, 0.268492526138)); expectedResults.add(Tuple2.of(0.194942233447, 0.0)); for (Result<IntValue> result : hits.collect()) { int id = result.f0.getValue(); assertEquals(expectedResults.get(id).f0, result.getHubScore().getValue(), 0.000001); assertEquals(expectedResults.get(id).f1, result.getAuthorityScore().getValue(), 0.000001); } } @Test public void testWithCompleteGraph() throws Exception { double expectedScore = 1.0 / Math.sqrt(completeGraphVertexCount); DataSet<Result<LongValue>> hits = new HITS<LongValue, NullValue, NullValue>(0.000001) .run(completeGraph); List<Result<LongValue>> results = hits.collect(); assertEquals(completeGraphVertexCount, results.size()); for (Result<LongValue> result : results) { assertEquals(expectedScore, result.getHubScore().getValue(), 0.000001); assertEquals(expectedScore, result.getAuthorityScore().getValue(), 0.000001); } } /* * This test result can be verified with the following Python script. import math import networkx as nx graph=nx.read_edgelist('directedRMatGraph.csv', delimiter=',', create_using=nx.DiGraph()) hits=nx.algorithms.link_analysis.hits(graph) hubbiness_norm=math.sqrt(sum(v*v for v in hits[0].values())) authority_norm=math.sqrt(sum(v*v for v in hits[1].values())) for key in [0, 1, 2, 8, 13, 29, 109, 394, 652, 1020]: print('{}: {}, {}'.format(key, hits[0][str(key)]/hubbiness_norm, hits[1][str(key)]/authority_norm)) */ @Test public void testWithRMatGraph() throws Exception { DataSet<Result<LongValue>> hits = directedRMatGraph(10, 16) .run(new HITS<LongValue, NullValue, NullValue>(0.000001)); Map<Long, Result<LongValue>> results = new HashMap<>(); for (Result<LongValue> result : new Collect<Result<LongValue>>().run(hits).execute()) { results.put(result.f0.getValue(), result); } assertEquals(902, results.size()); Map<Long, Tuple2<Double, Double>> expectedResults = new HashMap<>(); // a pseudo-random selection of results, both high and low expectedResults.put(0L, Tuple2.of(0.231077034747, 0.238110214937)); expectedResults.put(1L, Tuple2.of(0.162364053933, 0.169679504287)); expectedResults.put(2L, Tuple2.of(0.162412612499, 0.161015667261)); expectedResults.put(8L, Tuple2.of(0.167064641724, 0.158592966505)); expectedResults.put(13L, Tuple2.of(0.041915595624, 0.0407091625629)); expectedResults.put(29L, Tuple2.of(0.0102017346511, 0.0146218045999)); expectedResults.put(109L, Tuple2.of(0.00190531000389, 0.00481944993023)); expectedResults.put(394L, Tuple2.of(0.0122287016161, 0.0147987969538)); expectedResults.put(652L, Tuple2.of(0.010966659242, 0.0113713306749)); expectedResults.put(1020L, Tuple2.of(0.0, 0.000326973732127)); for (Map.Entry<Long, Tuple2<Double, Double>> expected : expectedResults.entrySet()) { double hubScore = results.get(expected.getKey()).getHubScore().getValue(); double authorityScore = results.get(expected.getKey()).getAuthorityScore().getValue(); assertEquals(expected.getValue().f0, hubScore, 0.00001); assertEquals(expected.getValue().f1, authorityScore, 0.00001); } } }