/* * Created on Jul 15, 2007 * * Copyright (c) 2007, the JUNG Project and the Regents of the University * of California * All rights reserved. * * This software is open-source under the BSD license; see either * "license.txt" or * http://jung.sourceforge.net/license.txt for a description. */ package edu.uci.ics.jung.algorithms.scoring; import org.apache.commons.collections15.Transformer; import edu.uci.ics.jung.algorithms.scoring.util.ScoringUtils; import edu.uci.ics.jung.graph.Graph; /** * Assigns hub and authority scores to each vertex depending on the topology of * the network. The essential idea is that a vertex is a hub to the extent that * it links to authoritative vertices, and is an authority to the extent that it * links to 'hub' vertices. * * <p> * The classic HITS algorithm essentially proceeds as follows: * * <pre> * assign equal initial hub and authority values to each vertex * repeat * for each vertex w: * w.hub = sum over successors x of x.authority * w.authority = sum over predecessors v of v.hub * normalize hub and authority scores so that the sum of the squares of each = 1 * until scores converge * </pre> * * HITS is somewhat different from random walk/eigenvector-based algorithms such * as PageRank in that: * <ul> * <li/>there are two mutually recursive scores being calculated, rather than a * single value * <li/>the edge weights are effectively all 1, i.e., they can't be interpreted * as transition probabilities. This means that the more inlinks and outlinks * that a vertex has, the better, since adding an inlink (or outlink) does not * dilute the influence of the other inlinks (or outlinks) as in random * walk-based algorithms. * <li/>the scores cannot be interpreted as posterior probabilities (due to the * different normalization) * </ul> * * This implementation has the classic behavior by default. However, it has been * generalized somewhat so that it can act in a more "PageRank-like" fashion: * <ul> * <li/>this implementation has an optional 'random jump probability' parameter * analogous to the 'alpha' parameter used by PageRank. Varying this value * between 0 and 1 allows the user to vary between the classic HITS behavior and * one in which the scores are smoothed to a uniform distribution. The default * value for this parameter is 0 (no random jumps possible). * <li/>the edge weights can be set to anything the user likes, and in * particular they can be set up (e.g. using <code>UniformDegreeWeight</code>) * so that the weights of the relevant edges incident to a vertex sum to 1. * <li/>The vertex score normalization has been factored into its own method so * that it can be overridden by a subclass. Thus, for example, since the * vertices' values are set to sum to 1 initially, if the weights of the * relevant edges incident to a vertex sum to 1, then the vertices' values will * continue to sum to 1 if the "sum-of-squares" normalization code is overridden * to a no-op. (Other normalization methods may also be employed.) * </ul> * * @param <V> * the vertex type * @param <E> * the edge type * * @see "'Authoritative sources in a hyperlinked environment' by Jon Kleinberg, 1997" */ public class HITS<V, E> extends HITSWithPriors<V, E> { /** * Creates an instance for the specified graph, edge weights, and alpha * (random jump probability) parameter. * * @param g * the input graph * @param edge_weights * the weights to use for each edge * @param alpha * the probability of a hub giving some authority to all * vertices, and of an authority increasing the score of all hubs * (not just those connected via links) */ public HITS(Graph<V, E> g, Transformer<E, Double> edge_weights, double alpha) { super(g, edge_weights, ScoringUtils.getHITSUniformRootPrior(g.getVertices()), alpha); } /** * Creates an instance for the specified graph and alpha (random jump * probability) parameter. The edge weights are all set to 1. * * @param g * the input graph * @param alpha * the probability of a hub giving some authority to all * vertices, and of an authority increasing the score of all hubs * (not just those connected via links) */ public HITS(Graph<V, E> g, double alpha) { super(g, ScoringUtils.getHITSUniformRootPrior(g.getVertices()), alpha); } /** * Creates an instance for the specified graph. The edge weights are all set * to 1 and alpha is set to 0. * * @param g * the input graph */ public HITS(Graph<V, E> g) { this(g, 0.0); } /** * Maintains hub and authority score information for a vertex. */ public static class Scores { /** * The hub score for a vertex. */ public double hub; /** * The authority score for a vertex. */ public double authority; /** * Creates an instance with the specified hub and authority score. */ public Scores(double hub, double authority) { this.hub = hub; this.authority = authority; } @Override public String toString() { // TODO: improve the format of string. Look at the next row. // return String.format("[h:%.4f,a:%.4f]", this.hub, // this.authority); return "[h:" + this.hub + ",a:" + this.authority + "]"; } } }