/* * Copyright (c) 2003, the JUNG Project and the Regents of the University * of California * All rights reserved. * * This software is open-source under the BSD license; see either * "license.txt" or * http://jung.sourceforge.net/license.txt for a description. */ package edu.uci.ics.jung.algorithms.cluster; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.commons.collections15.Transformer; import edu.uci.ics.jung.algorithms.scoring.BetweennessCentrality; import edu.uci.ics.jung.graph.Graph; import edu.uci.ics.jung.graph.util.Pair; /** * An algorithm for computing clusters (community structure) in graphs based on * edge betweenness. The betweenness of an edge is defined as the extent to * which that edge lies along shortest paths between all pairs of nodes. * * This algorithm works by iteratively following the 2 step process: * <ul> * <li>Compute edge betweenness for all edges in current graph * <li>Remove edge with highest betweenness * </ul> * <p> * Running time is: O(kmn) where k is the number of edges to remove, m is the * total number of edges, and n is the total number of vertices. For very sparse * graphs the running time is closer to O(kn^2) and for graphs with strong * community structure, the complexity is even lower. * <p> * This algorithm is a slight modification of the algorithm discussed below in * that the number of edges to be removed is parameterized. * * @author Scott White * @author Tom Nelson (converted to jung2) * @see "Community structure in social and biological networks by Michelle Girvan and Mark Newman" */ public class EdgeBetweennessClusterer<V, E> implements Transformer<Graph<V, E>, Set<Set<V>>> { private int mNumEdgesToRemove; private Map<E, Pair<V>> edges_removed; /** * Constructs a new clusterer for the specified graph. * * @param numEdgesToRemove * the number of edges to be progressively removed from the graph */ public EdgeBetweennessClusterer(int numEdgesToRemove) { mNumEdgesToRemove = numEdgesToRemove; edges_removed = new LinkedHashMap<E, Pair<V>>(); } /** * Finds the set of clusters which have the strongest "community structure". * The more edges removed the smaller and more cohesive the clusters. * * @param graph * the graph */ @Override public Set<Set<V>> transform(Graph<V, E> graph) { if (mNumEdgesToRemove < 0 || mNumEdgesToRemove > graph.getEdgeCount()) { throw new IllegalArgumentException( "Invalid number of edges passed in."); } edges_removed.clear(); for (int k = 0; k < mNumEdgesToRemove; k++) { BetweennessCentrality<V, E> bc = new BetweennessCentrality<V, E>( graph); E to_remove = null; double score = 0; for (E e : graph.getEdges()) { if (bc.getEdgeScore(e) > score) { to_remove = e; score = bc.getEdgeScore(e); } } edges_removed.put(to_remove, graph.getEndpoints(to_remove)); graph.removeEdge(to_remove); } WeakComponentClusterer<V, E> wcSearch = new WeakComponentClusterer<V, E>(); Set<Set<V>> clusterSet = wcSearch.transform(graph); for (Map.Entry<E, Pair<V>> entry : edges_removed.entrySet()) { Pair<V> endpoints = entry.getValue(); graph.addEdge(entry.getKey(), endpoints.getFirst(), endpoints.getSecond()); } return clusterSet; } /** * Retrieves the list of all edges that were removed (assuming extract(...) * was previously called). The edges returned are stored in order in which * they were removed. * * @return the edges in the original graph */ public List<E> getEdgesRemoved() { return new ArrayList<E>(edges_removed.keySet()); } }