/*
* File: CommunityMetrics.java
* Authors: Jeremy D. Wendt
* Company: Sandia National Laboratories
* Project: Cognitive Foundry
*
* Copyright 2016, Sandia Corporation.
* Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
* license for use of this work by or on behalf of the U.S. Government.
* Export of this program may require a license from the United States
* Government. See CopyrightHistory.txt for complete details.
*
*/
package gov.sandia.cognition.graph.community;
import gov.sandia.cognition.annotation.PublicationReference;
import gov.sandia.cognition.annotation.PublicationType;
import gov.sandia.cognition.graph.DirectedNodeEdgeGraph;
import gov.sandia.cognition.graph.DirectedWeightedNodeEdgeGraph;
import gov.sandia.cognition.graph.GraphMetrics;
import gov.sandia.cognition.util.Pair;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
/**
* This class stores several static methods for computing metrics specific to a
* graph and a set of communities. Unlike the GraphMetrics class, this does not
* preserve values for the community structure nor the graph internally -- each
* method is a separate call.
*
* @author jdwendt
*/
public class CommunityMetrics
{
/**
* Computes the modularity of the input graph into the input set of
* communities. NOTE: Modularity is intended for exclusive community
* detection (every node is in exactly one community), but this code does
* not check that the communities are constituted correctly. Therefore, if
* you insert garbage, be prepared for bad results!
*
* @param <NodeNameType>
* @param graph The graph that has been partitioned
* @param communities The partitions created on the input graph
* @return The modularity score
*/
public static <NodeNameType> double computeModularity(
DirectedNodeEdgeGraph<NodeNameType> graph,
Set<Set<NodeNameType>> communities)
{
return computeModularity(communities, new GraphMetrics<>(graph));
}
/**
* Computes the modularity of the graph (whose metrics were passed in) into
* the input set of communities. NOTE: Modularity is intended for exclusive
* community detection (every node is in exactly one community), but this
* code does not check that the communities are constituted correctly.
* Therefore, if you insert garbage, be prepared for bad results!
*
* @param <NodeNameType>
* @param communities The partitions created on the input graph
* @return The modularity score
* @param graphMetrics The complete set of metrics for the partitioned graph
*/
public static <NodeNameType> double computeModularity(
Set<Set<NodeNameType>> communities,
GraphMetrics<NodeNameType> graphMetrics)
{
double modularity = 0.0;
for (Set<NodeNameType> community : communities)
{
modularity += modularityPartForCommunity(community, graphMetrics);
}
return modularity / (2.0 * graphMetrics.numEdges());
}
/**
* Computes the modularity of the input graph into the input set of
* communities. NOTE: Modularity is intended for exclusive community
* detection (every node is in exactly one community), but this code does
* not check that the communities are constituted correctly. Therefore, if
* you insert garbage, be prepared for bad results!
*
* @param <NodeNameType>
* @param graph The graph that has been partitioned
* @param communities The partitions created on the input graph
* @return The modularity score
*/
public static <NodeNameType> double computeModularity(
DirectedNodeEdgeGraph<NodeNameType> graph,
NodePartitioning<NodeNameType> communities)
{
return computeModularity(communities, new GraphMetrics<>(graph));
}
@PublicationReference(author = "Wikipedia", title = "Modularity (networks)",
type = PublicationType.WebPage, year = 2016, url
= "https://en.wikipedia.org/wiki/Modularity_(networks)")
/**
* Computes the modularity of the graph (whose metrics are passed in) into
* the input set of communities. NOTE: Modularity is intended for exclusive
* community detection (every node is in exactly one community), but this
* code does not check that the communities are constituted correctly.
* Therefore, if you insert garbage, be prepared for bad results!
*
* @param <NodeNameType>
* @param communities The partitions created on the input graph
* @return The modularity score
* @param graphMetrics The complete set of metrics for the partitioned graph
*/
public static <NodeNameType> double computeModularity(
NodePartitioning<NodeNameType> communities,
GraphMetrics<NodeNameType> graphMetrics)
{
Double internalMod = communities.getModularity();
if (internalMod != null)
{
return internalMod;
}
double modularity = 0.0;
for (int i = 0; i < communities.getNumPartitions(); ++i)
{
modularity += modularityPartForCommunity(
communities.getPartitionMembers(i), graphMetrics);
}
return modularity / (2.0 * graphMetrics.numEdges());
}
/**
* Helper that performs the meat of the modularity computation. This allows
* the above methods to all work given different input types but need not
* reimplement too much work.
*
* @param <NodeNameType>
* @param community The community to compute part of the modularity score
* @param graphMetrics The metrics for the graph that contains the input
* community
* @return the non-normalized modularity for this community in part of the
* graph
*/
private static <NodeNameType> double modularityPartForCommunity(
Set<NodeNameType> community,
GraphMetrics<NodeNameType> graphMetrics)
{
double oneOverTwoM = 1.0 / (2.0 * graphMetrics.numEdges());
double modularityPart = 0.0;
for (NodeNameType nodei : community)
{
Set<NodeNameType> neighbors = graphMetrics.neighbors(nodei);
int degi = graphMetrics.degree(nodei);
for (NodeNameType nodej : community)
{
if (neighbors.contains(nodej))
{
modularityPart += 1;
}
modularityPart -= (graphMetrics.degree(nodej) * degi)
* oneOverTwoM;
}
}
return modularityPart;
}
@PublicationReference(author = "Wikipedia", title = "Conductance (graph)",
type = PublicationType.WebPage, year = 2016, url
= "https://en.wikipedia.org/wiki/Conductance_(graph)")
/**
* Helper that computes the conductance resulting from the cut between the
* input community and the rest of the graph. Note this supports weighted or
* unweighted graphs.
*
* @param <NodeNameType>
* @param graph The graph that is being cut by separating the input
* community
* @param community The set of nodes that are proposed to be their own
* community
* @return the conductance resulting by the cut of this community from the
* graph
*/
public static <NodeNameType> double computeConductance(
DirectedNodeEdgeGraph<NodeNameType> graph,
Set<NodeNameType> community)
{
double edgesCut = 0;
double edgesInside = 0;
double edgesOutside = 0;
boolean isWeighted = (graph instanceof DirectedWeightedNodeEdgeGraph);
for (int i = 0; i < graph.getNumEdges(); ++i)
{
Pair<Integer, Integer> edge = graph.getEdgeEndpointIds(i);
double w = 1.0;
if (isWeighted)
{
w = ((DirectedWeightedNodeEdgeGraph) graph).getEdgeWeight(i);
}
boolean inside_i
= community.contains(graph.getNode(edge.getFirst()));
boolean inside_j = community.contains(
graph.getNode(edge.getSecond()));
if (inside_i != inside_j)
{
edgesCut += w;
}
if (inside_i)
{
edgesInside += w;
}
else
{
edgesOutside += w;
}
if (inside_j)
{
edgesInside += w;
}
else
{
edgesOutside += w;
}
}
return (edgesCut) / Math.min(edgesInside, edgesOutside);
}
/**
* This computes the clustering coefficient for a subgraph of a graph. The
* permanence paper makes it seem they will use this, but it turns out what
* they call internal clustering coefficient is actually internal edge
* density. Method preserved for in case its needed in the future.
*
* @param <NodeNameType>
* @param metrics The graph metrics object to be used
* @param nodesInSubgraph The set of nodes that comprise the subgraph
* @return The clustering coefficient for the graph created by the input
* subgraph
*/
private static <NodeNameType> double getSubgraphClusteringCoefficient(
GraphMetrics<NodeNameType> metrics,
Set<NodeNameType> nodesInSubgraph)
{
int totalNumInternalWedges = 0;
int totalNumInternalTriangles = 0;
for (NodeNameType node : nodesInSubgraph)
{
int numInternalNeighbors = 0;
for (NodeNameType neighbor : metrics.neighbors(node))
{
numInternalNeighbors += (nodesInSubgraph.contains(neighbor)) ? 1
: 0;
}
totalNumInternalWedges += numInternalNeighbors
* (numInternalNeighbors - 1);
int numInternalTris = 0;
for (Pair<NodeNameType, NodeNameType> others
: metrics.getNodeTriangleEndpoints(node))
{
numInternalTris += (nodesInSubgraph.contains(others.getFirst())
&& nodesInSubgraph.contains(others.getSecond())) ? 1 : 0;
}
totalNumInternalTriangles += numInternalTris;
}
if (totalNumInternalWedges <= 0)
{
return 0;
}
return ((double) totalNumInternalTriangles)
/ ((double) totalNumInternalWedges);
}
/**
* Returns the edge density of the neighbors of node internal to subgraph.
*
* @param <NodeNameType>
* @param metrics The graph metrics object to be used herein
* @param nodesInSubgraph The subset of nodes that are within the input
* community
* @param node The node (must be within the community) whose edge density is
* desired
* @return The ratio of the existing edges and the total number of possible
* edges among the internal neighbors of node.
*/
private static <NodeNameType> double getInternalEdgeDensity(
GraphMetrics<NodeNameType> metrics,
Set<Integer> nodesInSubgraph,
Integer nodeId)
{
int numInternalNeighbors = 0;
// The following is identical to this, but faster
// Iterate through the smaller and do O(1) contains on larger
// for (NodeNameType neighbor : metrics.neighbors(node))
// {
// numInternalNeighbors += (nodesInSubgraph.contains(neighbor)) ? 1 : 0;
// }
Set<Integer> smaller = metrics.neighborIds(nodeId);
Set<Integer> larger = nodesInSubgraph;
if (smaller.size() > larger.size())
{
Set<Integer> tmp = smaller;
smaller = larger;
larger = tmp;
}
for (int s : smaller)
{
numInternalNeighbors += larger.contains(s) ? 1 : 0;
}
int numInternalTriangles = 0;
for (Pair<Integer, Integer> others
: metrics.getNodeTriangleEndpointIds(nodeId))
{
numInternalTriangles += (nodesInSubgraph.contains(others.getFirst())
&& nodesInSubgraph.contains(others.getSecond())) ? 1 : 0;
}
// If there are no internal triangles possible, this returns 0
if (numInternalNeighbors < 2)
{
return 0;
}
return numInternalTriangles / (((numInternalNeighbors - 1)
* numInternalNeighbors) / 2.0);
}
/**
* Temporary variable used by getInternalToMaxExternalRatio. As that method
* is called many times when maximizing permanence, initializing an array
* over-and-over was getting expensive.
*/
private static int[] connections = null;
/**
* Returns the ratio of internal neighbors of node to the maximum number
* node's neighbors in any other community
*
* @param <NodeNameType>
* @param metrics The graph metrics to be used herein
* @param partitions The partitioning of the graph
* @param node The node whose ratio is desired
* @return the ratio of internal neighbors to the maximum number of
* neighbors in any other one community
*/
private static <NodeNameType> double getInternalToMaxExternalRatio(
GraphMetrics<NodeNameType> metrics,
NodePartitioning<NodeNameType> partitions,
int nodeId)
{
int nodesPartition = partitions.getPartitionById(nodeId);
int numPartitions = partitions.getNumPartitions();
if (connections == null || connections.length < numPartitions)
{
connections = new int[numPartitions];
}
Arrays.fill(connections, 0);
int maxExternal = 1;
for (int neighbor : metrics.neighborIds(nodeId))
{
int partId = partitions.getPartitionById(neighbor);
++connections[partId];
if (partId != nodesPartition)
{
maxExternal = Math.max(maxExternal, connections[partId]);
}
}
return ((double) connections[nodesPartition]) / ((double) maxExternal);
}
@PublicationReference(author = "Tanmoy Chakraborty, Sriram Srinivasan, "
+ "Niloy Ganguly, Animesh Mukherjee, and Sanjukta Bhowmick",
title = "On the permanence of vertices in network communities", year
= 2014, type = PublicationType.Conference)
/**
* Computes the permanence for one node in the graph.
*
* @param <NodeNameType>
* @param metrics The graph metrics object to be used herein
* @param partitions The partitioning on the graph
* @param node The node whose permanence is requested
* @param graph The graph being partitioned
* @return the permanence for the input node
*/
public static <NodeNameType> double computeOneNodePermanence(
GraphMetrics<NodeNameType> metrics,
NodePartitioning<NodeNameType> partitions,
NodeNameType node,
DirectedNodeEdgeGraph<NodeNameType> graph)
{
return computeOneNodePermanenceById(metrics, partitions,
graph.getNodeId(node), graph);
}
/**
* Computes the permanence for one node in the graph.
*
* @param <NodeNameType>
* @param metrics The graph metrics object to be used herein
* @param partitions The partitioning on the graph
* @param nodeId The node whose permanence is requested
* @param graph The graph being partitioned
* @return the permanence for the input node
*/
public static <NodeNameType> double computeOneNodePermanenceById(
GraphMetrics<NodeNameType> metrics,
NodePartitioning<NodeNameType> partitions,
int nodeId,
DirectedNodeEdgeGraph<NodeNameType> graph)
{
int degree = Math.max(1, metrics.degree(nodeId));
return (getInternalToMaxExternalRatio(metrics, partitions, nodeId)
/ degree) - (1 - getInternalEdgeDensity(metrics,
getPartitionIds(partitions, nodeId, graph), nodeId));
}
/**
* Helper that gets the ids of the nodes within the input node's
* partitioning. If an instance of MutableNodePartitioning is passed in, the
* faster method that implementation provides is used.
*
* @param <NodeNameType>
* @param partitioning The partitioning to use
* @param node The whose partition members is wanted
* @param graph The graph that's partitioned
* @return the ids of the nodes in the same partition as the input node
*/
private static <NodeNameType> Set<Integer> getPartitionIds(
NodePartitioning<NodeNameType> partitioning,
int nodeId,
DirectedNodeEdgeGraph<NodeNameType> graph)
{
int partition = partitioning.getPartitionById(nodeId);
if (partitioning instanceof MutableNodePartitioning)
{
Set<Integer> tmp
= ((MutableNodePartitioning<NodeNameType>) partitioning).getPartitionMemberIds(
partition);
return tmp;
}
Set<NodeNameType> nodes = partitioning.getPartitionMembers(partition);
Set<Integer> ret = new HashSet<>();
for (NodeNameType n : nodes)
{
ret.add(graph.getNodeId(n));
}
return ret;
}
/**
* Computes the average permanence for the partitioning of the entire graph
*
* @param <NodeNameType>
* @param graph The graph to partition
* @param metrics The metrics on that graph
* @param partitions The partitioning of the graph
* @return the average permanence for the partitioning
*/
public static <NodeNameType> double computeGraphPermanance(
DirectedNodeEdgeGraph<NodeNameType> graph,
GraphMetrics<NodeNameType> metrics,
NodePartitioning<NodeNameType> partitions)
{
double sum = 0;
for (int i = 0; i < graph.getNumNodes(); ++i)
{
sum += computeOneNodePermanenceById(metrics, partitions, i, graph);
}
return sum / metrics.numNodes();
}
}