CommunityMetrics.java example

Explorer
Foundry-master
- Components
/*
 * File:                CommunityMetrics.java
 * Authors:             Jeremy D. Wendt
 * Company:             Sandia National Laboratories
 * Project:             Cognitive Foundry
 * 
 * Copyright 2016, Sandia Corporation.
 * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
 * license for use of this work by or on behalf of the U.S. Government. 
 * Export of this program may require a license from the United States
 * Government. See CopyrightHistory.txt for complete details.
 * 
 */

package gov.sandia.cognition.graph.community;

import gov.sandia.cognition.annotation.PublicationReference;
import gov.sandia.cognition.annotation.PublicationType;
import gov.sandia.cognition.graph.DirectedNodeEdgeGraph;
import gov.sandia.cognition.graph.DirectedWeightedNodeEdgeGraph;
import gov.sandia.cognition.graph.GraphMetrics;
import gov.sandia.cognition.util.Pair;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;

/**
 * This class stores several static methods for computing metrics specific to a
 * graph and a set of communities. Unlike the GraphMetrics class, this does not
 * preserve values for the community structure nor the graph internally -- each
 * method is a separate call.
 *
 * @author jdwendt
 */
public class CommunityMetrics
{

    /**
     * Computes the modularity of the input graph into the input set of
     * communities. NOTE: Modularity is intended for exclusive community
     * detection (every node is in exactly one community), but this code does
     * not check that the communities are constituted correctly. Therefore, if
     * you insert garbage, be prepared for bad results!
     *
     * @param <NodeNameType>
     * @param graph The graph that has been partitioned
     * @param communities The partitions created on the input graph
     * @return The modularity score
     */
    public static <NodeNameType> double computeModularity(
        DirectedNodeEdgeGraph<NodeNameType> graph,
        Set<Set<NodeNameType>> communities)
    {
        return computeModularity(communities, new GraphMetrics<>(graph));
    }

    /**
     * Computes the modularity of the graph (whose metrics were passed in) into
     * the input set of communities. NOTE: Modularity is intended for exclusive
     * community detection (every node is in exactly one community), but this
     * code does not check that the communities are constituted correctly.
     * Therefore, if you insert garbage, be prepared for bad results!
     *
     * @param <NodeNameType>
     * @param communities The partitions created on the input graph
     * @return The modularity score
     * @param graphMetrics The complete set of metrics for the partitioned graph
     */
    public static <NodeNameType> double computeModularity(
        Set<Set<NodeNameType>> communities,
        GraphMetrics<NodeNameType> graphMetrics)
    {
        double modularity = 0.0;
        for (Set<NodeNameType> community : communities)
        {
            modularity += modularityPartForCommunity(community, graphMetrics);
        }

        return modularity / (2.0 * graphMetrics.numEdges());
    }

    /**
     * Computes the modularity of the input graph into the input set of
     * communities. NOTE: Modularity is intended for exclusive community
     * detection (every node is in exactly one community), but this code does
     * not check that the communities are constituted correctly. Therefore, if
     * you insert garbage, be prepared for bad results!
     *
     * @param <NodeNameType>
     * @param graph The graph that has been partitioned
     * @param communities The partitions created on the input graph
     * @return The modularity score
     */
    public static <NodeNameType> double computeModularity(
        DirectedNodeEdgeGraph<NodeNameType> graph,
        NodePartitioning<NodeNameType> communities)
    {
        return computeModularity(communities, new GraphMetrics<>(graph));
    }

    @PublicationReference(author = "Wikipedia", title = "Modularity (networks)",
        type = PublicationType.WebPage, year = 2016, url
        = "https://en.wikipedia.org/wiki/Modularity_(networks)")
    /**
     * Computes the modularity of the graph (whose metrics are passed in) into
     * the input set of communities. NOTE: Modularity is intended for exclusive
     * community detection (every node is in exactly one community), but this
     * code does not check that the communities are constituted correctly.
     * Therefore, if you insert garbage, be prepared for bad results!
     *
     * @param <NodeNameType>
     * @param communities The partitions created on the input graph
     * @return The modularity score
     * @param graphMetrics The complete set of metrics for the partitioned graph
     */
    public static <NodeNameType> double computeModularity(
        NodePartitioning<NodeNameType> communities,
        GraphMetrics<NodeNameType> graphMetrics)
    {
        Double internalMod = communities.getModularity();
        if (internalMod != null)
        {
            return internalMod;
        }
        double modularity = 0.0;
        for (int i = 0; i < communities.getNumPartitions(); ++i)
        {
            modularity += modularityPartForCommunity(
                communities.getPartitionMembers(i), graphMetrics);
        }

        return modularity / (2.0 * graphMetrics.numEdges());
    }

    /**
     * Helper that performs the meat of the modularity computation. This allows
     * the above methods to all work given different input types but need not
     * reimplement too much work.
     *
     * @param <NodeNameType>
     * @param community The community to compute part of the modularity score
     * @param graphMetrics The metrics for the graph that contains the input
     * community
     * @return the non-normalized modularity for this community in part of the
     * graph
     */
    private static <NodeNameType> double modularityPartForCommunity(
        Set<NodeNameType> community,
        GraphMetrics<NodeNameType> graphMetrics)
    {
        double oneOverTwoM = 1.0 / (2.0 * graphMetrics.numEdges());
        double modularityPart = 0.0;
        for (NodeNameType nodei : community)
        {
            Set<NodeNameType> neighbors = graphMetrics.neighbors(nodei);
            int degi = graphMetrics.degree(nodei);
            for (NodeNameType nodej : community)
            {
                if (neighbors.contains(nodej))
                {
                    modularityPart += 1;
                }
                modularityPart -= (graphMetrics.degree(nodej) * degi)
                    * oneOverTwoM;
            }
        }

        return modularityPart;
    }

    @PublicationReference(author = "Wikipedia", title = "Conductance (graph)",
        type = PublicationType.WebPage, year = 2016, url
        = "https://en.wikipedia.org/wiki/Conductance_(graph)")
    /**
     * Helper that computes the conductance resulting from the cut between the
     * input community and the rest of the graph. Note this supports weighted or
     * unweighted graphs.
     *
     * @param <NodeNameType>
     * @param graph The graph that is being cut by separating the input
     * community
     * @param community The set of nodes that are proposed to be their own
     * community
     * @return the conductance resulting by the cut of this community from the
     * graph
     */
    public static <NodeNameType> double computeConductance(
        DirectedNodeEdgeGraph<NodeNameType> graph,
        Set<NodeNameType> community)
    {
        double edgesCut = 0;
        double edgesInside = 0;
        double edgesOutside = 0;
        boolean isWeighted = (graph instanceof DirectedWeightedNodeEdgeGraph);
        for (int i = 0; i < graph.getNumEdges(); ++i)
        {
            Pair<Integer, Integer> edge = graph.getEdgeEndpointIds(i);
            double w = 1.0;
            if (isWeighted)
            {
                w = ((DirectedWeightedNodeEdgeGraph) graph).getEdgeWeight(i);
            }
            boolean inside_i
                = community.contains(graph.getNode(edge.getFirst()));
            boolean inside_j = community.contains(
                graph.getNode(edge.getSecond()));
            if (inside_i != inside_j)
            {
                edgesCut += w;
            }
            if (inside_i)
            {
                edgesInside += w;
            }
            else
            {
                edgesOutside += w;
            }
            if (inside_j)
            {
                edgesInside += w;
            }
            else
            {
                edgesOutside += w;
            }
        }
        return (edgesCut) / Math.min(edgesInside, edgesOutside);
    }

    /**
     * This computes the clustering coefficient for a subgraph of a graph. The
     * permanence paper makes it seem they will use this, but it turns out what
     * they call internal clustering coefficient is actually internal edge
     * density. Method preserved for in case its needed in the future.
     *
     * @param <NodeNameType>
     * @param metrics The graph metrics object to be used
     * @param nodesInSubgraph The set of nodes that comprise the subgraph
     * @return The clustering coefficient for the graph created by the input
     * subgraph
     */
    private static <NodeNameType> double getSubgraphClusteringCoefficient(
        GraphMetrics<NodeNameType> metrics,
        Set<NodeNameType> nodesInSubgraph)
    {
        int totalNumInternalWedges = 0;
        int totalNumInternalTriangles = 0;
        for (NodeNameType node : nodesInSubgraph)
        {
            int numInternalNeighbors = 0;
            for (NodeNameType neighbor : metrics.neighbors(node))
            {
                numInternalNeighbors += (nodesInSubgraph.contains(neighbor)) ? 1
                    : 0;
            }
            totalNumInternalWedges += numInternalNeighbors
                * (numInternalNeighbors - 1);
            int numInternalTris = 0;
            for (Pair<NodeNameType, NodeNameType> others
                : metrics.getNodeTriangleEndpoints(node))
            {
                numInternalTris += (nodesInSubgraph.contains(others.getFirst())
                    && nodesInSubgraph.contains(others.getSecond())) ? 1 : 0;
            }
            totalNumInternalTriangles += numInternalTris;
        }
        if (totalNumInternalWedges <= 0)
        {
            return 0;
        }
        return ((double) totalNumInternalTriangles)
            / ((double) totalNumInternalWedges);
    }

    /**
     * Returns the edge density of the neighbors of node internal to subgraph.
     *
     * @param <NodeNameType>
     * @param metrics The graph metrics object to be used herein
     * @param nodesInSubgraph The subset of nodes that are within the input
     * community
     * @param node The node (must be within the community) whose edge density is
     * desired
     * @return The ratio of the existing edges and the total number of possible
     * edges among the internal neighbors of node.
     */
    private static <NodeNameType> double getInternalEdgeDensity(
        GraphMetrics<NodeNameType> metrics,
        Set<Integer> nodesInSubgraph,
        Integer nodeId)
    {
        int numInternalNeighbors = 0;
        // The following is identical to this, but faster
        // Iterate through the smaller and do O(1) contains on larger
        // for (NodeNameType neighbor : metrics.neighbors(node))
        // {
        //     numInternalNeighbors += (nodesInSubgraph.contains(neighbor)) ? 1 : 0;
        // }
        Set<Integer> smaller = metrics.neighborIds(nodeId);
        Set<Integer> larger = nodesInSubgraph;
        if (smaller.size() > larger.size())
        {
            Set<Integer> tmp = smaller;
            smaller = larger;
            larger = tmp;
        }
        for (int s : smaller)
        {
            numInternalNeighbors += larger.contains(s) ? 1 : 0;
        }

        int numInternalTriangles = 0;
        for (Pair<Integer, Integer> others
            : metrics.getNodeTriangleEndpointIds(nodeId))
        {
            numInternalTriangles += (nodesInSubgraph.contains(others.getFirst())
                && nodesInSubgraph.contains(others.getSecond())) ? 1 : 0;
        }

        // If there are no internal triangles possible, this returns 0
        if (numInternalNeighbors < 2)
        {
            return 0;
        }
        return numInternalTriangles / (((numInternalNeighbors - 1)
            * numInternalNeighbors) / 2.0);
    }

    /**
     * Temporary variable used by getInternalToMaxExternalRatio. As that method
     * is called many times when maximizing permanence, initializing an array
     * over-and-over was getting expensive.
     */
    private static int[] connections = null;

    /**
     * Returns the ratio of internal neighbors of node to the maximum number
     * node's neighbors in any other community
     *
     * @param <NodeNameType>
     * @param metrics The graph metrics to be used herein
     * @param partitions The partitioning of the graph
     * @param node The node whose ratio is desired
     * @return the ratio of internal neighbors to the maximum number of
     * neighbors in any other one community
     */
    private static <NodeNameType> double getInternalToMaxExternalRatio(
        GraphMetrics<NodeNameType> metrics,
        NodePartitioning<NodeNameType> partitions,
        int nodeId)
    {
        int nodesPartition = partitions.getPartitionById(nodeId);
        int numPartitions = partitions.getNumPartitions();
        if (connections == null || connections.length < numPartitions)
        {
            connections = new int[numPartitions];
        }
        Arrays.fill(connections, 0);
        int maxExternal = 1;
        for (int neighbor : metrics.neighborIds(nodeId))
        {
            int partId = partitions.getPartitionById(neighbor);
            ++connections[partId];
            if (partId != nodesPartition)
            {
                maxExternal = Math.max(maxExternal, connections[partId]);
            }
        }

        return ((double) connections[nodesPartition]) / ((double) maxExternal);
    }

    @PublicationReference(author = "Tanmoy Chakraborty, Sriram Srinivasan, "
        + "Niloy Ganguly, Animesh Mukherjee, and Sanjukta Bhowmick",
        title = "On the permanence of vertices in network communities", year
        = 2014, type = PublicationType.Conference)
    /**
     * Computes the permanence for one node in the graph.
     *
     * @param <NodeNameType>
     * @param metrics The graph metrics object to be used herein
     * @param partitions The partitioning on the graph
     * @param node The node whose permanence is requested
     * @param graph The graph being partitioned
     * @return the permanence for the input node
     */
    public static <NodeNameType> double computeOneNodePermanence(
        GraphMetrics<NodeNameType> metrics,
        NodePartitioning<NodeNameType> partitions,
        NodeNameType node,
        DirectedNodeEdgeGraph<NodeNameType> graph)
    {
        return computeOneNodePermanenceById(metrics, partitions,
            graph.getNodeId(node), graph);
    }

    /**
     * Computes the permanence for one node in the graph.
     *
     * @param <NodeNameType>
     * @param metrics The graph metrics object to be used herein
     * @param partitions The partitioning on the graph
     * @param nodeId The node whose permanence is requested
     * @param graph The graph being partitioned
     * @return the permanence for the input node
     */
    public static <NodeNameType> double computeOneNodePermanenceById(
        GraphMetrics<NodeNameType> metrics,
        NodePartitioning<NodeNameType> partitions,
        int nodeId,
        DirectedNodeEdgeGraph<NodeNameType> graph)
    {
        int degree = Math.max(1, metrics.degree(nodeId));
        return (getInternalToMaxExternalRatio(metrics, partitions, nodeId)
            / degree) - (1 - getInternalEdgeDensity(metrics,
                getPartitionIds(partitions, nodeId, graph), nodeId));
    }

    /**
     * Helper that gets the ids of the nodes within the input node's
     * partitioning. If an instance of MutableNodePartitioning is passed in, the
     * faster method that implementation provides is used.
     *
     * @param <NodeNameType>
     * @param partitioning The partitioning to use
     * @param node The whose partition members is wanted
     * @param graph The graph that's partitioned
     * @return the ids of the nodes in the same partition as the input node
     */
    private static <NodeNameType> Set<Integer> getPartitionIds(
        NodePartitioning<NodeNameType> partitioning,
        int nodeId,
        DirectedNodeEdgeGraph<NodeNameType> graph)
    {
        int partition = partitioning.getPartitionById(nodeId);
        if (partitioning instanceof MutableNodePartitioning)
        {
            Set<Integer> tmp
                = ((MutableNodePartitioning<NodeNameType>) partitioning).getPartitionMemberIds(
                    partition);
            return tmp;
        }
        Set<NodeNameType> nodes = partitioning.getPartitionMembers(partition);
        Set<Integer> ret = new HashSet<>();
        for (NodeNameType n : nodes)
        {
            ret.add(graph.getNodeId(n));
        }

        return ret;
    }

    /**
     * Computes the average permanence for the partitioning of the entire graph
     *
     * @param <NodeNameType>
     * @param graph The graph to partition
     * @param metrics The metrics on that graph
     * @param partitions The partitioning of the graph
     * @return the average permanence for the partitioning
     */
    public static <NodeNameType> double computeGraphPermanance(
        DirectedNodeEdgeGraph<NodeNameType> graph,
        GraphMetrics<NodeNameType> metrics,
        NodePartitioning<NodeNameType> partitions)
    {
        double sum = 0;
        for (int i = 0; i < graph.getNumNodes(); ++i)
        {
            sum += computeOneNodePermanenceById(metrics, partitions, i, graph);
        }

        return sum / metrics.numNodes();
    }

}