/*
* File: GraphMetrics.java
* Authors: Jeremy D. Wendt
* Company: Sandia National Laboratories
* Project: Cognitive Foundry
*
* Copyright 2016, Sandia Corporation.
* Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
* license for use of this work by or on behalf of the U.S. Government.
* Export of this program may require a license from the United States
* Government. See CopyrightHistory.txt for complete details.
*
*/
package gov.sandia.cognition.graph;
import gov.sandia.cognition.annotation.PublicationReference;
import gov.sandia.cognition.annotation.PublicationType;
import gov.sandia.cognition.util.DefaultKeyValuePair;
import gov.sandia.cognition.collection.DoubleArrayList;
import gov.sandia.cognition.collection.IntArrayList;
import gov.sandia.cognition.util.Pair;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.Set;
import java.util.Stack;
/**
* This class is intended to allow programmers to get any number of graph
* metrics for any input graph without bloating the intentionally spartan graph
* implementations and interfaces. This class computes all values of a given
* type when one of them is requested as the runtime costs for all is often
* quite similar to the runtime costs for one -- although at the cost of
* increased storage. However, if no instances of a metric are ever requested
* (e.g., no Jaccard similarities are requested), then they are never computed
* or stored (unless required as a prerequisite for some other requested
* metric).
*
* NOTE: If the graph is changed after any metric is computed, this has no way
* of knowing intrinsically. Thus, if you have altered the graph by adding a
* node or an edge, you need to call the clear method herein to remove old
* cached values.
*
* @author jdwendt
* @param <NodeNameType> The type for node names in the input graph
*/
public class GraphMetrics<NodeNameType>
{
/**
* The graph whose metrics are computed and stored by this class
*/
private final DirectedNodeEdgeGraph<NodeNameType> graph;
/**
* The set of neighbor (undirected) node ids for all nodes in the graph
*/
private List<Set<Integer>> allNodeNeighbors;
/**
* The set of successor (directed) node ids for all nodes in the graph
*/
private List<Set<Integer>> allNodeSuccessors;
/**
* The list of degrees for all nodes in the graph
*/
private IntArrayList allNodeDegrees;
/**
* The other two node ids for all triangles that each node in the graph is
* in
*/
private List<Set<Pair<Integer, Integer>>> allNodeTriangles;
/**
* The assortativity for the whole graph
*/
private double degreeAssortativity;
/**
* The Jaccard similarity for all edges in the graph -- That is, the Jaccard
* similarity of the neighbors (undirected) for the endpoints on each edge
*/
private DoubleArrayList perEdgeJaccardSimilarity;
/**
* The other node involved in all triangles that any edge is in
*/
private List<Set<Integer>> allEdgeTriangles;
/**
* The percentage of closed triangles compared to the total number that any
* edge could be in
*/
private DoubleArrayList perEdgeTriangleDensity;
/**
* The eccentricities of all nodes in the unweighted graph
*/
private IntArrayList perNodeEccentricity;
/**
* The betweenness centralities for all nodes in the unweighted graph
*/
private DoubleArrayList perNodeBetweenCentrality;
private int radius;
private int diameter;
private Boolean isWcc;
/**
* Initialize this as an empty metrics class surrounding the input graph.
* Note that if you alter the input graph after creating this class, you
* should call the clear method.
*
* @param graph The graph whose metrics will be computed and returned by
* this
*/
public GraphMetrics(DirectedNodeEdgeGraph<NodeNameType> graph)
{
this.graph = graph;
allNodeNeighbors = null;
allNodeSuccessors = null;
allNodeDegrees = null;
allNodeTriangles = null;
degreeAssortativity = -Double.MAX_VALUE;
perEdgeJaccardSimilarity = null;
allEdgeTriangles = null;
perEdgeTriangleDensity = null;
radius = diameter = Integer.MAX_VALUE;
perNodeEccentricity = null;
perNodeBetweenCentrality = null;
isWcc = null;
}
/**
* Clears all cached metrics for the originally input graph. This method
* created as it is possible that the graph is modified after being passed
* into the constructor of this class. If the graph is changed, this class
* has no way of knowing. This method resets all previously computed
* metrics.
*/
public void clear()
{
allNodeNeighbors = null;
allNodeSuccessors = null;
allNodeDegrees = null;
allNodeTriangles = null;
degreeAssortativity = -Double.MAX_VALUE;
perEdgeJaccardSimilarity = null;
allEdgeTriangles = null;
perEdgeTriangleDensity = null;
radius = diameter = Integer.MAX_VALUE;
perNodeEccentricity = null;
isWcc = null;
}
/**
* Returns the number of nodes in the graph. (O(1) on all calls)
*
* @return the number of nodes in the graph
*/
public int numNodes()
{
return graph.getNumNodes();
}
/**
* Returns the number of edges in the graph. (O(1) on all calls)
*
* @return the number of edges in the graph
*/
public int numEdges()
{
return graph.getNumEdges();
}
/**
* Private helper that tests whether node degrees has been initialized. O(1)
*
* @return true if has been initialized, else false
*/
private boolean isInitializedNodeDegrees()
{
return allNodeDegrees != null;
}
/**
* Initializes the unweighted degree values for all nodes in the graph. Note
* that repeated edges count once for each repeat. Furthermore, self-loops
* increase the degree by 2. O(n + m)
*/
public void initializeNodeDegrees()
{
int n = numNodes();
allNodeDegrees = new IntArrayList(n);
for (int i = 0; i < n; ++i)
{
allNodeDegrees.add(0);
}
int m = numEdges();
for (int i = 0; i < m; ++i)
{
Pair<Integer, Integer> edge = graph.getEdgeEndpointIds(i);
allNodeDegrees.plusEquals(edge.getFirst(), 1);
allNodeDegrees.plusEquals(edge.getSecond(), 1);
}
}
/**
* Return the degree for the input nodeId. O(n+m) on first call to either
* degree method, O(1) on all further calls.
*
* @param nodeId The node id of the node whose degree is wanted
* @return the degree for the input nodeId
*/
public int degree(int nodeId)
{
if (!isInitializedNodeDegrees())
{
initializeNodeDegrees();
}
return allNodeDegrees.get(nodeId);
}
/**
* Return the degree for the input node. O(n+m) on the first call to either
* degree method, O(1) on all further calls.
*
* @param nodeName The name of the node whose degree is wanted
* @return the degree for the input nodeName
*/
public int degree(NodeNameType nodeName)
{
return degree(graph.getNodeId(nodeName));
}
/**
* Private helper that tests if the node neighbors have been computed. O(1)
*
* @return true if initialized
*/
private boolean isInitializedNodeNeighbors()
{
return allNodeNeighbors != null;
}
/**
* Initializes the neighbors (undirected) for all nodes in the graph at
* once. O(n + m)
*/
public void initializeNodeNeighbors()
{
if (!isInitializedNodeDegrees())
{
initializeNodeDegrees();
}
int n = numNodes();
allNodeNeighbors = new ArrayList<>(n);
for (int i = 0; i < n; ++i)
{
int di = degree(i);
allNodeNeighbors.add(new HashSet<>(di));
}
int m = numEdges();
for (int i = 0; i < m; ++i)
{
Pair<Integer, Integer> e = graph.getEdgeEndpointIds(i);
allNodeNeighbors.get(e.getFirst()).add(e.getSecond());
allNodeNeighbors.get(e.getSecond()).add(e.getFirst());
}
}
/**
* Returns the number of neighbors for the input node id. This is different
* from degree as repeated edges and self loops don't increase the count.
* O(n+m) for first neighbor method called, O(1) for all later.
*
* @param nodeId The node whose number of neighbors is wanted
* @return The number of neighbors for the input node
*/
public int numNeighbors(int nodeId)
{
if (!isInitializedNodeNeighbors())
{
initializeNodeNeighbors();
}
return allNodeNeighbors.get(nodeId).size();
}
/**
* Returns the number of neighbors for the input node name. This is
* different from degree as repeated edges and self loops don't increase the
* count. O(n+m) for the first neighbor method called, O(1) for all later.
*
* @param nodeName The node whose number of neighbors is wanted
* @return the number of neighbors for the input node
*/
public int numNeighbors(NodeNameType nodeName)
{
return numNeighbors(graph.getNodeId(nodeName));
}
/**
* Returns the ids of all neighbors for the input node id. Note that
* neighbors connected multiple times by repeated edges are included only
* once. O(n+m) for the first neighbor method called, O(1) for all later.
*
* @param nodeId The ndoe whose neighbor ids are wanted
* @return the ids of all neighbors.
*/
public Set<Integer> neighborIds(int nodeId)
{
if (!isInitializedNodeNeighbors())
{
initializeNodeNeighbors();
}
return Collections.unmodifiableSet(allNodeNeighbors.get(nodeId));
}
/**
* Returns the ids of all neighbors for the input node name. Note that
* neighbors connected multiple times by repeated edges are included only
* once. O(n+m) for the first neighbor method called, O(1) for all later.
*
* @param nodeName The node whose neighbor ids are wanted
* @return the ids of all neighbors.
*/
public Set<Integer> neighborIds(NodeNameType nodeName)
{
return neighborIds(graph.getNodeId(nodeName));
}
/**
* Returns the names of all neighbors for the input node id. Note that
* neighbors connected multiple times by repeated edges are included only
* once. O(n+m) for the first neighbor method called, O(1) for all later.
*
* @param nodeId The node whose neighbor names are wanted
* @return the names of all neighbors
*/
public Set<NodeNameType> neighbors(int nodeId)
{
Set<Integer> ids = neighborIds(nodeId);
Set<NodeNameType> ret = new HashSet<>(ids.size());
for (Integer id : ids)
{
ret.add(graph.getNode(id));
}
return ret;
}
/**
* Returns the names of all neighbors for the input node name. Note that
* neighbors connected multiple times by repeated edges are included only
* once. O(n+m) for the first neighbor method called, O(1) for all later.
*
* @param nodeName The node whose neighbor names are wanted
* @return the names of all neighbors
*/
public Set<NodeNameType> neighbors(NodeNameType nodeName)
{
return neighbors(graph.getNodeId(nodeName));
}
/**
* Private helper that returns if node successors has been initialized. O(1)
*
* @return true if initialized; else false.
*/
private boolean isInitializedNodeSuccessors()
{
return allNodeSuccessors != null;
}
/**
* Initializes the node successors (directed version of neighbors). O(n+m).
*/
public void initializeNodeSuccessors()
{
int n = numNodes();
allNodeSuccessors = new ArrayList<>(n);
for (int i = 0; i < n; ++i)
{
allNodeSuccessors.add(new HashSet<>());
}
int m = numEdges();
for (int i = 0; i < m; ++i)
{
Pair<Integer, Integer> e = graph.getEdgeEndpointIds(i);
allNodeSuccessors.get(e.getFirst()).add(e.getSecond());
}
}
/**
* Returns the number of direct successors for the input node. This is
* different from degree in two ways: First, it doesn't count repeated
* edges. Second, it is directed; it counts nodes that can be reached by an
* edge originating at this node. O(m+n) for first successors method called,
* O(1) for later.
*
* @param nodeId The node whose successor count is desired
* @return the number of successors
*/
public int numSuccessors(int nodeId)
{
if (!isInitializedNodeSuccessors())
{
initializeNodeSuccessors();
}
return allNodeSuccessors.get(nodeId).size();
}
/**
* Returns the number of direct successors for the input node. This is
* different from degree in two ways: First, it doesn't count repeated
* edges. Second, it is directed; it counts nodes that can be reached by an
* edge originating at this node. O(m+n) for first successors method called,
* O(1) for later.
*
* @param nodeName The node whose successor count is desired
* @return the number of successors
*/
public int numSuccessors(NodeNameType nodeName)
{
return numSuccessors(graph.getNodeId(nodeName));
}
/**
* Returns the id for all direct successors for the input node. This is
* different from neighbors as it is directed; it returns nodes that can be
* reached by an edge originating at this node. O(m+n) for first successors
* method called, O(1) for later.
*
* @param nodeId The node whose successors' ids is desired
* @return the ids of successors for this node
*/
public Set<Integer> successorIds(int nodeId)
{
if (!isInitializedNodeSuccessors())
{
initializeNodeSuccessors();
}
return Collections.unmodifiableSet(allNodeSuccessors.get(nodeId));
}
/**
* Returns the ids of the direct successors for the input node. This is
* different from neighbors as it is directed; it returns nodes that can be
* reached by an edge originating at this node. O(m+n) for first successors
* method called, O(1) for later.
*
* @param nodeName The node whose successors' ids is desired
* @return the ids of successors for this node
*/
public Set<Integer> successorIds(NodeNameType nodeName)
{
return successorIds(graph.getNodeId(nodeName));
}
/**
* Returns the node names for all direct successors to the input node. This
* is different from neighbors as it is directed; it returns nodes that can
* be reached by an edge originating at this node. O(m+n) for first
* successors method called, O(1) for later.
*
* @param nodeId The node whose successors' names is desired
* @return the names of successors for this node
*/
public Set<NodeNameType> successors(int nodeId)
{
Set<Integer> ids = successorIds(nodeId);
Set<NodeNameType> ret = new HashSet<>(ids.size());
for (Integer id : ids)
{
ret.add(graph.getNode(id));
}
return ret;
}
/**
* Returs the node names for all direct successors to the input node. This
* is different from neighbors as it is directed; it returns nodes that can
* be reached by an edge originating at this node. O(m+n) for first
* successors method called, O(1) for later.
*
* @param nodeName The node whose successors' names is desired
* @return the names of successors for this node
*/
public Set<NodeNameType> successors(NodeNameType nodeName)
{
return successors(graph.getNodeId(nodeName));
}
/**
* Private helper that tests if node triangles are initialized.
*
* @return true if initialized, else false
*/
private boolean isInitializedNodeTriangles()
{
return allNodeTriangles != null;
}
/**
* Initializes the datastructure for all triangles that all nodes and edges
* participate in. Note that this implementation uses the neighbors for each
* node, so does not allow triangles with only two nodes (where the third
* edge is a self-loop) nor does it create repeated triangles for nodes with
* repeated edges. According to the publication, this is O(m^(3/2)) in the
* worst case and O(m) at best.
*/
@PublicationReference(author = "Siddharth Suri and Sergei Vassilvitskii",
title = "Counting Triangles and the Curse of the Last Reducer",
year = 2011,
publication = "Proceedings of the World Wide Web Conference (WWW)",
type = PublicationType.Conference)
public void initializeNodeTriangles()
{
if (!isInitializedNodeDegrees())
{
initializeNodeDegrees();
}
if (!isInitializedNodeNeighbors())
{
initializeNodeNeighbors();
}
int n = numNodes();
int m = numEdges();
// First I need to create a strict ordering on the nodes based on degree
List<Pair<Integer, Integer>> degreeList = new ArrayList<>(n);
for (int i = 0; i < n; ++i)
{
degreeList.add(new DefaultKeyValuePair<>(i, degree(i)));
}
Collections.sort(degreeList, new Comparator<Pair<Integer, Integer>>()
{
@Override
public int compare(Pair<Integer, Integer> o1,
Pair<Integer, Integer> o2)
{
return Integer.compare(o1.getSecond(), o2.getSecond());
}
});
List<Integer> nodeOrder = new ArrayList<>(n);
for (int i = 0; i < n; ++i)
{
nodeOrder.add(0);
}
for (int i = 0; i < n; ++i)
{
nodeOrder.set(degreeList.get(i).getFirst(), i);
}
// Next I need to make a map from edge pairs to edge index
Map<Pair<Integer, Integer>, Set<Integer>> edgeMap = new HashMap<>(2 * m);
allEdgeTriangles = new ArrayList<>(m);
for (int i = 0; i < m; ++i)
{
Pair<Integer, Integer> edge = graph.getEdgeEndpointIds(i);
allEdgeTriangles.add(new HashSet<>());
if (!edgeMap.containsKey(edge))
{
edgeMap.put(edge, new HashSet<>());
}
edgeMap.get(edge).add(i);
Pair<Integer, Integer> otherOrder = new DefaultKeyValuePair<>(
edge.getSecond(), edge.getFirst());
if (!edgeMap.containsKey(otherOrder))
{
edgeMap.put(otherOrder, new HashSet<>());
}
edgeMap.get(otherOrder).add(i);
}
allNodeTriangles = new ArrayList<>(n);
for (int i = 0; i < n; ++i)
{
allNodeTriangles.add(new HashSet<>());
}
for (int i = 0; i < n; ++i)
{
if (degree(i) <= 1)
{
continue;
}
int iidx = nodeOrder.get(i);
int jcnt = 0;
for (Integer neighborj : allNodeNeighbors.get(i))
{
++jcnt;
// Ignore self loops
if (i == neighborj)
{
continue;
}
if (iidx > nodeOrder.get(neighborj))
{
continue;
}
int kcnt = 0;
for (Integer neighbork : allNodeNeighbors.get(i))
{
++kcnt;
// Ignore self loops
if (i == neighbork)
{
continue;
}
if (kcnt <= jcnt)
{
continue;
}
if (iidx > nodeOrder.get(neighbork))
{
continue;
}
if (allNodeNeighbors.get(neighborj).contains(neighbork))
{
allNodeTriangles.get(i).add(new DefaultKeyValuePair<>(
neighborj, neighbork));
allNodeTriangles.get(neighborj).add(
new DefaultKeyValuePair<>(i, neighbork));
allNodeTriangles.get(neighbork).add(
new DefaultKeyValuePair<>(i, neighborj));
Pair<Integer, Integer> edge = new DefaultKeyValuePair<>(
i, neighborj);
for (int edgeId : edgeMap.get(edge))
{
allEdgeTriangles.get(edgeId).add(neighbork);
}
edge = new DefaultKeyValuePair<>(i, neighbork);
for (int edgeId : edgeMap.get(edge))
{
allEdgeTriangles.get(edgeId).add(neighborj);
}
edge = new DefaultKeyValuePair<>(neighborj, neighbork);
for (int edgeId : edgeMap.get(edge))
{
allEdgeTriangles.get(edgeId).add(i);
}
}
}
}
}
}
/**
* Returns the number of triangles the node participates in. Note that this
* implementation does not permit triangles with only two nodes (where the
* third edge is a self-loop), nor does it count repeated triangles for
* nodes with repeated edges. O(m^(3/2)) for the first call to any triangle
* method, O(1) for later calls.
*
* @param nodeId The node whose number of triangles is desired
* @return The number of triangles which use this node
*/
public int numNodeTriangles(int nodeId)
{
if (!isInitializedNodeTriangles())
{
initializeNodeTriangles();
}
return allNodeTriangles.get(nodeId).size();
}
/**
* Returns the number of triangles the node participates in. Note that this
* implementation does not permit triangles with only two nodes (where the
* third edge is a self-loop), nor does it count repeated triangles for
* nodes with repeated edges. O(m^(3/2)) for the first call to any triangle
* method, O(1) for later calls.
*
* @param nodeName The node whose number of triangles is desired
* @return The number of triangles which use this node
*/
public int numNodeTriangles(NodeNameType nodeName)
{
return numNodeTriangles(graph.getNodeId(nodeName));
}
/**
* Returns the other two endpoint ids for all triangles the node
* participates in. Note that this implementation does not permit triangles
* with only two nodes (where the third edge is a self-loop), nor does it
* count repeated triangles for nodes with repeated edges. O(m^(3/2)) for
* the first call to any triangle method, O(1) for later calls.
*
* @param nodeId The node whose triangles are requested
* @return The other two endpoint ids for all triangles which use this node
*/
public Set<Pair<Integer, Integer>> getNodeTriangleEndpointIds(int nodeId)
{
if (!isInitializedNodeTriangles())
{
initializeNodeTriangles();
}
return Collections.unmodifiableSet(allNodeTriangles.get(nodeId));
}
/**
* Returns the other two endpoint ids for all triangles the node
* participates in. Note that this implementation does not permit triangles
* with only two nodes (where the third edge is a self-loop), nor does it
* count repeated triangles for nodes with repeated edges. O(m^(3/2)) for
* the first call to any triangle method, O(1) for later calls.
*
* @param nodeName The node whose triangles are requested
* @return The other two endpoint ids for all triangles which use this node
*/
public Set<Pair<Integer, Integer>> getNodeTriangleEndpointIds(
NodeNameType nodeName)
{
return getNodeTriangleEndpointIds(graph.getNodeId(nodeName));
}
/**
* Returns the other two endpoint names for all triangles the node
* participates in. Note that this implementation does not permit triangles
* with only two nodes (where the third edge is a self-loop), nor does it
* count repeated triangles for nodes with repeated edges. O(m^(3/2)) for
* the first call to any triangle method, O(1) for later calls.
*
* @param nodeId The node whose triangles are requested
* @return The other two endpoint names for all triangles which use this
* node
*/
public Set<Pair<NodeNameType, NodeNameType>> getNodeTriangleEndpoints(
int nodeId)
{
Set<Pair<Integer, Integer>> endpointIds = getNodeTriangleEndpointIds(
nodeId);
Set<Pair<NodeNameType, NodeNameType>> ret = new HashSet<>(
endpointIds.size());
for (Pair<Integer, Integer> endpointId : endpointIds)
{
ret.add(new DefaultKeyValuePair<>(graph.getNode(
endpointId.getFirst()), graph.getNode(endpointId.getSecond())));
}
return ret;
}
/**
* Returns the other two endpoint names for all triangles the node
* participates in. Note that this implementation does not permit triangles
* with only two nodes (where the third edge is a self-loop), nor does it
* count repeated triangles for nodes with repeated edges. O(m^(3/2)) for
* the first call to any triangle method, O(1) for later calls.
*
* @param nodeName The node whose triangles are requested
* @return The other two endpoint names for all triangles which use this
* node
*/
public Set<Pair<NodeNameType, NodeNameType>> getNodeTriangleEndpoints(
NodeNameType nodeName)
{
return getNodeTriangleEndpoints(graph.getNodeId(nodeName));
}
/**
* Private helper that tests if degree assortativity has been initialized
*
* @return true if initialized, else false
*/
private boolean isInitializedDegreeAssortativity()
{
return (degreeAssortativity != -Double.MAX_VALUE);
}
/**
* Initialize the degree assortativity for the whole graph. O(m)
*/
@PublicationReference(author = "M. E. J. Newman",
title = "Assortative mixing in networks",
type = PublicationType.Journal,
year = 2002,
publication = "Physical Review Letters")
public void initializeDegreeAssortativity()
{
if (!isInitializedNodeDegrees())
{
initializeNodeDegrees();
}
int m = numEdges();
double mInv = 1.0 / m;
double numerProduct, normalizeSum, normalizeSumSquares;
numerProduct = normalizeSum = normalizeSumSquares = 0;
boolean allDegreesEqual = true;
for (int i = 0; i < m; ++i)
{
Pair<Integer, Integer> edge = graph.getEdgeEndpointIds(i);
double di = degree(edge.getFirst());
double dj = degree(edge.getSecond());
allDegreesEqual &= (di == dj);
numerProduct += di * dj;
normalizeSum += di + dj;
normalizeSumSquares += di * di + dj * dj;
}
// Special case can lead to degeneracies... specifically, if all not only
// have the same degree on both sides of all edges, but that all nodes
// have the exact same degree (at which point, this computes 0 / 0 below).
if (allDegreesEqual)
{
degreeAssortativity = 1.0;
return;
}
normalizeSum *= normalizeSum;
normalizeSum *= mInv * 0.25;
degreeAssortativity = ((numerProduct) - normalizeSum) / ((0.5
* normalizeSumSquares) - normalizeSum);
}
/**
* Returns the whole-graph degree assortativity score. O(m) the first time
* called; O(1) any repeats.
*
* @return the whole-graph degree assortativity
*/
public double degreeAssortativity()
{
if (!isInitializedDegreeAssortativity())
{
initializeDegreeAssortativity();
}
return degreeAssortativity;
}
/**
* Private helper that tests if per-edge Jaccard similarity has been
* initialized
*
* @return true if initialized; else false
*/
private boolean isInitializedPerEdgeJaccardSimilarity()
{
return perEdgeJaccardSimilarity != null;
}
/**
* Initializes the Jaccard similarity for each edge in the graph. O(n+m)
*/
@PublicationReference(title = "Jaccard index",
type = PublicationType.WebPage,
year = 2015,
author = "Wikipedia",
url = "https://en.wikipedia.org/wiki/Jaccard_index")
public void initializePerEdgeJaccardSimilarity()
{
if (!isInitializedNodeNeighbors())
{
initializeNodeNeighbors();
}
int m = numEdges();
perEdgeJaccardSimilarity = new DoubleArrayList(m);
for (int i = 0; i < m; ++i)
{
Pair<Integer, Integer> edge = graph.getEdgeEndpointIds(i);
Set<Integer> iNeighbors = new HashSet<>(
neighborIds(edge.getFirst()));
Set<Integer> jNeighbors = neighborIds(edge.getSecond());
int iSize = iNeighbors.size();
int jSize = jNeighbors.size();
iNeighbors.retainAll(jNeighbors);
int intersectSize = iNeighbors.size();
perEdgeJaccardSimilarity.add(((double) intersectSize)
/ ((double) (iSize + jSize - intersectSize)));
}
}
/**
* Returns the Jaccard Similarity for each edge of the graph.
*
* @param edgeId The [0..m) edge index
* @return The Jaccard Similarity for the neighbors of the endpoints of the
* edge.
*/
public double getEdgeJaccardSimilarity(int edgeId)
{
if (!isInitializedPerEdgeJaccardSimilarity())
{
initializePerEdgeJaccardSimilarity();
}
return perEdgeJaccardSimilarity.get(edgeId);
}
/**
* Private helper that tests if the all-edges triangles have been
* initialized
*
* @return true if initialized, else false
*/
private boolean isInitializedEdgeTriangles()
{
return allEdgeTriangles != null;
}
/**
* Initializes the datastructure for all triangles that all nodes and edges
* participate in. Note that this implementation uses the neighbors for each
* node, so does not allow triangles with only two nodes (where the third
* edge is a self-loop) nor does it create repeated triangles for nodes with
* repeated edges. According to the publication, this is O(m^(3/2)) in the
* worst case and O(m) at best.
*/
public void initializeEdgeTriangles()
{
initializeNodeTriangles();
}
/**
* Returns the number of triangles the input edge participates in.
*
* @param edgeId The edge whose triangle count is wanted
* @return the number of triangles the input edge participates in
*/
public int numEdgeTriangles(int edgeId)
{
if (!isInitializedEdgeTriangles())
{
initializeEdgeTriangles();
}
return allEdgeTriangles.get(edgeId).size();
}
/**
* Returns the ids for the third nodes for all triangles this edge
* participates in.
*
* @param edgeId The edge whose triangles' third nodes are requested
* @return the ids for the third nodes for all triangles this edge
* participates in
*/
public Set<Integer> getEdgeTriangleOtherEndpointIds(int edgeId)
{
if (!isInitializedEdgeTriangles())
{
initializeEdgeTriangles();
}
return Collections.unmodifiableSet(allEdgeTriangles.get(edgeId));
}
/**
* Returns the names for the third nodes for all triangles this edge
* participates in.
*
* @param edgeId The edge whose triangles' third names are requested
* @return the names for the third nodes for all triangles this edge
* participates in
*/
public Set<NodeNameType> getEdgeTriangleOtherEndpointNames(int edgeId)
{
Set<Integer> ids = getEdgeTriangleOtherEndpointIds(edgeId);
Set<NodeNameType> ret = new HashSet<>(ids.size());
for (Integer id : ids)
{
ret.add(graph.getNode(id));
}
return ret;
}
/**
* Private helper that tests if per-edge triangle density has been
* initialized.
*
* @return true if initialized, else false
*/
private boolean isInitializedPerEdgeTriangleDensity()
{
return perEdgeTriangleDensity != null;
}
/**
* Initializes the per-edge triangle density. This measure is based on the
* degrees of the two endpoints of the edge. Intuitively, this looks at how
* many of the possible connections that come off the endpoints pair off
* into triangles. If edgeTriangles and nodeDegrees not already initialized,
* this initializes those. This method requires O(m), but may require
* O(m^3/2)/O(m) (edgeTriangles) or O(n+m) (degrees).
*/
public void initializePerEdgeTriangleDensity()
{
if (!isInitializedEdgeTriangles())
{
initializeEdgeTriangles();
}
if (!isInitializedNodeDegrees())
{
initializeNodeDegrees();
}
int m = numEdges();
perEdgeTriangleDensity = new DoubleArrayList(m);
for (int i = 0; i < m; ++i)
{
Pair<Integer, Integer> edge = graph.getEdgeEndpointIds(i);
int di = degree(edge.getFirst());
int dj = degree(edge.getSecond());
double density = (2.0 * numEdgeTriangles(i)) / ((double) (di + dj
- 2));
perEdgeTriangleDensity.add(density);
}
}
/**
* Returns the per-edge triangle density for the input edge
*
* @param edgeId The edge whose density is requested
* @return the per-edge triangle density for the input edge
*/
public double getPerEdgeTriangleDensity(int edgeId)
{
if (!isInitializedPerEdgeTriangleDensity())
{
initializePerEdgeTriangleDensity();
}
return perEdgeTriangleDensity.get(edgeId);
}
/**
* Private helper that tests if per-node eccentricity has been initialized.
*
* @return true if initialized, else false
*/
private boolean isInitializedPerNodeEccentricity()
{
return perNodeEccentricity != null;
}
/**
* Initializes the per-node eccentricity.
*/
@PublicationReference(author = "Frank W. Takes and Walter A. Kosters", title
= "Computing the Eccentricity Distribution of Large Graphs", type
= PublicationType.Journal, publication
= "Algorithms - Open Access Journal", year = 2013, pages =
{
100, 118
},
url = "http://www.mdpi.com/1999-4893/6/1/100")
public void initializePerNodeEccentricity()
{
if (!isInitializedNodeNeighbors())
{
initializeNodeNeighbors();
}
if (!isInitializedNodeDegrees())
{
initializeNodeDegrees();
}
int n = numNodes();
perNodeEccentricity = new IntArrayList(n);
int[] minEccentricity = new int[n];
int[] maxEccentricity = new int[n];
radius = Integer.MAX_VALUE;
diameter = Integer.MIN_VALUE;
isWcc = true;
Set<Integer> unvisitedNodes = new HashSet<>(n);
// Initialize all values
for (int i = 0; i < n; ++i)
{
perNodeEccentricity.add(Integer.MAX_VALUE);
minEccentricity[i] = Integer.MIN_VALUE;
maxEccentricity[i] = Integer.MAX_VALUE;
// I'll handle degree 1 nodes elsewhere
if (degree(i) > 1)
{
unvisitedNodes.add(i);
}
}
// Handle special cases and get out of here
if (n == 1)
{
perNodeEccentricity.set(0, 0);
}
else if (n == 2)
{
if (numEdges() >= 1)
{
perNodeEccentricity.set(0, 1);
perNodeEccentricity.set(1, 1);
}
}
while (!unvisitedNodes.isEmpty())
{
// Can't keep the iterator as I'll possibly be removing other nodes during this pass
int v = unvisitedNodes.iterator().next();
unvisitedNodes.remove(v);
int[] allDistances = computeAllDistancesForNode(v);
int e_v = 0;
for (int j = 0; j < n; ++j)
{
if (allDistances[j] == Integer.MAX_VALUE)
{
isWcc = false;
continue;
}
e_v = Math.max(e_v, allDistances[j]);
}
if (e_v > maxEccentricity[v] || e_v < minEccentricity[v])
{
throw new RuntimeException(
"This should be impossible. Please report bug.");
}
perNodeEccentricity.set(v, e_v);
radius = Math.min(radius, e_v);
diameter = Math.max(diameter, e_v);
for (int neighbor : neighborIds(v))
{
// Ignore self loops
if (neighbor == v)
{
continue;
}
if (degree(neighbor) == 1)
{
perNodeEccentricity.set(neighbor, e_v + 1);
diameter = Math.max(diameter, e_v + 1);
}
}
for (Iterator<Integer> iter = unvisitedNodes.iterator();
iter.hasNext();)
{
int node = iter.next();
// No way to update for unreachable nodes
if (allDistances[node] == Integer.MAX_VALUE)
{
continue;
}
minEccentricity[node] = Math.max(minEccentricity[node],
Math.max(e_v - allDistances[node], allDistances[node]));
maxEccentricity[node] = Math.min(maxEccentricity[node], e_v
+ allDistances[node]);
if (minEccentricity[node] == maxEccentricity[node])
{
perNodeEccentricity.set(node, minEccentricity[node]);
radius = Math.min(radius, minEccentricity[node]);
diameter = Math.max(diameter, minEccentricity[node]);
iter.remove();
}
}
}
for (int i = 0; i < perNodeEccentricity.size(); ++i)
{
// This node was never visited
if (perNodeEccentricity.get(i) == Integer.MAX_VALUE)
{
// In this case, it's in a 2-node disconnected barbell
if ((degree(i) == 1) && (neighborIds(i).iterator().next() != i))
{
perNodeEccentricity.set(i, 1);
continue;
}
// In this case, it's in a 1-node spot
if (degree(i) == 0)
{
perNodeEccentricity.set(i, 0);
continue;
}
throw new RuntimeException("Found node " + i + " has degree "
+ degree(i) + ", but never visited? Please report bug "
+ "with this exception and example graph");
}
}
if (!isWcc)
{
radius = Integer.MAX_VALUE;
diameter = Integer.MAX_VALUE;
}
}
/**
* Helper which computes Dijkstra's Algorithm for the input node and returns
* all of the distances to all other nodes from this node. This node's value
* is returned as 0. This assumes an unweighted graph and each edge costs 1.
* If the graph is disconnected (so no path exists between at least one pair
* of nodes), this returns Integer.MAX_VALUE for any unreachable nodes.
*
* @param nodeId The node from which to start the search
* @return the distances to all other nodes
*/
@PublicationReference(author = "Wikipedia", title = "Dijkstra's algorithm",
type = PublicationType.WebPage, url
= "https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm", year = 2016)
public int[] computeAllDistancesForNode(int nodeId)
{
PriorityQueue<Pair<Integer, Integer>> queue = new PriorityQueue<>(
new Comparator<Pair<Integer, Integer>>()
{
@Override
public int compare(Pair<Integer, Integer> o1,
Pair<Integer, Integer> o2)
{
return Integer.compare(o1.getSecond(), o2.getSecond());
}
});
int n = numNodes();
int[] allDistances = new int[n];
boolean[] allDone = new boolean[n];
for (int i = 0; i < n; ++i)
{
allDistances[i] = Integer.MAX_VALUE;
allDone[i] = false;
}
int current = nodeId;
allDistances[current] = 0;
queue.add(new DefaultKeyValuePair<>(current, allDistances[current]));
while (!queue.isEmpty())
{
Pair<Integer, Integer> curr = queue.poll();
// Skip those that were already done (but left in their old positions to speed up computation)
// Combined with the block commented code below, this is a departure from the algorithm
while (allDone[curr.getFirst()])
{
if (queue.isEmpty())
{
break;
}
curr = queue.poll();
}
if (queue.isEmpty() && allDone[curr.getFirst()])
{
break;
}
// Mark this one as done, now
allDone[curr.getFirst()] = true;
int newLen = curr.getSecond() + 1;
for (int neighbor : neighborIds(curr.getFirst()))
{
if (allDistances[neighbor] == Integer.MAX_VALUE)
{
allDistances[neighbor] = newLen;
queue.add(new DefaultKeyValuePair<>(neighbor,
allDistances[neighbor]));
}
// We want to do this only if it will improve the result
else if (allDistances[neighbor] > newLen)
{
// NOTE: I don't need to remove it if I simply throw out extra copies of a node found later (see allDone variable)
// This code preserved as a long comment as it's a slight departure from the algorithm in the paper
//
// Need to remove it from the queue and this is unfortunately O(n)
// boolean found = false;
// for (Iterator<Pair<Integer, Integer>> iter
// = queue.iterator(); iter.hasNext();)
// {
// Pair<Integer, Integer> v = iter.next();
// if (v.getFirst().equals(neighbor))
// {
// iter.remove();
// found = true;
// }
// }
// if (!found)
// {
// throw new RuntimeException("Unable to find a node that "
// + "was added to the queue, but not removed. Please "
// + "report this bug");
// }
allDistances[neighbor] = newLen;
queue.add(new DefaultKeyValuePair<>(neighbor,
allDistances[neighbor]));
}
}
}
return allDistances;
}
/**
* Returns the per-node eccentricity for the input node
*
* @param nodeId The node whose eccentricity is requested
* @return the per-node eccentricity for the input node
*/
public int getPerNodeEccentricityById(int nodeId)
{
if (!isInitializedPerNodeEccentricity())
{
initializePerNodeEccentricity();
}
return perNodeEccentricity.get(nodeId);
}
/**
* Returns the per-node eccentricity for the input node
*
* @param node The node whose eccentricity is requested
* @return the per-node eccentricity for the input node
*/
public int getPerNodeEccentricity(NodeNameType node)
{
if (!isInitializedPerNodeEccentricity())
{
initializePerNodeEccentricity();
}
return perNodeEccentricity.get(graph.getNodeId(node));
}
/**
* Returns the radius for this graph. NOTE: If the graph is not a single
* weakly connected component (WCC), the radius is ill-defined (for which
* component do you want the radius?), so this returns Integer.MAX_VALUE
*
* @return the radius for the graph if it is WCC, else Integer.MAX_VALUE
*/
public int getRadius()
{
if (!isInitializedPerNodeEccentricity())
{
initializePerNodeEccentricity();
}
return radius;
}
/**
* Returns the diameter for this graph. NOTE: If the graph is not a single
* weakly connected component (WCC), the diameter is ill-defined (for which
* component do you want the diameter?), so this returns Integer.MAX_VALUE
*
* @return the diameter for the graph if it is WCC, else Integer.MAX_VALUE
*/
public int getDiameter()
{
if (!isInitializedPerNodeEccentricity())
{
initializePerNodeEccentricity();
}
return diameter;
}
/**
* Returns true if the graph is a single weakly connected component (WCC),
* else false.
*
* @return true if this is WCC
*/
public boolean isWcc()
{
if (!isInitializedPerNodeEccentricity())
{
initializePerNodeEccentricity();
}
return isWcc;
}
/**
* Private helper that tests if per-node betweenness centrality has been
* initialized.
*
* @return true if initialized, else false
*/
private boolean isInitializedPerNodeBetweennessCentrality()
{
return perNodeBetweenCentrality != null;
}
/**
* Initializes the per-node eccentricity.
*/
@PublicationReference(author = "Ulrik Brandes", title
= "A Faster Algorithm for Betweenness Centrality", type
= PublicationType.Journal, publication
= "Journal of Mathematical Sociology", year = 2001, pages =
{
163, 177
})
public void initializePerNodeBetweennessCentrality()
{
int n = numNodes();
perNodeBetweenCentrality = new DoubleArrayList(n);
for (int i = 0; i < n; ++i)
{
perNodeBetweenCentrality.add(0);
}
List<List<Integer>> P = new ArrayList<>();
Queue<Integer> Q = new LinkedList<>();
Stack<Integer> S = new Stack<>();
double[] sigma = new double[n];
double[] d = new double[n];
double[] delta = new double[n];
for (int s = 0; s < n; ++s)
{
S.clear();
P.clear();
for (int j = 0; j < n; ++j)
{
P.add(new ArrayList<>());
sigma[j] = 0;
d[j] = -1;
}
sigma[s] = 1;
d[s] = 0;
Q.clear();
Q.add(s);
while (!Q.isEmpty())
{
int v = Q.remove();
S.push(v);
for (int w : neighborIds(v))
{
// w found for the first time?
if (d[w] < 0)
{
Q.add(w);
d[w] = d[v] + 1;
}
// shortest path to w via v?
if (d[w] == (d[v] + 1))
{
sigma[w] += sigma[v];
P.get(w).add(v);
}
}
}
for (int i = 0; i < n; ++i)
{
delta[i] = 0;
}
// S returns vertices in order of non-increasing distance from s
while (!S.isEmpty())
{
int w = S.pop();
for (int v : P.get(w))
{
delta[v] += (sigma[v] / sigma[w]) * (1 + delta[w]);
}
if (w != s)
{
perNodeBetweenCentrality.plusEquals(w, delta[w]);
}
}
}
// Different from paper, but see https://en.wikipedia.org/wiki/Betweenness_centrality
// Normalize by number of pairs not including each node
double normalizeBy = 2.0 / ((n - 1) * (n - 2));
for (int i = 0; i < n; ++i)
{
perNodeBetweenCentrality.set(i, normalizeBy
* perNodeBetweenCentrality.get(i));
}
}
/**
* Returns the per-node betweenness centrality for the input node
*
* @param nodeId The node whose betweenness centrality is requested
* @return the per-node betweenness centrality for the input node
*/
public double getPerNodeBetweennessCentralityById(int nodeId)
{
if (!isInitializedPerNodeBetweennessCentrality())
{
initializePerNodeBetweennessCentrality();
}
return perNodeBetweenCentrality.get(nodeId);
}
/**
* Returns the per-node betweenness centrality for the input node
*
* @param node The node whose betweenness centrality is requested
* @return the per-node betweenness centrality for the input node
*/
public double getPerNodeBetweennessCentrality(NodeNameType node)
{
if (!isInitializedPerNodeBetweennessCentrality())
{
initializePerNodeBetweennessCentrality();
}
return perNodeBetweenCentrality.get(graph.getNodeId(node));
}
}