package uk.ac.rhul.cs.cl1; import java.util.Arrays; import java.util.Collection; import java.util.Iterator; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; import uk.ac.rhul.cs.graph.BreadthFirstSearch; import uk.ac.rhul.cs.graph.Directedness; import uk.ac.rhul.cs.graph.Graph; import uk.ac.rhul.cs.stats.independentsamples.MannWhitneyTest; import uk.ac.rhul.cs.stats.tests.H1; import uk.ac.rhul.cs.utils.StringUtils; import uk.ac.rhul.cs.utils.UniqueIDGenerator; import com.sosnoski.util.array.IntArray; import com.sosnoski.util.hashset.IntHashSet; /** * A subset of the nodes of a given graph. * * A subset on the given graph classifies the nodes of the graph into two groups: * internal nodes (those that are within the subset) and external nodes (those that are * outside the subset). External nodes that are adjacent to at least one internal node * are called external boundary nodes; similarly, internal nodes that are adjacent to * at least one external node are called internal boundary nodes. * * The edge classification is similar, but there are four different kinds of edges * depending on where the endpoints are. If both endpoints are internal, then the edge * is called internal as well. If at least one endpoint is external, the edge is called * external. Edges with <i>exactly</i> one external endpoint are called boundary * edges. * * @author Tamas Nepusz <tamas@cs.rhul.ac.uk> */ public class NodeSet implements Iterable<Integer>, Intersectable<NodeSet>, Sized { /** * The graph associated with this node set */ protected Graph graph = null; /** * The set of node indices in the set. * * This list is always sorted in ascending order. */ protected SortedSet<Integer> members = null; /** * Total weight of the internal edges */ public double totalInternalEdgeWeight = 0.0; /** * Total weight of the boundary edges */ public double totalBoundaryEdgeWeight = 0.0; /** * Significance of the nodeset */ protected Double significance = null; /** * Constructs a new, empty nodeset that does not belong to any graph. */ public NodeSet() { this.graph = null; } /** * Constructs a new, empty nodeset on the given graph. * * @param graph the graph on which the nodeset is created */ public NodeSet(Graph graph) { this.graph = graph; } /** * Constructs a new nodeset on the given graph. * * @param graph the graph on which the nodeset is created * @param members a collection containing the member IDs */ public NodeSet(Graph graph, Collection<Integer> members) { this(graph); this.setMembers(members); } /** * Constructs a new nodeset on the given graph. * * @param graph the graph on which the nodeset is created * @param members an array containing the member IDs */ public NodeSet(Graph graph, int[] members) { this(graph); this.setMembers(members); } /** * Compares a nodeset with another (lexicographical order). */ public int compareTo(NodeSet other) { if (this.members == null && other.members == null) return 0; if (this.members == null) return -1; if (other.members == null) return 1; Iterator<Integer> it1 = this.members.iterator(); Iterator<Integer> it2 = other.members.iterator(); while (it1.hasNext() && it2.hasNext()) { Integer i1 = it1.next(), i2 = it2.next(); if (i1 < i2) return -1; if (i1 > i2) return 1; } if (it1.hasNext()) return 1; if (it2.hasNext()) return -1; return 0; } /** * Checks whether a node is a member of the nodeset or not * @param idx index of the node being tested * @return true if the node is a member of the set, false otherwise */ public boolean contains(int idx) { return members.contains(idx); } /** * Checks whether any of the given nodes is a member of the nodeset or not * @param idxs indexes of the node being tested * @return true if any node is a member of the set, false otherwise */ public boolean containsAny(Collection<Integer> idxs) { for (Integer i: idxs) if (this.members.contains(i)) return true; return false; } /** * Checks whether all of the given nodes are a member of the nodeset or not * @param idxs indexes of the node being tested * @return true if all the nodes are a member of the set, false otherwise */ public boolean containsAll(Collection<Integer> idxs) { return this.members.containsAll(idxs); } /** * Checks whether two nodesets are equal. * * Two nodesets are equal if they are the same reference or if they * belong to the same graph and have the same members */ @Override public boolean equals(Object o) { if (o == this) return true; if (!(o instanceof NodeSet)) return false; NodeSet other = (NodeSet)o; return other.graph.equals(this.graph) && other.members.equals(this.members); } /** * Returns the commitment of a node to this nodeset * * The commitment of a node is defined as the total weight of edges leading from * this node to other members of this nodeset, divided by the total weight of edges * adjacent to the node. * * @param nodeIndex the index of the node * @return the commitment of the node */ public double getCommitment(int nodeIndex) { IntHashSet memberHashSet = this.getMemberHashSet(); double in = 0.0, out = 0.0; int[] edgeIdxs = this.graph.getAdjacentEdgeIndicesArray(nodeIndex, Directedness.ALL); for (int edgeIdx: edgeIdxs) { double weight = this.graph.getEdgeWeight(edgeIdx); int endpoint = this.graph.getEdgeEndpoint(edgeIdx, nodeIndex); if (memberHashSet.contains(endpoint)) { /* This is an internal edge */ in += weight; } else { out += weight; } } if (in + out == 0) return 0.0; return in / (in + out); } /** * Returns the graph this nodeset is associated to * @return the graph */ public Graph getGraph() { return graph; } /** * Returns an IntHashSet for efficient repeated membership checks */ protected IntHashSet getMemberHashSet() { // We use an IntHashSet for membership checks, it's more efficient IntHashSet memberSet = new IntHashSet(); for (int i: members) memberSet.add(i); return memberSet; } /** * Returns the members of this nodeset * @return the members */ public SortedSet<Integer> getMembers() { return new TreeSet<Integer>(members); } /** * Returns the names of the members of this nodeset * @return the names of the members */ public String[] getMemberNames() { String[] result = new String[this.members.size()]; int i = 0; for (Integer member: this.members) { result[i] = this.graph.getNodeName(member); i++; } return result; } /** * Returns the hash code of this nodeset * * This class is overridden to ensure that equal nodesets have equal hash codes */ public int hashCode() { return graph.hashCode() + members.hashCode(); } /** * Checks whether the nodeset is connected in the graph */ public boolean isConnected() { if (this.members.size() < 2) return true; BreadthFirstSearch bfs = new BreadthFirstSearch(this.graph, this.members.first()); Integer[] dummy = {}; bfs.restrictToSubgraph(this.members.toArray(dummy)); return bfs.toArray().length == this.members.size(); } /** * Checks whether the given node is a cut vertex of the nodeset. * * A vertex is a cut vertex of the nodeset if its removal would make the * nodeset disconnected. */ public boolean isCutVertex(int index) { if (this.members.isEmpty()) return false; IntArray newMembers = new IntArray(); for (int member: this.members) if (member != index) newMembers.add(member); if (newMembers.size() == 0) return false; BreadthFirstSearch bfs = new BreadthFirstSearch(this.graph, newMembers.get(0)); bfs.restrictToSubgraph(newMembers.toArray()); return bfs.toArray().length != newMembers.size(); } /** * Returns whether the nodeset is empty or not */ public boolean isEmpty() { return this.members.isEmpty(); } /** * Returns the number of nodes in this nodeset */ public int size() { return this.members.size(); } /** * Sets the members of this nodeset */ protected void setMembers(Iterable<Integer> members) { this.members = new TreeSet<Integer>(); if (members == null) return; for (Integer member: members) this.members.add(member); recalculate(); } /** * Sets the members of this nodeset */ protected void setMembers(int[] members) { this.members = new TreeSet<Integer>(); if (members == null) return; for (int member: members) this.members.add(member); recalculate(); } /** * Recalculate some internal variables when the member set changes */ protected void recalculate() { IntHashSet memberHashSet = this.getMemberHashSet(); this.totalBoundaryEdgeWeight = 0.0; this.totalInternalEdgeWeight = 0.0; for (int i: members) { int[] edgeIdxs = this.graph.getAdjacentEdgeIndicesArray(i, Directedness.ALL); for (int edgeIdx: edgeIdxs) { double weight = this.graph.getEdgeWeight(edgeIdx); int endpoint = this.graph.getEdgeEndpoint(edgeIdx, i); if (memberHashSet.contains(endpoint)) { /* This is an internal edge */ this.totalInternalEdgeWeight += weight; } else { /* This is a boundary edge */ this.totalBoundaryEdgeWeight += weight; } } } /* Internal edges were found twice, divide the result by two */ this.totalInternalEdgeWeight /= 2.0; } /** * Returns the density of this nodeset */ public double getDensity() { if (this.size() < 2) return 0.0; return 2.0 * this.totalInternalEdgeWeight / (this.size() * (this.size() - 1)); } /** * Returns the internal weight of a given node */ public double getInternalWeight(int nodeIndex) { IntHashSet memberHashSet = this.getMemberHashSet(); double result = 0.0; int[] edgeIdxs = this.graph.getAdjacentEdgeIndicesArray(nodeIndex, Directedness.ALL); for (int edgeIdx: edgeIdxs) { double weight = this.graph.getEdgeWeight(edgeIdx); int endpoint = this.graph.getEdgeEndpoint(edgeIdx, nodeIndex); if (memberHashSet.contains(endpoint)) { /* This is an internal edge */ result += weight; } } return result; } /** * Returns the intersection of this nodeset with another */ public NodeSet getIntersectionWith(NodeSet other) { Set<Integer> smaller; IntHashSet larger; IntArray intersection = new IntArray(); if (this.size() < other.size()) { smaller = this.members; larger = other.getMemberHashSet(); } else { smaller = other.members; larger = this.getMemberHashSet(); } for (int member: smaller) if (larger.contains(member)) intersection.add(member); return new NodeSet(this.getGraph(), intersection.toArray()); } /** * Returns the size of the intersection between this nodeset and another */ public int getIntersectionSizeWith(NodeSet other) { int isectSize = 0; Set<Integer> smaller; Set<Integer> larger; if (this.size() < other.size()) { smaller = this.members; larger = other.members; } else { smaller = other.members; larger = this.members; } for (int member: smaller) if (larger.contains(member)) isectSize++; return isectSize; } /** * Returns the statistical significance of the nodeset * * The statistical significance of the nodeset is the p-value of a one-sided * Mann-Whitney U test on the in-weights and the out-weights. It tells us * whether the mean in-weight is significantly larger than the mean * out-weight; in other words, it roughly tells us what is the probability of * the community satisfying the weak criterion of Radicchi et al purely * by chance. */ public double getSignificance() { if (significance == null) significance = this.getSignificanceReal(); return significance; } protected double getSignificanceReal() { double[] inWeights = new double[this.size()]; double[] outWeights = new double[this.size()]; IntHashSet memberHashSet = this.getMemberHashSet(); int j; Arrays.fill(inWeights, 0.0); Arrays.fill(outWeights, 0.0); j = 0; for (int i: members) { int[] edgeIdxs = this.graph.getAdjacentEdgeIndicesArray(i, Directedness.ALL); for (int edgeIdx: edgeIdxs) { double weight = this.graph.getEdgeWeight(edgeIdx); int endpoint = this.graph.getEdgeEndpoint(edgeIdx, i); if (memberHashSet.contains(endpoint)) { /* This is an internal edge */ inWeights[j] += weight; } else { /* This is a boundary edge */ outWeights[j] += weight; } } j++; } /* Internal edges were found twice, divide the result by two */ MannWhitneyTest test = new MannWhitneyTest(inWeights, outWeights, H1.GREATER_THAN); return test.getSP(); } /** * Extracts the subgraph spanned by the nodeset and returns it as a new {@link Graph} object * * @return the subgraph as a new {@link Graph} */ public Graph getSubgraph() { boolean directed = this.getGraph().isDirected(); Graph result = new Graph(directed); IntHashSet memberSet = this.getMemberHashSet(); UniqueIDGenerator<Integer> idGen = new UniqueIDGenerator<Integer>(result); for (int i: members) { int srcId = idGen.get(i); int[] edgeIdxs = this.graph.getAdjacentEdgeIndicesArray(i, Directedness.OUT); for (int edgeIdx: edgeIdxs) { int endpoint = this.graph.getEdgeEndpoint(edgeIdx, i); /* If not an internal edge, continue */ if (!memberSet.contains(endpoint)) continue; /* Avoid creating each edge twice in undirected graphs */ if (!directed && i > endpoint) continue; /* Add the edge */ result.createEdge(srcId, idGen.get(endpoint), this.graph.getEdgeWeight(edgeIdx)); } } return result; } /** * Returns the total internal edge weight in this nodeset */ public double getTotalInternalEdgeWeight() { return this.totalInternalEdgeWeight; } /** * Returns the total boundary edge weight in this nodeset */ public double getTotalBoundaryEdgeWeight() { return this.totalBoundaryEdgeWeight; } /** * Returns a set of all the external boundary nodes of this set */ public int[] getExternalBoundaryNodes() { IntHashSet seen = new IntHashSet(this.getMemberHashSet()); IntArray result = new IntArray(); for (int i: members) { int[] edgeIdxs = this.graph.getAdjacentEdgeIndicesArray(i, Directedness.ALL); for (int edgeIdx: edgeIdxs) { int endpoint = this.graph.getEdgeEndpoint(edgeIdx, i); if (!seen.contains(endpoint)) { /* This is an external boundary node that we haven't seen yet */ seen.add(endpoint); result.add(endpoint); } } } return result.toArray(); } /** * Iterates over the members of this nodeset */ public Iterator<Integer> iterator() { return this.members.iterator(); } /** * Returns the members of this nodeset as an array. */ public int[] toArray() { int i = 0; int[] result = new int[members.size()]; for (int member: members) { result[i++] = member; } return result; } /** * Prints the nodes in this set to a string */ public String toString() { return toString(" "); } /** * Prints the nodes in this set to a string using a given separator */ public String toString(String separator) { return StringUtils.join(getMemberNames(), separator); } }