package uk.ac.rhul.cs.cl1; import java.util.Arrays; import java.util.Collection; import java.util.Set; import java.util.TreeSet; import com.sosnoski.util.array.IntArray; import uk.ac.rhul.cs.collections.Multiset; import uk.ac.rhul.cs.collections.TreeMultiset; import uk.ac.rhul.cs.graph.Directedness; import uk.ac.rhul.cs.graph.Edge; import uk.ac.rhul.cs.graph.Graph; import uk.ac.rhul.cs.stats.independentsamples.MannWhitneyTest; import uk.ac.rhul.cs.stats.tests.H1; import com.sosnoski.util.hashset.IntHashSet; /** * A mutable subset of the nodes of a given graph. * * This class is used to grow cohesive subgroups on a given graph, starting from a seed * node. The node set gives facilities to query the total weight of internal and boundary * edges or to iterate over the set of external boundary nodes. * * @see NodeSet * @author Tamas Nepusz <tamas@cs.rhul.ac.uk> */ public class MutableNodeSet extends NodeSet { /** * Multiset that keeps track of the external boundary nodes of the subset. */ protected Multiset<Integer> externalBoundaryNodes = new TreeMultiset<Integer>(); /** * A hash of node indices in the set for quick membership checks */ protected IntHashSet memberHashSet = new IntHashSet(); /** * Auxiliary array used when adding/removing nodes * * For nodes within the set, this array stores the total weight of internal * edges incident on the node. For nodes outside the set, this array stores * the total weight of boundary edges incident on the node. */ public double[] inWeights = null; /** * Stores the total weight of each node, i.e. the sum of all the weights * incident on the given node. This array is useful because it holds that: * * <ul> * <li>For nodes within the set, this array stores the total weight * of <em>boundary</em> edges incident on the node.</li> * <li>For nodes outside the set, this array stores the total weight * of <em>external</em> edges incident on the node.</li> * </ul> */ public double[] totalWeights = null; /** * Constructs a new, empty mutable nodeset on the given graph. * * @param graph the graph on which the nodeset is created */ public MutableNodeSet(Graph graph) { super(graph); this.members = new TreeSet<Integer>(); initializeInAndTotalWeights(); } /** * Constructs a new nodeset on the given graph. * * @param graph the graph on which the nodeset is created * @param members a collection containing the member IDs */ public MutableNodeSet(Graph graph, Collection<Integer> members) { this(graph); this.setMembers(members); } /** * Constructs a new nodeset on the given graph. * * @param graph the graph on which the nodeset is created * @param members an array containing the member IDs */ public MutableNodeSet(Graph graph, int[] members) { this(graph); this.setMembers(members); } /** * Constructs a new mutable nodeset by cloning an existing one. * * @param nodeSet the nodeset to clone */ private MutableNodeSet(MutableNodeSet nodeSet) { super(nodeSet.graph); this.members = new TreeSet<Integer>(nodeSet.members); for (int member: members) { this.memberHashSet.add(member); } totalInternalEdgeWeight = nodeSet.totalInternalEdgeWeight; totalBoundaryEdgeWeight = nodeSet.totalBoundaryEdgeWeight; inWeights = nodeSet.inWeights.clone(); // totalWeights does not have to be cloned because the graph is the same, // therefore totalWeights is also the same totalWeights = nodeSet.totalWeights; } /** * Constructs a new mutable nodeset from the given non-mutable nodeset * * @param nodeSet the original, non-mutable nodeset */ public MutableNodeSet(NodeSet nodeSet) { this(nodeSet.getGraph(), nodeSet.getMembers()); } protected void initializeInAndTotalWeights() { int n = graph.getNodeCount(); totalInternalEdgeWeight = 0.0; totalBoundaryEdgeWeight = 0.0; if (inWeights == null) { inWeights = new double[n]; } else { // Optimization here: we can be sure that the only nonzero elements in inWeights are // for internal or boundary nodes, so it is enough to iterate over them if the graph // is large. Otherwise it is probably faster to simply fill the entire array with zeros. if (n >= 5000 && members.size() < n / 4) { for (int member: members) { inWeights[member] = 0.0; } for (int boundaryNode: externalBoundaryNodes.elementSet()) { inWeights[boundaryNode] = 0.0; } } else { Arrays.fill(inWeights, 0.0); } } if (totalWeights == null) { totalWeights = new double[n]; for (Edge e: graph) { totalWeights[e.source] += e.weight; totalWeights[e.target] += e.weight; } } } /** * Adds a node to this nodeset * * @param node the index of the node being added * @return true if the node was added, false if the node was already a member */ public boolean add(int node) { if (memberHashSet.contains(node)) return false; /* Things will change, invalidate the cached values */ invalidateCache(); /* First, increase the internal and the boundary weights with the * appropriate amounts. Here we are actually increasing totalBoundaryEdgeWeight * by outWeights[node] - inWeights[node] but make use of the fact that * outWeights[node] = totalWeights[node] - inWeights[node] */ totalInternalEdgeWeight += inWeights[node]; totalBoundaryEdgeWeight += totalWeights[node] - 2 * inWeights[node]; /* For each edge incident on the given node, make some adjustments to inWeights */ for (int adjEdge: graph.getAdjacentEdgeIndicesArray(node, Directedness.ALL)) { int adjNode = graph.getEdgeEndpoint(adjEdge, node); if (adjNode == node) continue; inWeights[adjNode] += graph.getEdgeWeight(adjEdge); if (!memberHashSet.contains(adjNode)) { externalBoundaryNodes.add(adjNode); } } /* Add the node to the nodeset */ memberHashSet.add(node); members.add(node); externalBoundaryNodes.setCount(node, 0); return true; } /** * Adds more nodes to this nodeset * * @param nodes a collection of the nodes being added * @return the number of nodes that were not members originally */ public int add(int[] nodes) { int result = 0; for (int i: nodes) if (this.add(i)) result++; return result; } /** * Clears the nodeset */ public void clear() { /* Things will change, invalidate the cached values */ invalidateCache(); /* This must be called _before_ we clear the members because it uses the old members */ initializeInAndTotalWeights(); externalBoundaryNodes.clear(); members.clear(); memberHashSet.clear(); } /** * Creates a semantically equivalent copy of this MutableNodeSet. */ public MutableNodeSet clone() { return new MutableNodeSet(this); } /** * Freezes the nodeset (i.e. converts it to a non-mutable NodeSet) */ public NodeSet freeze() { return new NodeSet(this.graph, this.members); } /** * Returns the commitment of a node to this nodeset * * The commitment of a node is defined as the total weight of edges leading from * this node to other members of this nodeset, divided by the total weight of edges * adjacent to the node. * * @param nodeIndex the index of the node * @return the commitment of the node */ @Override public double getCommitment(int nodeIndex) { double den = this.totalWeights[nodeIndex]; return den == 0 ? 0 : (this.inWeights[nodeIndex] / den); } @Override public int[] getExternalBoundaryNodes() { Set<Integer> nodes = externalBoundaryNodes.elementSet(); int[] result = new int[nodes.size()]; int i = 0; for (int member: nodes) { result[i++] = member; } return result; } /** * Returns the internal weight of a given node */ @Override public double getInternalWeight(int nodeIndex) { return this.inWeights[nodeIndex]; } /** * Returns the total weight of edges that are adjacent to the given node and another internal node. * * The query node can either be internal or external. For internal nodes, the returned weight is * equal to the amount with which the total internal edge weight of the node set would decrease * if the node is removed from the cluster. For external nodes, the returned weight is equal to * the amount with which the total internal edge weight of the node set would increase if the * node is added to the cluster. * * @param nodeIndex the index of the node */ public double getTotalAdjacentInternalWeight(int nodeIndex) { return this.inWeights[nodeIndex]; } /** * Returns an IntHashSet for efficient repeated membership checks * * MutableNodeSet maintains memberHashSet in parallel with the ordinary members * variable, so we just return it here. */ protected IntHashSet getMemberHashSet() { return memberHashSet; } protected double getSignificanceReal() { int i, n = members.size(); double[] memberInWeights = new double[n]; double[] memberOutWeights = new double[n]; i = 0; for (int member: members) { memberInWeights[i] = inWeights[member]; memberOutWeights[i] = totalWeights[member] - memberInWeights[i]; i++; } MannWhitneyTest test = new MannWhitneyTest(memberInWeights, memberOutWeights, H1.GREATER_THAN); return test.getSP(); } /** * Invalidates the cached member variables when the nodeset changes */ private void invalidateCache() { } /** * Removes a node from this nodeset * * @param node the index of the node being removed * @return true if the node was removed, false if the node was not a member */ public boolean remove(int node) { if (!memberHashSet.contains(node)) return false; /* Things will change, invalidate the cached values */ invalidateCache(); /* First, decrease the internal and the boundary weights with the * appropriate amounts. Here we are actually decreasing totalBoundaryEdgeWeight * by outWeights[node] - inWeights[node] but make use of the fact that * outWeights[node] = totalWeights[node] - inWeights[node] */ totalInternalEdgeWeight -= inWeights[node]; totalBoundaryEdgeWeight -= totalWeights[node] - 2 * inWeights[node]; /* For each edge incident on the given node, make some adjustments to inWeights */ for (int adjEdge: graph.getAdjacentEdgeIndicesArray(node, Directedness.ALL)) { int adjNode = graph.getEdgeEndpoint(adjEdge, node); if (adjNode == node) continue; inWeights[adjNode] -= graph.getEdgeWeight(adjEdge); if (memberHashSet.contains(adjNode)) { externalBoundaryNodes.add(node); } else { externalBoundaryNodes.remove(adjNode); } } /* Remove the node from the nodeset */ memberHashSet.remove(node); members.remove(node); return true; } /** * Removes more nodes from this nodeset * * @param nodes a collection of the nodes being removed */ public void remove(int[] nodes) { for (int i: nodes) this.remove(i); } /** * Sets the members of this nodeset */ @Override protected void setMembers(int[] members) { this.clear(); for (int member: members) this.add(member); } /** * Sets the members of this nodeset */ public void setMembers(Iterable<Integer> members) { this.clear(); for (int member: members) this.add(member); } }