package edu.stanford.hci.flowmap.cluster;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.TreeSet;
import edu.stanford.hci.flowmap.structure.Node;
/**
* Turns a list of structure.Node objects into structure.Node objects that are in clusters
* and attached to one another.
*
* This software is distributed under the Berkeley Software Distribution License.
* Please see http://graphics.stanford.edu/~dphan/code/bsd.license.html
*
*/
public class HierarchicalCluster {
private ClusterDistance clusDist;
public HierarchicalCluster() {
clusDist = new ClusterDistance();
}
/**
* Manages distances between clusters
* @author dphan
*/
private class ClusterDistance {
// sorted set of clusters with shortest distance first
public TreeSet closestPair;
// set of Clusters that are under consideration
public HashSet<Cluster> currentClusters;
// maps a cluster to a Set of DistancePairs it appears in
// we need this association to be able to invalidate some distances
// when we remove it from the ClusterDistance objects
public HashMap clusterAssoc;
public ClusterDistance() {
clusterAssoc = new HashMap();
closestPair = new TreeSet();
currentClusters = new HashSet<Cluster>();
}
public void clear() {
clusterAssoc.clear();
closestPair.clear();
currentClusters.clear();
}
public int numClusters() {
return currentClusters.size();
}
public Collection<Cluster> getClusters() {
return currentClusters;
}
public void addDistPairCollection(Collection c) {
// add the crossProduct to ClusterDistance
Iterator i = c.iterator();
while (i.hasNext()) {
addDistPair((DistancePair) i.next());
}
}
public void addDistPair(DistancePair dp) {
closestPair.add(dp);
// update cluster association list
addClusterAssoc((Cluster) dp.one, dp);
addClusterAssoc((Cluster) dp.two, dp);
currentClusters.add((Cluster) dp.one);
currentClusters.add((Cluster) dp.two);
}
/**
* Adds a new pair and distance to the data structure
* @param one
* @param two
*/
public void addDist(Cluster one, Cluster two) {
double dist = one.distTo(two);
DistancePair p = new DistancePair(one, two, dist);
addDistPair(p);
}
/**
* Returns and removes the closest pair
* @return the closest pair from the data structure or null if none exists
*/
public DistancePair removeClosest() {
if (closestPair.size() == 0)
return null;
else {
DistancePair dp = (DistancePair) closestPair.first();
closestPair.remove(dp);
// update association info
removeClusterAssoc((Cluster) dp.one);
removeClusterAssoc((Cluster) dp.two);
// update current cluster info
currentClusters.remove(dp.one);
currentClusters.remove(dp.two);
return dp;
}
}
/**
* If the association does not yet exist, associates key with value
* @param key Cluster to associate
* @param value DistancePair to be associated
*/
private void addClusterAssoc(Cluster key, DistancePair value) {
// lookup association in Map
Object o = clusterAssoc.get(key);
HashSet assocSet;
if (o != null) {
assocSet = (HashSet)o;
} else {
assocSet = new HashSet();
clusterAssoc.put(key, assocSet);
}
assocSet.add(value);
//System.out.println("ClusterAssoc: " + "key: " + key + ", " + assocSet);
}
/**
* Removes all information associated with that cluster.
* @param c The cluster to be removed
*/
private void removeClusterAssoc(Cluster c) {
// first remove the association
Object o = clusterAssoc.get(c);
clusterAssoc.remove(c);
// now update the closestPair Set
HashSet assocSet;
if (o == null)
return;
assocSet = (HashSet)o;
//System.out.println("RemoveClusterAssoc: " + "key: " + c + ", " + assocSet);
Iterator i = assocSet.iterator();
while(i.hasNext()) {
closestPair.remove(i.next());
}
}
}
/**
* Generates cross product of the two lists of Clusters,
* but ASSUMES a and b are the same list
* @return A collection of all possible DistancePair objects between a and b
*/
private Collection<DistancePair> crossSameCluster(Collection<Cluster> a, Collection<Cluster> b) {
LinkedList<DistancePair> list = new LinkedList<DistancePair>();
// records how many times we have run the outer loop;
int count = 0;
int currCount = 0;
for(Cluster one : a) {
currCount = 0;
for(Cluster two : b) {
if (one.equals(two))
continue;
if (currCount < count) {
currCount++;
continue;
}
double dist = one.distTo(two);
DistancePair dp = new DistancePair(one, two, dist);
//System.out.println("Added " + dp);
list.add(dp);
}
count++;
}
return list;
}
private Collection<DistancePair> crossDiffCluster(Cluster clus, Collection<Cluster> a) {
LinkedList<DistancePair> list = new LinkedList<DistancePair>();
Iterator i = a.iterator();
Cluster one;
while(i.hasNext()) {
one = (Cluster)i.next();
if (clus.equals(one))
continue;
double dist = clus.distTo(one);
DistancePair dp = new DistancePair(clus, one, dist);
list.add(dp);
}
return list;
}
/**
* This method performs hierarchical clustering on a collection of nodes
* where one node is designated the root. The idea is that we keep the root cluster
* in the collection of nodes we are clustering. Then, as we cluster, if we ever include
* the root cluster, we consider the cluster that was paired with the root to be
* a finished cluster. We store that finished cluster.
*
* We then reintroduce the root cluster into the mix and continue the process above until
* we run out of things to cluster. At the end we return a collection of all the
* finished clusters.
*
* @param rootNode the root node of the cluster
* @param destNodes a collection of destination nodes
* @return a collection of clusters that don't include the root node
*/
private Collection<Cluster> rootHierarchicalCluster(Node rootNode, Collection<Node> allNodes) {
//System.out.println("HierarchicalCluster.rootHierarchicalCluster rowSchema: " + rowSchema);
LinkedList<Cluster> clusterCollection = new LinkedList<Cluster>();
LinkedList<Cluster> leafClusters = new LinkedList<Cluster>();
LinkedList<Cluster> copyClusters = new LinkedList<Cluster>();
// create a rootCluster and add it to the leafClusters and copyClusters
Cluster rootCluster = new Cluster(rootNode);
leafClusters.add(rootCluster);
copyClusters.add(rootCluster);
// create Clusters for each node except the root node
for(Node n : allNodes) {
if (n == rootNode) // don't create two clusters for the root
continue;
Cluster c = new Cluster(n);
//System.out.println("adding new cluster: " + n.getName() + " " + n.getID());
leafClusters.add(c);
copyClusters.add(c);
}
Collection crossProd = crossSameCluster(leafClusters, copyClusters);
clusDist.addDistPairCollection(crossProd);
// start of clustering method
DistancePair pair;
Cluster newClus, oneClus, twoClus;
newClus = oneClus = twoClus = null;
while (clusDist.numClusters() > 0) {
// get the two closest clusters
pair = clusDist.removeClosest();
assert(pair != null);
oneClus = (Cluster)pair.one;
twoClus = (Cluster)pair.two;
// if the either element of the pair includes the rootCluster,
// add the other element of the pair to the clusterCollection
if (rootCluster.equals(oneClus) || rootCluster.equals(twoClus)) {
if (rootCluster.equals(oneClus)) {
clusterCollection.add(twoClus);
} else {
clusterCollection.add(oneClus);
}
newClus = rootCluster;
} else {
newClus = new Cluster(oneClus, twoClus);
}
crossProd = crossDiffCluster(newClus, clusDist.getClusters());;
clusDist.addDistPairCollection(crossProd);
}
/*System.out.println("HierarchicalCluster got: ");
Iterator clusterIter = clusterCollection.iterator();
while (clusterIter.hasNext()) {
System.out.println("Clusters are " + clusterIter.next());
}
*/
// the last cluster will be the newClus created in the previous iteration
return clusterCollection;
}
public Collection<Cluster> doCluster(Node rootN, Collection<Node> allNodes) {
return rootHierarchicalCluster(rootN, allNodes);
}
}