HierarchicalCluster.java example

Explorer
codemap-master
package edu.stanford.hci.flowmap.cluster;

import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.TreeSet;

import edu.stanford.hci.flowmap.structure.Node;
/**
 * Turns a list of structure.Node objects into structure.Node objects that are in clusters
 * and attached to one another.
 * 
 * This software is distributed under the Berkeley Software Distribution License.
 * Please see http://graphics.stanford.edu/~dphan/code/bsd.license.html
 *
 */
public class HierarchicalCluster {

	private ClusterDistance clusDist;
	
	public HierarchicalCluster() {
		clusDist = new ClusterDistance();
	}
	
	/**
	 * Manages distances between clusters
	 * @author dphan
	 */
	private class ClusterDistance {
		
		// sorted set of clusters with shortest distance first
		public TreeSet closestPair;
		
		// set of Clusters that are under consideration
		public HashSet<Cluster> currentClusters;
		
		// maps a cluster to a Set of DistancePairs it appears in
		// we need this association to be able to invalidate some distances
		// when we remove it from the ClusterDistance objects
		public HashMap clusterAssoc;
		
		public ClusterDistance() {
			clusterAssoc = new HashMap();
			closestPair = new TreeSet();
			currentClusters = new HashSet<Cluster>();
		}
		
		public void clear() {
			clusterAssoc.clear();
			closestPair.clear();
			currentClusters.clear();
		}
		
		public int numClusters() {
			return currentClusters.size();
		}
		
		public Collection<Cluster> getClusters() {
			return currentClusters;
		}
		
		public void addDistPairCollection(Collection c) {
			// add the crossProduct to ClusterDistance
			Iterator i = c.iterator();
			while (i.hasNext()) {
				addDistPair((DistancePair) i.next());
			}
		}
		
		public void addDistPair(DistancePair dp) {
			closestPair.add(dp);
			
			// update cluster association list
			addClusterAssoc((Cluster) dp.one, dp);
			addClusterAssoc((Cluster) dp.two, dp);
			
			currentClusters.add((Cluster) dp.one);
			currentClusters.add((Cluster) dp.two);
		}
		
		/**
		 * Adds a new pair and distance to the data structure
		 * @param one
		 * @param two
		 */
		public void addDist(Cluster one, Cluster two) {
			double dist = one.distTo(two);
			DistancePair p = new DistancePair(one, two, dist);
			addDistPair(p);			
		}
		
		/**
		 * Returns and removes the closest pair
		 * @return the closest pair from the data structure or null if none exists
		 */
		public DistancePair removeClosest() {
			if (closestPair.size() == 0)
				return null;
			else {
				DistancePair dp = (DistancePair) closestPair.first();
				closestPair.remove(dp);
				
				// update association info
				removeClusterAssoc((Cluster) dp.one);
				removeClusterAssoc((Cluster) dp.two);
				
				// update current cluster info
				currentClusters.remove(dp.one);
				currentClusters.remove(dp.two);
				
				return dp;
			}
		}
		
		/**
		 * If the association does not yet exist, associates key with value
		 * @param key Cluster to associate
		 * @param value DistancePair to be associated
		 */
		private void addClusterAssoc(Cluster key, DistancePair value) {
			// lookup association in Map
			Object o = clusterAssoc.get(key);
			HashSet assocSet;
			if (o != null) {
				assocSet = (HashSet)o;
			} else {
				assocSet = new HashSet();
				clusterAssoc.put(key, assocSet);
			}
			assocSet.add(value);
			//System.out.println("ClusterAssoc: " + "key: " + key + ", " + assocSet);
		}
		
		/**
		 * Removes all information associated with that cluster. 
		 * @param c The cluster to be removed
		 */
		private void removeClusterAssoc(Cluster c) {
			
			// first remove the association
			Object o = clusterAssoc.get(c);
			clusterAssoc.remove(c);
			
			// now update the closestPair Set
			HashSet assocSet;
			
			if (o == null)
				return;
			
			assocSet = (HashSet)o;
			//System.out.println("RemoveClusterAssoc: " + "key: " + c + ", " + assocSet);
			Iterator i = assocSet.iterator();
			while(i.hasNext()) {
				closestPair.remove(i.next());
			}
			
			
		}
		
		
	}
	
	/**
	 * Generates cross product of the two lists of Clusters, 
	 * but ASSUMES a and b are the same list
	 * @return A collection of all possible DistancePair objects between a and b
	 */
	private Collection<DistancePair> crossSameCluster(Collection<Cluster> a, Collection<Cluster> b) {
		LinkedList<DistancePair> list = new LinkedList<DistancePair>();
		
		// records how many times we have run the outer loop; 
		int count = 0;
		int currCount = 0;
		
		for(Cluster one : a) {
			currCount = 0;
			for(Cluster two : b) {
				if (one.equals(two))
					continue;
				if (currCount < count) {
					currCount++;
					continue;
				}
				
				double dist = one.distTo(two);
				DistancePair dp = new DistancePair(one, two, dist);
				//System.out.println("Added " + dp);
				list.add(dp);
			}
			count++;
		}
		return list;
	}
	
	private Collection<DistancePair> crossDiffCluster(Cluster clus, Collection<Cluster> a) {
		LinkedList<DistancePair> list = new LinkedList<DistancePair>();
		Iterator i = a.iterator();
		
		Cluster one;
		while(i.hasNext()) {
			one = (Cluster)i.next();
			if (clus.equals(one))
				continue;
			double dist = clus.distTo(one);
			DistancePair dp = new DistancePair(clus, one, dist);
			list.add(dp);
		}
		return list;
	}

	
	/**
	 * This method performs hierarchical clustering on a collection of nodes
	 * where one node is designated the root. The idea is that we keep the root cluster
	 * in the collection of nodes we are clustering. Then, as we cluster, if we ever include
	 * the root cluster, we consider the cluster that was paired with the root to be 
	 * a finished cluster. We store that finished cluster.
	 * 
	 * We then reintroduce the root cluster into the mix and continue the process above until
	 * we run out of things to cluster. At the end we return a collection of all the 
	 * finished clusters.
	 * 
	 * @param rootNode the root node of the cluster
	 * @param destNodes a collection of destination nodes
	 * @return a collection of clusters that don't include the root node
	 */
	private Collection<Cluster> rootHierarchicalCluster(Node rootNode, Collection<Node> allNodes) {
		//System.out.println("HierarchicalCluster.rootHierarchicalCluster rowSchema: " + rowSchema);
		
		LinkedList<Cluster> clusterCollection = new LinkedList<Cluster>();
		LinkedList<Cluster> leafClusters = new LinkedList<Cluster>();
		LinkedList<Cluster> copyClusters = new LinkedList<Cluster>();
		
		// create a rootCluster and add it to the leafClusters and copyClusters
		Cluster rootCluster = new Cluster(rootNode);
		leafClusters.add(rootCluster);
		copyClusters.add(rootCluster);
		
		// create Clusters for each node except the root node
		for(Node n : allNodes) {
			if (n == rootNode) // don't create two clusters for the root
				continue;
			Cluster c = new Cluster(n);
			//System.out.println("adding new cluster: " + n.getName() + " " + n.getID());
			leafClusters.add(c);
			copyClusters.add(c);
		}
		
		Collection crossProd = crossSameCluster(leafClusters, copyClusters);
		clusDist.addDistPairCollection(crossProd);		
		
		// start of clustering method
		DistancePair pair;
		Cluster newClus, oneClus, twoClus;
		newClus = oneClus = twoClus = null;
		
		while (clusDist.numClusters() > 0) {
			
			// get the two closest clusters 
			pair = clusDist.removeClosest();
			assert(pair != null);
			oneClus = (Cluster)pair.one;
			twoClus = (Cluster)pair.two;
			
			// if the either element of the pair includes the rootCluster,
			// add the other element of the pair to the clusterCollection
			if (rootCluster.equals(oneClus) || rootCluster.equals(twoClus)) {
				if (rootCluster.equals(oneClus)) {
					clusterCollection.add(twoClus);
				} else {
					clusterCollection.add(oneClus);
				}
				newClus = rootCluster;
			} else {
				newClus = new Cluster(oneClus, twoClus);
			}
			
			crossProd = crossDiffCluster(newClus, clusDist.getClusters());;
			clusDist.addDistPairCollection(crossProd);		
		}
		
		/*System.out.println("HierarchicalCluster got: ");
		Iterator clusterIter = clusterCollection.iterator();
		while (clusterIter.hasNext()) {
			System.out.println("Clusters are " + clusterIter.next());
		}
		*/
		// the last cluster will be the newClus created in the previous iteration
		return clusterCollection;
	}
	
	public Collection<Cluster> doCluster(Node rootN, Collection<Node> allNodes) {
		return rootHierarchicalCluster(rootN, allNodes);	
	}
	
}