ClusterONE.java example

Explorer

cl1-master
- src
  - java
    - uk
      - ac
        rhul
        cs
        cl1
        ClusterONE.java
        ClusterONEAlgorithmParameters.java
        ClusterONEException.java
        Intersectable.java
        MutableNodeSet.java
        NodeSet.java
        NullTaskMonitor.java
        Sized.java
        TaskMonitor.java
        TaskMonitorSupport.java
        ValuedNodeSet.java
        ValuedNodeSetList.java
        api
        Cluster.java
        ClusterONEResult.java
        EntityNotFoundException.java
        EntityStore.java
        InMemoryEntityStore.java
        PersistentEntityStore.java
        rest
        DatasetResource.java
        JAXBContextResolver.java
        MissingParameterException.java
        NotFoundException.java
        ResultResource.java
        StandaloneWebServer.java
        WebApplication.java
        filters
        DensityFilter.java
        DiameterFilter.java
        FilterChain.java
        FluffingFilter.java
        HaircutFilter.java
        KCoreFilter.java
        NodeSetFilter.java
        SizeFilter.java
        growth
        ClusterGrowthAction.java
        ClusterGrowthProcess.java
        ClusterGrowthWorker.java
        GreedyClusterGrowthProcess.java
        io
        AbstractClusteringWriter.java
        AbstractGraphWriter.java
        CSVClusteringWriter.java
        ClusteringWriter.java
        ClusteringWriterFactory.java
        EdgeListReader.java
        EdgeListWriter.java
        GeneProClusteringWriter.java
        GraphReader.java
        GraphReaderFactory.java
        GraphWriter.java
        PlainTextClusteringWriter.java
        SIFReader.java
        merging
        AbstractNodeSetMerger.java
        DummyNodeSetMerger.java
        MultiPassNodeSetMerger.java
        NodeSetMerger.java
        SinglePassNodeSetMerger.java
        quality
        CohesivenessFunction.java
        DummyQualityFunction.java
        LogLikelihoodFunction.java
        QualityFunction.java
        seeding
        EveryEdgeSeedGenerator.java
        EveryNodeSeedGenerator.java
        FileBasedSeedGenerator.java
        MaximalCliqueSeedGenerator.java
        NodeSetCollectionBasedSeedGenerator.java
        NodeSetCollectionBasedSeedIterator.java
        Seed.java
        SeedGenerator.java
        SeedIterator.java
        StreamBasedSeedGenerator.java
        similarity
        DiceSimilarity.java
        JaccardSimilarity.java
        MatchingScore.java
        SimilarityFunction.java
        SimpsonCoefficient.java
        support
        OrderMaintainingQueue.java
        UsedNodeSet.java
        ui
        AboutDialog.java
        ClusterONEAlgorithmParametersDialog.java
        ClusterONEAlgorithmParametersPanel.java
        CollapsiblePanel.java
        ConsoleTaskMonitor.java
        EmptyIcon.java
        ExtendedSpinnerNumberModel.java
        GraphRenderer.java
        HeightLimitedJLabelRenderer.java
        JMultiLineToolTip.java
        NodeSetDetails.java
        NodeSetPropertiesPanel.java
        NodeSetTableModel.java
        PValueRenderer.java
        PopupMenuTrigger.java
        RemoveClusterFromResultAction.java
        ResultViewerPanel.java
        ShowDetailedResultsAction.java
        SwingTaskMonitor.java
        cmdline
        CommandLineApplication.java
        cytoscape
        AboutAction.java
        AboutDialog.java
        AffinityColouringAction.java
        CloseControlPanelAction.java
        ClusterONECytoscapeTask.java
        ControlPanel.java
        CopyClusterToClipboardAction.java
        CyNetworkCache.java
        CytoscapePlugin.java
        CytoscapeResultViewerPanel.java
        CytoscapeTaskMonitorWrapper.java
        ExtractClusterAction.java
        FindAction.java
        Graph.java
        GrowClusterAction.java
        HelpAction.java
        NodeContextMenuAction.java
        NonNumericAttributeException.java
        SaveClusterAction.java
        SaveClusterAsCyGroupAction.java
        SaveClusteringAction.java
        SelectionPropertiesPanel.java
        ShowControlPanelAction.java
        StartAction.java
        VisualStyleManager.java
        cytoscape3
        AboutAction.java
        AboutDialog.java
        AbstractClusterONEAction.java
        AffinityColouringAction.java
        CloseControlPanelAction.java
        ClusterONECytoscapeApp.java
        ClusterONECytoscapeTask.java
        ClusterONECytoscapeTaskFactory.java
        ControlPanel.java
        CopyClusterToClipboardAction.java
        CyNetworkCache.java
        CyNetworkUtil.java
        CyNodeUtil.java
        CytoscapeAppActivator.java
        CytoscapeResultViewerPanel.java
        CytoscapeTaskMonitorWrapper.java
        ExtractClusterAction.java
        FindAction.java
        Graph.java
        GrowClusterAction.java
        HelpAction.java
        NodeContextMenuFactory.java
        NonNumericAttributeException.java
        SaveClusterAction.java
        SaveClusteringAction.java
        SelectionPropertiesPanel.java
        ShowControlPanelAction.java
        StartAction.java
        VisualStyleManager.java
        procope
        ProcopePlugin.java
        collections
        HashMultimap.java
        IntIntHashMap.java
        IntObjectHashMap.java
        Multimap.java
        MultimapBase.java
        Multiset.java
        TreeMultimap.java
        TreeMultiset.java
        graph
        BreadthFirstSearch.java
        BreadthFirstSearchIterator.java
        BronKerboschMaximalCliqueFinder.java
        CircularLayoutAlgorithm.java
        DepthFirstSearch.java
        DepthFirstSearchIterator.java
        Directedness.java
        Edge.java
        EdgeIterator.java
        FruchtermanReingoldLayoutAlgorithm.java
        Graph.java
        GraphAlgorithm.java
        GraphFactory.java
        GraphLayoutAlgorithm.java
        GraphTraversalAlgorithm.java
        Layout.java
        RandomLayoutAlgorithm.java
        TarjanCutVertexFinder.java
        TransitivityCalculator.java
        stats
        StatsUtils.java
        correlation
        KendallCorrelation.java
        LinearCorrelation.java
        curvefitting
        LineFit.java
        StraightLineFit.java
        datastructures
        PairedData.java
        descriptive
        MeanVar.java
        independentsamples
        MannWhitneyTest.java
        tests
        H1.java
        SignificanceTest.java
        utils
        ArrayUtils.java
        BlockingQueueAdapter.java
        DebugHelper.java
        IntegerRange.java
        IteratorUtils.java
        ObjectUtils.java
        Ordered.java
        Pair.java
        StringUtils.java
        UniqueIDGenerator.java
        UnorderedPair.java
- test
  - java
    - uk
      - ac
        rhul
        cs
        cl1
        MutableNodeSetTest.java
        NodeSetTest.java
        filters
        DensityFilterTest.java
        FilterChainTest.java
        SizeFilterTest.java
        merging
        MultiPassNodeSetMergerTest.java
        quality
        LogLikelihoodFunctionTest.java
        seeding
        SeedGeneratorTest.java
        similarity
        DiceSimilarityTest.java
        JaccardSimilarityTest.java
        MatchingScoreTest.java
        SimilarityTestBase.java
        SimpsonCoefficientTest.java
        support
        OrderMaintainingQueueTest.java
        graph
        BreadthFirstSearchTest.java
        BronKerboschMaximalCliqueFinderTest.java
        DepthFirstSearchTest.java
        LayoutTest.java
        TarjanCutVertexFinderTest.java
        TransitivityCalculatorTest.java
        stats
        KendallCorrelationTest.java
        LineFitTest.java
        LinearCorrelationTest.java
        MannWhitneyTestTest.java
        MeanVarTest.java
        StatsUtilsTest.java
        utils
        ArrayUtilsTest.java
        IntegerRangeTest.java
        StringUtilsTest.java

package uk.ac.rhul.cs.cl1;

import java.util.*;
import java.util.concurrent.*;

import uk.ac.rhul.cs.cl1.growth.ClusterGrowthWorker;
import uk.ac.rhul.cs.cl1.merging.AbstractNodeSetMerger;
import uk.ac.rhul.cs.cl1.seeding.Seed;
import uk.ac.rhul.cs.cl1.seeding.SeedGenerator;
import uk.ac.rhul.cs.cl1.seeding.SeedIterator;
import uk.ac.rhul.cs.cl1.support.OrderMaintainingQueue;
import uk.ac.rhul.cs.cl1.support.UsedNodeSet;
import uk.ac.rhul.cs.collections.IntObjectHashMap;
import uk.ac.rhul.cs.graph.Graph;
import uk.ac.rhul.cs.graph.GraphAlgorithm;
import uk.ac.rhul.cs.graph.TransitivityCalculator;
import uk.ac.rhul.cs.utils.ArrayUtils;
import uk.ac.rhul.cs.utils.Ordered;

/**
 * Main class for the ClusterONE algorithm.
 * 
 * This class represents an instance of the algorithm along with all its
 * necessary parameters. The main entry point of the algorithm is the
 * run() method which executes the clustering algorithm on the graph
 * set earlier using the setGraph() method. The algorithm can also be
 * run in a separate thread as it implements the Callable interface.
 * 
 * @author Tamas Nepusz <tamas@cs.rhul.ac.uk>
 */
public class ClusterONE extends GraphAlgorithm implements Callable<Void>, TaskMonitorSupport {
	/** The name of the application that will appear on the user interface */
	public static final String applicationName = "ClusterONE";
	
	/** The version number of the application */
	public static final String version = "1.1";

	/** A thread pool used for asynchronous operations within ClusterONE */
	private static Executor threadPool = null;

	/** The clustering result as a list of {@link ValuedNodeSet} objects */
	protected ValuedNodeSetList result = null;
	
	/** Algorithm settings for this instance */
	protected ClusterONEAlgorithmParameters parameters = null;

	/** A task monitor where the algorithm will report its progress */
	protected TaskMonitor monitor = new NullTaskMonitor();
	
	/** Whether we are running on a Mac or not */
	protected static boolean runningOnMac = false;
	
	static {
		runningOnMac = System.getProperty("os.name").toLowerCase().startsWith("mac os x");
	}

	/**
	 * Internal enum that stores the state of the main loop of the algorithm.
	 */
	enum State {
		START,
		GENERATING_SEEDS,
		NOTIFYING_WORKERS_NO_MORE_SEEDS,
		WAITING_FOR_CLUSTERS,
		FINISHED(true),
		CANCELLED(true);

		boolean isTerminal;

		State() {
			this(false);
		}

		State(boolean isTerminal) {
			this.isTerminal = isTerminal;
		}
	}

	/**
	 * Constructs an instance of the algorithm using the default algorithm parameters.
	 */
	public ClusterONE() {
		this(null);
	}

	/**
	 * Constructs an instance of the algorithm using the given algorithm parameters.
	 * 
	 * @param algorithmParameters   a {@link ClusterONEAlgorithmParameters} instance that
	 *                              controls the algorithms. If null, the defaults
	 *                              will be used.
	 */
	public ClusterONE(ClusterONEAlgorithmParameters algorithmParameters) {
		if (algorithmParameters == null)
			this.setParameters(new ClusterONEAlgorithmParameters());
		else
			this.setParameters(algorithmParameters);
	}

	/**
	 * Executes the algorithm in a separate thread and returns a future
	 */
	public Void call() throws ClusterONEException {
		run();
		return null;
	}
	
	/**
	 * Returns the current parameter setting of the algorithm
	 * 
	 * @return the parameters
	 */
	public ClusterONEAlgorithmParameters getParameters() {
		return parameters;
	}

	/**
	 * Returns the clustering results or null if there was no clustering executed so far
	 */
	public List<ValuedNodeSet> getResults() {
		return result;
	}
	
	/**
	 * Returns a thread pool used by ClusterONE for asynchronous operations
	 */
	public static Executor getThreadPool() {
		if (threadPool == null)
			threadPool = Executors.newSingleThreadExecutor();
		return threadPool;
	}
	
	/**
	 * Checks whether we are running on a Mac 
	 */
	public static boolean isRunningOnMac() {
		return runningOnMac;
	}
	
	/**
	 * Executes the algorithm on the graph set earlier by setGraph()
	 */
	public void run() throws ClusterONEException {
		Double minDensity = parameters.getMinDensity();
		AbstractNodeSetMerger merger;
		Seed seed;
		Seed pendingSeed = null;
		ValuedNodeSet cluster;
		Ordered<ValuedNodeSet> orderedCluster;
		State state;
		UsedNodeSet usedNodes;

		int numGeneratedSeeds;
		int numPostedSeeds;
		int numProcessedClusters;

		ValuedNodeSetList result = new ValuedNodeSetList();
		IntObjectHashMap submittedSeeds = new IntObjectHashMap();
		OrderMaintainingQueue<ValuedNodeSet> receivedClusters = new OrderMaintainingQueue<ValuedNodeSet>();

		/* Simple sanity checks */
		if (ArrayUtils.min(graph.getEdgeWeights()) < 0.0)
			throw new ClusterONEException("Edge weights must all be non-negative");
		try {
			merger = AbstractNodeSetMerger.fromString(
					parameters.getMergingMethodName());
		} catch (InstantiationException ex) {
			throw new ClusterONEException(ex.getMessage());
		}	
		
		/* Set the minimum density automatically if needed */
		if (minDensity == null) {
			monitor.setStatus("Choosing density thresold...");
			monitor.setPercentCompleted(0);
			if (graph.isWeighted())
				minDensity = 0.3;
			else {
				TransitivityCalculator calc = new TransitivityCalculator(graph);
				calc.setTaskMonitor(monitor);
				if (calc.getGlobalTransitivity() < 0.1)
					minDensity = 0.6;
				else
					minDensity = 0.5;
			}
			monitor.setPercentCompleted(100);
		}

		/* Create an executor service that will run the workers */
		int numThreads = parameters.getNumThreads();
		if (numThreads <= 0) {
			numThreads = Math.max(1, Runtime.getRuntime().availableProcessors());
		}
		ExecutorService executor = Executors.newFixedThreadPool(numThreads);

		// Create the input and output queue for the workers.
		// Limit the size of the seed queue so it does not run too much "ahead" the worker
		// threads. This is useful for seed generators that depend on the clusters produced
		// by the workers.
		LinkedBlockingQueue<Ordered<Seed>> seedQueue = new LinkedBlockingQueue<Ordered<Seed>>(numThreads);
		LinkedBlockingQueue<Ordered<ValuedNodeSet>> clusterQueue = new LinkedBlockingQueue<Ordered<ValuedNodeSet>>();

		/* Create the workers and post them to the executor */
		for (int i = 0; i < numThreads; i++) {
			ClusterGrowthWorker worker = new ClusterGrowthWorker(graph, parameters, minDensity,
					seedQueue, clusterQueue);
			worker.setDebugMode(debugMode);
			executor.execute(worker);
		}

		// Get the seed generator from the parameters
		SeedGenerator seedGenerator = parameters.getSeedGenerator();
		seedGenerator.setGraph(graph);

		// Create a used node set where we will mark nodes that have been used in clusters
		usedNodes = new UsedNodeSet(graph);

		// Set up the task monitor
		if (numThreads > 1) {
			monitor.setStatus("Growing clusters from seeds using " + numThreads + " threads...");
		} else {
			monitor.setStatus("Growing clusters from seeds...");
		}
		monitor.setPercentCompleted(0);

		// Set up the seed iterator
		SeedIterator it = seedGenerator.iterator();

		numGeneratedSeeds = 0;
		numPostedSeeds = 0;
		numProcessedClusters = 0;
		state = State.START;

		// Start iterating over the seeds and collecting the clusters
		while (!state.isTerminal) {
			switch (state) {
				case START:
					state = State.GENERATING_SEEDS;
					break;

				case GENERATING_SEEDS:
					// Try to fill the seed queue with seeds
					boolean shouldEnqueue = true;
					while (shouldEnqueue) {
						// Get the next seed that is acceptable
						boolean seedAccepted = false;

						seed = null;
						while (!seedAccepted) {
							if (pendingSeed != null) {
								seed = pendingSeed;
								numGeneratedSeeds++;
								pendingSeed = null;
							} else if (it.hasNext()) {
								seed = it.next();
								numGeneratedSeeds++;
							} else {
								seed = null;
							}
							seedAccepted = (seed == null || !parameters.shouldRejectSeedsWithOnlyUsedNodes() ||
									!usedNodes.areAllNodesUsedFromSeed(seed));
						}

						if (seed == null) {
							state = State.NOTIFYING_WORKERS_NO_MORE_SEEDS;
							shouldEnqueue = false;
						} else {
							// Offer the seed to the workers; if the queue is full, do nothing
							if (seedQueue.offer(new Ordered<Seed>(numPostedSeeds, seed))) {
								// Store the seed and increase the number of posted seeds
								submittedSeeds.add(numPostedSeeds, seed);
								numPostedSeeds++;
							} else {
								// Queue is full now. Store the seed so we can try it again in the next iteration.
								pendingSeed = seed;
								numGeneratedSeeds--;
								shouldEnqueue = false;
							}
						}
					}
					break;

				case NOTIFYING_WORKERS_NO_MORE_SEEDS:
					// Iterator has just became null, so inform workers that there will
					// be no more seeds.
					if (seedQueue.offer(new Ordered<Seed>(numPostedSeeds, ClusterGrowthWorker.NO_MORE_SEEDS))) {
						state = State.WAITING_FOR_CLUSTERS;
					}
					break;

				case WAITING_FOR_CLUSTERS:
					// If we have processed all the seeds, switch to the FINISHED state
					if (numPostedSeeds == numProcessedClusters) {
						state = State.FINISHED;
					}
					break;

				case FINISHED:
				case CANCELLED:
					// Nothing to do here; we should not get here anyway.
			}

			// Check for termination
			if (shouldStop) {
				state = State.CANCELLED;
			}

			// In GENERATING_SEEDS, NOTIFYING_WORKERS_NO_MORE_SEEDS and WAITING_FOR_CLUSTERS states,
			// try to read a cluster from the cluster queue if we still expect one.
			if (state == State.GENERATING_SEEDS || state == State.NOTIFYING_WORKERS_NO_MORE_SEEDS ||
					state == State.WAITING_FOR_CLUSTERS) {
				// Try to get clusters from the incoming queue if we expect them
				while (numProcessedClusters < numPostedSeeds) {
					orderedCluster = null;

					try {
						orderedCluster = clusterQueue.take();
					} catch (InterruptedException ignored) {
					}

					if (orderedCluster == null)
						break;

					// Add the cluster to the queue that will restore the ordering
					// according to the sequence numbers
					receivedClusters.add(orderedCluster);
					numProcessedClusters++;

					// Try to retrieve a few clusters from receivedClusters; note that
					// even though we have added a cluster above, the queue might still
					// appear empty if the cluster with the _next_ sequence number we
					// are waiting for has not arrived yet
					while (!receivedClusters.isEmpty()) {
						orderedCluster = receivedClusters.remove();

						cluster = orderedCluster.object;
						if (cluster != ClusterGrowthWorker.EMPTY_CLUSTER) {
							Seed originalSeed = (Seed) submittedSeeds.remove(orderedCluster.sequenceNumber);
							// Check whether the cluster would have been generated at all if we were working
							// sequentially.
							if (!usedNodes.areAllNodesUsedFromSeed(originalSeed)) {
								// Yes, so mark the nodes in the seed and the cluster as used and store the
								// cluster.
								result.add(cluster);
								usedNodes.markSeedAsUsed(originalSeed);
								usedNodes.markNodeSetAsUsed(cluster);
							}
						}
					}

					// We try to keep all our workers busy so we break out of the while
					// loop here if the queue through which we feed the workers has some
					// empty slots and we are still generating seeds.
					if (state == State.GENERATING_SEEDS && seedQueue.remainingCapacity() > 0)
						break;
				}
			}

			// Report progress.
			// Progress has to be calculated from numGeneratedSeeds and not numPostedSeeds
			// because some seeds may be skipped before posting them to workers
			monitor.setPercentCompleted((int) (numGeneratedSeeds * 100.0 / it.getEstimatedLength()));

			// Check for termination
			if (shouldStop) {
				state = State.CANCELLED;
			}
		}

		if (state == State.FINISHED) {
			// Merge highly overlapping clusters
			merger.setTaskMonitor(monitor);
			this.result = merger.mergeOverlapping(result, parameters.getSimilarityFunction(),
					parameters.getOverlapThreshold());
		}

		// Wait for the workers to terminate. 1 day is a reasonable upper bound on the timeout ;)
		executor.shutdown();
		while (true) {
			try {
				executor.awaitTermination(1, TimeUnit.DAYS);
				break;
			} catch (InterruptedException ignored) {
			}
		}
	}
	
	/**
	 * Executes the algorithm on the given graph.
	 * 
	 * @param   graph    the graph being clustered
	 */
	public void runOnGraph(Graph graph) throws ClusterONEException {
		setGraph(graph);
		run();
	}

	/**
	 * Sets the current parameter settings of the algorithm
	 * @param parameters the new parameter settings
	 */
	public void setParameters(ClusterONEAlgorithmParameters parameters) {
		this.parameters = parameters;
	}

	/**
	 * Sets the task monitor where the algorithm will report its progress
	 *
	 * @param monitor    the task monitor to use
	 */
	public void setTaskMonitor(TaskMonitor monitor) {
		this.monitor = monitor;
	}
}