LogLikelihoodFunction.java example

Explorer

cl1-master
- src
  - java
    - uk
      - ac
        rhul
        cs
        cl1
        ClusterONE.java
        ClusterONEAlgorithmParameters.java
        ClusterONEException.java
        Intersectable.java
        MutableNodeSet.java
        NodeSet.java
        NullTaskMonitor.java
        Sized.java
        TaskMonitor.java
        TaskMonitorSupport.java
        ValuedNodeSet.java
        ValuedNodeSetList.java
        api
        Cluster.java
        ClusterONEResult.java
        EntityNotFoundException.java
        EntityStore.java
        InMemoryEntityStore.java
        PersistentEntityStore.java
        rest
        DatasetResource.java
        JAXBContextResolver.java
        MissingParameterException.java
        NotFoundException.java
        ResultResource.java
        StandaloneWebServer.java
        WebApplication.java
        filters
        DensityFilter.java
        DiameterFilter.java
        FilterChain.java
        FluffingFilter.java
        HaircutFilter.java
        KCoreFilter.java
        NodeSetFilter.java
        SizeFilter.java
        growth
        ClusterGrowthAction.java
        ClusterGrowthProcess.java
        ClusterGrowthWorker.java
        GreedyClusterGrowthProcess.java
        io
        AbstractClusteringWriter.java
        AbstractGraphWriter.java
        CSVClusteringWriter.java
        ClusteringWriter.java
        ClusteringWriterFactory.java
        EdgeListReader.java
        EdgeListWriter.java
        GeneProClusteringWriter.java
        GraphReader.java
        GraphReaderFactory.java
        GraphWriter.java
        PlainTextClusteringWriter.java
        SIFReader.java
        merging
        AbstractNodeSetMerger.java
        DummyNodeSetMerger.java
        MultiPassNodeSetMerger.java
        NodeSetMerger.java
        SinglePassNodeSetMerger.java
        quality
        CohesivenessFunction.java
        DummyQualityFunction.java
        LogLikelihoodFunction.java
        QualityFunction.java
        seeding
        EveryEdgeSeedGenerator.java
        EveryNodeSeedGenerator.java
        FileBasedSeedGenerator.java
        MaximalCliqueSeedGenerator.java
        NodeSetCollectionBasedSeedGenerator.java
        NodeSetCollectionBasedSeedIterator.java
        Seed.java
        SeedGenerator.java
        SeedIterator.java
        StreamBasedSeedGenerator.java
        similarity
        DiceSimilarity.java
        JaccardSimilarity.java
        MatchingScore.java
        SimilarityFunction.java
        SimpsonCoefficient.java
        support
        OrderMaintainingQueue.java
        UsedNodeSet.java
        ui
        AboutDialog.java
        ClusterONEAlgorithmParametersDialog.java
        ClusterONEAlgorithmParametersPanel.java
        CollapsiblePanel.java
        ConsoleTaskMonitor.java
        EmptyIcon.java
        ExtendedSpinnerNumberModel.java
        GraphRenderer.java
        HeightLimitedJLabelRenderer.java
        JMultiLineToolTip.java
        NodeSetDetails.java
        NodeSetPropertiesPanel.java
        NodeSetTableModel.java
        PValueRenderer.java
        PopupMenuTrigger.java
        RemoveClusterFromResultAction.java
        ResultViewerPanel.java
        ShowDetailedResultsAction.java
        SwingTaskMonitor.java
        cmdline
        CommandLineApplication.java
        cytoscape
        AboutAction.java
        AboutDialog.java
        AffinityColouringAction.java
        CloseControlPanelAction.java
        ClusterONECytoscapeTask.java
        ControlPanel.java
        CopyClusterToClipboardAction.java
        CyNetworkCache.java
        CytoscapePlugin.java
        CytoscapeResultViewerPanel.java
        CytoscapeTaskMonitorWrapper.java
        ExtractClusterAction.java
        FindAction.java
        Graph.java
        GrowClusterAction.java
        HelpAction.java
        NodeContextMenuAction.java
        NonNumericAttributeException.java
        SaveClusterAction.java
        SaveClusterAsCyGroupAction.java
        SaveClusteringAction.java
        SelectionPropertiesPanel.java
        ShowControlPanelAction.java
        StartAction.java
        VisualStyleManager.java
        cytoscape3
        AboutAction.java
        AboutDialog.java
        AbstractClusterONEAction.java
        AffinityColouringAction.java
        CloseControlPanelAction.java
        ClusterONECytoscapeApp.java
        ClusterONECytoscapeTask.java
        ClusterONECytoscapeTaskFactory.java
        ControlPanel.java
        CopyClusterToClipboardAction.java
        CyNetworkCache.java
        CyNetworkUtil.java
        CyNodeUtil.java
        CytoscapeAppActivator.java
        CytoscapeResultViewerPanel.java
        CytoscapeTaskMonitorWrapper.java
        ExtractClusterAction.java
        FindAction.java
        Graph.java
        GrowClusterAction.java
        HelpAction.java
        NodeContextMenuFactory.java
        NonNumericAttributeException.java
        SaveClusterAction.java
        SaveClusteringAction.java
        SelectionPropertiesPanel.java
        ShowControlPanelAction.java
        StartAction.java
        VisualStyleManager.java
        procope
        ProcopePlugin.java
        collections
        HashMultimap.java
        IntIntHashMap.java
        IntObjectHashMap.java
        Multimap.java
        MultimapBase.java
        Multiset.java
        TreeMultimap.java
        TreeMultiset.java
        graph
        BreadthFirstSearch.java
        BreadthFirstSearchIterator.java
        BronKerboschMaximalCliqueFinder.java
        CircularLayoutAlgorithm.java
        DepthFirstSearch.java
        DepthFirstSearchIterator.java
        Directedness.java
        Edge.java
        EdgeIterator.java
        FruchtermanReingoldLayoutAlgorithm.java
        Graph.java
        GraphAlgorithm.java
        GraphFactory.java
        GraphLayoutAlgorithm.java
        GraphTraversalAlgorithm.java
        Layout.java
        RandomLayoutAlgorithm.java
        TarjanCutVertexFinder.java
        TransitivityCalculator.java
        stats
        StatsUtils.java
        correlation
        KendallCorrelation.java
        LinearCorrelation.java
        curvefitting
        LineFit.java
        StraightLineFit.java
        datastructures
        PairedData.java
        descriptive
        MeanVar.java
        independentsamples
        MannWhitneyTest.java
        tests
        H1.java
        SignificanceTest.java
        utils
        ArrayUtils.java
        BlockingQueueAdapter.java
        DebugHelper.java
        IntegerRange.java
        IteratorUtils.java
        ObjectUtils.java
        Ordered.java
        Pair.java
        StringUtils.java
        UniqueIDGenerator.java
        UnorderedPair.java
- test
  - java
    - uk
      - ac
        rhul
        cs
        cl1
        MutableNodeSetTest.java
        NodeSetTest.java
        filters
        DensityFilterTest.java
        FilterChainTest.java
        SizeFilterTest.java
        merging
        MultiPassNodeSetMergerTest.java
        quality
        LogLikelihoodFunctionTest.java
        seeding
        SeedGeneratorTest.java
        similarity
        DiceSimilarityTest.java
        JaccardSimilarityTest.java
        MatchingScoreTest.java
        SimilarityTestBase.java
        SimpsonCoefficientTest.java
        support
        OrderMaintainingQueueTest.java
        graph
        BreadthFirstSearchTest.java
        BronKerboschMaximalCliqueFinderTest.java
        DepthFirstSearchTest.java
        LayoutTest.java
        TarjanCutVertexFinderTest.java
        TransitivityCalculatorTest.java
        stats
        KendallCorrelationTest.java
        LineFitTest.java
        LinearCorrelationTest.java
        MannWhitneyTestTest.java
        MeanVarTest.java
        StatsUtilsTest.java
        utils
        ArrayUtilsTest.java
        IntegerRangeTest.java
        StringUtilsTest.java

package uk.ac.rhul.cs.cl1.quality;

import uk.ac.rhul.cs.cl1.MutableNodeSet;
import uk.ac.rhul.cs.cl1.NodeSet;

/**
 * Calculates the log-likelihood of a nodeset according to a simple blockmodel.
 * 
 * This goal function assumes the following:
 * 
 * <ul>
 * <li>The edges in the nodeset are distributed randomly; i.e. each edge is present
 * with probability p1, where p1 is estimated from the nodeset itself according to
 * the maximum-likelihood principle.</li>
 * <li>The edges from the nodeset to other parts of the network are also distributed
 * randomly; i.e. each edge is present with probability p2, where p2 is estimated from
 * the surroundings of the nodeset according to the maximum-likelihood principle.</li>
 * <li>A randomly chosen node in the graph is included in the nodeset with probability
 * p, which is again estimated from the size of the graph and the size of the nodeset
 * using the maximum-likelihood principle.</li>
 * </ul>
 * 
 * The model goes as follows: each node in the graph is selected by probability p.
 * Internal edges between the selected nodes are generated randomly and independently
 * from each other by probability p1 for each node pair. Boundary edges from the
 * selected nodes to the rest of the network are also generated randomly and independently
 * from each other by probability p2 for each internal-external node pair. The value of the
 * goal function of our nodeset is then the log-likelihood of the event that the above model
 * generates exactly our nodeset.
 * 
 * This goal function is good to assess the quality of a cluster based on a strict,
 * model-based criterion, but it is not good to drive a greedy growth process, since
 * it is almost always "safer" to contract small communities completely. It would require
 * a fairly large seed to reach the point where the expansion of the community yields
 * better log-likelihood scores than contraction.
 * 
 * @author tamas
 *
 */
public class LogLikelihoodFunction implements QualityFunction {
	/**
	 * Calculates the entropy of a binary random variable.
	 * 
	 * @param x  the probability of the event that the random variable is 1.
	 * @return   the entropy of the variable.
	 */
	private static double binaryEntropy(double x) {
		if (x == 0 || x == 1)
			return 0.0;
		return x * Math.log(x) + (1-x) * Math.log(1-x);
	}
	
	public double calculate(NodeSet nodeSet) {
		if (nodeSet.isEmpty())
			return Double.NEGATIVE_INFINITY;
		
		double n = nodeSet.size();
		double N = nodeSet.getGraph().getNodeCount();
		double maxInternalEdges = n * (n-1) / 2;
		double maxBoundaryEdges = n * (N-n);
		double p = n / N;
		double p1 = (n == 1) ? 0 : nodeSet.getTotalInternalEdgeWeight() / maxInternalEdges;
		double p2 = (n == N) ? 0 : nodeSet.getTotalBoundaryEdgeWeight() / maxBoundaryEdges;
		double result;
		
		result  = N * binaryEntropy(p);
		result += maxInternalEdges * binaryEntropy(p1);
		result += maxBoundaryEdges * binaryEntropy(p2);
		
		return result;
	}

	public double getAdditionAffinity(MutableNodeSet nodeSet, int index) {
		// TODO more efficient implementation
		if (nodeSet.contains(index))
			return calculate(nodeSet);
		
		MutableNodeSet copy = new MutableNodeSet(nodeSet);
		copy.add(index);
		return calculate(copy);
	}

	public double getRemovalAffinity(MutableNodeSet nodeSet, int index) {
		// TODO more efficient implementation
		if (!nodeSet.contains(index))
			return calculate(nodeSet);
		
		MutableNodeSet copy = new MutableNodeSet(nodeSet);
		copy.remove(index);
		return calculate(copy);
	}
}