package uk.ac.rhul.cs.cl1.quality;
import uk.ac.rhul.cs.cl1.MutableNodeSet;
import uk.ac.rhul.cs.cl1.NodeSet;
/**
* Calculates the log-likelihood of a nodeset according to a simple blockmodel.
*
* This goal function assumes the following:
*
* <ul>
* <li>The edges in the nodeset are distributed randomly; i.e. each edge is present
* with probability p1, where p1 is estimated from the nodeset itself according to
* the maximum-likelihood principle.</li>
* <li>The edges from the nodeset to other parts of the network are also distributed
* randomly; i.e. each edge is present with probability p2, where p2 is estimated from
* the surroundings of the nodeset according to the maximum-likelihood principle.</li>
* <li>A randomly chosen node in the graph is included in the nodeset with probability
* p, which is again estimated from the size of the graph and the size of the nodeset
* using the maximum-likelihood principle.</li>
* </ul>
*
* The model goes as follows: each node in the graph is selected by probability p.
* Internal edges between the selected nodes are generated randomly and independently
* from each other by probability p1 for each node pair. Boundary edges from the
* selected nodes to the rest of the network are also generated randomly and independently
* from each other by probability p2 for each internal-external node pair. The value of the
* goal function of our nodeset is then the log-likelihood of the event that the above model
* generates exactly our nodeset.
*
* This goal function is good to assess the quality of a cluster based on a strict,
* model-based criterion, but it is not good to drive a greedy growth process, since
* it is almost always "safer" to contract small communities completely. It would require
* a fairly large seed to reach the point where the expansion of the community yields
* better log-likelihood scores than contraction.
*
* @author tamas
*
*/
public class LogLikelihoodFunction implements QualityFunction {
/**
* Calculates the entropy of a binary random variable.
*
* @param x the probability of the event that the random variable is 1.
* @return the entropy of the variable.
*/
private static double binaryEntropy(double x) {
if (x == 0 || x == 1)
return 0.0;
return x * Math.log(x) + (1-x) * Math.log(1-x);
}
public double calculate(NodeSet nodeSet) {
if (nodeSet.isEmpty())
return Double.NEGATIVE_INFINITY;
double n = nodeSet.size();
double N = nodeSet.getGraph().getNodeCount();
double maxInternalEdges = n * (n-1) / 2;
double maxBoundaryEdges = n * (N-n);
double p = n / N;
double p1 = (n == 1) ? 0 : nodeSet.getTotalInternalEdgeWeight() / maxInternalEdges;
double p2 = (n == N) ? 0 : nodeSet.getTotalBoundaryEdgeWeight() / maxBoundaryEdges;
double result;
result = N * binaryEntropy(p);
result += maxInternalEdges * binaryEntropy(p1);
result += maxBoundaryEdges * binaryEntropy(p2);
return result;
}
public double getAdditionAffinity(MutableNodeSet nodeSet, int index) {
// TODO more efficient implementation
if (nodeSet.contains(index))
return calculate(nodeSet);
MutableNodeSet copy = new MutableNodeSet(nodeSet);
copy.add(index);
return calculate(copy);
}
public double getRemovalAffinity(MutableNodeSet nodeSet, int index) {
// TODO more efficient implementation
if (!nodeSet.contains(index))
return calculate(nodeSet);
MutableNodeSet copy = new MutableNodeSet(nodeSet);
copy.remove(index);
return calculate(copy);
}
}