package i5.las2peer.services.ocd.metrics;
import i5.las2peer.services.ocd.graphs.Cover;
import i5.las2peer.services.ocd.utils.NonZeroEntriesVectorProcedure;
import java.util.HashMap;
import java.util.Map;
import org.la4j.matrix.Matrix;
import org.la4j.vector.Vector;
public class ExtendedNormalizedMutualInformationMetric implements KnowledgeDrivenMeasure {
@Override
public void setParameters(Map<String, String> parameters) {
}
@Override
public Map<String, String> getParameters() {
return new HashMap<String, String>();
}
@Override
public double measure(Cover cover, Cover groundTruth) throws InterruptedException {
double metricValue = 1;
Map<Integer, Integer> coverCommunitySizes = determineCommunitySizes(cover);
Map<Integer, Integer> groundTruthCommunitySizes = determineCommunitySizes(groundTruth);
metricValue -= 0.5 * calculateArbitraryConditionalEntropy(cover, groundTruth, coverCommunitySizes, groundTruthCommunitySizes);
metricValue -= 0.5 * calculateArbitraryConditionalEntropy(groundTruth, cover, groundTruthCommunitySizes, coverCommunitySizes);
return metricValue;
}
/*
* Determines the uncertainty about the membership of a node in an arbitrary community of cover 1,
* when the community memberships of the node are known for cover 2.
* @param cover1 The cover 1.
* @param cover2 The cover 2.
* @return The uncertainty calculated as the normalized conditional entropy.
*/
private double calculateArbitraryConditionalEntropy(Cover cover1, Cover cover2,
Map<Integer, Integer> cover1CommunitySizes, Map<Integer, Integer> cover2CommunitySizes) throws InterruptedException {
Matrix cover1Memberships = cover1.getMemberships();
Matrix cover2Memberships = cover2.getMemberships();
double minParticularConditionalEntropy;
double currentParticularConditionalEntropy;
double arbitraryConditionalEntropy = 0;
double communityEntropy;
double probability_x0;
double probability_x1;
int nodeCount = cover1.getGraph().nodeCount();
for(int i=0; i<cover1.communityCount(); i++) {
minParticularConditionalEntropy = Double.POSITIVE_INFINITY;
for(int j=0; j<cover2.communityCount(); j++) {
if(Thread.interrupted()) {
throw new InterruptedException();
}
currentParticularConditionalEntropy = calculateParticularConditionalEntropy(cover1Memberships, cover2Memberships, i, j,
cover1CommunitySizes.get(i), cover2CommunitySizes.get(j));
if(currentParticularConditionalEntropy < minParticularConditionalEntropy) {
minParticularConditionalEntropy = currentParticularConditionalEntropy;
}
}
if(minParticularConditionalEntropy != Double.POSITIVE_INFINITY) {
probability_x0 = (double)(nodeCount - cover1CommunitySizes.get(i)) / (double)nodeCount;
probability_x1 = (double)cover1CommunitySizes.get(i) / (double)nodeCount;
communityEntropy = 0;
if(probability_x0 > 0) {
communityEntropy -= probability_x0 * Math.log(probability_x0) / Math.log(2);
}
if(probability_x1 > 0) {
communityEntropy -= probability_x1 * Math.log(probability_x1) / Math.log(2);
}
if(communityEntropy > 0) {
minParticularConditionalEntropy /= communityEntropy;
}
else {
minParticularConditionalEntropy = 1;
}
}
else {
minParticularConditionalEntropy = 1;
}
arbitraryConditionalEntropy += minParticularConditionalEntropy;
}
return arbitraryConditionalEntropy / (double)cover1.communityCount();
}
/*
* Determines the uncertainty about the membership of a node in a particular community of cover 1, when it is known
* that the node is a member of a particular community in cover 2.
* @param cover1Memberships The community memberships of cover 1.
* @param cover2Memberships The community memberships of cover 2.
* @param cover1CommunityIndex The index of the particular community of cover 1.
* @param cover2CommunityIndex The index of the particular community of cover 2.
* @param cover1CommunitySize The size of the particular community of cover 1.
* @param cover2CommunitySize The size of the particular community of cover 2.
* @return The uncertainty calculated as the conditional entropy, if eligible according to the
* constraints of the definition of the NMI. Else positive infinity is returned.
*/
private double calculateParticularConditionalEntropy(Matrix cover1Memberships, Matrix cover2Memberships, int cover1CommunityIndex, int cover2CommunityIndex,
int cover1CommunitySize, int cover2CommunitySize) {
Vector cover1CommunityMemberships = cover1Memberships.getColumn(cover1CommunityIndex);
Vector cover2CommunityMemberships = cover2Memberships.getColumn(cover2CommunityIndex);
NonZeroEntriesVectorProcedure procedure = new NonZeroEntriesVectorProcedure();
/*
* An entry is different than 0 iff the corresponding entries of both communities are different than 0.
*/
Vector sharedMemberships = cover1CommunityMemberships.hadamardProduct(cover2CommunityMemberships);
sharedMemberships.eachNonZero(procedure);
int sharedMembersCount = procedure.getNonZeroEntryCount();
procedure = new NonZeroEntriesVectorProcedure();
/*
* An entry is different than 0 iff the corresponding entries of both communities are different than 0.
* Positive memberships are assumed.
*/
Vector joinedMemberships = cover1CommunityMemberships.add(cover2CommunityMemberships);
joinedMemberships.eachNonZero(procedure);
int joinedMembersCount = procedure.getNonZeroEntryCount();
int nodeCount = cover1Memberships.rows();
/*
* Probabilities of y
*/
double probability_y0 = (double)(nodeCount - cover2CommunitySize) / (double)nodeCount;
double probability_y1 = (double)cover2CommunitySize / (double)nodeCount;
/*
* Conditional probabilities of x given y
*/
double probability_x0_y0 = (double)( nodeCount - joinedMembersCount ) / (double)nodeCount / probability_y0;
double probability_x1_y0 = (double)( cover1CommunitySize - sharedMembersCount ) / (double)nodeCount / probability_y0;
double probability_x0_y1 = (double)( cover2CommunitySize - sharedMembersCount ) / (double)nodeCount / probability_y1;
double probability_x1_y1 = (double) sharedMembersCount / (double) nodeCount / probability_y1;
double h_x0_y0 = 0;
if(probability_x0_y0 > 0) {
h_x0_y0 = - probability_x0_y0 * Math.log(probability_x0_y0) / Math.log(2d);
}
double h_x1_y0 = 0;
if(probability_x1_y0 > 0) {
h_x1_y0 = - probability_x1_y0 * Math.log(probability_x1_y0) / Math.log(2d);
}
double h_x0_y1 = 0;
if(probability_x0_y1 > 0) {
h_x0_y1 = - probability_x0_y1 * Math.log(probability_x0_y1) / Math.log(2d);
}
double h_x1_y1 = 0;
if(probability_x1_y1 > 0) {
h_x1_y1 = - probability_x1_y1 * Math.log(probability_x1_y1) / Math.log(2d);
}
double conditionalEntropy = Double.POSITIVE_INFINITY;
if(h_x0_y0 + h_x1_y1 >= h_x1_y0 + h_x0_y1) {
conditionalEntropy = (h_x0_y0 + h_x1_y0) * probability_y0;
conditionalEntropy += (h_x0_y1 + h_x1_y1) * probability_y1;
}
return conditionalEntropy;
}
/*
* Determines the community sizes of all communities of a cover.
* @param cover The cover.
* @return A mapping from the community indices to the community sizes.
*/
private Map<Integer, Integer> determineCommunitySizes(Cover cover) throws InterruptedException {
Map<Integer, Integer> communitySizes = new HashMap<Integer, Integer>();
for(int i=0; i<cover.communityCount(); i++) {
if(Thread.interrupted()) {
throw new InterruptedException();
}
communitySizes.put(i, cover.getCommunitySize(i));
}
return communitySizes;
}
}