package cc.mallet.cluster.clustering_scorer; import cc.mallet.cluster.Clustering; import cc.mallet.cluster.iterator.AllPairsIterator; import cc.mallet.cluster.neighbor_evaluator.AgglomerativeNeighbor; import cc.mallet.cluster.neighbor_evaluator.NeighborEvaluator; import cc.mallet.cluster.util.ClusterUtils; import cc.mallet.types.Instance; /** * For each pair of Instances, if the pair is predicted to be in the same * cluster, increment the total by the evaluator's score for merging the two. * Else, increment by 1 - evaluator score. Divide by number of pairs. * * @author culotta * */ public class PairwiseScorer implements ClusteringScorer { NeighborEvaluator evaluator; public PairwiseScorer(NeighborEvaluator evaluator) { super(); this.evaluator = evaluator; } public double score(Clustering clustering) { Clustering singletons = ClusterUtils .createSingletonClustering(clustering.getInstances()); double total = 0; int count = 0; for (AllPairsIterator iter = new AllPairsIterator(singletons); iter .hasNext(); count++) { Instance instance = (Instance) iter.next(); AgglomerativeNeighbor neighbor = (AgglomerativeNeighbor) instance .getData(); double score = evaluator.evaluate(neighbor); int[][] clusters = neighbor.getOldClusters(); if (clustering.getLabel(clusters[0][0]) == clustering .getLabel(clusters[1][0])) total += score; else total += 1.0 - score; } return (double) total / count; } }