/******************************************************************************* * Copyright (c) 2010 Haifeng Li * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *******************************************************************************/ package smile.validation; import smile.math.Math; /** * Rand Index. Rand index is defined as the number of pairs of objects * that are either in the same group or in different groups in both partitions * divided by the total number of pairs of objects. The Rand index lies between * 0 and 1. When two partitions agree perfectly, the Rand index achieves the * maximum value 1. A problem with Rand index is that the expected value of * the Rand index between two random partitions is not a constant. This problem * is corrected by the adjusted Rand index that assumes the generalized * hyper-geometric distribution as the model of randomness. The adjusted Rand * index has the maximum value 1, and its expected value is 0 in the case * of random clusters. A larger adjusted Rand index means a higher agreement * between two partitions. The adjusted Rand index is recommended for measuring * agreement even when the partitions compared have different numbers of clusters. * * @author Haifeng Li */ public class RandIndex implements ClusterMeasure { @Override public double measure(int[] y1, int[] y2) { if (y1.length != y2.length) { throw new IllegalArgumentException(String.format("The vector sizes don't match: %d != %d.", y1.length, y2.length)); } // Get # of non-zero classes in each solution int n = y1.length; int[] label1 = Math.unique(y1); int n1 = label1.length; int[] label2 = Math.unique(y2); int n2 = label2.length; // Calculate N contingency matrix int[][] count = new int[n1][n2]; for (int i = 0; i < n1; i++) { for (int j = 0; j < n2; j++) { int match = 0; for (int k = 0; k < n; k++) { if (y1[k] == label1[i] && y2[k] == label2[j]) { match++; } } count[i][j] = match; } } // Marginals int[] count1 = new int[n1]; int[] count2 = new int[n2]; for (int i = 0; i < n1; i++) { for (int j = 0; j < n2; j++) { count1[i] += count[i][j]; count2[j] += count[i][j]; } } // Calculate RAND - Non-adjusted double rand_T = 0.0; for (int i = 0; i < n1; i++) { for (int j = 0; j < n2; j++) { rand_T += Math.sqr(count[i][j]); } } rand_T -= n; double rand_P = 0.0; for (int i = 0; i < n1; i++) { rand_P += Math.sqr(count1[i]); } rand_P -= n; double rand_Q = 0.0; for (int j = 0; j < n2; j++) { rand_Q += Math.sqr(count2[j]); } rand_Q -= n; double rand = (rand_T - 0.5 * rand_P - 0.5 * rand_Q + Math.choose(n, 2)) / Math.choose(n, 2); return rand; } @Override public String toString() { return "Rand Index"; } }