/*
* Copyright (c) 2003, the JUNG Project and the Regents of the University
* of California
* All rights reserved.
*
* This software is open-source under the BSD license; see either
* "license.txt" or
* http://jung.sourceforge.net/license.txt for a description.
*/
package edu.uci.ics.jung.algorithms.metrics;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.collections15.CollectionUtils;
import edu.uci.ics.jung.graph.DirectedGraph;
import edu.uci.ics.jung.graph.Graph;
/**
* TriadicCensus is a standard social network tool that counts, for each of the
* different possible configurations of three vertices, the number of times that
* that configuration occurs in the given graph. This may then be compared to
* the set of expected counts for this particular graph or to an expected
* sample. This is often used in p* modeling.
* <p>
* To use this class,
*
* <pre>
* long[] triad_counts = TriadicCensus(dg);
* </pre>
*
* where <code>dg</code> is a <code>DirectedGraph</code>. ith element of the
* array (for i in [1,16]) is the number of occurrences of the corresponding
* triad type. (The 0th element is not meaningful; this array is effectively
* 1-based.) To get the name of the ith triad (e.g. "003"), look at the global
* constant array c.TRIAD_NAMES[i]
* <p>
* Triads are named as (number of pairs that are mutually tied) (number of pairs
* that are one-way tied) (number of non-tied pairs) in the triple. Since there
* are be only three pairs, there is a finite set of these possible triads.
* <p>
* In fact, there are exactly 16, conventionally sorted by the number of
* realized edges in the triad:
* <table>
* <tr>
* <th>Number</th>
* <th>Configuration</th>
* <th>Notes</th>
* </tr>
* <tr>
* <td>1</td>
* <td>003</td>
* <td>The empty triad</td>
* </tr>
* <tr>
* <td>2</td>
* <td>012</td>
* <td></td>
* </tr>
* <tr>
* <td>3</td>
* <td>102</td>
* <td></td>
* </tr>
* <tr>
* <td>4</td>
* <td>021D</td>
* <td>"Down": the directed edges point away</td>
* </tr>
* <tr>
* <td>5</td>
* <td>021U</td>
* <td>"Up": the directed edges meet</td>
* </tr>
* <tr>
* <td>6</td>
* <td>021C</td>
* <td>"Circle": one in, one out</td>
* </tr>
* <tr>
* <td>7</td>
* <td>111D</td>
* <td>"Down": 021D but one edge is mutual</td>
* </tr>
* <tr>
* <td>8</td>
* <td>111U</td>
* <td>"Up": 021U but one edge is mutual</td>
* </tr>
* <tr>
* <td>9</td>
* <td>030T</td>
* <td>"Transitive": two point to the same vertex</td>
* </tr>
* <tr>
* <td>10</td>
* <td>030C</td>
* <td>"Circle": A->B->C->A</td>
* </tr>
* <tr>
* <td>11</td>
* <td>201</td>
* <td></td>
* </tr>
* <tr>
* <td>12</td>
* <td>120D</td>
* <td>"Down": 021D but the third edge is mutual</td>
* </tr>
* <tr>
* <td>13</td>
* <td>120U</td>
* <td>"Up": 021U but the third edge is mutual</td>
* </tr>
* <tr>
* <td>14</td>
* <td>120C</td>
* <td>"Circle": 021C but the third edge is mutual</td>
* </tr>
* <tr>
* <td>15</td>
* <td>210</td>
* <td></td>
* </tr>
* <tr>
* <td>16</td>
* <td>300</td>
* <td>The complete</td>
* </tr>
* </table>
* <p>
* This implementation takes O( m ), m is the number of edges in the graph. <br>
* It is based on
* <a href="http://vlado.fmf.uni-lj.si/pub/networks/doc/triads/triads.pdf"> A
* subquadratic triad census algorithm for large sparse networks with small
* maximum degree</a> Vladimir Batagelj and Andrej Mrvar, University of
* Ljubljana Published in Social Networks.
*
* @author Danyel Fisher
* @author Tom Nelson - converted to jung2
*
*/
public class TriadicCensus {
// NOTE THAT THIS RETURNS STANDARD 1-16 COUNT!
// and their types
public static final String[] TRIAD_NAMES = { "N/A", "003", "012", "102",
"021D", "021U", "021C", "111D", "111U", "030T", "030C", "201",
"120D", "120U", "120C", "210", "300" };
public static final int MAX_TRIADS = TRIAD_NAMES.length;
/**
* Returns an array whose ith element (for i in [1,16]) is the number of
* occurrences of the corresponding triad type in <code>g</code>. (The 0th
* element is not meaningful; this array is effectively 1-based.)
*
* @param g
*/
public static <V, E> long[] getCounts(DirectedGraph<V, E> g) {
long[] count = new long[MAX_TRIADS];
List<V> id = new ArrayList<V>(g.getVertices());
// apply algorithm to each edge, one at at time
for (int i_v = 0; i_v < g.getVertexCount(); i_v++) {
V v = id.get(i_v);
for (V u : g.getNeighbors(v)) {
int triType = -1;
if (id.indexOf(u) <= i_v) {
continue;
}
Set<V> neighbors = new HashSet<V>(CollectionUtils
.union(g.getNeighbors(u), g.getNeighbors(v)));
neighbors.remove(u);
neighbors.remove(v);
if (g.isSuccessor(v, u) && g.isSuccessor(u, v)) {
triType = 3;
} else {
triType = 2;
}
count[triType] += g.getVertexCount() - neighbors.size() - 2;
for (V w : neighbors) {
if (shouldCount(g, id, u, v, w)) {
count[triType(triCode(g, u, v, w))]++;
}
}
}
}
int sum = 0;
for (int i = 2; i <= 16; i++) {
sum += count[i];
}
int n = g.getVertexCount();
count[1] = n * (n - 1) * (n - 2) / 6 - sum;
return count;
}
/**
* This is the core of the technique in the paper. Returns an int from 0 to
* 65 based on: WU -> 32 UW -> 16 WV -> 8 VW -> 4 UV -> 2 VU -> 1
*
*/
public static <V, E> int triCode(Graph<V, E> g, V u, V v, V w) {
int i = 0;
i += link(g, v, u) ? 1 : 0;
i += link(g, u, v) ? 2 : 0;
i += link(g, v, w) ? 4 : 0;
i += link(g, w, v) ? 8 : 0;
i += link(g, u, w) ? 16 : 0;
i += link(g, w, u) ? 32 : 0;
return i;
}
protected static <V, E> boolean link(Graph<V, E> g, V a, V b) {
return g.isPredecessor(b, a);
}
/**
* Simply returns the triCode.
*
* @param triCode
* @return the string code associated with the numeric type
*/
public static int triType(int triCode) {
return codeToType[triCode];
}
/**
* For debugging purposes, this is copied straight out of the paper which
* means that they refer to triad types 1-16.
*/
protected static final int[] codeToType = { 1, 2, 2, 3, 2, 4, 6, 8, 2, 6, 5,
7, 3, 8, 7, 11, 2, 6, 4, 8, 5, 9, 9, 13, 6, 10, 9, 14, 7, 14, 12,
15, 2, 5, 6, 7, 6, 9, 10, 14, 4, 9, 9, 12, 8, 13, 14, 15, 3, 7, 8,
11, 7, 12, 14, 15, 8, 14, 13, 15, 11, 15, 15, 16 };
/**
* Make sure we have a canonical ordering: Returns true if u < w, or v < w <
* u and v doesn't link to w
*
* @param id
* @param u
* @param v
* @param w
* @return true if u < w, or if v < w < u and v doesn't link to w; false
* otherwise
*/
protected static <V, E> boolean shouldCount(Graph<V, E> g, List<V> id, V u,
V v, V w) {
int i_u = id.indexOf(u);
int i_w = id.indexOf(w);
if (i_u < i_w) {
return true;
}
int i_v = id.indexOf(v);
if ((i_v < i_w) && (i_w < i_u) && (!g.isNeighbor(w, v))) {
return true;
}
return false;
}
}