/* Copyright 2008-2010 Gephi Authors : Mathieu Bastian <mathieu.bastian@gephi.org> Website : http://www.gephi.org This file is part of Gephi. DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. Copyright 2011 Gephi Consortium. All rights reserved. The contents of this file are subject to the terms of either the GNU General Public License Version 3 only ("GPL") or the Common Development and Distribution License("CDDL") (collectively, the "License"). You may not use this file except in compliance with the License. You can obtain a copy of the License at http://gephi.org/about/legal/license-notice/ or /cddl-1.0.txt and /gpl-3.0.txt. See the License for the specific language governing permissions and limitations under the License. When distributing the software, include this License Header Notice in each file and include the License files at /cddl-1.0.txt and /gpl-3.0.txt. If applicable, add the following below the License Header, with the fields enclosed by brackets [] replaced by your own identifying information: "Portions Copyrighted [year] [name of copyright owner]" If you wish your version of this file to be governed by only the CDDL or only the GPL Version 3, indicate your decision by adding "[Contributor] elects to include this software in this distribution under the [CDDL or GPL Version 3] license." If you do not indicate a single choice of license, a recipient has the option to distribute your version of this file under either the CDDL, the GPL Version 3 or to extend the choice of license to its licensees as provided above. However, if you add GPL Version 3 code and therefore, elected the GPL Version 3 license, then the option applies only if the new code is made subject to such option by the copyright holder. Contributor(s): Portions Copyrighted 2011 Gephi Consortium. */ package org.gephi.clustering.plugin.mcl; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import org.gephi.clustering.api.Cluster; import org.gephi.clustering.spi.Clusterer; import org.gephi.graph.api.Edge; import org.gephi.graph.api.Graph; import org.gephi.graph.api.GraphModel; import org.gephi.graph.api.Node; import org.gephi.utils.longtask.spi.LongTask; import org.gephi.utils.progress.Progress; import org.gephi.utils.progress.ProgressTicket; /** * MarkovClustering implements the Markov clustering (MCL) algorithm for graphs, * using a HashMap-based sparse representation of a Markov matrix, i.e., an * adjacency matrix m that is normalised to one. Elements in a column / node can * be interpreted as decision probabilities of a random walker being at that * node. Note: whereas we explain the algorithms with columns, the actual * implementation works with rows because the used sparse matrix has row-major * format. * <p> * The basic idea underlying the MCL algorithm is that dense regions in sparse * graphs correspond with regions in which the number of k-length paths is * relatively large, for small k in N, which corresponds to multiplying * probabilities in the matrix appropriately. Random walks of length k have * higher probability (product) for paths with beginning and ending in the same * dense region than for other paths. * <p> * The algorithm starts by creating a Markov matrix from the graph, for which * first the adjacency matrix is added diagonal elements to include self-loops * for all nodes, i.e., probabilities that the random walker stays at a * particular node. After this initialisation, the algorithm works by * alternating two operations, expansion and inflation, iteratively recomputing * the set of transition probabilities. The expansion step corresponds to matrix * multiplication (on stochastic matrices), the inflation step corresponds with * a parametrized inflation operator Gamma_r, which acts column-wise on (column) * stochastic matrices (here, we use row-wise operation, which is analogous). * <p> * The inflation operator transforms a stochastic matrix into another one by * raising each element to a positive power p and re-normalising columns to keep * the matrix stochastic. The effect is that larger probabilities in each column * are emphasised and smaller ones deemphasised. On the other side, the matrix * multiplication in the expansion step creates new non-zero elements, i.e., * edges. The algorithm converges very fast, and the result is an idempotent * Markov matrix, M = M * M, which represents a hard clustering of the graph * into components. * <p> * Expansion and inflation have two opposing effects: While expansion flattens * the stochastic distributions in the columns and thus causes paths of a random * walker to become more evenly spread, inflation contracts them to favoured * paths. * <p> * Description is based on the introduction of Stijn van Dongen's thesis Graph * Clustering by Flow Simulation (2000); for a mathematical treatment of the * algorithm and the associated MCL process, see there. */ //Original author Gregor Heinrich public class MarkovClustering implements Clusterer, LongTask { private double maxResidual = 0.001; private double gammaExp = 2.0; private double loopGain = 0.; private double zeroMax = 0.001; private Cluster[] clusters; //LongTask private ProgressTicket progressTicket; private boolean cancelled; public void execute(GraphModel graphModel) { cancelled = false; Progress.start(progressTicket); Progress.setDisplayName(progressTicket, "MCL Clustering"); HashMap<Integer, Node> nodeMap = new HashMap<Integer, Node>(); HashMap<Node, Integer> intMap = new HashMap<Node, Integer>(); Graph graph = graphModel.getGraphVisible(); graph.readLock(); //Load matrix SparseMatrix matrix = new SparseMatrix(); int nodeId = 0; for (Edge e : graph.getEdges()) { Node source = e.getSource(); Node target = e.getTarget(); Integer sourceId; Integer targetId; if ((sourceId = intMap.get(source)) == null) { sourceId = nodeId++; intMap.put(source, sourceId); nodeMap.put(sourceId, source); } if ((targetId = intMap.get(target)) == null) { targetId = nodeId++; intMap.put(target, targetId); nodeMap.put(targetId, target); } double weight = e.getWeight(); matrix.add(sourceId, targetId, weight); if (cancelled) { graph.readUnlockAll(); return; } } graph.readUnlock(); matrix = matrix.transpose(); matrix = run(matrix, maxResidual, gammaExp, loopGain, zeroMax); if (cancelled) { return; } Map<Integer, ArrayList<Integer>> map = getClusters(matrix); if (cancelled) { return; } int clusterNumber = 1; List<Cluster> clustersList = new ArrayList<Cluster>(); Set<ArrayList<Integer>> sortedClusters = new HashSet<ArrayList<Integer>>(); for (ArrayList<Integer> c : map.values()) { if (!sortedClusters.contains(c)) { sortedClusters.add(c); Node[] nodes = new Node[c.size()]; int i = 0; for (Integer in : c) { Node node = nodeMap.get(in); nodes[i] = node; i++; } clustersList.add(new MCLCluster(nodes, clusterNumber)); clusterNumber++; } if (cancelled) { return; } } clusters = clustersList.toArray(new Cluster[0]); Progress.finish(progressTicket); } public Cluster[] getClusters() { return clusters; } public boolean cancel() { cancelled = true; return true; } public void setProgressTicket(ProgressTicket progressTicket) { this.progressTicket = progressTicket; } private static class MCLCluster implements Cluster { private Node[] nodes; private String name; private Node metaNode; public MCLCluster(Node[] nodes, int number) { this.nodes = nodes; this.name = "Cluster " + number; } public Node[] getNodes() { return nodes; } public int getNodesCount() { return nodes.length; } public String getName() { return name; } public Node getMetaNode() { return metaNode; } public void setMetaNode(Node node) { this.metaNode = node; } } /** * run the MCL process. * * @param a matrix * @param maxResidual maximum difference between row elements and row square * sum (measure of idempotence) * @param pGamma inflation exponent for Gamma operator * @param loopGain values for cycles * @param maxZero maximum value considered zero for pruning operations * @return the resulting matrix */ public SparseMatrix run(SparseMatrix a, double maxResidual, double pGamma, double loopGain, double maxZero) { //System.out.println("original matrix\n" + a.transpose().toStringDense()); // add cycles addLoops(a, loopGain); // make stochastic a.normaliseRows(); //System.out.println("normalised\n" + a.transpose().toStringDense()); double residual = 1.; int i = 0; if (cancelled) { return a; } // main iteration while (residual > maxResidual) { i++; a = expand(a); residual = inflate(a, pGamma, maxZero); System.out.println("residual energy = " + residual); if (cancelled) { return a; } } return a; } /** * inflate stochastic matrix by Hadamard (elementwise) exponentiation, * pruning and normalisation : * <p> * result = Gamma ( m, p ) = normalise ( prune ( m .^ p ) ). * <p> * By convention, normalisation is done along rows (SparseMatrix has * row-major representation) * * @param m matrix (mutable) * @param p exponent as a double * @param zeromax below which elements are pruned from the sparse matrix * @return residuum value, m is modified. */ public double inflate(SparseMatrix m, double p, double zeromax) { double res = 0.; // matlab: m = m .^ p m.hadamardPower(p); // matlab: m(find(m < threshold)) = 0 m.prune(zeromax); // matlab [for cols]: dinv = diag(1./sum(m)); m = m * dinv; return // sum(m) SparseVector rowsums = m.normalise(1.); // check if done: if the maximum element for (int i : rowsums.keySet()) { SparseVector row = m.get(i); double max = row.max(); double sumsq = row.sum(2.); res = Math.max(res, max - sumsq); if (cancelled) { return res; } } return res; } /** * expand stochastic quadratic matrix by sqaring it with itself: result = m * * m. Here normalisation is rowwise. * * @param matrix * @return new matrix (pointer != argument) */ public SparseMatrix expand(SparseMatrix m) { m = m.times(m); return m; } /** * add loops with specific energy, which corresponds to adding loopGain to * the diagonal elements. * * @param a * @param loopGain */ private void addLoops(SparseMatrix a, double loopGain) { if (loopGain <= 0) { return; } for (int i = 0; i < a.size(); i++) { a.add(i, i, loopGain); if (cancelled) { return; } } } public double getGammaExp() { return gammaExp; } /** * Set inflation exponent for Gamma operator. Default is 2.0 */ public void setGammaExp(double gammaExp) { this.gammaExp = gammaExp; } public double getLoopGain() { return loopGain; } /** * Set values for cycles. Default is 0.0 */ public void setLoopGain(double loopGain) { this.loopGain = loopGain; } public double getMaxResidual() { return maxResidual; } /** * Set the maximum difference between row elements and row square sum (measure of idempotence). Default is 0.001 */ public void setMaxResidual(double maxResidual) { this.maxResidual = maxResidual; } public double getZeroMax() { return zeroMax; } /** * Set the maximum value considered zero for pruning operations. Default is 0.001 */ public void setZeroMax(double zeroMax) { this.zeroMax = zeroMax; } private Map<Integer, ArrayList<Integer>> getClusters(SparseMatrix matrix) { Map<Integer, ArrayList<Integer>> clusters = new HashMap<Integer, ArrayList<Integer>>(); int clusterCount = 0; double[][] mat = matrix.getDense(); for (int i = 0; i < mat.length; i++) { for (int j = 0; j < mat[0].length; j++) { double value = mat[i][j]; if (value != 0.0) { if (i == j) { continue; } if (clusters.containsKey(j)) { // Already seen "column" -- get the cluster and add column ArrayList<Integer> columnCluster = clusters.get(j); if (clusters.containsKey(i)) { // We've already seen row also -- join them ArrayList<Integer> rowCluster = clusters.get(i); if (rowCluster == columnCluster) { continue; } columnCluster.addAll(rowCluster); clusterCount--; } else { // debugln("Adding "+row+" to "+columnCluster.getClusterNumber()); columnCluster.add(i); } for (Integer in : columnCluster) { clusters.put(in, columnCluster); } } else { ArrayList<Integer> rowCluster; // First time we've seen "column" -- have we already seen "row" if (clusters.containsKey(i)) { // Yes, just add column to row's cluster rowCluster = clusters.get(i); // debugln("Adding "+column+" to "+rowCluster.getClusterNumber()); rowCluster.add(j); } else { rowCluster = new ArrayList<Integer>(); clusterCount++; // debugln("Created new cluster "+rowCluster.getClusterNumber()+" with "+row+" and "+column); rowCluster.add(j); rowCluster.add(i); } for (Integer in : rowCluster) { clusters.put(in, rowCluster); } } } if (cancelled) { return clusters; } } } return clusters; } }