/* * Copyright 2008 Network Engine for Objects in Lund AB [neotechnology.com] * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.neo4j.graphalgo.centrality; import java.util.ArrayList; import java.util.HashMap; import java.util.Map; import java.util.Random; import java.util.Set; import org.neo4j.graphalgo.MatrixUtil; import org.neo4j.graphalgo.MatrixUtil.DoubleMatrix; import org.neo4j.graphalgo.MatrixUtil.DoubleVector; import org.neo4j.graphalgo.shortestpath.CostEvaluator; import org.neo4j.graphdb.Direction; import org.neo4j.graphdb.Node; import org.neo4j.graphdb.Relationship; /** * Computing eigenvector centrality with the "Arnoldi iteration". Convergence is * dependent of the eigenvalues of the input adjacency matrix (the network). If * the two largest eigenvalues are u1 and u2, a small factor u2/u1 will give a * faster convergence (i.e. faster computation). NOTE: Currently only works on * Doubles. * @complexity The {@link CostEvaluator} is called once for every relationship * in each iteration. Assuming this is done in constant time, the * total time complexity is O(j(n + m + i)) when j internal restarts * are required and i iterations are done in the internal * eigenvector solving of the H matrix. Typically j = the number of * iterations / k, where normally k = 3. * @author Patrik Larsson */ public class EigenvectorCentralityArnoldi implements EigenvectorCentrality { protected Direction relationDirection; protected CostEvaluator<Double> costEvaluator; protected Set<Node> nodeSet; protected Set<Relationship> relationshipSet; protected double precision = 0.001; protected boolean doneCalculation = false; protected Map<Node,Double> values; protected int totalIterations = 0; private int maxIterations = Integer.MAX_VALUE; /** * @param relationDirection * The direction in which the paths should follow the * relationships. * @param costEvaluator * @see CostEvaluator * @param nodeSet * The set of nodes the calculation should be run on. * @param relationshipSet * The set of relationships that should be processed. * @param precision * Precision factor (ex. 0.01 for 1% error). Note that this is * not the error from the correct values, but the amount of * change tolerated in one iteration. */ public EigenvectorCentralityArnoldi( Direction relationDirection, CostEvaluator<Double> costEvaluator, Set<Node> nodeSet, Set<Relationship> relationshipSet, double precision ) { super(); this.relationDirection = relationDirection; this.costEvaluator = costEvaluator; this.nodeSet = nodeSet; this.relationshipSet = relationshipSet; this.precision = precision; } /** * This can be used to retrieve the result for every node. Will return null * if the node is not contained in the node set initially given, or doesn't * receive a result because no relationship points to it. * @param node * @return */ public Double getCentrality( Node node ) { calculate(); return values.get( node ); } /** * This resets the calculation if we for some reason would like to redo it. */ public void reset() { doneCalculation = false; } /** * Internal calculate method that will do the calculation. This can however * be called externally to manually trigger the calculation.The calculation is * done the first time this method is run. Upon successive requests, the old * result is returned, unless the calculation is reset via {@link #reset()} */ public void calculate() { // Don't do it more than once if ( doneCalculation ) { return; } doneCalculation = true; values = new HashMap<Node,Double>(); totalIterations = 0; // generate a random start vector Random random = new Random( System.currentTimeMillis() ); for ( Node node : nodeSet ) { values.put( node, random.nextDouble() ); } normalize( values ); runIterations( maxIterations ); } /** * Stop condition for the iteration. * @return true if enough precision has been achieved. */ private boolean timeToStop( Map<Node,Double> oldValues, Map<Node,Double> newValues ) { for ( Node node : oldValues.keySet() ) { if ( newValues.get( node ) == null ) { return false; } if ( oldValues.get( node ) == 0.0 ) { if ( Math.abs( newValues.get( node ) ) > precision ) { return false; } continue; } double factor = newValues.get( node ) / oldValues.get( node ); factor = Math.abs( factor ); if ( factor - precision > 1.0 || factor + precision < 1.0 ) { return false; } } return true; } /** * This runs a number of iterations in the computation and stops when enough * precision has been reached. A maximum number of iterations to perform is * supplied. NOTE: For maxNrIterations > 0 at least one iteration will be * run, regardless if good precision has already been reached or not. This * method also ignores the global limit defined by maxIterations. * @param maxNrIterations * The maximum number of iterations to run. * @return the number of iterations performed. if this is lower than the * given maxNrIterations the desired precision has been reached. */ public int runIterations( int maxNrIterations ) { if ( maxNrIterations <= 0 ) { return 0; } int localIterations = 0; while ( localIterations < maxNrIterations ) { Map<Node,Double> oldValues = values; localIterations += runInternalArnoldi( 3 ); if ( timeToStop( oldValues, values ) ) { break; } } // If the first value is negative (possibly the whole vector), negate // the whole vector if ( values.get( nodeSet.iterator().next() ) < 0 ) { for ( Node node : nodeSet ) { values.put( node, -values.get( node ) ); } } return localIterations; } /** * This runs the Arnoldi decomposition in a specified number of steps. * @param iterations * The number of steps to perform, i.e. the dimension of the H * matrix. * @return The number of iterations actually performed. This can be less * than the input argument if the starting vector is not linearly * dependent on that many eigenvectors. */ protected int runInternalArnoldi( int iterations ) { // Create a list of the nodes, in order to quickly translate an index // into a node. ArrayList<Node> nodes = new ArrayList<Node>( nodeSet.size() ); for ( Node node : nodeSet ) { nodes.add( node ); } DoubleMatrix hMatrix = new DoubleMatrix(); DoubleMatrix qMatrix = new DoubleMatrix(); for ( int i = 0; i < nodes.size(); ++i ) { qMatrix.set( 0, i, values.get( nodes.get( i ) ) ); } int localIterations = 1; // The main arnoldi iteration loop while ( true ) { ++totalIterations; Map<Node,Double> newValues = new HashMap<Node,Double>(); // "matrix multiplication" for ( Relationship relationship : relationshipSet ) { if ( relationDirection.equals( Direction.BOTH ) || relationDirection.equals( Direction.OUTGOING ) ) { processRelationship( newValues, relationship, false ); } if ( relationDirection.equals( Direction.BOTH ) || relationDirection.equals( Direction.INCOMING ) ) { processRelationship( newValues, relationship, true ); } } // Orthogonalize for ( int j = 0; j < localIterations; ++j ) { DoubleVector qj = qMatrix.getRow( j ); // vector product double product = 0; for ( int i = 0; i < nodes.size(); ++i ) { Double d1 = newValues.get( nodes.get( i ) ); Double d2 = qj.get( i ); if ( d1 != null && d2 != null ) { product += d1 * d2; } } hMatrix.set( j, localIterations - 1, product ); if ( product != 0.0 ) { // vector subtraction for ( int i = 0; i < nodes.size(); ++i ) { Node node = nodes.get( i ); Double value = newValues.get( node ); if ( value == null ) { value = 0.0; } Double qValue = qj.get( i ); if ( qValue != null ) { newValues.put( node, value - product * qValue ); } } } } double normalizeFactor = normalize( newValues ); values = newValues; DoubleVector qVector = new DoubleVector(); for ( int i = 0; i < nodes.size(); ++i ) { qVector.set( i, newValues.get( nodes.get( i ) ) ); } qMatrix.setRow( localIterations, qVector ); if ( normalizeFactor == 0.0 || localIterations >= nodeSet.size() || localIterations >= iterations ) { break; } hMatrix.set( localIterations, localIterations - 1, normalizeFactor ); ++localIterations; } // employ the power method to find eigenvector to h Random random = new Random( System.currentTimeMillis() ); DoubleVector vector = new DoubleVector(); for ( int i = 0; i < nodeSet.size(); ++i ) { vector.set( i, random.nextDouble() ); } MatrixUtil.normalize( vector ); boolean powerDone = false; int its = 0; double powerPrecision = 0.1; while ( !powerDone ) { DoubleVector newVector = MatrixUtil.multiply( hMatrix, vector ); MatrixUtil.normalize( newVector ); powerDone = true; for ( Integer index : vector.getIndices() ) { if ( newVector.get( index ) == null ) { continue; } double factor = Math.abs( newVector.get( index ) / vector.get( index ) ); if ( factor - powerPrecision > 1.0 || factor + powerPrecision < 1.0 ) { powerDone = false; break; } } vector = newVector; ++its; if ( its > 100 ) { break; } } // multiply q and vector to get a ritz vector DoubleVector ritzVector = new DoubleVector(); for ( int r = 0; r < nodeSet.size(); ++r ) { for ( int c = 0; c < localIterations; ++c ) { ritzVector.incrementValue( r, vector.get( c ) * qMatrix.get( c, r ) ); } } for ( int i = 0; i < nodeSet.size(); ++i ) { values.put( nodes.get( i ), ritzVector.get( i ) ); } normalize( values ); return localIterations; } /** * Internal method used in the "matrix multiplication" in each iteration. */ protected void processRelationship( Map<Node,Double> newValues, Relationship relationship, boolean backwards ) { Node startNode = relationship.getStartNode(); if ( backwards ) { startNode = relationship.getEndNode(); } Node endNode = relationship.getOtherNode( startNode ); Double newValue = newValues.get( endNode ); if ( newValue == null ) { newValue = 0.0; } if ( values.get( startNode ) != null ) { newValue += values.get( startNode ) * costEvaluator.getCost( relationship, backwards ); } newValues.put( endNode, newValue ); } /** * Normalizes a vector represented as a Map. * @param vector * @return the initial length of the vector. */ protected double normalize( Map<Node,Double> vector ) { // Compute vector length double sum = 0; for ( Node node : vector.keySet() ) { Double d = vector.get( node ); if ( d == null ) { d = 0.0; vector.put( node, 0.0 ); } sum += d * d; } sum = Math.sqrt( sum ); // Divide all components if ( sum > 0.0 ) { for ( Node node : vector.keySet() ) { vector.put( node, vector.get( node ) / sum ); } } return sum; } /** * @return the number of iterations made. */ public int getTotalIterations() { return totalIterations; } /** * @return the maxIterations */ public int getMaxIterations() { return maxIterations; } /** * Limit the maximum number of iterations to run. Per default, * the maximum iterations are set to Integer.MAX_VALUE, which should * be limited to 50-100 normally. * @param maxIterations * the maxIterations to set */ public void setMaxIterations( int maxIterations ) { this.maxIterations = maxIterations; } }