InfluenceMaximization.java example

/*
 * File:                InfluenceMaximization.java
 * Authors:             Tu-Thach Quach
 * Company:             Sandia National Laboratories
 * Project:             Cognitive Foundry
 * 
 * Copyright 2016, Sandia Corporation.
 * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
 * license for use of this work by or on behalf of the U.S. Government. 
 * Export of this program may require a license from the United States
 * Government. See CopyrightHistory.txt for complete details.
 * 
 */

package examples;

import gov.sandia.cognition.annotation.PublicationReference;
import gov.sandia.cognition.annotation.PublicationReferences;
import gov.sandia.cognition.annotation.PublicationType;
import gov.sandia.cognition.graph.DirectedNodeEdgeGraph;
import gov.sandia.cognition.graph.inference.GraphWrappingEnergyFunction;
import gov.sandia.cognition.graph.inference.SumProductDirectedPropagation;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;

/**
 * Use this to do influence maximization: find the set of seed nodes that
 * maximizes influence spread. This is an implementation of the CELF algorithm.
 * See the main method for explanation on the required parameters.
 *
 * @author Tu-Thach Quach
 *
 */
@PublicationReferences(references =
{
    @PublicationReference(author
        = "Tu-Thach Quach and Jeremy D. Wendt",
        title
        = "A diffusion model for maximizing influence spread in large networks",
        type = PublicationType.Conference,
        publication
        = "Proceedings of the International Conference on Social Informatics",
        year
        = 2016),
    @PublicationReference(author
        = "Jure Leskovec, A. Krause, C. Guestrin, C. Faloutsos, J. VanBriesen, and N. Glance",
        title = "Cost-effective outbreak detection in networks", type
        = PublicationType.Conference, publication
        = "Proceedings of the 13th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining",
        year = 2007)
})
public class InfluenceMaximization
{

    /**
     * The CELF maximization routine.
     *
     * @param f The energy function to maximize.
     * @param nodeInfluenceMap The influence spread of each node alone.
     * @param k The number of nodes to find, e.g., seed size.
     * @param numCores The number of processing cores to use.
     * @return The list of computed influence.
     */
    public List<NodeMarginalInfluence> maximize(
        GraphWrappingEnergyFunction<Integer, String> f,
        Map<String, Double> nodeInfluenceMap,
        int k,
        int numCores)
    {
        // Calculate the base spread when without any seed node.
        SumProductDirectedPropagation<Integer> solver
            = new SumProductDirectedPropagation<>(
                InfluenceSpread.MAX_NUM_ITERATIONS, 0.001, numCores);
        solver.init(f);
        solver.solve();
        double baseSpread = 0;
        for (int node = 0; node < f.numNodes(); node++)
        {
            baseSpread += solver.getBelief(node, 1);
        }

        // Add all nodes to queue.
        PriorityQueue<NodeMarginalInfluence> queue = new PriorityQueue<>();
        for (Map.Entry<String, Double> entry : nodeInfluenceMap.entrySet())
        {
            NodeMarginalInfluence ui = new NodeMarginalInfluence();
            ui.id = entry.getKey();
            ui.gain = entry.getValue() - baseSpread;
            ui.spread = entry.getValue();
            ui.iteration = 0;
            queue.add(ui);
        }

        int numExamined = 0;
        double spread = 0; // Keeps track of the current spread achieved.
        List<NodeMarginalInfluence> topNodeList = new ArrayList<>();
        while (topNodeList.size() < k && !queue.isEmpty())
        {
            NodeMarginalInfluence ui = queue.poll();
            numExamined++;
            if (ui.iteration == topNodeList.size())
            {
                ui.numExamined = numExamined;
                topNodeList.add(ui);
                f.setLabel(ui.id, 1);
                spread = ui.spread;
            }
            else
            {
                f.setLabel(ui.id, 1);
                solver.solve();
                f.setLabel(ui.id, 0);

                double prop = 0;
                for (int node = 0; node < f.numNodes(); node++)
                {
                    prop += solver.getBelief(node, 1);
                }

                ui.gain = prop - spread;
                ui.spread = prop;
                ui.iteration = topNodeList.size();
                queue.add(ui);
            }

            System.out.println("Seed size: " + topNodeList.size()
                + "; examined: " + numExamined);
        }

        return topNodeList;
    }

    public static Map<String, Double> readInitialInfluence(String filename)
        throws IOException
    {
        Map<String, Double> nodeInfluenceMap = new HashMap<>();
        try (BufferedReader reader
            = new BufferedReader(new FileReader(filename)))
        {
            String line = reader.readLine();
            while (line != null)
            {
                String[] items = line.split("\\s+");
                String node = items[0].trim();
                double influence = Double.parseDouble(items[1]);
                nodeInfluenceMap.put(node, influence);

                line = reader.readLine();
            }
        }
        return nodeInfluenceMap;
    }

    /**
     * Arguments for this example
     */
    private static class Arguments
    {

        String inputFilename;

        String unaryPotentialsFilename;

        String influenceFilename;

        double minimumInfluenceProbability;

        int numCores;

        String outputFilename;

        int numInfluencers;

        public Arguments()
        {
            // The graph file.  Each line is source, dest, influence_src_to_dst, influence_dst_to_src
            inputFilename = "example.txt";
            // Each node's probability of adopting without external influence
            unaryPotentialsFilename = "unary.txt";
            // How much each node-as-a-seed results in overall propagation
            influenceFilename = "influence.txt";
            // The minimum unary potential.  If a line in unary.txt is lower than this, it will be increased to this.
            minimumInfluenceProbability = 0.001;
            // The number of cores to run the parallel codes on
            numCores = 4;
            // Where to write results.  Each line is selected node, resulting spread number of nodes examined by CELF to find this one, order of nodes chosen
            outputFilename = "results.txt";
            // The number of inlfluencers you want this to find
            numInfluencers = 3;
        }

    };

    /**
     * This is the main program that will find the top k seeds that maximizes
     * the spread. The parameters are
     * <p>
     * 1: the graph file that is exactly the same as the one in InfluenceSpread.
     * This file has the following format on each line: nodei nodej p_ij p_ji
     * </p>
     * <p>
     * 2: the unary file. Same as the one from InfluenceSpread. Each file is
     * node_id \rho_id.
     * </p>
     * <p>
     * 3: the influence file that stores the influence spread of each node as a
     * seed node. The format of each line is node_id spread. This file can be
     * obtained by running InfluenceSpread on a seed file where each line is
     * node_id node_id. The result of such run is an output file that stores the
     * spread of each node as seed node. This output file can then be used as
     * this influence file.
     * </p>
     * <p>
     * 4: the base influence spread without any seed node.
     * </p>
     * <p>
     * 5: the minimum pairwise term. This can be set to zero in most cases,
     * e.g., -p 0
     * </p>
     * <p>
     * 6: the number of compute cores to use, e.g., -n 4
     * </p>
     * <p>
     * 7: the output file name, e.g., -o output_spread.txt
     * </p>
     *
     * @param args
     * @throws IOException
     */
    public static void main(String[] args)
        throws IOException
    {
        Arguments a = new Arguments();

        File graphFile = new File(a.inputFilename);
        File unaryFile = new File(a.unaryPotentialsFilename);
        File nodeInfluenceFile = new File(a.influenceFilename);

        if (!graphFile.exists())
        {
            System.out.println(graphFile.getPath() + " does not exist.");
            System.exit(0);
        }

        if (!unaryFile.exists())
        {
            System.out.println(unaryFile.getPath() + " does not exist.");
            System.exit(0);
        }

        if (!nodeInfluenceFile.exists())
        {
            System.out.println(nodeInfluenceFile.getPath() + " does not exist.");
            System.exit(0);
        }

        InfluencePotentialHandler.MINIMUM_EDGE_POTENTIAL
            = a.minimumInfluenceProbability;

        System.out.println("Parameters:");
        System.out.println("Graph file: " + graphFile.getPath());
        System.out.println("Unary file: " + unaryFile.getPath());
        System.out.println("Influence file: " + nodeInfluenceFile.getPath());
        System.out.println("Minimum influence probability: "
            + InfluencePotentialHandler.MINIMUM_EDGE_POTENTIAL);
        System.out.println("Number of threads: " + a.numCores);
        System.out.println("Output file: " + a.outputFilename);
        System.out.println("Number of seeds: " + a.numInfluencers);

        System.out.println("Reading unary potentials...");
        Map<String, Double> unaryPotentials
            = InfluenceSpread.readUnaryPotentials(unaryFile.getPath());
        System.out.println("Number of unary potentials: "
            + unaryPotentials.size());

        System.out.println("Reading graph...");
        DirectedNodeEdgeGraph<String> graph
            = InfluenceSpread.createFromEdgeList(graphFile.getPath());
        System.out.println("Number of nodes: " + graph.getNumNodes());
        System.out.println("Number of edges: " + graph.getNumEdges());
        GraphWrappingEnergyFunction<Integer, String> f
            = new GraphWrappingEnergyFunction<>(graph,
                new InfluencePotentialHandler(graph, unaryPotentials));

        // Every node must have a unary potential.
        assert (unaryPotentials.size() == graph.getNumNodes());

        System.out.println("Reading individual node influence...");
        Map<String, Double> nodeInfluenceMap = readInitialInfluence(
            nodeInfluenceFile.getPath());

        System.out.println("Performing maximization...");
        InfluenceMaximization maximizer = new InfluenceMaximization();
        List<NodeMarginalInfluence> seeds = maximizer.maximize(f,
            nodeInfluenceMap, a.numInfluencers, a.numCores);

        // Write results to file.
        // NOTE: If you are running this with the default graph, you'll notice
        // that the best influence is found by grouping the influencers into
        // one community.  This is likely due to the very odd nature of this
        // very simple and contrived input graph.
        try (BufferedWriter writer = new BufferedWriter(new FileWriter(new File(
            a.outputFilename))))
        {
            for (NodeMarginalInfluence ui : seeds)
            {
                writer.write(ui.id + "\t" + ui.spread + "\t" + ui.numExamined
                    + "\t" + ui.iteration);
                writer.newLine();
            }
            writer.flush();
        }
        System.out.println("Done");
    }

    public class NodeMarginalInfluence
        implements Comparable<NodeMarginalInfluence>
    {

        public String id;

        public double gain;

        public double spread;

        public int iteration;

        public int numExamined;

        @Override
        public int compareTo(NodeMarginalInfluence o)
        {
            if (gain == o.gain)
            {
                return 0;
            }
            else if (gain < o.gain)
            {
                return 1;
            }
            else
            {
                return -1;
            }
        }

    }

}