/* * File: InfluenceMaximization.java * Authors: Tu-Thach Quach * Company: Sandia National Laboratories * Project: Cognitive Foundry * * Copyright 2016, Sandia Corporation. * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive * license for use of this work by or on behalf of the U.S. Government. * Export of this program may require a license from the United States * Government. See CopyrightHistory.txt for complete details. * */ package examples; import gov.sandia.cognition.annotation.PublicationReference; import gov.sandia.cognition.annotation.PublicationReferences; import gov.sandia.cognition.annotation.PublicationType; import gov.sandia.cognition.graph.DirectedNodeEdgeGraph; import gov.sandia.cognition.graph.inference.GraphWrappingEnergyFunction; import gov.sandia.cognition.graph.inference.SumProductDirectedPropagation; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.PriorityQueue; /** * Use this to do influence maximization: find the set of seed nodes that * maximizes influence spread. This is an implementation of the CELF algorithm. * See the main method for explanation on the required parameters. * * @author Tu-Thach Quach * */ @PublicationReferences(references = { @PublicationReference(author = "Tu-Thach Quach and Jeremy D. Wendt", title = "A diffusion model for maximizing influence spread in large networks", type = PublicationType.Conference, publication = "Proceedings of the International Conference on Social Informatics", year = 2016), @PublicationReference(author = "Jure Leskovec, A. Krause, C. Guestrin, C. Faloutsos, J. VanBriesen, and N. Glance", title = "Cost-effective outbreak detection in networks", type = PublicationType.Conference, publication = "Proceedings of the 13th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining", year = 2007) }) public class InfluenceMaximization { /** * The CELF maximization routine. * * @param f The energy function to maximize. * @param nodeInfluenceMap The influence spread of each node alone. * @param k The number of nodes to find, e.g., seed size. * @param numCores The number of processing cores to use. * @return The list of computed influence. */ public List<NodeMarginalInfluence> maximize( GraphWrappingEnergyFunction<Integer, String> f, Map<String, Double> nodeInfluenceMap, int k, int numCores) { // Calculate the base spread when without any seed node. SumProductDirectedPropagation<Integer> solver = new SumProductDirectedPropagation<>( InfluenceSpread.MAX_NUM_ITERATIONS, 0.001, numCores); solver.init(f); solver.solve(); double baseSpread = 0; for (int node = 0; node < f.numNodes(); node++) { baseSpread += solver.getBelief(node, 1); } // Add all nodes to queue. PriorityQueue<NodeMarginalInfluence> queue = new PriorityQueue<>(); for (Map.Entry<String, Double> entry : nodeInfluenceMap.entrySet()) { NodeMarginalInfluence ui = new NodeMarginalInfluence(); ui.id = entry.getKey(); ui.gain = entry.getValue() - baseSpread; ui.spread = entry.getValue(); ui.iteration = 0; queue.add(ui); } int numExamined = 0; double spread = 0; // Keeps track of the current spread achieved. List<NodeMarginalInfluence> topNodeList = new ArrayList<>(); while (topNodeList.size() < k && !queue.isEmpty()) { NodeMarginalInfluence ui = queue.poll(); numExamined++; if (ui.iteration == topNodeList.size()) { ui.numExamined = numExamined; topNodeList.add(ui); f.setLabel(ui.id, 1); spread = ui.spread; } else { f.setLabel(ui.id, 1); solver.solve(); f.setLabel(ui.id, 0); double prop = 0; for (int node = 0; node < f.numNodes(); node++) { prop += solver.getBelief(node, 1); } ui.gain = prop - spread; ui.spread = prop; ui.iteration = topNodeList.size(); queue.add(ui); } System.out.println("Seed size: " + topNodeList.size() + "; examined: " + numExamined); } return topNodeList; } public static Map<String, Double> readInitialInfluence(String filename) throws IOException { Map<String, Double> nodeInfluenceMap = new HashMap<>(); try (BufferedReader reader = new BufferedReader(new FileReader(filename))) { String line = reader.readLine(); while (line != null) { String[] items = line.split("\\s+"); String node = items[0].trim(); double influence = Double.parseDouble(items[1]); nodeInfluenceMap.put(node, influence); line = reader.readLine(); } } return nodeInfluenceMap; } /** * Arguments for this example */ private static class Arguments { String inputFilename; String unaryPotentialsFilename; String influenceFilename; double minimumInfluenceProbability; int numCores; String outputFilename; int numInfluencers; public Arguments() { // The graph file. Each line is source, dest, influence_src_to_dst, influence_dst_to_src inputFilename = "example.txt"; // Each node's probability of adopting without external influence unaryPotentialsFilename = "unary.txt"; // How much each node-as-a-seed results in overall propagation influenceFilename = "influence.txt"; // The minimum unary potential. If a line in unary.txt is lower than this, it will be increased to this. minimumInfluenceProbability = 0.001; // The number of cores to run the parallel codes on numCores = 4; // Where to write results. Each line is selected node, resulting spread number of nodes examined by CELF to find this one, order of nodes chosen outputFilename = "results.txt"; // The number of inlfluencers you want this to find numInfluencers = 3; } }; /** * This is the main program that will find the top k seeds that maximizes * the spread. The parameters are * <p> * 1: the graph file that is exactly the same as the one in InfluenceSpread. * This file has the following format on each line: nodei nodej p_ij p_ji * </p> * <p> * 2: the unary file. Same as the one from InfluenceSpread. Each file is * node_id \rho_id. * </p> * <p> * 3: the influence file that stores the influence spread of each node as a * seed node. The format of each line is node_id spread. This file can be * obtained by running InfluenceSpread on a seed file where each line is * node_id node_id. The result of such run is an output file that stores the * spread of each node as seed node. This output file can then be used as * this influence file. * </p> * <p> * 4: the base influence spread without any seed node. * </p> * <p> * 5: the minimum pairwise term. This can be set to zero in most cases, * e.g., -p 0 * </p> * <p> * 6: the number of compute cores to use, e.g., -n 4 * </p> * <p> * 7: the output file name, e.g., -o output_spread.txt * </p> * * @param args * @throws IOException */ public static void main(String[] args) throws IOException { Arguments a = new Arguments(); File graphFile = new File(a.inputFilename); File unaryFile = new File(a.unaryPotentialsFilename); File nodeInfluenceFile = new File(a.influenceFilename); if (!graphFile.exists()) { System.out.println(graphFile.getPath() + " does not exist."); System.exit(0); } if (!unaryFile.exists()) { System.out.println(unaryFile.getPath() + " does not exist."); System.exit(0); } if (!nodeInfluenceFile.exists()) { System.out.println(nodeInfluenceFile.getPath() + " does not exist."); System.exit(0); } InfluencePotentialHandler.MINIMUM_EDGE_POTENTIAL = a.minimumInfluenceProbability; System.out.println("Parameters:"); System.out.println("Graph file: " + graphFile.getPath()); System.out.println("Unary file: " + unaryFile.getPath()); System.out.println("Influence file: " + nodeInfluenceFile.getPath()); System.out.println("Minimum influence probability: " + InfluencePotentialHandler.MINIMUM_EDGE_POTENTIAL); System.out.println("Number of threads: " + a.numCores); System.out.println("Output file: " + a.outputFilename); System.out.println("Number of seeds: " + a.numInfluencers); System.out.println("Reading unary potentials..."); Map<String, Double> unaryPotentials = InfluenceSpread.readUnaryPotentials(unaryFile.getPath()); System.out.println("Number of unary potentials: " + unaryPotentials.size()); System.out.println("Reading graph..."); DirectedNodeEdgeGraph<String> graph = InfluenceSpread.createFromEdgeList(graphFile.getPath()); System.out.println("Number of nodes: " + graph.getNumNodes()); System.out.println("Number of edges: " + graph.getNumEdges()); GraphWrappingEnergyFunction<Integer, String> f = new GraphWrappingEnergyFunction<>(graph, new InfluencePotentialHandler(graph, unaryPotentials)); // Every node must have a unary potential. assert (unaryPotentials.size() == graph.getNumNodes()); System.out.println("Reading individual node influence..."); Map<String, Double> nodeInfluenceMap = readInitialInfluence( nodeInfluenceFile.getPath()); System.out.println("Performing maximization..."); InfluenceMaximization maximizer = new InfluenceMaximization(); List<NodeMarginalInfluence> seeds = maximizer.maximize(f, nodeInfluenceMap, a.numInfluencers, a.numCores); // Write results to file. // NOTE: If you are running this with the default graph, you'll notice // that the best influence is found by grouping the influencers into // one community. This is likely due to the very odd nature of this // very simple and contrived input graph. try (BufferedWriter writer = new BufferedWriter(new FileWriter(new File( a.outputFilename)))) { for (NodeMarginalInfluence ui : seeds) { writer.write(ui.id + "\t" + ui.spread + "\t" + ui.numExamined + "\t" + ui.iteration); writer.newLine(); } writer.flush(); } System.out.println("Done"); } public class NodeMarginalInfluence implements Comparable<NodeMarginalInfluence> { public String id; public double gain; public double spread; public int iteration; public int numExamined; @Override public int compareTo(NodeMarginalInfluence o) { if (gain == o.gain) { return 0; } else if (gain < o.gain) { return 1; } else { return -1; } } } }