package edu.nd.nina.snap.cascades; import java.io.File; import java.io.IOException; import java.util.HashSet; import java.util.Hashtable; import java.util.List; import java.util.Set; import java.util.Vector; import javax.imageio.ImageIO; import com.panayotis.gnuplot.JavaPlot; import com.panayotis.gnuplot.style.Style; import com.panayotis.gnuplot.terminal.ImageTerminal; import edu.nd.nina.DirectedGraph; import edu.nd.nina.alg.BreadthFirstSearch; import edu.nd.nina.alg.ConnectivityInspector; import edu.nd.nina.graph.DefaultEdge; import edu.nd.nina.graph.DirectedSubgraph; import edu.nd.nina.math.Moment; import edu.nd.nina.math.Randoms; import edu.nd.nina.structs.Pair; import edu.nd.nina.util.Plot; /** * Structural properties of the cascades (propagation trees) * * @author weninger * */ public class CascadeStatistics { Hashtable<Float, Moment> NCascInf, NCascNet; // number of cascades Hashtable<Float, Moment> MxSzInf, MxSzNet; // size of the largest cascade Hashtable<Float, Moment> AvgSzInf, AvgSzNet; // average cascade size (number // of nodes) Hashtable<Float, Moment> NIsoInf, NIsoNet; // number of isolated nodes in // the cascade Hashtable<Float, Moment> NLfInf, NLfNet; // number of leaves in a cascade Hashtable<Float, Moment> NRtInf, NRtNet; // number of roots in a cascade Hashtable<Float, Moment> OutDegInf, OutDegNet; // average out-degree of a // cascade Hashtable<Float, Moment> InDegInf, InDegNet; // average in-degree of a // cascade // requires the root node (largest connected component) Hashtable<Float, Moment> DepthInf, DepthNet; // average depth (avg. distance // from leaves to the root) Hashtable<Float, Moment> MxWidInf, MxWidNet; // cascade width (max number of // nodes at any depth d) Hashtable<Float, Moment> MxLevInf, MxLevNet; // level of max width (depth of // max width) Hashtable<Float, Moment> IncLevInf, IncLevNet; // number of levels with // increasing width CascadeStatistics() { NCascInf = new Hashtable<Float, Moment>(); NCascNet = new Hashtable<Float, Moment>(); MxSzInf = new Hashtable<Float, Moment>(); MxSzNet = new Hashtable<Float, Moment>(); AvgSzInf = new Hashtable<Float, Moment>(); NCascNet = new Hashtable<Float, Moment>(); AvgSzNet = new Hashtable<Float, Moment>(); InDegInf = new Hashtable<Float, Moment>(); InDegNet = new Hashtable<Float, Moment>(); OutDegInf = new Hashtable<Float, Moment>(); OutDegNet = new Hashtable<Float, Moment>(); NLfInf = new Hashtable<Float, Moment>(); NLfNet = new Hashtable<Float, Moment>(); NRtInf = new Hashtable<Float, Moment>(); NRtNet = new Hashtable<Float, Moment>(); NIsoInf = new Hashtable<Float, Moment>(); NIsoNet = new Hashtable<Float, Moment>(); DepthInf = new Hashtable<Float, Moment>(); MxWidInf = new Hashtable<Float, Moment>(); MxLevInf = new Hashtable<Float, Moment>(); IncLevInf = new Hashtable<Float, Moment>(); DepthNet = new Hashtable<Float, Moment>(); MxWidNet = new Hashtable<Float, Moment>(); MxLevNet = new Hashtable<Float, Moment>(); IncLevNet = new Hashtable<Float, Moment>(); } void plotAll(final String OutFNm, final String Desc) { plotAll(OutFNm, Desc, true); } void plotAll(final String OutFNm, final String Desc, final boolean DivByM) { String MStr = DivByM ? " / M (number of observed nodes)" : ""; JavaPlot GP = new JavaPlot(); ImageTerminal png = new ImageTerminal(); File file = new File("." + System.getProperty("file.separator") + "data" + System.getProperty("file.separator") + OutFNm + System.getProperty("file.separator") + String.format("ncasc-%s", OutFNm) + ".png"); file.getParentFile().mkdirs(); GP.setTerminal(png); GP.setTitle(Desc); GP.getAxis("x").setLabel("Fraction of observed data (P)"); GP.getAxis("y").setLabel("Number of connected components" + MStr); Plot.addPlot(GP, NCascInf, Style.LINESPOINTS, "Influence cascade", "lw 2", true, false, false, false, false, false, false); Plot.addPlot(GP, NCascNet, Style.LINESPOINTS, "Network cascade", "lw 2", true, false, false, false, false, false, false); GP.plot(); try { ImageIO.write(png.getImage(), "png", file); } catch (IOException ex) { System.err.print(ex); } GP = new JavaPlot(); png = new ImageTerminal(); file = new File("." + System.getProperty("file.separator") + "data" + System.getProperty("file.separator") + OutFNm + System.getProperty("file.separator") + String.format("mxSz-%s", OutFNm) + ".png"); file.getParentFile().mkdirs(); GP.setTerminal(png); GP.setTitle(Desc); GP.getAxis("x").setLabel("Fraction of observed data (P)"); GP.getAxis("y").setLabel("Size of largest connected component" + MStr); Plot.addPlot(GP, MxSzInf, Style.LINESPOINTS, "Influence cascade", "lw 2", true, false, false, false, false, false, false); Plot.addPlot(GP, MxSzNet, Style.LINESPOINTS, "Network cascade", "lw 2", true, false, false, false, false, false, false); GP.plot(); try { ImageIO.write(png.getImage(), "png", file); } catch (IOException ex) { System.err.print(ex); } GP = new JavaPlot(); png = new ImageTerminal(); file = new File("." + System.getProperty("file.separator") + "data" + System.getProperty("file.separator") + OutFNm + System.getProperty("file.separator") + String.format("avgSz-%s", OutFNm) + ".png"); file.getParentFile().mkdirs(); GP.setTerminal(png); GP.setTitle(Desc); GP.getAxis("x").setLabel("Fraction of observed data (P)"); GP.getAxis("y").setLabel("Average connected component size" + MStr); Plot.addPlot(GP, AvgSzInf, Style.LINESPOINTS, "Influence cascade", "lw 2", true, false, false, false, false, false, false); Plot.addPlot(GP, AvgSzNet, Style.LINESPOINTS, "Network cascade", "lw 2", true, false, false, false, false, false, false); GP.plot(); try { ImageIO.write(png.getImage(), "png", file); } catch (IOException ex) { System.err.print(ex); } GP = new JavaPlot(); png = new ImageTerminal(); file = new File("." + System.getProperty("file.separator") + "data" + System.getProperty("file.separator") + OutFNm + System.getProperty("file.separator") + String.format("nIso-%s", OutFNm) + ".png"); file.getParentFile().mkdirs(); GP.setTerminal(png); GP.setTitle(Desc); GP.getAxis("x").setLabel("Fraction of observed data (P)"); GP.getAxis("y").setLabel("Number of isolated nodes" + MStr); Plot.addPlot(GP, NIsoInf, Style.LINESPOINTS, "Influence cascade", "lw 2", true, false, false, false, false, false, false); Plot.addPlot(GP, NIsoNet, Style.LINESPOINTS, "Network cascade", "lw 2", true, false, false, false, false, false, false); GP.plot(); try { ImageIO.write(png.getImage(), "png", file); } catch (IOException ex) { System.err.print(ex); } GP = new JavaPlot(); png = new ImageTerminal(); file = new File("." + System.getProperty("file.separator") + "data" + System.getProperty("file.separator") + OutFNm + System.getProperty("file.separator") + String.format("nRt-%s", OutFNm) + ".png"); file.getParentFile().mkdirs(); GP.setTerminal(png); GP.setTitle(Desc); GP.getAxis("x").setLabel("Fraction of observed data (P)"); GP.getAxis("y").setLabel("Number of root nodes" + MStr); Plot.addPlot(GP, NRtInf, Style.LINESPOINTS, "Influence cascade", "lw 2", true, false, false, false, false, false, false); Plot.addPlot(GP, NRtNet, Style.LINESPOINTS, "Network cascade", "lw 2", true, false, false, false, false, false, false); GP.plot(); try { ImageIO.write(png.getImage(), "png", file); } catch (IOException ex) { System.err.print(ex); } GP = new JavaPlot(); png = new ImageTerminal(); file = new File("." + System.getProperty("file.separator") + "data" + System.getProperty("file.separator") + OutFNm + System.getProperty("file.separator") + String.format("nLf-%s", OutFNm) + ".png"); file.getParentFile().mkdirs(); GP.setTerminal(png); GP.setTitle(Desc); GP.getAxis("x").setLabel("Fraction of observed data (P)"); GP.getAxis("y").setLabel( "Number of leaves (nodes of zero out-degree)" + MStr); Plot.addPlot(GP, NLfInf, Style.LINESPOINTS, "Influence cascade", "lw 2", true, false, false, false, false, false, false); Plot.addPlot(GP, NLfNet, Style.LINESPOINTS, "Network cascade", "lw 2", true, false, false, false, false, false, false); GP.plot(); try { ImageIO.write(png.getImage(), "png", file); } catch (IOException ex) { System.err.print(ex); } GP = new JavaPlot(); png = new ImageTerminal(); file = new File("." + System.getProperty("file.separator") + "data" + System.getProperty("file.separator") + OutFNm + System.getProperty("file.separator") + String.format("outDeg-%s", OutFNm) + ".png"); file.getParentFile().mkdirs(); GP.setTerminal(png); GP.setTitle(Desc); GP.getAxis("x").setLabel("Fraction of observed data (P)"); GP.getAxis("y").setLabel("Average Out-Degree (of a non-leaf)" + MStr); Plot.addPlot(GP, OutDegInf, Style.LINESPOINTS, "Influence cascade", "lw 2", true, false, false, false, false, false, false); Plot.addPlot(GP, OutDegNet, Style.LINESPOINTS, "Network cascade", "lw 2", true, false, false, false, false, false, false); GP.plot(); try { ImageIO.write(png.getImage(), "png", file); } catch (IOException ex) { System.err.print(ex); } GP = new JavaPlot(); png = new ImageTerminal(); file = new File("." + System.getProperty("file.separator") + "data" + System.getProperty("file.separator") + OutFNm + System.getProperty("file.separator") + String.format("inDeg-%s", OutFNm) + ".png"); file.getParentFile().mkdirs(); GP.setTerminal(png); GP.setTitle(Desc); GP.getAxis("x").setLabel("Fraction of observed data (P)"); GP.getAxis("y").setLabel("Average In-Degree (of a non-root)" + MStr); Plot.addPlot(GP, InDegInf, Style.LINESPOINTS, "Influence cascade", "lw 2", true, false, false, false, false, false, false); Plot.addPlot(GP, InDegNet, Style.LINESPOINTS, "Network cascade", "lw 2", true, false, false, false, false, false, false); GP.plot(); try { ImageIO.write(png.getImage(), "png", file); } catch (IOException ex) { System.err.print(ex); } GP = new JavaPlot(); png = new ImageTerminal(); file = new File("." + System.getProperty("file.separator") + "data" + System.getProperty("file.separator") + OutFNm + System.getProperty("file.separator") + String.format("levels-%s", OutFNm) + ".png"); file.getParentFile().mkdirs(); GP.setTerminal(png); GP.setTitle(Desc); GP.getAxis("x").setLabel("Fraction of observed data (P)"); GP.getAxis("y").setLabel("Average depth of largest component" + MStr); Plot.addPlot(GP, DepthInf, Style.LINESPOINTS, "Influence cascade", "lw 2", true, false, false, false, false, false, false); Plot.addPlot(GP, DepthNet, Style.LINESPOINTS, "Network cascade", "lw 2", true, false, false, false, false, false, false); GP.plot(); try { ImageIO.write(png.getImage(), "png", file); } catch (IOException ex) { System.err.print(ex); } GP = new JavaPlot(); png = new ImageTerminal(); file = new File("." + System.getProperty("file.separator") + "data" + System.getProperty("file.separator") + OutFNm + System.getProperty("file.separator") + String.format("width-%s", OutFNm) + ".png"); file.getParentFile().mkdirs(); GP.setTerminal(png); GP.setTitle(Desc); GP.getAxis("x").setLabel("Fraction of observed data (P)"); GP.getAxis("y").setLabel( "Width of largest components (max nodes at any level)" + MStr); Plot.addPlot(GP, MxWidInf, Style.LINESPOINTS, "Influence cascade", "lw 2", true, false, false, false, false, false, false); Plot.addPlot(GP, MxWidNet, Style.LINESPOINTS, "Network cascade", "lw 2", true, false, false, false, false, false, false); GP.plot(); try { ImageIO.write(png.getImage(), "png", file); } catch (IOException ex) { System.err.print(ex); } GP = new JavaPlot(); png = new ImageTerminal(); file = new File("." + System.getProperty("file.separator") + "data" + System.getProperty("file.separator") + OutFNm + System.getProperty("file.separator") + String.format("levWidth-%s", OutFNm) + ".png"); file.getParentFile().mkdirs(); GP.setTerminal(png); GP.setTitle(Desc); GP.getAxis("x").setLabel("Fraction of observed data (P)"); GP.getAxis("y").setLabel("Level with maximum width / Depth" + MStr); Plot.addPlot(GP, MxLevInf, Style.LINESPOINTS, "Influence cascade", "lw 2", true, false, false, false, false, false, false); Plot.addPlot(GP, MxLevNet, Style.LINESPOINTS, "Network cascade", "lw 2", true, false, false, false, false, false, false); GP.plot(); try { ImageIO.write(png.getImage(), "png", file); } catch (IOException ex) { System.err.print(ex); } GP = new JavaPlot(); png = new ImageTerminal(); file = new File("." + System.getProperty("file.separator") + "data" + System.getProperty("file.separator") + OutFNm + System.getProperty("file.separator") + String.format("levInc-%s", OutFNm) + ".png"); file.getParentFile().mkdirs(); GP.setTerminal(png); GP.setTitle(Desc); GP.getAxis("x").setLabel("Fraction of observed data (P)"); GP.getAxis("y").setLabel( "Number of levels of increasing width / Depth" + MStr); Plot.addPlot(GP, IncLevInf, Style.LINESPOINTS, "Influence cascade", "lw 2", true, false, false, false, false, false, false); Plot.addPlot(GP, IncLevNet, Style.LINESPOINTS, "Network cascade", "lw 2", true, false, false, false, false, false, false); GP.plot(); try { ImageIO.write(png.getImage(), "png", file); } catch (IOException ex) { System.err.print(ex); } } /** * randomly remove nodes from the cascade and store cascade properties as a * function of the fraction of removed nodes for more details see * "Correcting for Missing Data in Information Cascades" by E. Sadikov, M. * Medina, J. Leskovec, H. Garcia-Molina. WSDM, 2011 * * @param infCasc * @param netCasc * @param nIdInfTmH * @param PStep * @param NRuns * @param divByM * @param r */ public void sampleCascade(DirectedGraph<Integer, DefaultEdge> infCasc, DirectedGraph<Integer, DefaultEdge> netCasc, Hashtable<Integer, Integer> nIdInfTmH, double PStep, int NRuns, boolean divByM, Randoms r) { for (int Run = 0; Run < NRuns; Run++) { for (double P = PStep; P <= 1.01; P += PStep) { Set<Integer> NIdV = new HashSet<Integer>(); for (Integer v : infCasc.vertexSet()) { if (r.GetUniDev() < P) { NIdV.add(v); } } DirectedSubgraph<Integer, DefaultEdge> InfG = new DirectedSubgraph<Integer, DefaultEdge>( infCasc, NIdV, null); DirectedSubgraph<Integer, DefaultEdge> NetG = new DirectedSubgraph<Integer, DefaultEdge>( netCasc, NIdV, null); if (InfG.vertexSet().isEmpty()) { continue; } takeStat(InfG, NetG, nIdInfTmH, P, divByM); } } } /** * * @param InfG * @param NetG * @param nIdInfTmH * @param P * @param DivByM */ private void takeStat(DirectedGraph<Integer, DefaultEdge> InfG, DirectedGraph<Integer, DefaultEdge> NetG, Hashtable<Integer, Integer> nIdInfTmH, double P, boolean DivByM) { final double M = DivByM ? InfG.vertexSet().size() : 1d; assert (M >= 1); DirectedSubgraph<Integer, DefaultEdge> CcInf, CcNet; // largest // connected // component // connected components and sizes ConnectivityInspector<Integer, DefaultEdge> ci = new ConnectivityInspector<Integer, DefaultEdge>( InfG); List<Set<Integer>> CnComV = ci.connectedSets(); addMoment(NCascInf, P, CnComV.size() / M); addMoment(MxSzInf, P, CnComV.get(0).size() / M); int a = 0; for (int i = 0; i < CnComV.size(); i++) { a += CnComV.get(i).size(); } addMoment(AvgSzInf, P, a / (double) CnComV.size() * M); CcInf = new DirectedSubgraph<Integer, DefaultEdge>(InfG, CnComV.get(0), null); ci = new ConnectivityInspector<Integer, DefaultEdge>(NetG); CnComV = ci.connectedSets(); addMoment(NCascNet, P, CnComV.size() / M); addMoment(MxSzNet, P, CnComV.get(0).size() / M); a = 0; for (int i = 0; i < CnComV.size(); i++) { a += CnComV.get(i).size(); } addMoment(AvgSzNet, P, a / (double) CnComV.size() * M); CcNet = new DirectedSubgraph<Integer, DefaultEdge>(NetG, CnComV.get(0), null); // count isolated nodes and leaves; average in- and out-degree (skip // leaves) int i1 = 0, i2 = 0, l1 = 0, l2 = 0, r1 = 0, r2 = 0, ENet = 0, EInf = 0; double ci1 = 0, ci2 = 0, co1 = 0, co2 = 0; for (Integer v : InfG.vertexSet()) { if (InfG.outDegreeOf(v) == 0 && InfG.inDegreeOf(v) > 0) { l1++; } if (InfG.outDegreeOf(v) > 0 && InfG.inDegreeOf(v) == 0) { r1++; } if (InfG.edgesOf(v).size() == 0) { i1++; } if (InfG.inDegreeOf(v) > 0) { ci1 += 1; } if (InfG.outDegreeOf(v) > 0) { co1 += 1; } EInf += InfG.outDegreeOf(v); } for (Integer v : NetG.vertexSet()) { if (NetG.outDegreeOf(v) == 0 && NetG.inDegreeOf(v) > 0) { l2++; } if (NetG.outDegreeOf(v) > 0 && NetG.inDegreeOf(v) == 0) { r2++; } if (NetG.edgesOf(v).size() == 0) { i2++; } if (NetG.inDegreeOf(v) > 0) { ci2 += 1; } if (NetG.outDegreeOf(v) > 0) { co2 += 1; } ENet += NetG.outDegreeOf(v); } if (ci1 > 0) addMoment(InDegInf, P, EInf / ci1); if (ci2 > 0) addMoment(InDegNet, P, ENet / ci2); if (co1 > 0) addMoment(OutDegInf, P, EInf / co1); if (co2 > 0) addMoment(OutDegNet, P, ENet / co2); addMoment(NLfInf, P, l1 / M); addMoment(NLfNet, P, l2 / M); addMoment(NRtInf, P, r1 / M); addMoment(NRtNet, P, r2 / M); addMoment(NIsoInf, P, i1 / M); addMoment(NIsoNet, P, i2 / M); // cascade depth final double M1 = DivByM ? CcNet.vertexSet().size() : 1; assert (M1 >= 1); int Root = findCascadeRoot(CcInf, nIdInfTmH); Vector<Pair<Integer, Integer>> HopCntV = BreadthFirstSearch.getNodesAtHops(CcInf, Root); int MxN = 0, Lev = 0, IncL = 0; for (int i = 0; i < HopCntV.size(); i++) { if (MxN < HopCntV.get(i).p2) { MxN = HopCntV.get(i).p2; Lev = HopCntV.get(i).p1; } if (i > 0 && HopCntV.get(i - 1).p2 <= HopCntV.get(i).p2) { IncL++; } } double D = 0; int c = 0; D = HopCntV.firstElement().p1; // maximum depth c = 1; if (c != 0 && D != 0) { D = D / c; addMoment(DepthInf, P, D / M1); addMoment(MxWidInf, P, MxN / M1); addMoment(MxLevInf, P, Lev / D); addMoment(IncLevInf, P, IncL / D); } Root = findCascadeRoot(CcNet, nIdInfTmH); HopCntV = BreadthFirstSearch.getNodesAtHops(CcNet, Root); MxN = 0; Lev = 0; IncL = 0; D = 0; c = 0; for (int i = 0; i < HopCntV.size(); i++) { if (MxN < HopCntV.get(i).p2) { MxN = HopCntV.get(i).p2; Lev = HopCntV.get(i).p1; } if (i > 0 && HopCntV.get(i - 1).p2 <= HopCntV.get(i).p2) { IncL++; } } D = HopCntV.firstElement().p1; c = 1; // maximum depth if (c != 0 && D != 0) { D = D / c; addMoment(DepthNet, P, D / M1); addMoment(MxWidNet, P, MxN / M1); addMoment(MxLevNet, P, Lev / D); addMoment(IncLevNet, P, IncL / D); } } /** * * @param G * @param nIdInfTmH * @return */ private int findCascadeRoot(DirectedSubgraph<Integer, DefaultEdge> G, Hashtable<Integer, Integer> nIdInfTmH) { // earliest infected node int Min = Integer.MAX_VALUE; for (Integer v : G.vertexSet()) { if (v < Min && G.inDegreeOf(v) == 0) { Min = v; } } assert (Min != Integer.MAX_VALUE); return Min; } /** * * @param x * @param p * @param d */ private void addMoment(Hashtable<Float, Moment> x, double p, double d) { if (!x.containsKey(p)) { Moment m = new Moment(); m.add((float) d); x.put((float) p, m); } else { x.get(p).add((float) d); } } }