package edu.nd.nina.snap.agm; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.PrintWriter; import java.util.HashSet; import java.util.Hashtable; import java.util.Set; import java.util.Vector; import javax.imageio.ImageIO; import com.panayotis.gnuplot.JavaPlot; import com.panayotis.gnuplot.plot.AbstractPlot; import com.panayotis.gnuplot.style.PlotStyle; import com.panayotis.gnuplot.style.Style; import com.panayotis.gnuplot.terminal.ImageTerminal; import com.panayotis.gnuplot.terminal.PostscriptTerminal; import edu.nd.nina.UndirectedGraph; import edu.nd.nina.graph.DefaultEdge; import edu.nd.nina.math.LogisticRegressionFit; import edu.nd.nina.math.LogisticRegressionPrediction; import edu.nd.nina.math.Randoms; import edu.nd.nina.structs.Pair; import edu.nd.nina.structs.Triple; public class AGMUtil { /** * estimate number of communities using AGM * * @param g * @param initComs * @param maxIter * @param randomSeed * @param regGap * @param pNumCom * @param outfileprefix * @return */ public static Integer findComsByAGM( final UndirectedGraph<Integer, DefaultEdge> g, final Integer initComs, final Integer maxIter, final Integer randomSeed, final Double regGap, final Float pNumCom, final String outfileprefix) { Randoms rnd = new Randoms(randomSeed); int lambdaIter = 100; if (g.vertexSet().size() < 200) { lambdaIter = 1; } if (g.vertexSet().size() < 200 && g.edgeSet().size() > 2000) { lambdaIter = 100; } // Find coms with large C AGMFit aGMFitM = new AGMFit(g, initComs, randomSeed); if (pNumCom > 0.0) { aGMFitM.setPNumCom(pNumCom); } aGMFitM.runMCMC(maxIter, lambdaIter, outfileprefix); int TE = g.edgeSet().size(); Vector<Float> RegV = new Vector<Float>(); RegV.add((float) (0.3 * TE)); for (int r = 0; r < 25; r++) { RegV.add((float) (RegV.lastElement() * regGap)); } Vector<Pair<Float, Float>> RegComsV = new Vector<Pair<Float, Float>>(); Vector<Pair<Float, Float>> RegLV = new Vector<Pair<Float, Float>>(); Vector<Pair<Float, Float>> RegBICV = new Vector<Pair<Float, Float>>(); Vector<Float> LV = new Vector<Float>(); Vector<Float> BICV = new Vector<Float>(); // record likelihood and number of communities with nonzero P_c for (int r = 0; r < RegV.size(); r++) { double RegCoef = RegV.get(r); aGMFitM.setRegCoef(RegCoef); aGMFitM.MLEGradAscentGivenCAG(0.01, 1000); aGMFitM.setRegCoef(0.0); Vector<Vector<Integer>> EstCmtyVV = aGMFitM.getCmtyVV(0.99); int NumLowQ = EstCmtyVV.size(); RegComsV.add(new Pair<Float, Float>((float) RegCoef, (float) NumLowQ)); if (EstCmtyVV.size() > 0) { AGMFit aFTemp = new AGMFit(g, EstCmtyVV, rnd); aFTemp.MLEGradAscentGivenCAG(0.001, 1000); double CurL = aFTemp.likelihood(); LV.add((float) CurL); BICV.add((float) (-2.0 * CurL + (double) EstCmtyVV.size() * Math.log((double) g.vertexSet().size() * (g.vertexSet().size() - 1) / 2.0))); } else { break; } System.out.print("."); } System.out.println(); // if likelihood does not exist or does not change at all, report the // smallest number of communities or 2 if (LV.size() == 0) { return 2; } else if (LV.get(0) == LV.lastElement()) { return (int) Math.max(2.0, RegComsV.get(LV.size() - 1).p2); } // normalize likelihood and BIC to 0~100 int MaxL = 100; { Vector<Float> ValueV = LV; Vector<Pair<Float, Float>> RegValueV = RegLV; double MinValue = Float.MAX_VALUE, MaxValue = Float.MIN_VALUE; for (int l = 0; l < ValueV.size(); l++) { if (ValueV.get(l) < MinValue) { MinValue = ValueV.get(l); } if (ValueV.get(l) > MaxValue) { MaxValue = ValueV.get(l); } } while (ValueV.size() < RegV.size()) { ValueV.add((float) MinValue); } double RangeVal = MaxValue - MinValue; for (int l = 0; l < ValueV.size(); l++) { RegValueV .add(new Pair<Float, Float>( RegV.get(l), (float) (((double) MaxL) * (ValueV.get(l) - MinValue) / RangeVal))); } } { Vector<Float> ValueV = BICV; Vector<Pair<Float, Float>> RegValueV = RegBICV; double MinValue = Float.MAX_VALUE, MaxValue = Float.MIN_VALUE; for (int l = 0; l < ValueV.size(); l++) { if (ValueV.get(l) < MinValue) { MinValue = ValueV.get(l); } if (ValueV.get(l) > MaxValue) { MaxValue = ValueV.get(l); } } while (ValueV.size() < RegV.size()) { ValueV.add((float) MinValue); } double RangeVal = MaxValue - MinValue; for (int l = 0; l < ValueV.size(); l++) { RegValueV .add(new Pair<Float, Float>( RegV.get(l), (float) (((double) MaxL) * (ValueV.get(l) - MinValue) / RangeVal))); } } // fit logistic regression to normalized likelihood. Vector<Vector<Float>> XV = new Vector<Vector<Float>>(RegLV.size()); Vector<Float> YV = new Vector<Float>(RegLV.size()); for (int l = 0; l < RegLV.size(); l++) { Vector<Float> x = new Vector<Float>(); x.add((float) Math.log(RegLV.get(l).p1)); XV.add(x); YV.add((float) (RegLV.get(l).p2 / (double) MaxL)); } Vector<Pair<Float, Float>> LRVScaled = new Vector<Pair<Float, Float>>(); Vector<Pair<Float, Float>> LRV = new Vector<Pair<Float, Float>>(); LogisticRegressionFit LRFit = new LogisticRegressionFit(); LogisticRegressionPrediction LRMd = LRFit.CalcLogRegNewton(XV, YV, outfileprefix); for (int l = 0; l < RegLV.size(); l++) { LRV.add(new Pair<Float, Float>(RegV.get(l), (float) LRMd.GetCfy(XV .get(l)))); LRVScaled.add(new Pair<Float, Float>(RegV.get(l), ((float) MaxL) * LRV.lastElement().p2)); } // estimate # communities from fitted logistic regression int NumComs = 0, IdxRegDrop = 0; double LRThres = 1.1, RegDrop; // 1 / (1 + exp(1.1)) = 0.25 double LeftReg = 0.0, RightReg = 0.0; Vector<Float> Theta = LRMd.GetTheta(); RegDrop = (-Theta.get(1) - LRThres) / Theta.get(0); if (RegDrop <= XV.get(0).get(0)) { NumComs = RegComsV.get(0).p2.intValue(); } else if (RegDrop >= XV.lastElement().get(0)) { NumComs = RegComsV.lastElement().p2.intValue(); } else { // interpolate for RegDrop for (int i = 0; i < XV.size(); i++) { if (XV.get(i).get(0) > RegDrop) { IdxRegDrop = i; break; } } if (IdxRegDrop == 0) { System.err.printf( "Error!! RegDrop:%f, Theta[0]:%f, Theta[1]:%f\n", RegDrop, Theta.get(0), Theta.get(1)); for (int l = 0; l < RegLV.size(); l++) { System.out.printf("X[%d]:%f, Y[%d]:%f\n", l, XV.get(l).get(0), l, YV.get(l)); } } assert (IdxRegDrop > 0); LeftReg = RegDrop - XV.get(IdxRegDrop - 1).get(0); RightReg = XV.get(IdxRegDrop).get(0) - RegDrop; NumComs = (int) Math.round((RightReg * RegComsV.get(IdxRegDrop - 1).p2 + LeftReg * RegComsV.get(IdxRegDrop).p2) / (LeftReg + RightReg)); } // printf("Interpolation coeff: %f, %f, index at drop:%d (%f), Left-Right Vals: %f, %f\n", // LeftReg, RightReg, IdxRegDrop, RegDrop, RegComsV[IdxRegDrop - // 1].Val2, RegComsV[IdxRegDrop].Val2); System.out.printf("Num Coms:%d\n", NumComs); if (NumComs < 2) { NumComs = 2; } if (outfileprefix.length() > 0) { ImageTerminal png = new ImageTerminal(); File file = new File("." + System.getProperty("file.separator") + "data" + System.getProperty("file.separator") + outfileprefix + System.getProperty("file.separator") + outfileprefix + "_l.png"); file.getParentFile().mkdirs(); JavaPlot GP1 = new JavaPlot(); GP1.setTerminal(png); double[][] zz = new double[RegComsV.size()][2]; for (int i = 0; i<RegComsV.size(); i++){ zz[i][0] = RegComsV.get(i).p1; zz[i][1] = RegComsV.get(i).p2; } GP1.addPlot(zz); zz = new double[RegLV.size()][2]; for (int i = 0; i<RegLV.size(); i++){ zz[i][0] = RegLV.get(i).p1; zz[i][1] = RegLV.get(i).p2; } GP1.addPlot(zz); zz = new double[RegBICV.size()][2]; for (int i = 0; i<RegBICV.size(); i++){ zz[i][0] = RegBICV.get(i).p1; zz[i][1] = RegBICV.get(i).p2; } GP1.addPlot(zz); zz = new double[LRVScaled.size()][2]; for (int i = 0; i<LRVScaled.size(); i++){ zz[i][0] = LRVScaled.get(i).p1; zz[i][1] = LRVScaled.get(i).p2; } GP1.addPlot(zz); GP1.getAxis("x").setLogScale(true); ((AbstractPlot) GP1.getPlots().get(0)).setTitle("C"); ((AbstractPlot) GP1.getPlots().get(1)).setTitle("likelihood"); ((AbstractPlot) GP1.getPlots().get(2)).setTitle("BIC"); ((AbstractPlot) GP1.getPlots().get(3)).setTitle("Sigmoid (scaled)"); ((AbstractPlot) GP1.getPlots().get(0)).getPlotStyle().setStyle( Style.LINESPOINTS); ((AbstractPlot) GP1.getPlots().get(1)).getPlotStyle().setStyle( Style.LINESPOINTS); ((AbstractPlot) GP1.getPlots().get(2)).getPlotStyle().setStyle( Style.LINESPOINTS); ((AbstractPlot) GP1.getPlots().get(3)).getPlotStyle().setStyle( Style.LINESPOINTS); String titleStr = String.format("N:%d, E:%d ", g.vertexSet().size(), TE); GP1.setTitle(titleStr); GP1.plot(); try { ImageIO.write(png.getImage(), "png", file); } catch (IOException ex) { System.err.print(ex); } } return NumComs; } public static Set<Integer> getNbhCom( UndirectedGraph<Integer, DefaultEdge> g, Integer n) { Set<Integer> nBCmty = new HashSet<Integer>(g.degreeOf(n) + 1); nBCmty.add(n); for (DefaultEdge e : g.edgesOf(n)) { if (g.getEdgeSource(e) == n) { nBCmty.add(g.getEdgeTarget(e)); } else { nBCmty.add(g.getEdgeSource(e)); } } return nBCmty; } public static double getConductance( UndirectedGraph<Integer, DefaultEdge> g, Set<Integer> cmtyS, int edges) { final int edges2 = edges >= 0 ? 2 * edges : g.edgeSet().size(); int vol = 0, cut = 0; double phi = 0.0; for (Integer n : cmtyS) { if (!g.containsVertex(n)) { continue; } for (DefaultEdge e : g.edgesOf(n)) { if (g.getEdgeSource(e) == n) { if (!cmtyS.contains(g.getEdgeTarget(e))) { cut += 1; } } else { if (!cmtyS.contains(g.getEdgeSource(e))) { cut += 1; } } } vol += g.degreeOf(n); } // get conductance if (vol != edges2) { if (2 * vol > edges2) { phi = cut / ((double) (edges2 - vol)); } else if (vol == 0) { phi = 0.0; } else { phi = cut / ((double) vol); } } else { if (vol == edges2) { phi = 1.0; } } return phi; } public static Hashtable<Integer, Set<Integer>> getNodeMembership( Hashtable<Integer, Set<Integer>> nIDComVH, final Vector<Set<Integer>> cmtyVV) { for (int i = 0; i < cmtyVV.size(); i++) { int CID = i; for (Integer NID : cmtyVV.get(i)) { if (nIDComVH.containsKey(NID)) { Set<Integer> x = nIDComVH.get(NID); x.add(CID); } else { Set<Integer> x = new HashSet<Integer>(); x.add(CID); nIDComVH.put(NID, x); } } } return nIDComVH; } public static Hashtable<Integer, Vector<Integer>> getNodeMembership(final Vector<Vector<Integer>> CmtyVV) { Hashtable<Integer, Vector<Integer>> NIDComVH = new Hashtable<Integer, Vector<Integer>>(); for (int CID=0; CID<CmtyVV.size(); CID++) { for (Integer NID : CmtyVV.get(CID)) { if (!NIDComVH.contains(NID)) { Vector<Integer> v = new Vector<Integer>(); v.add(CID); NIDComVH.put(NID, v); } else { Vector<Integer> v = NIDComVH.get(NID); v.add(CID); NIDComVH.put(NID, v); } } } return NIDComVH; } public static Set<Integer> getIntersection(Set<Integer> set1, Set<Integer> set2) { Set<Integer> ret = new HashSet<Integer>(); for (Integer i : set1) { if (set2.contains(i)) { ret.add(i); } } return ret; } /** * dump bipartite community affiliation into a text file with node names * * @param OutFNm * @param CmtyVV * @param NIDNmH */ static void dumpCmtyVV(final String OutFNm, Vector<Vector<Integer>> CmtyVV, Hashtable<Integer, String> NIDNmH) { PrintWriter f; try { f = new PrintWriter(OutFNm); for (int c = 0; c < CmtyVV.size(); c++) { for (int u = 0; u < CmtyVV.get(c).size(); u++) { if (NIDNmH.containsKey(CmtyVV.get(c).get(u))) { f.printf("%s\t", NIDNmH.get(CmtyVV.get(c).get(u))); } else { f.printf("%d\t", (int) CmtyVV.get(c).get(u)); } } f.printf("\n"); } f.close(); } catch (FileNotFoundException e) { e.printStackTrace(); } } /** * save graph into a gexf file which Gephi can read * * @param OutFNm * @param g * @param CmtyVVAtr * @param MaxSz * @param MinSz * @param NIDNameH * @param NIDColorH */ public static void saveGephi( final String OutFNm, final UndirectedGraph<Integer, DefaultEdge> g, final Vector<Vector<Integer>> CmtyVVAtr, final double MaxSz, final double MinSz, final Hashtable<Integer, String> NIDNameH, final Hashtable<Integer, Triple<Integer, Integer, Integer>> NIDColorH) { Hashtable<Integer, Vector<Integer>> NIDComVHAtr = AGMUtil.getNodeMembership(CmtyVVAtr); PrintWriter f = null; try { f = new PrintWriter(OutFNm); } catch (FileNotFoundException e) { e.printStackTrace(); return; } f.printf("<?xml version='1.0' encoding='UTF-8'?>\n"); f.printf("<gexf xmlns='http://www.gexf.net/1.2draft' xmlns:viz='http://www.gexf.net/1.1draft/viz' xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd' version='1.2'>\n"); f.printf("\t<graph mode='static' defaultedgetype='undirected'>\n"); if (CmtyVVAtr.size() > 0) { f.printf("\t<attributes class='node'>\n"); for (int c = 0; c < CmtyVVAtr.size(); c++) { f.printf("\t\t<attribute id='%d' title='c%d' type='boolean'>", c, c); f.printf("\t\t<default>false</default>\n"); f.printf("\t\t</attribute>\n"); } f.printf("\t</attributes>\n"); } f.printf("\t\t<nodes>\n"); for (Integer NID : g.vertexSet()) { String Label = NIDNameH.containsKey(NID) ? NIDNameH.get(NID) : ""; Triple<Integer, Integer, Integer> Color = NIDColorH.containsKey(NID) ? NIDColorH .get(NID) : new Triple<Integer, Integer, Integer>(120, 120, 120); double Size = MinSz; double SizeStep = (MaxSz - MinSz) / (double) CmtyVVAtr.size(); if (NIDComVHAtr.containsKey(NID)) { Size = MinSz + SizeStep * (double) NIDComVHAtr.get(NID).size(); } double Alpha = 1.0; f.printf("\t\t\t<node id='%d' label='%s'>\n", NID, Label); f.printf("\t\t\t\t<viz:color r='%d' g='%d' b='%d' a='%.1f'/>\n", Color.v1, Color.v2, Color.v3, Alpha); f.printf("\t\t\t\t<viz:size value='%.3f'/>\n", Size); // specify attributes if (NIDComVHAtr.containsKey(NID)) { f.printf("\t\t\t\t<attvalues>\n"); for (int c = 0; c < NIDComVHAtr.get(NID).size(); c++) { int CID = NIDComVHAtr.get(NID).get(c); f.printf("\t\t\t\t\t<attvalue for='%d' value='true'/>\n", CID); } f.printf("\t\t\t\t</attvalues>\n"); } f.printf("\t\t\t</node>\n"); } f.printf("\t\t</nodes>\n"); // plot edges int EID = 0; f.printf("\t\t<edges>\n"); for (DefaultEdge EI : g.edgeSet()) { f.printf("\t\t\t<edge id='%d' source='%d' target='%d'/>\n", EID++, g.getEdgeSource(EI), g.getEdgeTarget(EI)); } f.printf("\t\t</edges>\n"); f.printf("\t</graph>\n"); f.printf("</gexf>\n"); f.close(); } public static void saveGephi(final String OutFNm, final UndirectedGraph<Integer, DefaultEdge> g, final Vector<Vector<Integer>> CmtyVVAtr, final double MaxSz, final double MinSz) { Hashtable<Integer, String> TmpH = new Hashtable<Integer, String>(); saveGephi(OutFNm, g, CmtyVVAtr, MaxSz, MinSz, TmpH); } static void saveGephi(final String OutFNm, final UndirectedGraph<Integer, DefaultEdge> g, final Vector<Vector<Integer>> CmtyVVAtr, final double MaxSz, final double MinSz, final Hashtable<Integer, String> NIDNameH) { Hashtable<Integer, Triple<Integer, Integer, Integer>> TmpH = new Hashtable<Integer, Triple<Integer, Integer, Integer>>(); saveGephi(OutFNm, g, CmtyVVAtr, MaxSz, MinSz, NIDNameH, TmpH); } }