package edu.nd.nina.snap.agm; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashSet; import java.util.Hashtable; import java.util.Map; import java.util.Set; import java.util.Vector; import javax.imageio.ImageIO; import com.panayotis.gnuplot.JavaPlot; import com.panayotis.gnuplot.plot.AbstractPlot; import com.panayotis.gnuplot.style.PlotStyle; import com.panayotis.gnuplot.style.Style; import com.panayotis.gnuplot.terminal.ImageTerminal; import com.panayotis.gnuplot.terminal.PostscriptTerminal; import edu.nd.nina.UndirectedGraph; import edu.nd.nina.graph.DefaultEdge; import edu.nd.nina.math.LinearAlgebra; import edu.nd.nina.math.Randoms; import edu.nd.nina.structs.Pair; public class AGMFit { /** Graph to fit. */ UndirectedGraph<Integer, DefaultEdge> g; /** Community ID -> Member Node ID Sets. */ Vector<Set<Integer>> cIDNSetV; /** Edge -> Shared Community ID Set. */ Hashtable<Pair<Integer, Integer>, Set<Integer>> edgeComVH; /** Node ID -> Communities IDs the node belongs to. */ Hashtable<Integer, Set<Integer>> nIDComVH; /** The number of edges in each community. */ Vector<Integer> comEdgesV; /** * Probability of edge when two nodes share no community (epsilon in the * paper). */ Float pNoCom; /** * Parametrization of P_c (edge probability in community c), P_c = 1 - * exp(-lambda). */ Vector<Float> lambdaV; /***/ Randoms rnd; /** <Node ID, Community ID> pairs (for sampling MCMC moves). */ Hashtable<Pair<Integer, Integer>, Float> nIDCIDPrH; /** <Node ID, Community ID> pairs (for sampling MCMC moves). */ Hashtable<Pair<Integer, Integer>, Integer> nIDCIDPrS; /** Minimum value of regularization parameter lambda (default = 1e-5). */ Float minLambda; /** Maximum value of regularization parameter lambda (default = 10). */ Float maxLambda; /** * Regularization parameter when we fit for P_c (for finding # communities). */ Float regCoef; /** * ID of the Epsilon-community (in case we fit P_c of the epsilon * community). We do not fit */ Integer baseCID; AGMFit() { } AGMFit(final UndirectedGraph<Integer, DefaultEdge> _g, final Integer _initComs, final Integer _rndSeed){ g = _g; pNoCom = 0.0f; rnd = new Randoms(_rndSeed); minLambda = 0.00001f; maxLambda = 10.0f; regCoef = 0f; baseCID = -1; neighborComInit(_initComs); //RandomInitCmtyVV(InitComs); } public AGMFit(UndirectedGraph<Integer, DefaultEdge> _g, Vector<Vector<Integer>> _cmtyVVPt, Randoms _rnd) { g = _g; pNoCom = 0.0f; rnd = _rnd; minLambda = 0.00001f; maxLambda = 10.0f; regCoef = 0f; baseCID = -1; setCmtyVV(_cmtyVVPt); } private void setCmtyVV(Vector<Vector<Integer>> cmtyVV) { cIDNSetV = new Vector<Set<Integer>>(cmtyVV.size()); for (int i = 0; i < cmtyVV.size(); i++) cIDNSetV.add(new HashSet<Integer>()); for (int c = 0; c < cIDNSetV.size(); c++) { cIDNSetV.get(c).addAll(cmtyVV.get(c)); // check whether the member nodes exist in the graph for (int j = 0; j < cmtyVV.get(c).size(); j++) { assert (g.containsVertex(cmtyVV.get(c).get(j))); } } initNodeData(); setDefaultPNoCom(); } private void neighborComInit(Integer _initComs) { cIDNSetV = new Vector<Set<Integer>>(_initComs); for(int i=0; i<_initComs; i++) cIDNSetV.add(new HashSet<Integer>()); final int edges = g.edgeSet().size(); Vector<Pair<Float, Integer>> nIdPhiV = new Vector<Pair<Float, Integer>>( g.vertexSet().size()); Set<Integer> invalidNIDS = new HashSet<Integer>(g.vertexSet().size()); Vector<Integer> chosenNIDV = new Vector<Integer>(_initComs); // FOR // DEBUG Long exeTime = System.currentTimeMillis(); // compute conductance of neighborhood community for (Integer n : g.vertexSet()) { Set<Integer> nBCmty; double phi; if (g.degreeOf(n) < 5) { // do not include nodes with too few degree phi = 1.0; } else { nBCmty = AGMUtil.getNbhCom(g, n); assert (nBCmty.size() == g.degreeOf(n) + 1); phi = AGMUtil.getConductance(g, nBCmty, edges); } nIdPhiV.add(new Pair<Float, Integer>((float) phi, n)); } Collections.sort(nIdPhiV); Collections.reverse(nIdPhiV); System.out.println("conductance computation completed "+(System.currentTimeMillis() - exeTime)); System.out.flush(); //choose nodes with local minimum in conductance int curCID = 0; for (int ui = 0; ui < nIdPhiV.size(); ui++) { int UID = nIdPhiV.get(ui).p2; if (invalidNIDS.contains(UID)) { continue; } chosenNIDV.add(UID); //FOR DEBUG //add the node and its neighbors to the current community cIDNSetV.get(curCID).add(UID); for (DefaultEdge e : g.edgesOf(UID)) { if (g.getEdgeSource(e) == UID) { cIDNSetV.get(curCID).add(g.getEdgeTarget(e)); invalidNIDS.add(g.getEdgeTarget(e)); } else { cIDNSetV.get(curCID).add(g.getEdgeSource(e)); invalidNIDS.add(g.getEdgeTarget(e)); } } curCID++; if (curCID >= _initComs) { break; } } if (_initComs > curCID) { System.out.println((_initComs - curCID) + " communities needed to fill randomly"); } //assign a member to zero-member community (if any) for (int c = 0; c < cIDNSetV.size(); c++) { if (cIDNSetV.get(c).size() == 0) { int comSz = 10; for (int u = 0; u < comSz; u++) { int UID = g.randomVertex(rnd); cIDNSetV.get(c).add(UID); } } } initNodeData(); setDefaultPNoCom(); } //AGMFit(final UndirectedGraph<Integer, DefaultEdge> g, const TVec<TIntV>& CmtyVVPt, const TRnd& RndPt): G(GraphPt), PNoCom(0.0), Rnd(RndPt), MinLambda(0.00001), MaxLambda(10.0), RegCoef(0), BaseCID(-1) { SetCmtyVV(CmtyVVPt); } /** Set epsilon by the default value.*/ private void setDefaultPNoCom() { pNoCom = (float) (1.0d / (double) g.vertexSet().size() / (double) g.vertexSet().size()); } private void initNodeData() { nIDComVH = new Hashtable<Integer, Set<Integer>>(); for(int i=0; i<g.vertexSet().size(); i++){ Set<Integer> x = new HashSet<Integer>(); nIDComVH.put(i, x); } AGMUtil.getNodeMembership(nIDComVH, cIDNSetV); getEdgeJointCom(); lambdaV = new Vector<Float>(cIDNSetV.size()); for(int i=0; i<cIDNSetV.size(); i++){ lambdaV.add(0f); } for (int c = 0; c < cIDNSetV.size(); c++) { int MaxE = (cIDNSetV.get(c).size()) * (cIDNSetV.get(c).size() - 1) / 2; if (MaxE < 2) { lambdaV.set(c, maxLambda); } else { lambdaV.set( c, (float) -Math.log((double) (MaxE - comEdgesV.get(c)) / (float) MaxE)); } if (lambdaV.get(c) > maxLambda) { lambdaV.set(c, maxLambda); } if (lambdaV.get(c) < minLambda) { lambdaV.set(c, minLambda); } } nIDCIDPrS = new Hashtable<Pair<Integer, Integer>, Integer>(); for (int c = 0; c < cIDNSetV.size(); c++) { for (Integer SI : cIDNSetV.get(c)) { nIDCIDPrS.put(new Pair<Integer, Integer>(SI, c), 0); } } } /** * For each (u, v) in edges, precompute C_uv (the set of communities u and v * share). */ private void getEdgeJointCom() { comEdgesV = new Vector<Integer>(); for (int i = 0; i < cIDNSetV.size(); i++) { comEdgesV.add(0); } edgeComVH = new Hashtable<Pair<Integer, Integer>, Set<Integer>>(); for (DefaultEdge e : g.edgeSet()) { Integer srcNID = g.getEdgeSource(e); Integer dstNID = g.getEdgeTarget(e); if (srcNID >= dstNID) { continue; } assert (nIDComVH.containsKey(srcNID)); assert (nIDComVH.containsKey(dstNID)); Set<Integer> jointCom = AGMUtil.getIntersection( nIDComVH.get(srcNID), nIDComVH.get(dstNID)); //assert(!jointCom.isEmpty()); edgeComVH.put(new Pair<Integer, Integer>(srcNID, dstNID), jointCom); for (Integer k : jointCom) { comEdgesV.set(k, comEdgesV.get(k) + 1); } } assert (edgeComVH.size() == g.edgeSet().size()); } public void setPNumCom(final Float epsilon) { if (baseCID == -1 && epsilon > 0.0) { pNoCom = epsilon; } } private void runMCMC(int maxIter, int i) { runMCMC(maxIter, i, ""); } public void runMCMC(Integer maxIter, int evalLambdaIter, String plotFPrf) { Long IterTm = 0l, TotalTm = 0l; double prevL = likelihood(); double[] deltaL = { 0d }; double bestL = prevL; System.out.printf("initial likelihood = %f\n", prevL); Vector<Pair<Integer, Float>> IterTrueLV = new Vector<Pair<Integer, Float>>(); Vector<Pair<Integer, Float>> IterJoinV = new Vector<Pair<Integer, Float>>(); Vector<Pair<Integer, Float>> IterLeaveV = new Vector<Pair<Integer, Float>>(); Vector<Pair<Integer, Float>> IterAcceptV = new Vector<Pair<Integer, Float>>(); Vector<Pair<Integer, Float>> IterSwitchV = new Vector<Pair<Integer, Float>>(); Vector<Pair<Integer, Float>> IterLBV = new Vector<Pair<Integer, Float>>(); Vector<Pair<Integer, Integer>> IterTotMemV; Vector<Integer> IterV; Vector<Float> BestLV = new Vector<Float>(); Vector<Set<Integer>> BestCmtySetV = new Vector<Set<Integer>>(); int SwitchCnt = 0, LeaveCnt = 0, JoinCnt = 0, AcceptCnt = 0, ProbBinSz; Long PlotTm = System.currentTimeMillis(); ProbBinSz = Math.max(1000, g.vertexSet().size() / 10); // bin to compute // probabilities IterLBV.add(new Pair<Integer, Float>(1, (float) bestL)); for (int iter = 0; iter < maxIter; iter++) { IterTm++; int[] NID = { -1 }; int[] JoinCID = { -1 }, LeaveCID = { -1 }; sampleTransition(NID, JoinCID, LeaveCID, deltaL); // sample a move double OptL = prevL; // if it is accepted if (deltaL[0] > 0 || rnd.GetUniDev() < Math.exp(deltaL[0])) { IterTm++; if (LeaveCID[0] > -1 && LeaveCID[0] != baseCID) { leaveCom(NID[0], LeaveCID[0]); } if (JoinCID[0] > -1 && JoinCID[0] != baseCID) { joinCom(NID[0], JoinCID[0]); } if (LeaveCID[0] > -1 && JoinCID[0] > -1 && JoinCID[0] != baseCID && LeaveCID[0] != baseCID) { SwitchCnt++; } else if (LeaveCID[0] > -1 && LeaveCID[0] != baseCID) { LeaveCnt++; } else if (JoinCID[0] > -1 && JoinCID[0] != baseCID) { JoinCnt++; } AcceptCnt++; if ((iter + 1) % evalLambdaIter == 0) { IterTm++; MLEGradAscentGivenCAG(0.01, 3); OptL = likelihood(); } else { OptL = prevL + deltaL[0]; } if (bestL <= OptL && cIDNSetV.size() > 0) { BestCmtySetV = cIDNSetV; BestLV = lambdaV; bestL = OptL; } } if (iter > 0 && (iter % ProbBinSz == 0) && plotFPrf.length() > 0) { IterLBV.add(new Pair<Integer, Float>(iter, (float) OptL)); IterSwitchV.add(new Pair<Integer, Float>(iter, (float) SwitchCnt / (float) AcceptCnt)); IterLeaveV.add(new Pair<Integer, Float>(iter, (float) LeaveCnt / (float) AcceptCnt)); IterJoinV.add(new Pair<Integer, Float>(iter, (float) JoinCnt / (float) AcceptCnt)); IterAcceptV.add(new Pair<Integer, Float>(iter, (float) AcceptCnt / (float) ProbBinSz)); SwitchCnt = JoinCnt = LeaveCnt = AcceptCnt = 0; } prevL = OptL; if ((iter + 1) % 10000 == 0) { System.out.printf("\r%d iterations completed [%.2f]", iter, (double) iter / (double) maxIter); } } // plot the likelihood and acceptance probabilities if the plot file // name is given if (plotFPrf.length() > 0) { ImageTerminal png = new ImageTerminal(); File file = new File("." + System.getProperty("file.separator") + "data" + System.getProperty("file.separator") + plotFPrf + System.getProperty("file.separator") + plotFPrf + "_likelihood.png"); file.getParentFile().mkdirs(); JavaPlot GP1 = new JavaPlot(); GP1.setTerminal(png); double[][] zz = new double[IterLBV.size()][2]; for(int i = 0; i<IterLBV.size(); i++){ zz[i][0] = IterLBV.get(i).p1; zz[i][1] = IterLBV.get(i).p2; } GP1.addPlot(zz); GP1.getAxis("x").setLabel("iterations"); // GP1.setGNUPlotPath(plotFPrf + ".likelihood.tab"); ((AbstractPlot) GP1.getPlots().get(0)).getPlotStyle().setStyle( Style.LINESPOINTS); ((AbstractPlot) GP1.getPlots().get(0)).setTitle("likelihood"); String titleStr = String.format(" N:%d E:%d", g.vertexSet().size(), g.edgeSet().size()); GP1.setTitle(plotFPrf + ".likelihood" + titleStr); GP1.plot(); try { ImageIO.write(png.getImage(), "png", file); } catch (IOException ex) { System.err.print(ex); } JavaPlot GP2 = new JavaPlot(); png = new ImageTerminal(); file = new File("." + System.getProperty("file.separator") + "data" + System.getProperty("file.separator") + plotFPrf + System.getProperty("file.separator") + plotFPrf + "_transition.png"); file.getParentFile().mkdirs(); GP2.setTerminal(png); zz = new double[IterSwitchV.size()][2]; for (int i = 0; i < IterSwitchV.size(); i++) { zz[i][0] = IterSwitchV.get(i).p1; zz[i][1] = IterSwitchV.get(i).p2; } GP2.addPlot(zz); zz = new double[IterLeaveV.size()][2]; for(int i = 0; i<IterLeaveV.size(); i++){ zz[i][0] = IterLeaveV.get(i).p1; zz[i][1] = IterLeaveV.get(i).p2; } GP2.addPlot(zz); zz = new double[IterJoinV.size()][2]; for(int i = 0; i<IterJoinV.size(); i++){ zz[i][0] = IterJoinV.get(i).p1; zz[i][1] = IterJoinV.get(i).p2; } GP2.addPlot(zz); zz = new double[IterAcceptV.size()][2]; for(int i = 0; i<IterAcceptV.size(); i++){ zz[i][0] = IterAcceptV.get(i).p1; zz[i][1] = IterAcceptV.get(i).p2; } GP2.addPlot(zz); GP2.getAxis("x").setLabel("Iterations"); ((AbstractPlot) GP2.getPlots().get(0)).setTitle("Switch"); ((AbstractPlot) GP2.getPlots().get(1)).setTitle("Leave"); ((AbstractPlot) GP2.getPlots().get(2)).setTitle("Join"); ((AbstractPlot) GP2.getPlots().get(3)).setTitle("Accept"); ((AbstractPlot) GP2.getPlots().get(0)).getPlotStyle().setStyle( Style.LINESPOINTS); ((AbstractPlot) GP2.getPlots().get(1)).getPlotStyle().setStyle( Style.LINESPOINTS); ((AbstractPlot) GP2.getPlots().get(2)).getPlotStyle().setStyle( Style.LINESPOINTS); ((AbstractPlot) GP2.getPlots().get(3)).getPlotStyle().setStyle( Style.LINESPOINTS); GP2.setTitle(plotFPrf + ".transition"); // GP2.setGNUPlotPath(plotFPrf + "transition_prob.tab"); GP2.plot(); try { ImageIO.write(png.getImage(), "png", file); } catch (IOException ex) { System.err.print(ex); } } cIDNSetV = BestCmtySetV; lambdaV = BestLV; initNodeData(); MLEGradAscentGivenCAG(0.001, 100); System.out.printf("\nMCMC completed (best likelihood: %.2f) [%s]\n", bestL, System.currentTimeMillis() - TotalTm); } /** * Gradient descent for p_c while fixing community affiliation graph (CAG). * * @param Thres * @param MaxIter * @return */ int MLEGradAscentGivenCAG(final double Thres, final int MaxIter) { return MLEGradAscentGivenCAG(Thres, MaxIter, new String()); } private int MLEGradAscentGivenCAG(final double Thres, final int MaxIter, final String PlotNm) { int Edges = g.edgeSet().size(); Long ExeTm = System.currentTimeMillis(); Vector<Float>[] GradV = new Vector[1]; int iter = 0; Vector<Pair<Integer, Float>> IterLV = new Vector<Pair<Integer, Float>>(), IterGradNormV = new Vector<Pair<Integer, Float>>(); double GradCutOff = 1000; for (iter = 0; iter < MaxIter; iter++) { gradLogLForLambda(GradV); // if gradient is going out of the // boundary, cut off for (int i = 0; i < lambdaV.size(); i++) { if (GradV[0].get(i) < -GradCutOff) { GradV[0].set(i, (float) -GradCutOff); } if (GradV[0].get(i) > GradCutOff) { GradV[0].set(i, (float) GradCutOff); } if (lambdaV.get(i) <= minLambda && GradV[0].get(i) < 0) { GradV[0].set(i, 0.0f); } if (lambdaV.get(i) >= maxLambda && GradV[0].get(i) > 0) { GradV[0].set(i, 0.0f); } } double Alpha = 0.15, Beta = 0.2; if (Edges > 1024 * 100) { Alpha = 0.00015; Beta = 0.3; } double LearnRate = getStepSizeByLineSearchForLambda(GradV[0], GradV[0], Alpha, Beta); if (LinearAlgebra.norm(GradV[0]) < Thres) { break; } for (int i = 0; i < lambdaV.size(); i++) { double Change = LearnRate * GradV[0].get(i); lambdaV.set(i, (float) (lambdaV.get(i) + Change)); if (lambdaV.get(i) < minLambda) { lambdaV.set(i, minLambda); } if (lambdaV.get(i) > maxLambda) { lambdaV.set(i, maxLambda); } } if (!PlotNm.isEmpty()) { double L = likelihood(); IterLV.add(new Pair<Integer, Float>(iter, (float) L)); IterGradNormV.add(new Pair<Integer, Float>(iter, (float) LinearAlgebra.norm(GradV[0]))); } } if (!PlotNm.isEmpty()) { // TGnuPlot::PlotValV(IterLV, PlotNm + ".likelihood_Q"); // TGnuPlot::PlotValV(IterGradNormV, PlotNm + ".gradnorm_Q"); System.out.printf( "MLE for Lambda completed with %d iterations(%s)\n", iter, System.currentTimeMillis() - ExeTm); } return iter; } /** * Step size search for updating P_c (which is parametarized by lambda). * * @param DeltaV * @param GradV * @param Alpha * @param Beta * @return */ private double getStepSizeByLineSearchForLambda(final Vector<Float> DeltaV, final Vector<Float> GradV, final double Alpha, final double Beta) { double StepSize = 1.0; double InitLikelihood = likelihood(); assert (lambdaV.size() == DeltaV.size()); Vector<Float> NewLambdaV = new Vector<Float>(lambdaV.size()); for(int i=0; i<lambdaV.size(); i++) NewLambdaV.add(0f); for (int iter = 0;; iter++) { for (int i = 0; i < lambdaV.size(); i++) { NewLambdaV.set(i, (float) (lambdaV.get(i) + StepSize * DeltaV.get(i))); if (NewLambdaV.get(i) < minLambda) { NewLambdaV.set(i, minLambda); } if (NewLambdaV.get(i) > maxLambda) { NewLambdaV.set(i, maxLambda); } } if (likelihood(NewLambdaV) < InitLikelihood + Alpha * StepSize * LinearAlgebra.dotProduct(GradV, DeltaV)) { StepSize *= Beta; } else { break; } } return StepSize; } /** * Gradient of likelihood for P_c. * * @param gradV */ private void gradLogLForLambda(Vector<Float>[] gradV) { gradV[0] = new Vector<Float>(lambdaV.size()); for(int i=0; i<lambdaV.size(); i++){ gradV[0].add(0f); } Vector<Float> SumEdgeProbsV = new Vector<Float>(lambdaV.size()); for(int i=0; i<lambdaV.size(); i++){ SumEdgeProbsV.add(0f); } for (Set<Integer> JointCom : edgeComVH.values()) { double LambdaSum = selectLambdaSum(JointCom); double Puv = 1 - Math.exp(-LambdaSum); if (JointCom.size() == 0) { Puv = pNoCom; } for (Integer si : JointCom) { SumEdgeProbsV.set(si, (float) (SumEdgeProbsV.get(si) + (1 - Puv) / Puv)); } } for (int k = 0; k < lambdaV.size(); k++) { int MaxEk = cIDNSetV.get(k).size() * (cIDNSetV.get(k).size() - 1) / 2; int NotEdgesInCom = MaxEk - comEdgesV.get(k); gradV[0].set(k, (float) (SumEdgeProbsV.get(k) - (double) NotEdgesInCom)); if (lambdaV.get(k) > 0.0 && regCoef > 0.0) { // if regularization // exists gradV[0].set(k, gradV[0].get(k) - regCoef); } } } /** * After MCMC, NID leaves community CID. * * @param NID * @param CID */ void leaveCom(final int NID, final int CID) { for (DefaultEdge e : g.edgesOf(NID)) { int VID; if (g.getEdgeSource(e) == NID) { VID = g.getEdgeTarget(e); } else { VID = g.getEdgeSource(e); } if (nIDComVH.get(VID).contains(CID)) { Pair<Integer, Integer> SrcDstNIDPr = new Pair<Integer, Integer>( Math.min(NID, VID), Math.max(NID, VID)); edgeComVH.get(SrcDstNIDPr).remove(CID); comEdgesV.set(CID, comEdgesV.get(CID) - 1); } } cIDNSetV.get(CID).remove(NID); nIDComVH.get(NID).remove(CID); nIDCIDPrS.remove(new Pair<Integer, Integer>(NID, CID)); } /** * After MCMC, NID joins community CID. * * @param NID * @param JoinCID */ void joinCom(final int NID, final int JoinCID) { for (DefaultEdge e : g.edgesOf(NID)) { int VID; if (g.getEdgeSource(e) == NID) { VID = g.getEdgeTarget(e); } else { VID = g.getEdgeSource(e); } if (nIDComVH.get(VID).contains(JoinCID)) { Pair<Integer, Integer> SrcDstNIDPr = new Pair<Integer, Integer>( Math.min(NID, VID), Math.max(NID, VID)); edgeComVH.get(SrcDstNIDPr).add(JoinCID); comEdgesV.set(JoinCID, comEdgesV.get(JoinCID) + 1); } } cIDNSetV.get(JoinCID).add(NID); nIDComVH.get(NID).add(JoinCID); nIDCIDPrS.put(new Pair<Integer, Integer>(NID, JoinCID), 0); } /** * Sample transition: Choose among (join, leave, switch), and then sample * (NID, CID). * * @param nID * @param joinCID * @param leaveCID * @param deltaL */ private void sampleTransition(int[] nID, int[] joinCID, int[] leaveCID, double[] deltaL) { int Option = rnd.GetUniDevInt(3); // 0:Join 1:Leave 2:Switch // if there is only one node membership, only join is possible. if (nIDCIDPrS.size() <= 1) { Option = 0; } int TryCnt = 0; int MaxTryCnt = g.vertexSet().size(); deltaL[0] = Float.MIN_VALUE; if (Option == 0) { do { joinCID[0] = rnd.GetUniDevInt(cIDNSetV.size()); nID[0] = g.randomVertex(rnd); } while (TryCnt++ < MaxTryCnt && nIDCIDPrS.containsKey(new Pair<Integer, Integer>(nID[0], joinCID[0]))); if (TryCnt < MaxTryCnt) { // if successfully find a move deltaL[0] = seekJoin(nID[0], joinCID[0]); } } else if (Option == 1) { do { Pair<Integer, Integer> NIDCIDPr = null; int stop = rnd.next(nIDCIDPrS.size()), i = 0; for (Pair<Integer, Integer> x : nIDCIDPrS.keySet()) { NIDCIDPr = x; i++; if (i >= stop) break; } nID[0] = NIDCIDPr.p1; leaveCID[0] = NIDCIDPr.p2; } while (TryCnt++ < MaxTryCnt && leaveCID[0] == baseCID); if (TryCnt < MaxTryCnt) {// if successfully find a move deltaL[0] = seekLeave(nID[0], leaveCID[0]); } } else { do { Pair<Integer, Integer> NIDCIDPr = null; int stop = rnd.next(nIDCIDPrS.size()), i = 0; for (Pair<Integer, Integer> x : nIDCIDPrS.keySet()) { NIDCIDPr = x; i++; if (i >= stop) break; } nID[0] = NIDCIDPr.p1; leaveCID[0] = NIDCIDPr.p2; } while (TryCnt++ < MaxTryCnt && (nIDComVH.get(nID[0]).size() == cIDNSetV.size() || leaveCID[0] == baseCID)); do { joinCID[0] = rnd.GetUniDevInt(cIDNSetV.size()); } while (TryCnt++ < g.vertexSet().size() && nIDCIDPrS.containsKey(new Pair<Integer, Integer>(nID[0], joinCID[0]))); if (TryCnt < MaxTryCnt) {// if successfully find a move deltaL[0] = seekSwitch(nID[0], leaveCID[0], joinCID[0]); } } } /** * Compute the change in likelihood (Delta) if node UID switches from CurCID * to NewCID. * * @param UID * @param curCID * @param newCID * @return */ private double seekSwitch(int UID, int curCID, int newCID) { assert (!cIDNSetV.get(newCID).contains(UID)); assert (cIDNSetV.get(curCID).contains(UID)); double Delta = seekJoin(UID, newCID) + seekLeave(UID, curCID); assert(!Double.isNaN(Delta)); // correct only for intersection between new com and current com for (DefaultEdge e : g.edgesOf(UID)) { int VID; if (g.getEdgeSource(e) == UID) { VID = g.getEdgeTarget(e); } else { VID = g.getEdgeSource(e); } if (!nIDComVH.get(VID).contains(curCID) || !nIDComVH.get(VID).contains(newCID)) { continue; } Pair<Integer, Integer> SrcDstNIDPr = new Pair<Integer, Integer>( Math.min(UID, VID), Math.max(UID, VID)); Set<Integer> JointCom = edgeComVH.get(SrcDstNIDPr); if(JointCom.isEmpty()) { System.out.println(SrcDstNIDPr); } double CurPuv, NewPuvAfterJoin, NewPuvAfterLeave, NewPuvAfterSwitch; double LambdaSum = selectLambdaSum(JointCom); CurPuv = 1 - Math.exp(-LambdaSum); NewPuvAfterLeave = 1 - Math.exp(-LambdaSum + lambdaV.get(curCID)); NewPuvAfterJoin = 1 - Math.exp(-LambdaSum - lambdaV.get(newCID)); NewPuvAfterSwitch = 1 - Math.exp(-LambdaSum - lambdaV.get(newCID) + lambdaV.get(curCID)); if (JointCom.size() == 1 || NewPuvAfterLeave == 0.0) { NewPuvAfterLeave = pNoCom; } assert(!Double.isNaN(Delta)); Delta += (Math.log(NewPuvAfterSwitch) + Math.log(CurPuv) - Math.log(NewPuvAfterLeave) - Math.log(NewPuvAfterJoin)); if (Double.isNaN(Delta)) { System.out.printf("NS:%f C:%f NL:%f NJ:%f pNoCom:%f", NewPuvAfterSwitch, CurPuv, NewPuvAfterLeave, NewPuvAfterJoin, pNoCom); } assert (!Double.isNaN(Delta)); } return Delta; } /** * Compute the change in likelihood (Delta) if node UID leaves community * CID. * * @param UID * @param CID * @return */ double seekLeave(int UID, int CID) { assert (cIDNSetV.get(CID).contains(UID)); assert (g.containsVertex(UID)); double Delta = 0.0; int NbhsInC = 0; for (DefaultEdge e : g.edgesOf(UID)) { int VID; if (g.getEdgeSource(e) == UID) { VID = g.getEdgeTarget(e); } else { VID = g.getEdgeSource(e); } if (!nIDComVH.get(VID).contains(CID)) { continue; } Pair<Integer, Integer> SrcDstNIDPr = new Pair<Integer, Integer>( Math.min(UID, VID), Math.max(UID, VID)); Set<Integer> JointCom = edgeComVH.get(SrcDstNIDPr); double CurPuv, NewPuv, LambdaSum = selectLambdaSum(JointCom); CurPuv = 1 - Math.exp(-LambdaSum); NewPuv = 1 - Math.exp(-LambdaSum + lambdaV.get(CID)); assert (JointCom.size() > 0); if (JointCom.size() == 1) { NewPuv = pNoCom; } Delta += (Math.log(NewPuv) - Math.log(CurPuv)); assert (!Double.isNaN(Delta)); NbhsInC++; } Delta += lambdaV.get(CID) * (cIDNSetV.get(CID).size() - 1 - NbhsInC); return Delta; } /** * // Compute the change in likelihood (Delta) if node UID joins community * CID. * * @param UID * @param CID * @return */ private double seekJoin(int UID, int CID) { assert (!cIDNSetV.get(CID).contains(UID)); double Delta = 0.0; int NbhsInC = 0; for (DefaultEdge e : g.edgesOf(UID)) { int VID; if (g.getEdgeSource(e) == UID) { VID = g.getEdgeTarget(e); } else { VID = g.getEdgeSource(e); } if (!nIDComVH.get(VID).contains(CID)) { continue; } Pair<Integer, Integer> SrcDstNIDPr = new Pair<Integer, Integer>( Math.min(UID, VID), Math.max(UID, VID)); Set<Integer> jointCom = edgeComVH.get(SrcDstNIDPr); double CurPuv, NewPuv, LambdaSum = selectLambdaSum(jointCom); CurPuv = 1 - Math.exp(-LambdaSum); if (jointCom.size() == 0) { CurPuv = pNoCom; } NewPuv = 1 - Math.exp(-LambdaSum - lambdaV.get(CID)); Delta += (Math.log(NewPuv) - Math.log(CurPuv)); assert (!Double.isNaN(Delta)); NbhsInC++; } Delta -= lambdaV.get(CID) * (cIDNSetV.get(CID).size() - NbhsInC); assert(!Double.isNaN(Delta)); return Delta; } double likelihood() { return likelihood(lambdaV); } private double likelihood(final Vector<Float> newLambdaV) { double tmp1 = 0, tmp2 = 0; return likelihood(newLambdaV, tmp1, tmp2); } private double likelihood(Vector<Float> newLambdaV, double lEdges, double lNoEdges) { assert (cIDNSetV.size() == newLambdaV.size()); assert (comEdgesV.size() == cIDNSetV.size()); lEdges = 0.0; lNoEdges = 0.0; for (Set<Integer> jointCom : edgeComVH.values()) { double lambdaSum = selectLambdaSum(newLambdaV, jointCom); double puv = 1 - Math.exp(-lambdaSum); if (jointCom.size() == 0) { puv = pNoCom; } assert (!Double.isNaN(Math.log(puv))); lEdges += Math.log(puv); } for (int k = 0; k < newLambdaV.size(); k++) { int maxEk = cIDNSetV.get(k).size() * (cIDNSetV.get(k).size() - 1) / 2; int notEdgesInCom = maxEk - comEdgesV.get(k); if (notEdgesInCom > 0) { if (lNoEdges >= -Double.MAX_VALUE + (double) notEdgesInCom * newLambdaV.get(k)) { lNoEdges -= (double) notEdgesInCom * newLambdaV.get(k); } } } double lReg = 0.0; if (regCoef > 0.0) { lReg = -regCoef * LinearAlgebra.sumVec(newLambdaV); } return lEdges + lNoEdges + lReg; } /** * Compute sum of \c lambda_c (which is log (1 - \c p_c)) over \c C_uv ( * <tt>ComK</tt>). The function is used to compute edge probability \c P_uv. * * @param newLambdaV * @param jointCom * @return */ private double selectLambdaSum(Vector<Float> newLambdaV, Set<Integer> comK) { double result = 0.0; for (Integer si : comK) { assert (newLambdaV.get(si) >= 0); result += newLambdaV.get(si); } return result; } private double selectLambdaSum(Set<Integer> jointCom) { return selectLambdaSum(lambdaV, jointCom); } public void setRegCoef(double _regCoef) { regCoef = (float) _regCoef; } /** * Get communities whose p_c is higher than 1 - QMax. * * @param CmtyVV * @param QMax */ public Vector<Vector<Integer>> getCmtyVV(final double QMax) { Vector<Float> TmpQV = new Vector<Float>(); return getCmtyVV(TmpQV, QMax); } public Vector<Vector<Integer>> getCmtyVV(Vector<Float> QV, final double QMax) { Vector<Vector<Integer>> CmtyVV = new Vector<Vector<Integer>>(); for (int i = 0; i < cIDNSetV.size(); i++){ QV.add(0f); CmtyVV.add(new Vector<Integer>()); } Hashtable<Integer, Float> CIDLambdaH = new Hashtable<Integer, Float>( cIDNSetV.size()); for (int c = 0; c < cIDNSetV.size(); c++) { CIDLambdaH.put(c, lambdaV.get(c)); } sortByValue(CIDLambdaH); for (Integer CID : CIDLambdaH.keySet()) { assert (lambdaV.get(CID) >= minLambda); double Q = Math.exp(-(double) lambdaV.get(CID)); if (Q > QMax) { continue; } Vector<Integer> CmtyV = new Vector<Integer>(); CmtyV.addAll(cIDNSetV.get(CID)); if (CmtyV.size() == 0) { continue; } if (CID == baseCID) { // if the community is the base // community(epsilon community), discard assert (CmtyV.size() == g.vertexSet().size()); } else { CmtyVV.add(CmtyV); QV.add((float) Q); } } return CmtyVV; } public static void sortByValue(Hashtable<?, Float> t) { // Transfer as List and sort it ArrayList<Map.Entry<?, Float>> l = new ArrayList<Map.Entry<?, Float>>( t.entrySet()); Collections.sort(l, new Comparator<Map.Entry<?, Float>>() { public int compare(Map.Entry<?, Float> o1, Map.Entry<?, Float> o2) { return o1.getValue().compareTo(o2.getValue()); } }); } private void printSummary() { Hashtable<Integer, Float> CIDLambdaH = new Hashtable<Integer, Float>( cIDNSetV.size()); for (int c = 0; c < cIDNSetV.size(); c++) { CIDLambdaH.put(c, lambdaV.get(c)); } sortByValue(CIDLambdaH); int Coms = 0; for (int i = 0; i < lambdaV.size(); i++) { int CID = CIDLambdaH.get(i).intValue(); if (lambdaV.get(CID) <= 0.0001) { continue; } System.out.printf( "P_c : %.3f Com Sz: %d, Total Edges inside: %d \n", 1.0 - Math.exp(-lambdaV.get(CID)), cIDNSetV.get(CID).size(), (int) comEdgesV.get(CID)); Coms++; } System.out .printf("%d Communities, Total Memberships = %d, Likelihood = %.2f, Epsilon = %f\n", Coms, nIDCIDPrS.size(), likelihood(), pNoCom); } public static void main(String args[]){ /**Output file name prefix*/ final String outFilePrefix = "demo"; /**Input edgelist file name. DEMO: AGM with 2 communities*/ //private static final String inFileName = "football.edgelist"; final String inFileName = "DEMO"; /**Input file name for node names (Node ID, Node label)*/ final String labelFileName = "football.labels"; /**Random seed for AGM*/ final Integer randomSeed = 1; /**Edge probability between the nodes that do not share any community (default (0.0): set it to be 1 / N^2)*/ final Float epsilon = 0f; /**Number of communities (0: determine it by AGM)*/ final Integer numCommunites = 0; Long exeTime = System.currentTimeMillis(); UndirectedGraph<Integer, DefaultEdge> g = null; Vector<Vector<Integer>> cmtyVV = new Vector<Vector<Integer>>(); Hashtable<Integer, String> nIDName = new Hashtable<Integer, String>(); if(inFileName.equals("DEMO")){ Vector<Vector<Integer>> trueCmtyVV = new Vector<Vector<Integer>>(); Randoms rnd = new Randoms(randomSeed); //generate community bipartite affiliation final Integer aBegin = 0, aEnd = 64, bBegin = 25, bEnd = 100; trueCmtyVV.add(new Vector<Integer>()); trueCmtyVV.add(new Vector<Integer>()); for (int u = aBegin; u < aEnd; u++) { trueCmtyVV.get(0).add(u); } for (int u = bBegin; u < bEnd; u++) { trueCmtyVV.get(1).add(u); } g = AGM.generateAGM(trueCmtyVV, 0.0, 0.5, rnd); }else if(!labelFileName.isEmpty()){ // G = TSnap::LoadEdgeList<PUNGraph>(InFNm); // TSsParser Ss(LabelFNm, ssfTabSep); // while (Ss.Next()) { // if (Ss.Len() > 0) { NIDNameH.AddDat(Ss.GetInt(0), Ss.GetFld(1)); } // } }else{ // G = TAGMUtil::LoadEdgeListStr<PUNGraph>(InFNm, NIDNameH); } System.out.println("Graph: "+g.vertexSet().size()+" Nodes "+g.edgeSet().size()+" Edges"); int maxIter = 10 * g.vertexSet().size() *g.vertexSet().size(); if (maxIter < 0) { maxIter = Integer.MAX_VALUE; } int numComs = numCommunites; if (numComs < 2) { int initComs; if (g.vertexSet().size() > 1000) { initComs = g.vertexSet().size() / 5; numComs = AGMUtil.findComsByAGM(g, initComs, maxIter, randomSeed, 1.5, epsilon, outFilePrefix); } else { initComs = g.vertexSet().size() / 5; numComs = AGMUtil.findComsByAGM(g, initComs, maxIter, randomSeed, 1.2, epsilon, outFilePrefix); } } AGMFit aGMFit = new AGMFit(g, numComs, randomSeed); if (epsilon > 0) { aGMFit.setPNumCom(epsilon); } aGMFit.runMCMC(maxIter, 10, outFilePrefix); cmtyVV = aGMFit.getCmtyVV(0.9999); AGMUtil.dumpCmtyVV(outFilePrefix + "cmtyvv.txt", cmtyVV, nIDName); AGMUtil.saveGephi(outFilePrefix + "graph.gexf", g, cmtyVV, 1.5, 1.5, nIDName); aGMFit.printSummary(); System.out.printf("\nrun time: %s (%s)\n", System.currentTimeMillis() - exeTime, System.currentTimeMillis()); } }