package edu.cmu.graphchi.apps.randomwalks; import edu.cmu.graphchi.*; import edu.cmu.graphchi.preprocessing.FastSharder; import edu.cmu.graphchi.preprocessing.VertexIdTranslate; import edu.cmu.graphchi.util.IdCount; import edu.cmu.graphchi.walks.DrunkardContext; import edu.cmu.graphchi.walks.DrunkardJob; import edu.cmu.graphchi.walks.DrunkardMobEngine; import edu.cmu.graphchi.walks.IntDrunkardContext; import edu.cmu.graphchi.walks.IntDrunkardFactory; import edu.cmu.graphchi.walks.IntWalkArray; import edu.cmu.graphchi.walks.WalkUpdateFunction; import edu.cmu.graphchi.walks.WalkArray; import edu.cmu.graphchi.walks.WeightedHopper; import edu.cmu.graphchi.walks.distributions.IntDrunkardCompanion; import edu.cmu.graphchi.walks.distributions.DrunkardCompanion; import edu.cmu.graphchi.walks.distributions.RemoteDrunkardCompanion; import org.apache.commons.cli.*; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.rmi.Naming; import java.util.Random; import java.util.logging.Logger; /** * Computes estimate of personalized pagerank using the DrunkardMobEngine. * <b>Note:</b> this version omits walks to adjacent vertices, and thus could be a * basis for recommendation engine. To remove that functionality, modify method * getNotTrackedVertices() * @author Aapo Kyrola */ public class PersonalizedPageRank implements WalkUpdateFunction<EmptyType, EmptyType> { private static double RESET_PROBABILITY = 0.15; private static Logger logger = ChiLogger.getLogger("personalized-pagerank"); private DrunkardMobEngine<EmptyType, EmptyType> drunkardMobEngine; private String baseFilename; private int firstSource; private int numSources; private int numWalksPerSource; private String companionUrl; public PersonalizedPageRank(String companionUrl, String baseFilename, int nShards, int firstSource, int numSources, int walksPerSource) throws Exception{ this.baseFilename = baseFilename; this.drunkardMobEngine = new DrunkardMobEngine<EmptyType, EmptyType>(baseFilename, nShards, new IntDrunkardFactory()); this.companionUrl = companionUrl; this.firstSource = firstSource; this.numSources = numSources; this.numWalksPerSource = walksPerSource; } private void execute(int numIters) throws Exception { File graphFile = new File(baseFilename); /** Use local drunkard mob companion. You can also pass a remote reference * by using Naming.lookup("rmi://my-companion") */ RemoteDrunkardCompanion companion; if (companionUrl.equals("local")) { companion = new IntDrunkardCompanion(4, Runtime.getRuntime().maxMemory() / 3); } else { companion = (RemoteDrunkardCompanion) Naming.lookup(companionUrl); } /* Configure walk sources. Note, GraphChi's internal ids are used. */ DrunkardJob drunkardJob = this.drunkardMobEngine.addJob("personalizedPageRank", EdgeDirection.OUT_EDGES, this, companion); drunkardJob.configureSourceRangeInternalIds(firstSource, numSources, numWalksPerSource); drunkardMobEngine.run(numIters); /* Ask companion to dump the results to file */ int nTop = 100; companion.outputDistributions(baseFilename + "_ppr_" + firstSource + "_" + (firstSource + numSources - 1) + ".top" + nTop, nTop); /* For debug */ VertexIdTranslate vertexIdTranslate = this.drunkardMobEngine.getVertexIdTranslate(); IdCount[] topForFirst = companion.getTop(firstSource, 10); System.out.println("Top visits from source vertex " + vertexIdTranslate.forward(firstSource) + " (internal id=" + firstSource + ")"); for(IdCount idc : topForFirst) { System.out.println(vertexIdTranslate.backward(idc.id) + ": " + idc.count); } /* If local, shutdown the companion */ if (companion instanceof DrunkardCompanion) { ((DrunkardCompanion) companion).close(); } } /** * WalkUpdateFunction interface implementations */ @Override public void processWalksAtVertex(WalkArray walkArray, ChiVertex<EmptyType, EmptyType> vertex, DrunkardContext drunkardContext_, Random randomGenerator) { int[] walks = ((IntWalkArray)walkArray).getArray(); IntDrunkardContext drunkardContext = (IntDrunkardContext) drunkardContext_; int numWalks = walks.length; int numOutEdges = vertex.numOutEdges(); // Advance each walk to a random out-edge (if any) if (numOutEdges > 0) { for(int i=0; i < numWalks; i++) { int walk = walks[i]; // Reset? if (randomGenerator.nextDouble() < RESET_PROBABILITY) { drunkardContext.resetWalk(walk, false); } else { int nextHop = vertex.getOutEdgeId(randomGenerator.nextInt(numOutEdges)); // Optimization to tell the manager that walks that have just been started // need not to be tracked. boolean shouldTrack = !drunkardContext.isWalkStartedFromVertex(walk); drunkardContext.forwardWalkTo(walk, nextHop, shouldTrack); } } } else { // Reset all walks -- no where to go from here for(int i=0; i < numWalks; i++) { drunkardContext.resetWalk(walks[i], false); } } } @Override /** * Instruct drunkardMob not to track visits to this vertex's immediate out-neighbors. */ public int[] getNotTrackedVertices(ChiVertex<EmptyType, EmptyType> vertex) { int[] notCounted = new int[1 + vertex.numOutEdges()]; for(int i=0; i < vertex.numOutEdges(); i++) { notCounted[i + 1] = vertex.getOutEdgeId(i); } notCounted[0] = vertex.getId(); return notCounted; } protected static FastSharder createSharder(String graphName, int numShards) throws IOException { return new FastSharder<EmptyType, EmptyType>(graphName, numShards, null, null, null, null); } public static void main(String[] args) throws Exception { /* Configure command line */ Options cmdLineOptions = new Options(); cmdLineOptions.addOption("g", "graph", true, "graph file name"); cmdLineOptions.addOption("n", "nshards", true, "number of shards"); cmdLineOptions.addOption("t", "filetype", true, "filetype (edgelist|adjlist)"); cmdLineOptions.addOption("f", "firstsource", true, "id of the first source vertex (internal id)"); cmdLineOptions.addOption("s", "nsources", true, "number of sources"); cmdLineOptions.addOption("w", "walkspersource", true, "number of walks to start from each source"); cmdLineOptions.addOption("i", "niters", true, "number of iterations"); cmdLineOptions.addOption("u", "companion", true, "RMI url to the DrunkardCompanion or 'local' (default)"); try { /* Parse command line */ CommandLineParser parser = new PosixParser(); CommandLine cmdLine = parser.parse(cmdLineOptions, args); /** * Preprocess graph if needed */ String baseFilename = cmdLine.getOptionValue("graph"); int nShards = Integer.parseInt(cmdLine.getOptionValue("nshards")); String fileType = (cmdLine.hasOption("filetype") ? cmdLine.getOptionValue("filetype") : null); /* Create shards */ if (baseFilename.equals("pipein")) { // Allow piping graph in FastSharder sharder = createSharder(baseFilename, nShards); sharder.shard(System.in, fileType); } else { FastSharder sharder = createSharder(baseFilename, nShards); if (!new File(ChiFilenames.getFilenameIntervals(baseFilename, nShards)).exists()) { sharder.shard(new FileInputStream(new File(baseFilename)), fileType); } else { logger.info("Found shards -- no need to pre-process"); } } // Run int firstSource = Integer.parseInt(cmdLine.getOptionValue("firstsource")); int numSources = Integer.parseInt(cmdLine.getOptionValue("nsources")); int walksPerSource = Integer.parseInt(cmdLine.getOptionValue("walkspersource")); int nIters = Integer.parseInt(cmdLine.getOptionValue("niters")); String companionUrl = cmdLine.hasOption("companion") ? cmdLine.getOptionValue("companion") : "local"; PersonalizedPageRank pp = new PersonalizedPageRank(companionUrl, baseFilename, nShards, firstSource, numSources, walksPerSource); pp.execute(nIters); } catch (Exception err) { err.printStackTrace(); // automatically generate the help statement HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("PersonalizedPageRank", cmdLineOptions); } } }