/* * - Is it worthwhile to send the Master the Worker numbers, thereby allowing * the Master to avoid sending start super step commands to workers that * have no active parts? Probably not worth the effort. */ // TODO: Worker: Batch the sending of AddVertexToWorker commands? // Currently, when vertices are added during graph construction, a message // is sent for each vertex added (as opposed to batching all such requests // destined for the same worker). Is this best? // TODO FIX jicosfoundation Processor thread invokes start() in its constructor package edu.ucsb.jpregel.system; import api.Aggregator; import edu.ucsb.jpregel.system.commands.*; import java.io.IOException; import static java.lang.System.out; import java.rmi.Naming; import java.rmi.RemoteException; import java.util.Collection; import java.util.HashMap; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.logging.Level; import java.util.logging.Logger; import jicosfoundation.*; /** * * @author Peter Cappello */ public abstract class Worker extends ServiceImpl { // ServiceImpl attributes static public String SERVICE_NAME = "Master"; static private final Department[] departments = { ServiceImpl.ASAP_DEPARTMENT }; static private final Class[][] command2DepartmentArray = { // ASAP Commands { AddVertexToWorker.class, AddVertexToPartComplete.class, CollectGarbage.class, DoNextSuperStep.class, MasterCommandCompleted.class, ReadWorkerInputFile.class, SendMessage.class, SendVertexIdToMessageQMap.class, SetJob.class, SetWorkerMap.class, ShutdownWorker.class, WorkerCommandCompleted.class, WriteWorkerOutputFile.class, } }; private final static RemoteExceptionHandler REMOTE_EXCEPTION_HANDLER = new DefaultRemoteExceptionHandler(); private static void tryAgain( int i ) { System.out.println( "Master not up yet. Trying again in 5 seconds ... " ); try { Thread.sleep( 5000 ); } catch ( InterruptedException exception ) { System.out.println( "Waiting interrupted, trying again immediately" ); } } private final Proxy masterProxy; private int myWorkerNum = 0; private Map<Integer, Service> workerNumToWorkerMap; private Job job; private ConcurrentMap<Integer, Part> partIdToPartMap; private Collection<Part> partSet; private FileSystem fileSystem; // super step attributes private ComputeInput computeInput; private Map<Integer, Map<Object, MessageQ>> workerNumToVertexIdToMessageQMapMap; private long superStep; private Aggregator stepAggregator; private Aggregator problemAggregator; private AtomicInteger deltaNumVertices; // coordination variables private AtomicBoolean thereIsANextStep; private AtomicInteger numUnacknowledgedAddVertexCommands; private ExecutorService executorService = Executors.newFixedThreadPool( Runtime.getRuntime().availableProcessors() ); private CountDownLatch countDownLatch; // constants private final Command AddVertexToPartCompleteCommand = new AddVertexToPartComplete(); private final Command masterCommandCompleted = new MasterCommandCompleted(); private final Command workerCommandCompleted = new WorkerCommandCompleted(); private final Service master; public Worker( Service master ) throws RemoteException { // set Jicos Service attributes super( command2DepartmentArray ); super.setDepartments( departments ); super.register(master); this.master = master; masterProxy = new ProxyMaster( master, this, REMOTE_EXCEPTION_HANDLER ); addProxy(master, masterProxy); // log numAvailableProcessors int numAvailableProcessors = Runtime.getRuntime().availableProcessors(); System.out.println("Worker.constructor: Available processors: " + numAvailableProcessors ) ; } public void init() throws RemoteException { super.setService( this ); CommandSynchronous command = new RegisterWorker( serviceName(), Runtime.getRuntime().availableProcessors() ); myWorkerNum =((Integer) master.executeCommand( this, command )); super.register ( master ); } public void addVertexToPart( int partId, VertexImpl vertex ) { Part part = partIdToPartMap.get( partId ); if ( null == part ) { part = new Part( partId, job, this ); partIdToPartMap.putIfAbsent( partId, part ); } partIdToPartMap.get(partId).add( vertex ); } Part getPart( int partId ) { return partIdToPartMap.get( partId ); } synchronized public Collection<Part> getParts() { return partIdToPartMap.values(); } public int getWorkerNum() { return myWorkerNum; } Collection<Part> getPartSet() { return partSet; } synchronized public Job getJob() { return job; } public int getWorkerNum( int partId ) { return job.getWorkerGraphMaker().getWorkerNum( partId, workerNumToWorkerMap.size() ); } // TODO omit this method by converting all worker graph makers synchronized public void addVertex( VertexImpl vertex, String stringVertex ) { int partId = job.getPartId( vertex.getVertexId() ); int workerNum = getWorkerNum( partId ); if ( myWorkerNum == workerNum ) { // vertex is local to this worker addVertexToPart( partId, vertex ); } else { // vertex belongs to another worker Service workerService = workerNumToWorkerMap.get( workerNum ); numUnacknowledgedAddVertexCommands.getAndIncrement(); Command command = new AddVertexToWorker( partId, stringVertex, getWorkerNum() ); sendCommand( workerService, command ); } } public void addRemoteVertex( int workerNum, int partId, String stringVertex ) { Service workerService = workerNumToWorkerMap.get( workerNum ); numUnacknowledgedAddVertexCommands.getAndIncrement(); Command command = new AddVertexToWorker( partId, stringVertex, getWorkerNum()); sendCommand( workerService, command ); } // TODO do not synchronize at this level; synchronized void mergeMap( Map<Integer, Map<Object, MessageQ>> workerNumToVertexIdToMessageQMapMap ) { // synchronized( this ) // { if ( this.workerNumToVertexIdToMessageQMapMap == null ) { this.workerNumToVertexIdToMessageQMapMap = workerNumToVertexIdToMessageQMapMap; return; // } } for ( Integer workerNum : workerNumToVertexIdToMessageQMapMap.keySet() ) { Map<Object, MessageQ> vertexIdToMessageQMap = workerNumToVertexIdToMessageQMapMap.get( workerNum ); Map<Object, MessageQ> workerVertexIdToMessageQMap = this.workerNumToVertexIdToMessageQMapMap.get( workerNum ); if ( workerVertexIdToMessageQMap == null ) { this.workerNumToVertexIdToMessageQMapMap.put( workerNum, vertexIdToMessageQMap ); } else { for ( Object vertexId : vertexIdToMessageQMap.keySet() ) { MessageQ newMessageQ = vertexIdToMessageQMap.get( vertexId ); MessageQ workerMessageQ = workerVertexIdToMessageQMap.get( vertexId ); if ( workerMessageQ == null ) { workerVertexIdToMessageQMap.put( vertexId, newMessageQ ); } else { workerMessageQ.addAll( newMessageQ ); } } } } } public abstract FileSystem makeFileSystem( String jobDirectoryName); @Override public void exceptionHandler( Exception exception ) { exception.printStackTrace(); System.exit( 1 ); } /* _____________________________ * * Command implementations * _____________________________ */ // Command: AddVertexToWorker public void addVertexToWorker( int partId, String stringVertex, int sendingWorkerNum) { VertexImpl vertexFactory = job.getVertexFactory(); VertexImpl vertex = vertexFactory.make( stringVertex ); addVertexToPart( partId, vertex ); sendCommand( workerNumToWorkerMap.get(sendingWorkerNum), AddVertexToPartCompleteCommand ); } // Command: AddVertexToPartComplete public void addVertexToPartComplete() { if ( numUnacknowledgedAddVertexCommands.decrementAndGet() == 0 ) { synchronized(this) { notify(); } } } public void collectGarbage() { if (this.collectingGarbage()) { System.gc(); } sendCommand( master, masterCommandCompleted ); } // Command: ReadWorkerInputFile synchronized public void processInputFile() throws InterruptedException { int numVertices = job.makeGraph( this ); // ensure completion of all AddVertexToPath Commands before notifying master if ( numUnacknowledgedAddVertexCommands.get() > 0 ) { wait(); } Command command = new InputFileProcessingComplete( myWorkerNum, numVertices ); sendCommand( master, command ); // LOG LEVEL = DEBUG: output part sizes to see how PartId for vertices are distributed for ( Part part : partSet ) { out.println("Worker.processInputFile: worker: " + myWorkerNum + " PartId: " + part.getPartId() + " size: " + part.getVertexIdToVertexMap().size() ); } } // Command: SendMessage public void receiveMessage( int sendingWorkerNum, int partId, Object vertexId, Message message, long superStep ) { Part receivingPart = partIdToPartMap.get( partId ); receivingPart.receiveMessage( vertexId, message, superStep ); sendCommand( workerNumToWorkerMap.get( sendingWorkerNum ), masterCommandCompleted ); } // Command: SendVertexIdToMessageQMap public void receiveVertexIdToMessageQMap( Service sendingWorker, Map<Object, MessageQ> vertexIdToMessageQMap, Long superStep ) { for ( Object vertexId : vertexIdToMessageQMap.keySet() ) { int partId = job.getPartId( vertexId ); Part receivingPart = partIdToPartMap.get( partId ); MessageQ messageQ = vertexIdToMessageQMap.get( vertexId ); receivingPart.receiveMessageQ( vertexId, messageQ, superStep ); } sendCommand( sendingWorker, workerCommandCompleted ); } // Command: SetJob: initialize Job data structures synchronized public void setJob( Job job) { this.job = job; partIdToPartMap = new ConcurrentHashMap<Integer, Part>(); // TODO: Worker: partSet: should be just parts that have > 0 active vertices partSet = partIdToPartMap.values(); numUnacknowledgedAddVertexCommands = new AtomicInteger(); superStep = -1L; problemAggregator = job.makeProblemAggregator(); fileSystem = makeFileSystem( job.getJobDirectoryName() ); job.setFileSystem( fileSystem ); sendCommand( master, masterCommandCompleted ); } // Command: SetJob synchronized public void setWorkerMap( Map<Integer, Service> integerToWorkerMap ) { this.workerNumToWorkerMap = integerToWorkerMap; Collection<Service> workerServiceCollection = integerToWorkerMap.values(); for ( Service workerService : workerServiceCollection ) { super.register( workerService ); Proxy workerProxy = new ProxyWorker( workerService, this, REMOTE_EXCEPTION_HANDLER ); addProxy( workerService, workerProxy ); } sendCommand( master, masterCommandCompleted ); } // Command: ShutdownWorker @Override public void shutdown() { out.println("Worker.shutdown: shutting down."); System.exit( 0 ); } // Command: DoNextSuperStep public void doNextSuperStep( ComputeInput computeInput ) throws InterruptedException { // super step initialization this.computeInput = computeInput; workerNumToVertexIdToMessageQMapMap = new HashMap<Integer, Map<Object, MessageQ>>(); thereIsANextStep = new AtomicBoolean(); superStep++; stepAggregator = job.makeStepAggregator(); deltaNumVertices = new AtomicInteger(); // for each part: do super step countDownLatch = new CountDownLatch( partIdToPartMap.size() ); for ( Part part : partSet ) { executorService.execute( new RunSuperStep( part ) ); } countDownLatch.await(); countDownLatch = new CountDownLatch( workerNumToVertexIdToMessageQMapMap.size() ); for ( Integer workerNum : workerNumToVertexIdToMessageQMapMap.keySet() ) { Service worker = workerNumToWorkerMap.get( workerNum ); Map<Object, MessageQ> vertexIdToMessageQMap = workerNumToVertexIdToMessageQMapMap.get( workerNum ); Command command = new SendVertexIdToMessageQMap( this, vertexIdToMessageQMap, superStep + 1 ); sendCommand( worker, command ); } workerNumToVertexIdToMessageQMapMap = null; // make available for gc countDownLatch.await(); ComputeOutput computeOutput = new ComputeOutput( thereIsANextStep.get(), stepAggregator, problemAggregator, deltaNumVertices ); Command command = new SuperStepComplete( computeOutput ); sendCommand( master, command ); } private class RunSuperStep implements Runnable { private Part part; RunSuperStep( Part part ) { this.part = part; } public void run() { // perform super step on part ComputeOutput computeOutput = part.doSuperStep( superStep, computeInput ); // thread-safe composition of part super step attributes with Worker super step attributes thereIsANextStep.weakCompareAndSet( false, computeOutput.getThereIsANextStep() ); stepAggregator.aggregate( computeOutput.getStepAggregator() ); problemAggregator.aggregate( computeOutput.getProblemAggregator() ); deltaNumVertices.addAndGet( computeOutput.deltaNumVertices() ); mergeMap( computeOutput.getWorkerNumToVertexIdToMessageQMapMap() ); countDownLatch.countDown(); } } // Command: MasterCommandCompleted public void workerCommandCompleted() { countDownLatch.countDown(); } // Command: WriteWorkerOutputFile public void writeWorkerOutputFile() { try { job.makeOutputFile( this ); } catch ( IOException exception ) { Logger.getLogger( Worker.class.getName() ).log( Level.SEVERE, null, exception ); } sendCommand( master, masterCommandCompleted ); } synchronized private void sync( AtomicInteger numUnacknowledgedSendVertexIdToMessageQMaps ) { while ( numUnacknowledgedSendVertexIdToMessageQMaps.get() > 0 ) { try { wait(); // notified when all acknowldegments have been received } catch ( InterruptedException ignore ) {} } } public static Service getMaster( String masterDomainName ) { String url = "//" + masterDomainName + ":" + Master.PORT + "/" + Master.SERVICE_NAME; Service master = null; for (int i = 0;; i += 5000) { try { master = (Service) Naming.lookup(url); } catch (Exception ex) { tryAgain(i); continue; } break; } return master; } void removeVertex() { deltaNumVertices.decrementAndGet(); } void sendMessage( int partId, Object vertexId, Message message, long superStep ) { Part receivingPart = partIdToPartMap.get( partId ); if ( receivingPart != null ) { // part is local to this Worker receivingPart.receiveMessage( vertexId, message, superStep ); } else { int destinationWorkerNum = getWorkerNum( partId ); Service workerService = workerNumToWorkerMap.get( destinationWorkerNum ); assert workerService != null; Command command = new SendMessage( myWorkerNum, partId, vertexId, message, superStep ); sendCommand( workerService, command ); } } public void output() { try { job.makeOutputFile( this ); } catch (IOException ex) { Logger.getLogger(Worker.class.getName()).log(Level.SEVERE, null, ex); } sendCommand( master, masterCommandCompleted ); } protected boolean collectingGarbage() { return true; } }