package edu.ucsb.jpregel.system; import api.Aggregator; import edu.ucsb.jpregel.system.commands.*; import static java.lang.System.out; import java.rmi.RemoteException; import java.util.HashMap; import java.util.Map; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import jicosfoundation.*; import pheme.api.Component; import pheme.api.ComponentType; import pheme.api.Pheme; import pheme.api.Server; /** * * @author Pete Cappello */ abstract public class Master extends ServiceImpl implements ClientToMaster { // constants public static final RemoteExceptionHandler REMOTE_EXCEPTION_HANDLER = new DefaultRemoteExceptionHandler(); private static final int NUM_STEPS_PER_MEASUREMENT = 1; // ServiceImpl attributes final static public String SERVICE_NAME = "Master"; public static final String CLIENT_SERVICE_NAME = "ClientToMaster"; final static public int PORT = 2048; private static final int NUM_PARTS_PER_PROCESSOR = 2; static private final Department[] departments = { ServiceImpl.ASAP_DEPARTMENT }; static private Class[][] command2DepartmentArray = { { // ASAP Commands InputFileProcessingComplete.class, SuperStepComplete.class, MasterCommandCompleted.class } }; // Master attributes private Map<Integer, Service> integerToWorkerMap = new HashMap<Integer, Service>(); protected AtomicInteger numRegisteredWorkers = new AtomicInteger(); private volatile int numProcessorsPerWorker; // flow control attributes protected int numUnfinishedWorkers; // TODO use CountDownLatch protected boolean commandExeutionIsComplete; // TODO use CountDownLatch private CountDownLatch countDownLatch; // Worker synchronization doWorkerPhase controller protected AtomicBoolean thereIsANextStep; // graph state protected Aggregator stepAggregator; protected Aggregator problemAggregator; protected AtomicInteger numVertices; private long maxMemory = Runtime.getRuntime().maxMemory(); // job attributes private Job job; private JobRunData jobRunData; // Pheme Pheme pheme = new Pheme( null ); Component masterComponent = pheme.register( "Master", ComponentType.COMPUTER); // set Master as a Jicos Service public Master() throws RemoteException { super( command2DepartmentArray ); Server server = pheme.startServer(); } @Override public synchronized void init( int numWorkers ) throws RemoteException, InterruptedException { super.setService(this); super.setDepartments(departments); // Ensure that registrations are not lost before numUnfinishedWorkers is set to numWorkers numUnfinishedWorkers += numWorkers; if ( numUnfinishedWorkers > 0 && ! commandExeutionIsComplete ) { wait(); // until numUnfinishedWorkers == 0 } setWorkerMap(); } @Override public synchronized void setWorkerMap() throws InterruptedException { // broadcaast to workers: set your integerToWorkerMap doWorkerStep( new SetWorkerMap( integerToWorkerMap ) ); } @Override public void exceptionHandler(Exception exception) { exception.printStackTrace(); System.exit(1); } /* * @param clientJob - problem & instance parameters */ @Override public JobRunData run( Job clientJob ) throws InterruptedException { try { // all Workers have registered with Master assert integerToWorkerMap.size() == numRegisteredWorkers.get(); // initialize job & statistics initJob( clientJob ); FileSystem fileSystem = makeFileSystem( job.getJobDirectoryName() ); // phase: workers set Job & FileSystem countDownLatch = new CountDownLatch( integerToWorkerMap.size() ); broadcast( new SetJob( job ), this ); // while workers SetJob: read Master input file, write Worker input files job.readJobInputFile(fileSystem, integerToWorkerMap.size() ); // wait for all workers to complete SetJob & FileSystem countDownLatch.await(); jobRunData.logPhaseEndTime(); doWorkerPhase( new ReadWorkerInputFile() ); // phase: workers: read your input file // phase: workers: Collect your garbage collectWorkerGarbage(); jobRunData.logPhaseEndTime(); // phase: computation problemAggregator = job.makeProblemAggregator(); long superStep = 0; long startStepTime = System.currentTimeMillis(); // monitor step time thereIsANextStep = new AtomicBoolean( true ); for ( ; thereIsANextStep.get(); superStep++ ) { // super step initialization thereIsANextStep = new AtomicBoolean(); // false, until a Worker says otherwise ComputeInput computeInput = new ComputeInput( problemAggregator, numVertices.get() ); stepAggregator = job.makeStepAggregator(); // initialize stepAggregator // broadcast to workers: do next super step doWorkerStep( new DoNextSuperStep( computeInput ) ); startStepTime = monitorStepProgress( startStepTime, superStep ); masterComponent.log("INFO", "SuperStep " + superStep); } jobRunData.logPhaseEndTime(); jobRunData.setNumSuperSteps( superStep ); doWorkerPhase( new WriteWorkerOutputFile() ); // phase: workers: write your output file // phase: process worker output files job.processWorkerOutputFiles(fileSystem, integerToWorkerMap.size()); jobRunData.logPhaseEndTime(); return jobRunData; } catch(RuntimeException runTimeException) { System.out.println( runTimeException.getLocalizedMessage() ); runTimeException.printStackTrace(System.out); } return null; } /** * initialize master Job data structures */ private void initJob( Job clientJob ) { job = new Job( clientJob, numRegisteredWorkers.get() * numProcessorsPerWorker * NUM_PARTS_PER_PROCESSOR ); jobRunData = new JobRunData( job, integerToWorkerMap.size() ); numVertices = new AtomicInteger(); } private long monitorStepProgress( long startStepTime, long superStep ) { if ( superStep % NUM_STEPS_PER_MEASUREMENT != 0 ) { return startStepTime; } long endStepTime = System.currentTimeMillis(); long elapsedTime = endStepTime - startStepTime; long freeMemory = Runtime.getRuntime().freeMemory(); int percentFreeMemory = (int) (((float) freeMemory / maxMemory) * 100); out.println("SuperStep: " + superStep + " requiring " + (elapsedTime / 1000) + " seconds " + " Maximum memory that is free: " + percentFreeMemory + "%" + " : " + (freeMemory / 1000) + "KB"); masterComponent.gauge("Heap % free", percentFreeMemory); return endStepTime; } @Override public void shutdown(){} //Master deployment and shutdown is handled at the machine level. /* _____________________________ * * Command implementations: Synchronize or explain why it is not needed! * _____________________________ */ // Command: InputFileProcessingComplete public void inputFileProcessingComplete( int workerNum, int numVertices ) { this.numVertices.addAndGet( numVertices ); commandCompleted(); } // Command: RegisterWorker synchronized public int registerWorker(ServiceName serviceName, int numWorkerProcessors ) { assert serviceName != null; // !! currently not storing/using ServiceName data apart from Service // !! Ensure that no service with this ID is registered already. // !! If there is, unregister it. this.numProcessorsPerWorker = Math.max( this.numProcessorsPerWorker, numWorkerProcessors); Service workerService = serviceName.service(); super.register(workerService); ProxyWorker workerProxy = new ProxyWorker(workerService, this, REMOTE_EXCEPTION_HANDLER); addProxy(workerService, workerProxy); int workerNum = numRegisteredWorkers.incrementAndGet(); integerToWorkerMap.put(workerNum, workerService); processAcknowledgement(); System.out.println("Master.registerWorker: workerNum: " + workerNum); return workerNum; } // Command: SuperStepComplete: Must be thread-safe! public void superStepComplete(ComputeOutput computeOutput) { thereIsANextStep.weakCompareAndSet( false, computeOutput.getThereIsANextStep() ); numVertices.addAndGet( computeOutput.deltaNumVertices() ); stepAggregator.aggregate(computeOutput.getStepAggregator()); problemAggregator.aggregate(computeOutput.getProblemAggregator()); commandCompleted(); } // Command: MasterCommandCompleted public void commandCompleted() { countDownLatch.countDown(); } protected void collectWorkerGarbage() throws InterruptedException { doWorkerPhase( new CollectGarbage() ); } private void doWorkerPhase( Command command ) throws InterruptedException { doWorkerStep( command ); jobRunData.logPhaseEndTime(); } // do a worker barrier computation private void doWorkerStep( Command command ) throws InterruptedException { countDownLatch = new CountDownLatch( integerToWorkerMap.size() ); broadcast( command, this ); countDownLatch.await(); } synchronized private void processAcknowledgement() { if ( --numUnfinishedWorkers == 0 ) { commandExeutionIsComplete = true; notify(); } } public abstract FileSystem makeFileSystem( String jobDirectoryName ); }