/** * Copyright 2008 - CommonCrawl Foundation * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * **/ package org.commoncrawl.service.pagerank.slave; import java.io.File; import java.io.IOException; import java.net.InetAddress; import java.net.InetSocketAddress; import java.net.UnknownHostException; import java.text.NumberFormat; import java.util.Vector; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.commoncrawl.async.Callback; import org.commoncrawl.async.CallbackWithResult; import org.commoncrawl.crawl.common.internal.CrawlEnvironment; import org.commoncrawl.rpc.base.internal.AsyncClientChannel; import org.commoncrawl.rpc.base.internal.AsyncContext; import org.commoncrawl.rpc.base.internal.AsyncRequest; import org.commoncrawl.rpc.base.internal.AsyncServerChannel; import org.commoncrawl.rpc.base.internal.NullMessage; import org.commoncrawl.rpc.base.shared.RPCException; import org.commoncrawl.rpc.base.shared.RPCStruct; import org.commoncrawl.server.CommonCrawlServer; import org.commoncrawl.service.pagerank.BaseConfig; import org.commoncrawl.service.pagerank.BeginPageRankInfo; import org.commoncrawl.service.pagerank.BlockTransfer; import org.commoncrawl.service.pagerank.BlockTransferAck; import org.commoncrawl.service.pagerank.CheckpointInfo; import org.commoncrawl.service.pagerank.FileInfo; import org.commoncrawl.service.pagerank.IterationInfo; import org.commoncrawl.service.pagerank.PageRankJobConfig; import org.commoncrawl.service.pagerank.PageRankSlave; import org.commoncrawl.service.pagerank.SlaveStatus; import org.commoncrawl.service.pagerank.slave.BeginPageRankTask.BeginPageRankTaskResult; import org.commoncrawl.util.CCStringUtils; import org.commoncrawl.util.JVMStats; /** * * @author rana * */ public class PageRankSlaveServer extends CommonCrawlServer implements PageRankSlave ,AsyncServerChannel.ConnectionCallback{ private static final int MIN_INSTANCE_ID = 0; private static final int MAX_INSTANCE_ID = 9; private static final int DEFAULT_THREAD_POOL_SIZE=20; private InetAddress _directoryServiceAddress; private int _instanceId = -1; private int _threadPoolSize = DEFAULT_THREAD_POOL_SIZE; private BaseConfig _baseConfig; private PageRankJobConfig _activeJobConfig; private PageRankTask _activeTask; private PageRankUtils.PRValueMap _valueMap = null; private TaskInstantiationCallback _queuedTaskInstantiator; private SlaveStatus _slaveStatus = new SlaveStatus(); private FileSystem _fileSystem = null; private static final NumberFormat NUMBER_FORMAT = NumberFormat.getInstance(); static { NUMBER_FORMAT.setMinimumIntegerDigits(5); NUMBER_FORMAT.setGroupingUsed(false); } private String _partId = null; private Vector<InetSocketAddress> _slaveAddresses = new Vector<InetSocketAddress>(); public FileSystem getFileSystem() { return _fileSystem; } public BaseConfig getBaseConfig() { return _baseConfig; } public PageRankJobConfig getActiveJobConfig() { return _activeJobConfig; } public String getPartId() { return _partId; } public File getJobLocalPath() { return new File(getDataDirectory(),"jobLocal"); } public File getActiveJobLocalPath() { if (getActiveJobConfig() == null) throw new RuntimeException("getActiveJobLocalDir called in Invalid State."); else return new File(getJobLocalPath(),"job-" + getActiveJobConfig().getJobId()); } public PageRankUtils.PRValueMap getValueMap() { return _valueMap; } public int getNodeIndex() { if (_baseConfig != null) { return _baseConfig.getSlaveId(); } else { LOG.error("Invalid call to getNodeIndex. No baseConfig!"); return -1; } } @Override protected String getDefaultHttpInterface() { return CrawlEnvironment.DEFAULT_HTTP_INTERFACE; } @Override protected int getDefaultHttpPort() { return CrawlEnvironment.DEFAULT_PAGERANK_SLAVE_HTTP_PORT + (_instanceId * 2); } @Override protected String getDefaultLogFileName() { return "prslave.log"; } @Override protected String getDefaultRPCInterface() { return CrawlEnvironment.DEFAULT_RPC_INTERFACE; } @Override protected int getDefaultRPCPort() { return CrawlEnvironment.DEFAULT_PAGERANK_SLAVE_RPC_PORT + (_instanceId * 2); } @Override protected String getWebAppName() { return CrawlEnvironment.PAGERANK_SLAVE_WEBAPP_NAME; } @Override protected boolean initServer() { LOG.info("PageRankSlave Initializing. AvailableMemory:" + JVMStats.getHeapUtilizationRatio()); JVMStats.dumpMemoryStats(); // create server channel ... AsyncServerChannel channel = new AsyncServerChannel(this, this.getEventLoop(), this.getServerAddress(),this); // register RPC services it supports ... registerService(channel,PageRankSlave.spec); // make job local directory getJobLocalPath().mkdirs(); return true; } @Override protected boolean parseArguements(String[] argv) { for(int i=0; i < argv.length;++i) { if (argv[i].equalsIgnoreCase("--instance")) { if (i+1 < argv.length) { _instanceId = Integer.parseInt(argv[++i]); if (_instanceId < MIN_INSTANCE_ID || _instanceId > MAX_INSTANCE_ID) { System.err.println("Invalid Instance Id specified. Instance Id must be between " + MIN_INSTANCE_ID + " and " + MAX_INSTANCE_ID); return false; } } } else if (argv[i].equalsIgnoreCase("--threadPoolSize")) { if (i+1 < argv.length) { _threadPoolSize = Integer.parseInt(argv[++i]); } } else if (argv[i].equalsIgnoreCase("--directoryserver")) { if (i+1 < argv.length) { try { _directoryServiceAddress = InetAddress.getByName(argv[++i]); } catch (UnknownHostException e) { LOG.error(CCStringUtils.stringifyException(e)); } } } } if (_instanceId == -1) { System.err.println("Instance Id (--instance) and (optional) Thread Pool Size (--threadPoolSize) are required parameters."); return false; } return true; } @Override protected void overrideConfig(Configuration conf) { conf.setInt("org.commoncrawl.threadpool.max.threads", _threadPoolSize); } @Override protected void printUsage() { // TODO Auto-generated method stub } @Override protected boolean startDaemons() { return true; } @Override protected void stopDaemons() { } @Override public void initialize(final AsyncContext<BaseConfig, SlaveStatus> rpcContext)throws RPCException { // clear out state ... _slaveStatus.clear(); _slaveStatus.setState(SlaveStatus.State.INITIALIZING); // if there is an active task on the queue ... cancel it asyncrhonously if (_activeTask != null) { _activeTask.cancel(new Callback() { // stop complete ... public void execute() { _activeTask = null; finishInitialize(rpcContext); } }); } // otherwise call directly ... else { finishInitialize(rpcContext); } } /** get directory service address **/ public InetAddress getDirectoryServiceAddress() { return _directoryServiceAddress; } // get the list of slaves public Vector<InetSocketAddress> getSlavesList() { return _slaveAddresses; } private void parseSlavesList(String slavesList) { String slaves[] = slavesList.split(","); for (String slaveName : slaves) { String nameParts[] = slaveName.split(":"); _slaveAddresses.add(new InetSocketAddress(nameParts[0],Integer.parseInt(nameParts[1]))); LOG.info("Slave At Index:"+ (_slaveAddresses.size() - 1) + " is:" + _slaveAddresses.get(_slaveAddresses.size() - 1).toString()); } } private void finishInitialize(AsyncContext<BaseConfig, SlaveStatus> rpcContext) { // set up base config ... try { _baseConfig = (BaseConfig) rpcContext.getInput().clone(); _partId = "part-" + NUMBER_FORMAT.format(_baseConfig.getSlaveId()); // parse slaves list parseSlavesList(_baseConfig.getSlavesList()); } catch (CloneNotSupportedException e) { } _activeJobConfig = null; _activeTask = null; // zero out the value array _valueMap = null; // initialize the file system ... try { _fileSystem = CrawlEnvironment.getDefaultFileSystem(); } catch (Exception e) { // log the error LOG.error(CCStringUtils.stringifyException(e)); // and fail the request ... failRequest(rpcContext, "Unable to Initialize FileSystem.\n" + CCStringUtils.stringifyException(e)); return; } // and update slave status state _slaveStatus.setState(SlaveStatus.State.IDLE); sendStatusResponse(rpcContext); } private void sendStatusResponse(AsyncContext<? extends RPCStruct,SlaveStatus> context) { try { context.setOutput((SlaveStatus) _slaveStatus.clone()); if (_activeTask != null) { _slaveStatus.setPercentComplete(_activeTask._percentComplete); } } catch (CloneNotSupportedException e) { } try { context.completeRequest(); } catch (RPCException e) { LOG.error("fail to send StatusResponse to incoming RPC. CLOSING RPC Channel"); try { context.getClientChannel().close(); } catch (IOException e1) { LOG.error(e1); } } } public static interface TaskInstantiationCallback { PageRankTask instantiateTask(); } @Override public void beginPageRank(final AsyncContext<BeginPageRankInfo, SlaveStatus> rpcContext)throws RPCException { final PageRankJobConfig jobConfig = rpcContext.getInput().getJobConfig(); LOG.info("GOT Begin Page Rank Command. Job Id Is:" + jobConfig.getJobId()); activateTask(new TaskInstantiationCallback() { @Override public PageRankTask instantiateTask() { // intialze the page rank config try { _activeJobConfig = (PageRankJobConfig) jobConfig.clone(); LOG.info("BeginPageRank starting. FreeMemory:" + Runtime.getRuntime().freeMemory()); // construct the begin page rank task BeginPageRankTask beginPageRankTask = new BeginPageRankTask(_activeJobConfig,rpcContext.getInput().getServerStatus(),PageRankSlaveServer.this,new CallbackWithResult<BeginPageRankTaskResult>() { @Override public void execute(BeginPageRankTaskResult result) { if (result.succeeded()) { LOG.error("BeginPageRankTask succeeded"); _valueMap = result._valueMap; _slaveStatus.setActiveJobId(_activeJobConfig.getJobId()); _slaveStatus.setCurrentIteration(0); LOG.info("Setting State to STARTED_ILDE"); _slaveStatus.setState(SlaveStatus.State.STARTED_IDLE); } else { LOG.error("BeginPageRankTask failed with Exception:" + result.getErrorDesc()); _valueMap = null; _slaveStatus.setState(SlaveStatus.State.ERROR); } LOG.info("Sending Response to Master"); sendStatusResponse(rpcContext); } }); return beginPageRankTask; } catch (CloneNotSupportedException e) { } return null; } }); } @Override public void doIteration(final AsyncContext<IterationInfo, SlaveStatus> rpcContext)throws RPCException { LOG.info("GOT doIteration Command. Phase:" + IterationInfo.Phase.toString(rpcContext.getInput().getPhase()) + " Iteration:" + rpcContext.getInput().getIterationNumber()); if (_activeTask != null) { LOG.error("doIteration called while Task still active:" + _activeTask.getDescription()); LOG.info("Setting State to: ERROR"); _slaveStatus.setState(SlaveStatus.State.ERROR); rpcContext.completeRequest(); return; } if (rpcContext.getInput().getJobId() == _activeJobConfig.getJobId()) { switch (rpcContext.getInput().getPhase()) { case IterationInfo.Phase.DISTRIBUTE: { _activeJobConfig.setIterationNumber(rpcContext.getInput().getIterationNumber()); _slaveStatus.setCurrentIteration(rpcContext.getInput().getIterationNumber()); LOG.info("Setting State to: DISTRIBUTING"); _slaveStatus.setState(SlaveStatus.State.DISTRIBUTING); activateTask(new TaskInstantiationCallback() { @Override public PageRankTask instantiateTask() { return new DistributeRankTask(PageRankSlaveServer.this,new CallbackWithResult<DistributeRankTask.DistributeRankTaskResult>() { @Override public void execute(DistributeRankTask.DistributeRankTaskResult result) { LOG.info("Done with Iteration:" + rpcContext.getInput().getIterationNumber() + " for Phase:" + IterationInfo.Phase.toString(rpcContext.getInput().getPhase()) + "Result:" + result.isDone()); if (result.isDone()) { LOG.info("Setting State to: DONE_DISTRIBUTING"); _slaveStatus.setState(SlaveStatus.State.DONE_DISTRIBUTING); } else { LOG.info("Distribution Failed with Result:" +result.getErrorDesc() + ".Setting State to: ERROR"); _slaveStatus.setState(SlaveStatus.State.ERROR); } } }); } }); } break; case IterationInfo.Phase.CALCULATE: { _activeJobConfig.setIterationNumber(rpcContext.getInput().getIterationNumber()); _slaveStatus.setCurrentIteration(rpcContext.getInput().getIterationNumber()); LOG.info("Setting State to: CALCULATING"); _slaveStatus.setState(SlaveStatus.State.CALCULATING); activateTask(new TaskInstantiationCallback() { @Override public PageRankTask instantiateTask() { return new CalculateRankTask(PageRankSlaveServer.this,new CallbackWithResult<CalculateRankTask.CalculateRankTaskResult>() { @Override public void execute(CalculateRankTask.CalculateRankTaskResult result) { LOG.info("Done with Iteration:" + rpcContext.getInput().getIterationNumber() + " for Phase:" + IterationInfo.Phase.toString(rpcContext.getInput().getPhase()) + "Result:" + result.isDone()); if (result.isDone()) { switch (rpcContext.getInput().getPhase()) { case IterationInfo.Phase.DISTRIBUTE: { LOG.info("Setting State to: DONE_DISTRIBUTING"); _slaveStatus.setState(SlaveStatus.State.DONE_DISTRIBUTING); } break; case IterationInfo.Phase.CALCULATE: { LOG.info("Setting State to: DONE_CALCULATING"); _slaveStatus.setState(SlaveStatus.State.DONE_CALCULATING); } break; } } else { LOG.info("Setting State to: ERROR"); _slaveStatus.setState(SlaveStatus.State.ERROR); } } }); } }); } break; } } else { LOG.error("Incoming Job Id:" + rpcContext.getInput().getJobId()+ " Different from Active Job Id:" + rpcContext.getInput().getJobId()); LOG.info("Setting State to: ERROR"); _slaveStatus.setState(SlaveStatus.State.ERROR); } LOG.info("Sending Response to Master"); sendStatusResponse(rpcContext); } @Override public void endPageRank(final AsyncContext<NullMessage, SlaveStatus> rpcContext)throws RPCException { LOG.info("GOT endPageRank Command"); if (_slaveStatus.getState() == SlaveStatus.State.STARTED_IDLE || _slaveStatus.getState() == SlaveStatus.State.DONE_CALCULATING) { activateTask(new TaskInstantiationCallback() { @Override public PageRankTask instantiateTask() { return new TestTask(PageRankSlaveServer.this,new CallbackWithResult<TestTask.TestTaskResult>() { @Override public void execute(TestTask.TestTaskResult result) { LOG.info("ended Page Rank for Job:" + _activeJobConfig.getJobId() + " with Result:" + result.isDone()); if (result.isDone()) { _activeJobConfig.clear(); _slaveStatus.setState(SlaveStatus.State.IDLE); } else { _activeJobConfig.clear(); _slaveStatus.setState(SlaveStatus.State.ERROR); } LOG.info("Sending Response to Master"); sendStatusResponse(rpcContext); } },"END PAGE RANK",15000); } }); } else { LOG.info("Setting State to: ERROR"); _slaveStatus.setState(SlaveStatus.State.ERROR); } } @Override public void checkpoint(final AsyncContext<CheckpointInfo, SlaveStatus> rpcContext)throws RPCException { try { if (_slaveStatus.getState() != SlaveStatus.State.DISTRIBUTING && _slaveStatus.getState() != SlaveStatus.State.CALCULATING) { LOG.info("Recevied Checkpoint Cmd. TxnId:" + rpcContext.getInput().getTxnId() + " Phase:" + rpcContext.getInput().getCurrentPhase() + " Iteration:" + rpcContext.getInput().getCurrentIterationNumber()); final CheckpointInfo checkpointInfo = rpcContext.getInput(); // check to see iteration number matches if (checkpointInfo.getCurrentIterationNumber() == _slaveStatus.getCurrentIteration()) { // do a phase to current state match if (checkpointInfo.getCurrentPhase() == IterationInfo.Phase.CALCULATE && _slaveStatus.getState() == SlaveStatus.State.DONE_CALCULATING) { // ok this is a valid txn id ... set it as active _slaveStatus.setCurrentCheckpointId(checkpointInfo.getTxnId()); // activate appropriate task activateTask(new TaskInstantiationCallback() { @Override public PageRankTask instantiateTask() { return new CalculateRankCommitTask(PageRankSlaveServer.this,checkpointInfo,new CallbackWithResult<CalculateRankCommitTask.TaskResult>() { @Override public void execute(CalculateRankCommitTask.TaskResult result) { LOG.info("Finished CalculateRank Commit Task. TxnId:" + rpcContext.getInput().getTxnId() + " Phase:" + rpcContext.getInput().getCurrentPhase() + " Iteration:" + rpcContext.getInput().getCurrentIterationNumber() + " Result:" + result.isDone()); if (result.isDone()) { _slaveStatus.setCommittedCheckpointId(checkpointInfo.getTxnId()); } else { LOG.info("Commit Failed with Result:" +result.getErrorDesc() + ".Setting State to: ERROR"); _slaveStatus.setState(SlaveStatus.State.ERROR); } } }); } }); } else if (checkpointInfo.getCurrentPhase() == IterationInfo.Phase.DISTRIBUTE && _slaveStatus.getState() == SlaveStatus.State.DONE_DISTRIBUTING) { // ok this is a valid txn id ... set it as active _slaveStatus.setCurrentCheckpointId(checkpointInfo.getTxnId()); // activate appropriate task activateTask(new TaskInstantiationCallback() { @Override public PageRankTask instantiateTask() { return new DistributeRankCommitTask(PageRankSlaveServer.this,checkpointInfo,new CallbackWithResult<DistributeRankCommitTask.TaskResult>() { @Override public void execute(DistributeRankCommitTask.TaskResult result) { LOG.info("Finished DistributeRankCommitTask. TxnId:" + rpcContext.getInput().getTxnId() + " Phase:" + rpcContext.getInput().getCurrentPhase() + " Iteration:" + rpcContext.getInput().getCurrentIterationNumber() + " Result:" + result.isDone()); if (result.isDone()) { _slaveStatus.setCommittedCheckpointId(checkpointInfo.getTxnId()); } else { LOG.info("Commit Failed with Result:" +result.getErrorDesc() + ".Setting State to: ERROR"); _slaveStatus.setState(SlaveStatus.State.ERROR); } } }); } }); } } } } finally { // now matter what, send a status response ... sendStatusResponse(rpcContext); } } @Override public void heartbeat(AsyncContext<NullMessage, SlaveStatus> rpcContext)throws RPCException { //LOG.info("Got Heartbeat from Master - Sending Status to Master"); sendStatusResponse(rpcContext); } private final void failRequest(AsyncContext<? extends RPCStruct,? extends RPCStruct> rpcContext,String reason) { // not good... time to fail the request ... rpcContext.setStatus(AsyncRequest.Status.Error_RequestFailed); rpcContext.setErrorDesc(reason); try { rpcContext.completeRequest(); } catch (RPCException e) { LOG.error(CCStringUtils.stringifyException(e)); try { rpcContext.getClientChannel().close(); } catch (IOException e2) { } } } /** activate the specified task*/ private void activateTask(final TaskInstantiationCallback callback) { _queuedTaskInstantiator = callback; if (_activeTask != null) { _activeTask.cancel(new Callback() { @Override public void execute() { _activeTask = null; instantiateQueuedTask(); } }); } else { _activeTask = null; instantiateQueuedTask(); } } /** instantiate the queued task **/ private void instantiateQueuedTask() { if (_queuedTaskInstantiator != null) { _activeTask = _queuedTaskInstantiator.instantiateTask(); if (_activeTask != null) { _activeTask.start(); } } } /** task starting callback **/ void taskStarting(PageRankTask task) { LOG.info("Task:" + task.getDescription() + " Starting"); } /** task complete callback **/ void taskComplete(PageRankTask task) { LOG.info("Task:" + task.getDescription() + " Complete"); if (_activeTask == task) { _activeTask = null; } } @Override protected String getDefaultDataDir() { return "data"; } @Override public void IncomingClientConnected(AsyncClientChannel channel) { LOG.info("Incoming Channel Connected"); } @Override public void IncomingClientDisconnected(AsyncClientChannel channel) { LOG.info("Channel Disconnected"); } @Override public void deleteFile(AsyncContext<FileInfo, NullMessage> rpcContext) throws RPCException { } @Override public void commitFile(AsyncContext<FileInfo, NullMessage> rpcContext) throws RPCException { // TODO Auto-generated method stub } @Override public void createJobFile(AsyncContext<FileInfo, FileInfo> rpcContext) throws RPCException { // TODO Auto-generated method stub } @Override public void transferBlock( AsyncContext<BlockTransfer, BlockTransferAck> rpcContext) throws RPCException { // TODO Auto-generated method stub } }