package org.commoncrawl.service.crawlmaster; import java.io.IOException; import java.net.InetSocketAddress; import java.util.Date; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.commoncrawl.protocol.CrawlerAction; import org.commoncrawl.protocol.CrawlerService; import org.commoncrawl.protocol.CrawlerStatus; import org.commoncrawl.rpc.base.internal.AsyncClientChannel; import org.commoncrawl.rpc.base.internal.AsyncRequest; import org.commoncrawl.rpc.base.internal.AsyncRequest.Callback; import org.commoncrawl.rpc.base.internal.AsyncRequest.Status; import org.commoncrawl.rpc.base.internal.NullMessage; import org.commoncrawl.rpc.base.shared.RPCException; import org.commoncrawl.util.CCStringUtils; /** helper object used to encapsulate an online crawler's state information **/ class OnlineCrawlerState implements AsyncClientChannel.ConnectionCallback { public static final Log LOG = LogFactory.getLog(OnlineCrawlerState.class); private String _hostName; private InetSocketAddress _crawlerIpAddressAndPort; private Date _lastUpdateTime = new Date(); private CrawlerStatus _lastKnownStatus = new CrawlerStatus(); private int _desiredState = CrawlerStatus.CrawlerState.IDLE; private int _activeCrawlNumber = -1; private CrawlDBServer _server; private boolean _crawlerOnline = false; private boolean _commandActive = false; private boolean _heartbeatActive = false; private AsyncClientChannel _channel; private CrawlerService.AsyncStub _crawlerService; public OnlineCrawlerState(CrawlDBServer server,String hostName,InetSocketAddress ipAndPort) throws IOException,RPCException { _server = server; _hostName = hostName; _crawlerIpAddressAndPort = ipAndPort; CrawlDBServer.LOG.info("OnlineCrawlerState - Opening Channel to Host:" + _hostName); // initialize channel ... _channel = new AsyncClientChannel(_server.getEventLoop(),_server.getServerAddress(),_crawlerIpAddressAndPort,this); _channel.open(); _crawlerService = new CrawlerService.AsyncStub(_channel); } boolean isOnline() { return _crawlerOnline; } @Override public String toString() { return "CrawlerState for ("+_hostName+") IPAddress:" + _crawlerIpAddressAndPort.getAddress().getHostAddress() + " Port:" + _crawlerIpAddressAndPort.getPort(); } public String getHostname() { return _hostName; } public Date getLastUpdateTime() { return _lastUpdateTime; } public void setLastUpdateTime(Date time) { _lastUpdateTime = time; } public CrawlerStatus getLastKnownStatus() { return _lastKnownStatus; } public void transitionToState(int crawlerState,int activeCrawlNumber) { //LOG.info("Transitioning Crawler:" + getHostname() + " to State:" + CrawlerStatus.CrawlerState.toString(crawlerState) + " crawlNumber:" + activeCrawlNumber); _desiredState = crawlerState; _activeCrawlNumber = activeCrawlNumber; } private void sendCrawlerCommand(int desiredAction,int activeListNumber) { LOG.info("Sending Command:" + desiredAction + " ListId:" + activeListNumber + " to Crawler:" + getHostname()); _commandActive = true; CrawlerAction action = new CrawlerAction(); action.setActionType(desiredAction); action.setActiveListNumber(activeListNumber); try { _crawlerService.doAction(action,new Callback<CrawlerAction, CrawlerStatus>() { @Override public void requestComplete(final AsyncRequest<CrawlerAction, CrawlerStatus> request) { _commandActive = false; if (request.getStatus() == Status.Success) { processStatusResponse(request.getOutput()); } } }); } catch (RPCException e) { LOG.error(CCStringUtils.stringifyException(e)); _commandActive = false; } } private void processStatusResponse(CrawlerStatus newStatus) { // update update time ... setLastUpdateTime(new Date()); // clone the status try { _lastKnownStatus = (CrawlerStatus) newStatus.clone(); } catch (CloneNotSupportedException e) { } // now validte against desired state ... switch (_desiredState) { case CrawlerStatus.CrawlerState.IDLE: { if (_lastKnownStatus.getCrawlerState() == CrawlerStatus.CrawlerState.ACTIVE) { sendCrawlerCommand(CrawlerAction.ActionType.FLUSH,_activeCrawlNumber); } else if (_lastKnownStatus.getCrawlerState() == CrawlerStatus.CrawlerState.FLUSHED) { sendCrawlerCommand(CrawlerAction.ActionType.PURGE,_activeCrawlNumber); } } break; case CrawlerStatus.CrawlerState.ACTIVE: { if (_lastKnownStatus.getCrawlerState() == CrawlerStatus.CrawlerState.ACTIVE) { if (_lastKnownStatus.getActiveListNumber() != _activeCrawlNumber) { sendCrawlerCommand(CrawlerAction.ActionType.FLUSH,_activeCrawlNumber); } else { //LOG.info("Crawler:" + getHostname() + " is active processing crawl no:" + _activeCrawlNumber); } } else if (_lastKnownStatus.getCrawlerState() == CrawlerStatus.CrawlerState.FLUSHED) { if (_lastKnownStatus.getActiveListNumber() != _activeCrawlNumber) { LOG.info("Crawler:" + getHostname() + " desired state active, current state flushed but active crawl != current crawl. sending PURGE"); sendCrawlerCommand(CrawlerAction.ActionType.PURGE,_activeCrawlNumber); } else { LOG.info("Crawler:" + getHostname() + " desired state active, current state flushed. sending RESUME"); sendCrawlerCommand(CrawlerAction.ActionType.RESUME_CRAWL,_activeCrawlNumber); } } else if (_lastKnownStatus.getCrawlerState() == CrawlerStatus.CrawlerState.PURGED || _lastKnownStatus.getCrawlerState() == CrawlerStatus.CrawlerState.IDLE) { LOG.info("Crawler:" + getHostname() + " desired state active, current state purged or idle. sending RESUME"); sendCrawlerCommand(CrawlerAction.ActionType.RESUME_CRAWL,_activeCrawlNumber); } } break; case CrawlerStatus.CrawlerState.FLUSHED: { if (_lastKnownStatus.getCrawlerState() == CrawlerStatus.CrawlerState.ACTIVE || _lastKnownStatus.getCrawlerState() == CrawlerStatus.CrawlerState.IDLE || _lastKnownStatus.getCrawlerState() == CrawlerStatus.CrawlerState.PURGED ) { LOG.info("Crawler:" + getHostname() + " desired state flushed, current state active,purged or idle. sending FLUSH"); sendCrawlerCommand(CrawlerAction.ActionType.FLUSH,_activeCrawlNumber); } } break; case CrawlerStatus.CrawlerState.PURGED: { if (_lastKnownStatus.getCrawlerState() == CrawlerStatus.CrawlerState.ACTIVE || _lastKnownStatus.getCrawlerState() == CrawlerStatus.CrawlerState.IDLE) { LOG.info("Crawler:" + getHostname() + " desired state purged, current state active, sending FLUSH"); sendCrawlerCommand(CrawlerAction.ActionType.FLUSH,_activeCrawlNumber); } else if (_lastKnownStatus.getCrawlerState() == CrawlerStatus.CrawlerState.FLUSHED) { LOG.info("Crawler:" + getHostname() + " desired state purged, current state flushed, sending PURGE"); sendCrawlerCommand(CrawlerAction.ActionType.PURGE,_activeCrawlNumber); } } break; } } public void sendHeartbeat() { if (!_heartbeatActive && !_commandActive) { _heartbeatActive = true; try { _crawlerService.queryStatus(new Callback<NullMessage,CrawlerStatus>() { public void requestComplete(AsyncRequest<NullMessage, CrawlerStatus> request) { boolean forceDisconnect = false; if (request.getStatus() == Status.Success) { processStatusResponse(request.getOutput()); } else { CrawlDBServer.LOG.error("Heartbeat request to crawler: " + getHostname() + " failed with Status: " + request.getStatus().toString()); forceDisconnect = true; } if (forceDisconnect) { try { _channel.close(); } catch (IOException e) { e.printStackTrace(); } } _heartbeatActive = false; } }); } catch (RPCException e ){ _heartbeatActive = false; CrawlDBServer.LOG.error(e); // force disconnect try { _channel.close(); } catch (IOException e1) { e1.printStackTrace(); } } } } public void OutgoingChannelConnected(AsyncClientChannel channel) { CrawlDBServer.LOG.info("OnlineCrawlerState - Connected to Host:" + _hostName); crawlerOnline(); } /** crawler online callback - triggered when a crawler comes online **/ private void crawlerOnline() { _crawlerOnline = true; } /** crawler offline callback - triggered when crawler has gone offline (socket disconnect etc.) **/ private void crawlerOffline() { _commandActive = false; _crawlerOnline = false; _heartbeatActive = false; } public boolean OutgoingChannelDisconnected(AsyncClientChannel channel) { // CrawlDBServer.LOG.info("Disconnect detected on OUTGOING Connection to Crawler: "+ _hostName); crawlerOffline(); return true; } }