/**
* Copyright 2008 - CommonCrawl Foundation
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
**/
package org.commoncrawl.service.pagerank.master;
import java.io.IOException;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.UnknownHostException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.commoncrawl.async.Timer;
import org.commoncrawl.crawl.common.internal.CrawlEnvironment;
import org.commoncrawl.rpc.base.internal.AsyncClientChannel;
import org.commoncrawl.rpc.base.internal.AsyncRequest;
import org.commoncrawl.rpc.base.internal.NullMessage;
import org.commoncrawl.rpc.base.internal.AsyncRequest.Callback;
import org.commoncrawl.rpc.base.internal.AsyncRequest.Status;
import org.commoncrawl.service.pagerank.BaseConfig;
import org.commoncrawl.service.pagerank.BeginPageRankInfo;
import org.commoncrawl.service.pagerank.CheckpointInfo;
import org.commoncrawl.service.pagerank.IterationInfo;
import org.commoncrawl.service.pagerank.PageRankJobConfig;
import org.commoncrawl.service.pagerank.PageRankSlave;
import org.commoncrawl.service.pagerank.SlaveStatus;
import org.commoncrawl.util.CCStringUtils;
///////////////////////////////////////////////////////
/* Online Crawler State Object */
///////////////////////////////////////////////////////
/**
* helper object used to encapsulate an online crawler's state information
*
* @author rana
*
*/
public class PageRankRemoteSlave implements AsyncClientChannel.ConnectionCallback, Comparable<PageRankRemoteSlave> {
private static final int HEARTBEAT_TIMER_INTERVAL = 10000;
private static final Log LOG = LogFactory.getLog(PageRankRemoteSlave.class);
private int _slaveId;
private int _instanceId;
private String _hostName;
private InetSocketAddress _hostAddress;
private long _lastUpdateTime = -1;
private PageRankMaster _master;
private Timer _heartbeatTimer = null;
private SlaveStatus _lastKnownStatus = new SlaveStatus();
private boolean _ignoreHeartbeats = false;
private boolean _online = false;
private AsyncClientChannel _channel;
private PageRankSlave.AsyncStub _slaveService;
public PageRankRemoteSlave(PageRankMaster master,int slaveId,String hostName,int instanceId){
_master = master;
_slaveId = slaveId;
_hostName = hostName;
_instanceId = instanceId;
InetAddress slaveAddress = null;
try {
LOG.info("Resolving Slave Address for Slave:" + hostName);
slaveAddress = InetAddress.getByName(hostName);
LOG.info("Resolving Slave Address for Slave:" + hostName + " to:" + slaveAddress.getHostAddress());
} catch (UnknownHostException e) {
LOG.error("Unable to Resolve Slave HostName:" + hostName + " Exception:" + CCStringUtils.stringifyException(e));
throw new RuntimeException("Unable to Resolve Slave HostName:" + hostName + " Exception:" + CCStringUtils.stringifyException(e));
}
_hostAddress = new InetSocketAddress(slaveAddress.getHostAddress(),CrawlEnvironment.DEFAULT_PAGERANK_SLAVE_RPC_PORT + (_instanceId * 2));
if (_hostAddress == null) {
throw new RuntimeException("Invalid HostName String in PageRank Slave Registration: " + _hostName);
}
else {
LOG.info("Host Address for Slave:" + hostName +" is:" + _hostAddress);
}
}
/**
* connect to remote
*
* @throws IOException
*/
public void connect() throws IOException {
LOG.info("Opening Channel to Host:" + _hostName);
// initialize channel ...
_channel = new AsyncClientChannel(_master.getEventLoop(),_master.getServerAddress(),_hostAddress,this);
_channel.open();
_slaveService = new PageRankSlave.AsyncStub(_channel);
}
public int getSlaveId() { return _slaveId; }
public String getHostName() { return _hostName; }
public int getPort() { return CrawlEnvironment.DEFAULT_PAGERANK_SLAVE_RPC_PORT + (_instanceId * 2); }
public String getFullyQualifiedName() { return getHostName() + ":" + getPort(); }
public InetSocketAddress getHostAddress() { return _hostAddress; }
public long getLastUpdateTime() { return _lastUpdateTime; }
public String getStatusText() {
if (_online) {
String statusText = SlaveStatus.State.toString(_lastKnownStatus.getState());
if (_lastKnownStatus.isFieldDirty(SlaveStatus.Field_PERCENTCOMPLETE)) {
statusText += " (" + _lastKnownStatus.getPercentComplete() + "% Complete)";
}
return statusText;
}
else {
return "Offline";
}
}
public SlaveStatus getLastKnowStatus() { return _lastKnownStatus; }
private void enableHeartbeats() { _ignoreHeartbeats = false; }
private void disableHeartbeats() { _ignoreHeartbeats = true; }
private boolean areHeartbeatsDisabled() { return _ignoreHeartbeats; }
public void OutgoingChannelConnected(AsyncClientChannel channel) {
LOG.info("Connected to PageRank Slave:" + _hostName);
slaveOnline();
}
public boolean OutgoingChannelDisconnected(AsyncClientChannel channel) {
//LOG.info("Disconnect detected for Slave : "+ _hostName);
slaveOffline();
return false;
}
private void slaveOnline() {
try {
// initialize the slave ...
_slaveService.initialize(_master.getBaseConfigForSlave(this), new Callback<BaseConfig,SlaveStatus> () {
@Override
public void requestComplete(AsyncRequest<BaseConfig, SlaveStatus> request) {
if (request.getStatus() != Status.Success) {
LOG.error("resetState failed on Slave:" + getFullyQualifiedName());
slaveOffline();
}
else {
_online = true;
// notify master of status change ...
updateSlaveStatus(request.getOutput());
// start the heartbeat timer ...
startHeartbeatTimer();
}
}
});
}
catch (IOException e) {
LOG.error(CCStringUtils.stringifyException(e));
slaveOffline();
}
}
private void slaveOffline() {
boolean wasOnline = _online;
_online = false;
// kill heartbeats...
killHeartbeatTimer();
// clear out last know status
_lastKnownStatus.clear();
if (wasOnline) {
// inform master ...
_master.slaveStatusChanged(this);
}
// reconnect channel
if (_channel != null) {
try {
_channel.reconnect();
} catch (IOException e) {
LOG.error(CCStringUtils.stringifyException(e));
}
}
}
void sendStartPageRankCmd(int serverStatus) {
if (_channel != null && _channel.isOpen()) {
try {
// disable heartbeats during this async call ...
disableHeartbeats();
BeginPageRankInfo pageRankInfo = new BeginPageRankInfo();
try {
pageRankInfo.setJobConfig((PageRankJobConfig) _master.getActiveJobConfig().clone());
} catch (CloneNotSupportedException e) {
}
pageRankInfo.setServerStatus(serverStatus);
_slaveService.beginPageRank(pageRankInfo,new Callback<BeginPageRankInfo,SlaveStatus>() {
@Override
public void requestComplete(AsyncRequest<BeginPageRankInfo, SlaveStatus> request) {
if (request.getStatus() == Status.Success) {
// update cached status
updateSlaveStatus(request.getOutput());
}
else {
LOG.error("beginPageRank to slave:" + getFullyQualifiedName() + " Failed with status:" + request.getStatus());
// reset connection
slaveOffline();
}
// enable heartbeats here ...
enableHeartbeats();
}
});
}
catch (IOException e) {
// we have to renable heartbeats here ...
enableHeartbeats();
LOG.error(CCStringUtils.stringifyException(e));
// restart connection ...
slaveOffline();
}
}
else {
LOG.error("sendStartPageRank called on Slave with Invalid State. Slave:" + getFullyQualifiedName());
}
}
void sendEndPageRankCmd() {
if (_channel != null && _channel.isOpen()) {
// disable heartbeats during this async call ...
disableHeartbeats();
try {
_slaveService.endPageRank(new Callback<NullMessage,SlaveStatus>() {
@Override
public void requestComplete(AsyncRequest<NullMessage, SlaveStatus> request) {
if (request.getStatus() == Status.Success) {
// update cached status
updateSlaveStatus(request.getOutput());
}
else {
LOG.error("RPC Failed during sendEndPageRank call to Host:" + getFullyQualifiedName());
slaveOffline();
}
// no matter what - renable heartbeats
enableHeartbeats();
}
});
}
catch (IOException e) {
// we have to renable heartbeats here ...
enableHeartbeats();
LOG.error(CCStringUtils.stringifyException(e));
}
}
else {
LOG.error("sendEndPageRank called on Slave with Invalid State. Slave:" + getFullyQualifiedName());
}
}
void sendCheckpointCommand(long txnId, int currentPhase,int currentIterationNumber) {
if (_channel != null && _channel.isOpen()) {
this._lastKnownStatus.setCurrentCheckpointId(txnId);
// disable heartbeats during this async call ...
disableHeartbeats();
CheckpointInfo checkpointInfo = new CheckpointInfo();
checkpointInfo.setTxnId(txnId);
checkpointInfo.setCurrentPhase(currentPhase);
checkpointInfo.setCurrentIterationNumber(currentIterationNumber);
try {
_slaveService.checkpoint(checkpointInfo, new Callback<CheckpointInfo,SlaveStatus>() {
@Override
public void requestComplete(AsyncRequest<CheckpointInfo, SlaveStatus> request) {
try {
if (request.getStatus() == Status.Success) {
// update cached status
updateSlaveStatus(request.getOutput());
}
else {
LOG.error("RPC Failed during sendCheckpoint call to Host:" + getFullyQualifiedName());
slaveOffline();
}
}
finally {
// we have to renable heartbeats here ...
enableHeartbeats();
}
}
});
}
catch (IOException e) {
// we have to renable heartbeats here ...
enableHeartbeats();
LOG.error(CCStringUtils.stringifyException(e));
// reboot the connection...
slaveOffline();
}
}
else {
LOG.error("sendCheckpoint called on Slave with Invalid State. Slave:" + getFullyQualifiedName());
}
}
void sendDoIterationCmd() {
if (_channel != null && _channel.isOpen()) {
// disable heartbeats during this async call ...
disableHeartbeats();
IterationInfo iterationInfo = new IterationInfo();
iterationInfo.setJobId(_master.getCurrentJobNumber());
iterationInfo.setIterationNumber(_master.getCurrentIterationNumber());
iterationInfo.setPhase(_master.getSlaveIterationPhase());
try {
_slaveService.doIteration(iterationInfo, new Callback<IterationInfo,SlaveStatus>() {
@Override
public void requestComplete(AsyncRequest<IterationInfo, SlaveStatus> request) {
if (request.getStatus() == Status.Success) {
// update cached status
updateSlaveStatus(request.getOutput());
}
else {
LOG.error("RPC Failed during sendEndPageRank call to Host:" + getFullyQualifiedName());
slaveOffline();
}
// we have to renable heartbeats here ...
enableHeartbeats();
}
});
}
catch (IOException e) {
// we have to renable heartbeats here ...
enableHeartbeats();
LOG.error(CCStringUtils.stringifyException(e));
// reboot the connection...
slaveOffline();
}
}
else {
LOG.error("sendDoIteration called on Slave with Invalid State. Slave:" + getFullyQualifiedName());
}
}
void sendResetCmd() {
if (_channel != null && _channel.isOpen()) {
try {
// disable heartbeats during this async call ...
disableHeartbeats();
// re-initialize the slave ...
_slaveService.initialize(_master.getBaseConfigForSlave(this), new Callback<BaseConfig,SlaveStatus> () {
@Override
public void requestComplete(AsyncRequest<BaseConfig, SlaveStatus> request) {
if (request.getStatus() != Status.Success) {
LOG.error("resetState failed on Slave:" + getFullyQualifiedName());
slaveOffline();
}
else {
// notify master of status change ...
updateSlaveStatus(request.getOutput());
}
// we have to re-enable heartbeats here ...
enableHeartbeats();
}
});
}
catch (IOException e) {
// we have to re-enable heartbeats here ...
enableHeartbeats();
LOG.error(CCStringUtils.stringifyException(e));
slaveOffline();
}
}
}
private void updateSlaveStatus(SlaveStatus status) {
_lastUpdateTime = System.currentTimeMillis();
_lastKnownStatus.clear();
try {
_lastKnownStatus.merge(status);
} catch (CloneNotSupportedException e) {
}
// and inform master ...
_master.slaveStatusChanged(this);
}
private void startHeartbeatTimer() {
_heartbeatTimer = new Timer(HEARTBEAT_TIMER_INTERVAL,false,new Timer.Callback() {
@Override
public void timerFired(final Timer timer) {
LOG.info("Heartbeat Timer Fired. Seconding heartbeat message to slave:" + getFullyQualifiedName());
try {
_slaveService.heartbeat(new Callback<NullMessage,SlaveStatus>() {
public void requestComplete(AsyncRequest<NullMessage, SlaveStatus> request) {
LOG.info("Received Heartbeat message Response from Slave:"+ getFullyQualifiedName());
boolean forceDisconnect = false;
if (request.getStatus() == AsyncRequest.Status.Success) {
if (!areHeartbeatsDisabled()) {
LOG.info("updating SlaveStatus from heartbeat response for Slave:"+ getFullyQualifiedName());
// update slave status ...
updateSlaveStatus(request.getOutput());
}
else {
LOG.info("heartbeats are disabled. Skipping response for Slave:"+ getFullyQualifiedName());
}
// need to SET timer because we are not in timerFired context anymore
_master.getEventLoop().setTimer(timer);
}
else {
LOG.error("Heartbeat request to slave: " + getFullyQualifiedName() +" failed with Status: " + request.getStatus().toString());
forceDisconnect = true;
}
if (forceDisconnect) {
slaveOffline();
}
}
});
}
catch (IOException e ){
slaveOffline();
LOG.error(CCStringUtils.stringifyException(e));
}
}
});
_master.getEventLoop().setTimer(_heartbeatTimer);
}
private void killHeartbeatTimer() {
if (_heartbeatTimer != null) {
_master.getEventLoop().cancelTimer(_heartbeatTimer);
_heartbeatTimer = null;
}
}
@Override
public int compareTo(PageRankRemoteSlave other) {
return (_slaveId < other._slaveId) ? -1 : (_slaveId > other._slaveId) ? 1 : 0;
}
}