/**
* Copyright 2008 - CommonCrawl Foundation
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
**/
package org.commoncrawl.service.pagerank.slave;
import java.io.File;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
import org.commoncrawl.async.CallbackWithResult;
import org.commoncrawl.service.pagerank.Constants;
import org.commoncrawl.service.pagerank.IterationInfo;
import org.commoncrawl.util.CCStringUtils;
public class DistributeRankTask extends PageRankTask<DistributeRankTask.DistributeRankTaskResult>{
private static final Log LOG = LogFactory.getLog(DistributeRankTask.class);
public DistributeRankTask(PageRankSlaveServer server,CallbackWithResult<DistributeRankTaskResult> completionCallback) {
super(server,DistributeRankTask.DistributeRankTaskResult.class, completionCallback);
}
public static class DistributeRankTaskResult extends PageRankTask.PageRankTaskResult {
public boolean done = false;
public DistributeRankTaskResult() {
}
public DistributeRankTaskResult(boolean done) {
this.done = done;
}
public boolean isDone() { return done; }
}
@Override
protected void cancelTask() {
}
@Override
public String getDescription() {
return "Distribute Rank Task";
}
@Override
protected DistributeRankTaskResult runTask() throws IOException {
// check to see if checkpoint already exists ...
Path checkpointFilePath = PageRankUtils.getCheckpointFilePath(new Path(_server.getActiveJobConfig().getJobWorkPath()),
IterationInfo.Phase.DISTRIBUTE,
_server.getActiveJobConfig().getIterationNumber(),
_server.getNodeIndex());
// only run distribute task if checkpoint file is not present
if (!_server.getFileSystem().exists(checkpointFilePath)) {
LOG.info("Starting Distribute Task - Iteration Number:" + _server.getActiveJobConfig().getIterationNumber());
// take value map and distribute it
if (_server.getValueMap() == null) {
throw new IOException("Value Map NULL! Operation Failed");
}
File localOutlinksFilePath = new File(_server.getActiveJobLocalPath(),PageRankUtils.makeUniqueFileName(Constants.PR_OUTLINKS_FILE_PREFIX,0,_server.getNodeIndex()));
LOG.info("Local Outlinks Path is:" + localOutlinksFilePath);
try {
PageRankUtils.distributeRank(_server.getValueMap(),new Path(localOutlinksFilePath.getAbsolutePath()),false,_server.getActiveJobLocalPath(),
_server.getActiveJobConfig().getJobWorkPath(), _server.getNodeIndex(), _server.getBaseConfig().getSlaveCount(),
_server.getActiveJobConfig().getIterationNumber(),
new PageRankUtils.ProgressAndCancelCheckCallback() {
@Override
public boolean updateProgress(float percentComplete) {
synchronized (DistributeRankTask.this) {
_percentComplete = percentComplete;
}
return false;
}
});
// ok write out the checkpoint file ...
return new DistributeRankTaskResult(_server.getFileSystem().createNewFile(checkpointFilePath));
}
catch (IOException e) {
LOG.error("Distribute Rank Failed with Error:" +CCStringUtils.stringifyException(e));
DistributeRankTaskResult result = new DistributeRankTaskResult(false);
result.setFailed(CCStringUtils.stringifyException(e));
return result;
}
}
// otherwise ... task already complete. skip ...
else {
LOG.info("Checkpoint File:" + checkpointFilePath + " exists. Skipping DistributeRankTask");
return new DistributeRankTaskResult(true);
}
}
}