/** * Copyright 2008 - CommonCrawl Foundation * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * **/ package org.commoncrawl.service.pagerank.slave; import java.io.IOException; import java.io.OutputStream; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; import org.commoncrawl.async.CallbackWithResult; import org.commoncrawl.crawl.common.internal.CrawlEnvironment; import org.commoncrawl.service.crawler.filters.SuperDomainFilter; import org.commoncrawl.service.pagerank.Constants; import org.commoncrawl.service.pagerank.IterationInfo; import org.commoncrawl.util.CCStringUtils; /** * * @author rana * */ public class CalculateRankTask extends PageRankTask<CalculateRankTask.CalculateRankTaskResult>{ private static final Log LOG = LogFactory.getLog(CalculateRankTask.class); public CalculateRankTask(PageRankSlaveServer server,CallbackWithResult<CalculateRankTaskResult> completionCallback) { super(server,CalculateRankTask.CalculateRankTaskResult.class, completionCallback); } public static class CalculateRankTaskResult extends PageRankTask.PageRankTaskResult { public boolean done = false; public CalculateRankTaskResult() { } public CalculateRankTaskResult(boolean done) { this.done = done; } public boolean isDone() { return done; } } @Override protected void cancelTask() { } @Override public String getDescription() { return "Calculate Rank Task"; } @Override protected CalculateRankTaskResult runTask() throws IOException { // construct checkpoint filename Path checkpointFilePath = PageRankUtils.getCheckpointFilePath(new Path(_server.getActiveJobConfig().getJobWorkPath()), IterationInfo.Phase.CALCULATE, _server.getActiveJobConfig().getIterationNumber(), _server.getNodeIndex()); // check to see if checkpoint file exists ... if (_server.getFileSystem().exists(checkpointFilePath)) { LOG.info("Checkpoint File Found.Skipping Task."); return new CalculateRankTaskResult(true); } else { // load super domain filter LOG.info("Initializing SuperDomain Filter"); SuperDomainFilter superDomainFilter = new SuperDomainFilter(); superDomainFilter.loadFromPath(_server.getDirectoryServiceAddress(), CrawlEnvironment.ROOT_SUPER_DOMAIN_PATH, false); LOG.info("Starting Calculate Task - Iteration Number:" + _server.getActiveJobConfig().getIterationNumber()); // take value map and distribute it if (_server.getValueMap() == null) { throw new IOException("Value Map NULL! Operation Failed"); } // first zero value map values ... _server.getValueMap().zeroValues(); try { PageRankUtils.calculateRank( _server.getConfig(), _server.getFileSystem(), _server.getValueMap(),_server.getActiveJobLocalPath(), _server.getActiveJobConfig().getJobWorkPath(), _server.getNodeIndex(), _server.getBaseConfig().getSlaveCount(), _server.getActiveJobConfig().getIterationNumber(), superDomainFilter, new PageRankUtils.ProgressAndCancelCheckCallback() { @Override public boolean updateProgress(float percentComplete) { synchronized (CalculateRankTask.this) { _percentComplete = percentComplete; } return false; } }); // and write out value map back to disk ... Path valuesPath = new Path(_server.getActiveJobConfig().getJobWorkPath(),PageRankUtils.makeUniqueFileName(Constants.PR_VALUE_FILE_PREFIX, _server.getActiveJobConfig().getIterationNumber(),_server.getNodeIndex())); LOG.info("Serializing Values to Path:" + valuesPath + " for Iteration:" + _server.getActiveJobConfig().getIterationNumber()); _server.getFileSystem().delete(valuesPath,true); OutputStream valueStream = null; // create new stream .. valueStream = _server.getFileSystem().create(valuesPath); try { _server.getValueMap().flush(valueStream); } catch (IOException e) { LOG.error("Failed to Flush Value Map to OutputStream:" + valuesPath); _server.getFileSystem().delete(valuesPath,false); throw e; } finally { if (valueStream != null) valueStream.close(); } // construct checkpoint filename LOG.info("Creating Checkpoint File:" + checkpointFilePath); return new CalculateRankTaskResult(_server.getFileSystem().createNewFile(checkpointFilePath)); } catch (IOException e) { // calculate failed LOG.error("Calculate Rank Failed with Error:" + CCStringUtils.stringifyException(e)); throw e; } } } }