/**
* Copyright 2008 - 2009 Pro-Netics S.P.A.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package it.pronetics.madstore.crawler.impl.local;
import com.googlecode.actorom.Actor;
import com.googlecode.actorom.Address;
import com.googlecode.actorom.KillActorException;
import com.googlecode.actorom.Topology;
import com.googlecode.actorom.annotation.OnMessage;
import com.googlecode.actorom.annotation.TopologyInstance;
import it.pronetics.madstore.crawler.model.Link;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.CountDownLatch;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* {@link Actor} implementing the main crawling process.
*
* @author Sergio Bossa
*/
public class CrawlerActor {
private static final transient Logger LOG = LoggerFactory.getLogger(CrawlerActor.class);
//
@TopologyInstance
private Topology actorsTopology;
private Address downloaderAddress;
//
private CountDownLatch finishLatch;
//
private final Map<String, Link> visitedLinks = new HashMap<String, Link>();
private final Map<String, Link> toParseLinks = new HashMap<String, Link>();
private int maxVisitedLinks;
private int visitedLinksCounter = 1;
public CrawlerActor(int maxVisitedLinks) {
this.maxVisitedLinks = maxVisitedLinks;
}
@OnMessage(type=StartCrawlingMessage.class)
public void startCrawling(StartCrawlingMessage message) {
downloaderAddress = message.getDownloaderAddress();
finishLatch = message.getFinishLatch();
visitedLinks.clear();
toParseLinks.clear();
sendDownloadLinkMessage(message.getLink());
}
@OnMessage(type=OutgoingLinksMessage.class)
public void crawlLinks(OutgoingLinksMessage message) {
Collection<Link> outgoingLinks = message.getOutgoingLinks();
for (Link outgoingLink : outgoingLinks) {
if (((visitedLinksCounter < maxVisitedLinks)) && (!visitedLinks.containsKey(outgoingLink.getLink()))) {
LOG.info("Crawling link-{}: {}", visitedLinksCounter, outgoingLink);
sendDownloadLinkMessage(outgoingLink);
++visitedLinksCounter;
}
}
toParseLinks.remove(message.getSourceLink().getLink());
if (toParseLinks.isEmpty()) {
visitedLinks.clear();
toParseLinks.clear();
finishCrawling();
}
}
private void sendDownloadLinkMessage(Link link) {
Actor downloader = actorsTopology.getActor(downloaderAddress);
DownloadLinkMessage message = new DownloadLinkMessage(link);
visitedLinks.put(link.getLink(), link);
toParseLinks.put(link.getLink(), link);
downloader.send(message);
}
private void finishCrawling() {
finishLatch.countDown();
throw new KillActorException();
}
}