package org.infoobject.core.crawl;
import org.infoobject.core.crawl.CrawlJobResultHandler;
import org.infoobject.core.rdf.vocabulary.InformationObjectVoc;
import org.infoobject.core.rdf.RdfContainer;
import org.openrdf.model.Statement;
import org.openrdf.model.Resource;
import org.openrdf.model.vocabulary.RDF;
import java.util.Iterator;
/**
* <p>
* Class MetadataExtractorManager ZUSAMMENFASSUNG
* </p>
* <p>
* DETAILS
* </p>
*
* @author Jan Friderici
* Date: 10.08.2008
* Time: 01:34:23
*/
public class CrawlerManager {
private MetadataExtractor extractor;
public CrawlerManager(MetadataExtractor extractor) {
this.extractor = extractor;
}
public void crawl(CrawlJob job, CrawlJobResultHandler handler) {
extractUrl(job.getUri(), job.getDepth(), handler);
}
/**
*
* @param uri
* @param depth
* @param handler
*/
private void extractUrl(String uri, int depth, CrawlJobResultHandler handler) {
System.out.println("Crawling uri " + uri + " with " + extractor);
MetadataExtractorResult extractorResult = extractor.extract(uri);
if (extractorResult.getError() != null){
handler.crawlFailed(extractorResult.getError());
} else {
RdfContainer metadataGraph = extractorResult.getMetadataGraph();
handler.urlCrawled(metadataGraph, depth);
if (depth > 0 ){
Iterator<Statement> statementIterator = metadataGraph.match(null, RDF.TYPE, InformationObjectVoc.HardLink);
while (statementIterator.hasNext()) {
Resource linkedUri = statementIterator.next().getSubject();
extractUrl(linkedUri.toString(), depth-1, handler);
}
} else {
handler.crawlFinished();
}
}
}
}