package org.opens.slurpmanager.handler;
import java.util.Date;
import java.util.List;
import org.opens.slurpmanager.entity.factory.subject.WebarchiveFactory;
import org.opens.slurpmanager.entity.factory.subject.WebresourceFactory;
import org.opens.slurpmanager.crawler.Crawler;
import org.opens.slurpmanager.crawler.CrawlerImpl;
import org.opens.slurpmanager.entity.service.subject.WebarchiveDataService;
import org.opens.slurpmanager.entity.service.subject.WebresourceDataService;
import org.opens.slurpmanager.entity.subject.Webarchive;
import org.opens.slurpmanager.entity.subject.Webresource;
import org.opens.slurpmanager.exception.WebarchiveCreationException;
import org.opens.slurpmanager.scope.CrawlScope;
/**
* Hello world!
*
*/
public class WebarchiveHandlerImpl implements WebarchiveHandler {
private static final String HTTP_PREFIX = "http://";
private static final String HTTPS_PREFIX = "https://";
/**
*
*/
protected WebarchiveFactory webarchiveFactory;
public WebarchiveFactory getWebarchiveFactory() {
return webarchiveFactory;
}
public void setWebarchiveFactory(WebarchiveFactory webarchiveFactory) {
this.webarchiveFactory = webarchiveFactory;
}
/**
*
*/
protected WebresourceFactory webresourceFactory;
public WebresourceFactory getWebresourceFactory() {
return webresourceFactory;
}
public void setWebresourceFactory(WebresourceFactory webresourceFactory) {
this.webresourceFactory = webresourceFactory;
}
/**
*
*/
protected WebresourceDataService webresourceDataService;
public WebresourceDataService getWebresourceDataService() {
return webresourceDataService;
}
public void setWebresourceDataService(WebresourceDataService webresourceDataService) {
this.webresourceDataService = webresourceDataService;
}
/**
*
*/
protected WebarchiveDataService webarchiveDataService;
public WebarchiveDataService getWebarchiveDataService() {
return webarchiveDataService;
}
public void setWebarchiveDataService(WebarchiveDataService webarchiveDataService) {
this.webarchiveDataService = webarchiveDataService;
}
/**
*
*/
protected Crawler crawler;
public Crawler getCrawler() {
return crawler;
}
public void setCrawler(Crawler crawler) {
this.crawler = crawler;
}
/**
* Create an archive from an url, a scope (page or site) and a description
* @param url
* @param scope
* @param description
* @return the accessible url of the webarchive, or an error message
*/
@Override
public synchronized String create(String url, CrawlScope scope, String description)
throws WebarchiveCreationException {
// Crawler localCrawler = initializeLocalCrawler();
crawler.setUrl(addProtocolToUrl(url));
crawler.setScope(scope);
if (crawler.run()) {
Webresource webresource;
if (webresourceDataService.findByUrl(url) != null) {
webresource = webresourceDataService.findByUrl(url);
} else {
webresource = webresourceDataService.create();
webresource.setUrl(url);
webresource.setDate(new Date());
}
Webarchive webarchive = webarchiveDataService.create();
webresource.addWebarchive(webarchive);
webarchive.setWebresourceParent(webresource);
webarchive.setScope(scope.getType());
webarchive.setDate(crawler.getResultDate());
webarchive.setUrl(crawler.getResult());
webarchive.setDescription(description);
webresourceDataService.saveOrUpdate(webresource);
// webarchiveDataService.saveOrUpdate(webarchive);
return webarchive.getUrl();
} else {
throw new WebarchiveCreationException(((CrawlerImpl)crawler).getErrorMessage());
}
}
public List<String> retrieveAll() {
throw new UnsupportedOperationException("Not supported yet.");
}
public String retrieve(String url) {
throw new UnsupportedOperationException("Not supported yet.");
}
public void update(String url) {
throw new UnsupportedOperationException("Not supported yet.");
}
public void delete(String url) {
throw new UnsupportedOperationException("Not supported yet.");
}
/**
*
* @param url
* @return
*/
private String addProtocolToUrl(String url){
if(url.startsWith(HTTP_PREFIX) || url.startsWith(HTTPS_PREFIX)) {
return url;
} else {
StringBuffer strBuffer = new StringBuffer();
strBuffer.append(HTTP_PREFIX);
strBuffer.append(url);
return strBuffer.toString();
}
}
// private Crawler initializeLocalCrawler(){
// Crawler localCrawler = new CrawlerImpl();
// ((CrawlerImpl)localCrawler).setArchivePrefix(
// ((CrawlerImpl)crawler).getArchivePrefix());
// ((CrawlerImpl)localCrawler).setCrawlConfigFilePath(
// ((CrawlerImpl)crawler).getCrawlConfigFilePath());
// ((CrawlerImpl)localCrawler).setHeritrixFileName(
// ((CrawlerImpl)crawler).getHeritrixFileName());
// ((CrawlerImpl)localCrawler).setOutputDir(
// ((CrawlerImpl)crawler).getOutputDir());
// ((CrawlerImpl)localCrawler).setTemporaryDir(
// ((CrawlerImpl)crawler).getTemporaryDir());
// ((CrawlerImpl)localCrawler).setWarcDir(
// ((CrawlerImpl)crawler).getWarcDir());
// ((CrawlerImpl)localCrawler).setWarcExtension(
// ((CrawlerImpl)crawler).getWarcExtension());
// return localCrawler;
// }
}