package at.chille.crawler;
import java.util.Date;
import java.util.HashMap;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import javax.inject.Inject;
import javax.inject.Named;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import org.springframework.stereotype.Component;
import at.chille.crawler.database.model.CrawlingSession;
import at.chille.crawler.database.model.HostInfo;
import at.chille.crawler.database.repository.CertificateRepository;
import at.chille.crawler.database.repository.CrawlingSessionRepository;
import at.chille.crawler.database.repository.HeaderRepository;
import at.chille.crawler.database.repository.HostInfoRepository;
import at.chille.crawler.database.repository.PageInfoRepository;
import edu.uci.ics.crawler4j.url.WebURL;
@Component
public class DatabaseManager
{
private static ClassPathXmlApplicationContext context = null;
private static DatabaseManager _instance;
protected CrawlingSession currentCrawlingSession;
public static DatabaseManager getInstance()
{
if (_instance == null)
{
_instance = DatabaseManager.getContext().getBean(
DatabaseManager.class);
}
return _instance;
}
/**
* This method is deprecated because it is too slow for huge datasets. (5000 HostInfo --> 35
* seconds) Use saveHostInfo(..) instead! Use it only if you generate the session!
*/
@Deprecated
public synchronized void saveSession()
{
if (currentCrawlingSession == null)
{
throw new NullPointerException();
}
// "Saves a given entity. Use the returned instance for further
// operations as the save operation might have changed the entity
// instance completely."
currentCrawlingSession = crawlingSessionRepository
.save(currentCrawlingSession);
}
public void tryAddingSomething()
{
// not synchronized on purpose: not necessary
this.setNewCrawlingSession("Dummy Crawling Session - no real content.");
HostInfo h = new HostInfo();
h.setHostName("dummy host");
this.addHostInfo(h);
this.saveSession();
}
public synchronized HostInfo saveHostInfo(HostInfo hi)
{
// Reminder: Double store, because by saving the object changes.
// if it is not restored, it is saved again, and all certificates
// occur twice in the database everytime the hostInfo is saved.
currentCrawlingSession.addHostInfo(hi);
hi = hostInfoRepository.save(hi);
currentCrawlingSession.addHostInfo(hi);
return hi;
}
public synchronized void setNewCrawlingSession(String description)
{
currentCrawlingSession = new CrawlingSession();
currentCrawlingSession.setDescription(description);
currentCrawlingSession.setTimeStarted(new Date().getTime());
// crawlingSessionRepository.save(currentCrawlingSession);
}
public void loadLastCrawlingSession()
{
long timeStartedMax = 0;
for (CrawlingSession cs : crawlingSessionRepository.findAll())
{
if (cs.getTimeStarted().longValue() > timeStartedMax)
{
timeStartedMax = cs.getTimeStarted().longValue();
this.currentCrawlingSession = cs;
}
}
}
public CrawlingSession getCurrentCrawlingSession()
{
// not synchronized on purpose: not necessary
return this.currentCrawlingSession;
}
public HashMap<String, Lock> lockedHosts = new HashMap<String, Lock>();
public Lock getHostLock(String hostName)
{
if (!lockedHosts.containsKey(hostName))
lockedHosts.put(hostName, new ReentrantLock());
return lockedHosts.get(hostName);
}
public at.chille.crawler.database.model.HostInfo getHostInfo(String hostName)
{
// not synchronized on purpose: not necessary
HostInfo toReturn = currentCrawlingSession.getHosts().get(hostName);
return toReturn;
}
public static String getFullDomain(WebURL webUrl)
{
String fullDomain;
if (webUrl.getSubDomain().length() > 0)
fullDomain = webUrl.getSubDomain().toLowerCase() + "."
+ webUrl.getDomain().toLowerCase();
else
fullDomain = webUrl.getDomain().toLowerCase();
return fullDomain;
}
public synchronized void addHostInfo(
at.chille.crawler.database.model.HostInfo hostInfo)
{
currentCrawlingSession.addHostInfo(hostInfo);
}
protected static synchronized ApplicationContext getContext()
{
if (context == null)
{
context = new ClassPathXmlApplicationContext();
String[] locations =
{ "classpath*:resthubContext.xml",
"classpath*:application-context-democlient.xml" };
context.getEnvironment().setActiveProfiles("resthub-jpa");
context.setConfigLocations(locations);
context.refresh();
}
return context;
}
@Autowired
HostInfoRepository hostInfoRepository;
@Autowired
CertificateRepository certificateRepository;
@Autowired
PageInfoRepository pageInfoRepository;
@Autowired
CrawlingSessionRepository crawlingSessionRepository;
@Autowired
HeaderRepository headerRepository;
@Inject
@Named("hostInfoRepository")
public void setHostInfoRepository(HostInfoRepository t)
{
this.hostInfoRepository = t;
}
@Inject
@Named("certificateRepository")
public void setCertificateRepository(CertificateRepository t)
{
this.certificateRepository = t;
}
@Inject
@Named("pageInfoRepository")
public void setPageInfoRepository(PageInfoRepository t)
{
this.pageInfoRepository = t;
}
@Inject
@Named("crawlingSessionRepository")
public void setCrawlingSessionRepository(CrawlingSessionRepository t)
{
this.crawlingSessionRepository = t;
}
@Inject
@Named("headerRepository")
public void setHeaderRepository(HeaderRepository t)
{
this.headerRepository = t;
}
}