/** * */ package ecologylab.bigsemantics.collecting; import java.io.File; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import ecologylab.bigsemantics.documentcache.DocumentCache; import ecologylab.bigsemantics.documentcache.HashMapDocumentCache; import ecologylab.bigsemantics.documentcache.PersistentDocumentCache; import ecologylab.bigsemantics.documentparsers.XPathAmender; import ecologylab.bigsemantics.downloadcontrollers.DefaultDownloadController; import ecologylab.bigsemantics.downloadcontrollers.DownloadController; import ecologylab.bigsemantics.gui.InteractiveSpace; import ecologylab.bigsemantics.html.dom.IDOMProvider; import ecologylab.bigsemantics.logging.CacheError; import ecologylab.bigsemantics.logging.CachedHtmlStale; import ecologylab.bigsemantics.logging.CachedMmdStale; import ecologylab.bigsemantics.logging.ChangeLocation; import ecologylab.bigsemantics.logging.DocumentLogRecord; import ecologylab.bigsemantics.logging.ErrorEvent; import ecologylab.bigsemantics.logging.MemoryCacheHit; import ecologylab.bigsemantics.logging.MemoryCacheMiss; import ecologylab.bigsemantics.logging.PersistenceCacheDocHit; import ecologylab.bigsemantics.logging.PersistenceCacheHtmlHit; import ecologylab.bigsemantics.logging.PersistenceCacheMiss; import ecologylab.bigsemantics.metadata.builtins.Document; import ecologylab.bigsemantics.metadata.builtins.DocumentClosure; import ecologylab.bigsemantics.metadata.builtins.Image; import ecologylab.bigsemantics.metametadata.MetaMetadataRepository; import ecologylab.bigsemantics.metametadata.fieldparsers.FieldParserRegistry; import ecologylab.generic.ReflectionTools; import ecologylab.logging.LogEventTypeScope; import ecologylab.net.ParsedURL; import ecologylab.serialization.SimplTypesScope; import ecologylab.serialization.formatenums.Format; /** * The SemanticsScope (also known as Crossroads) contains references to all of the big global * singleton object of S.IM.PL Semantics: * * (1) GlobalCollection that maps ParsedURL keys to <? extends Document> values. * * (2) MetaMetadataRespository * * (3) SemanticsDownloadMonitors -- a set of DownloadMonitors, with different priority levels and * media assignments. * * The SemanticsSessionScope will include references to Crossroads and to Crawler, if there is one. * I believe it is also where we store state related to Seeding. * * @author andruid */ public class SemanticsGlobalScope extends MetaMetadataRepositoryInit { static Logger logger; static { logger = LoggerFactory.getLogger(SemanticsGlobalScope.class); LogEventTypeScope.addEventClass(CachedHtmlStale.class); LogEventTypeScope.addEventClass(CachedMmdStale.class); LogEventTypeScope.addEventClass(CacheError.class); LogEventTypeScope.addEventClass(ChangeLocation.class); LogEventTypeScope.addEventClass(ErrorEvent.class); LogEventTypeScope.addEventClass(MemoryCacheHit.class); LogEventTypeScope.addEventClass(MemoryCacheMiss.class); LogEventTypeScope.addEventClass(PersistenceCacheDocHit.class); LogEventTypeScope.addEventClass(PersistenceCacheHtmlHit.class); LogEventTypeScope.addEventClass(PersistenceCacheMiss.class); } /** * Used to construct a DOM provider. */ final private Class<? extends IDOMProvider> domProviderClass; /** * Used to construct field parsers. */ final private FieldParserRegistry fieldParserRegistry; /** * Maps locations to Document Metadata subclasses. Constructs these Document instances as needed * using the MetaMetadataRepository. */ final private LocalDocumentCollections localDocumentCollection; /** * Pool of DownloadMonitors used for parsing Documents of various types. */ final private SemanticsDownloadMonitors downloadMonitors; /** * Monitoring document downloading, in order to link related metadata. */ final private DocumentDownloadingMonitor documentDownloadingMonitor; private XPathAmender xpathAmender; public SemanticsGlobalScope(SimplTypesScope metadataTScope, Class<? extends IDOMProvider> domProviderClass) { this(null, metadataTScope, domProviderClass); } public SemanticsGlobalScope(File repositoryLocation, SimplTypesScope metadataTScope, Class<? extends IDOMProvider> domProviderClass) { this(repositoryLocation, MetaMetadataRepositoryInit.DEFAULT_REPOSITORY_FORMAT, metadataTScope, domProviderClass); } public SemanticsGlobalScope(File repositoryLocation, Format repositoryFormat, SimplTypesScope metadataTypesScope, Class<? extends IDOMProvider> domProviderClass) { super(repositoryLocation, repositoryFormat, metadataTypesScope); this.domProviderClass = domProviderClass; this.fieldParserRegistry = new FieldParserRegistry(); MetaMetadataRepository repository = this.getMetaMetadataRepository(); DefaultDocumentMapHelper documentMapHelper = new DefaultDocumentMapHelper(repository); DocumentCache<ParsedURL, Document> documentCache = getDocumentCache(); localDocumentCollection = new LocalDocumentCollections(documentMapHelper, documentCache); downloadMonitors = new SemanticsDownloadMonitors(); documentDownloadingMonitor = new DocumentDownloadingMonitor(this); xpathAmender = createXPathAmender(); } public IDOMProvider constructDOMProvider() { return ReflectionTools.getInstance(domProviderClass); } public FieldParserRegistry getFieldParserRegistry() { return fieldParserRegistry; } public LocalDocumentCollections getLocalDocumentCollection() { return localDocumentCollection; } /** * Pool of DownloadMonitors used for parsing Documents of various types. * * @return the downloadMonitors */ public SemanticsDownloadMonitors getDownloadMonitors() { return downloadMonitors; } public DocumentDownloadingMonitor getDocumentDownloadingMonitor() { return documentDownloadingMonitor; } public Document lookupDocument(ParsedURL location) { return location == null ? null : localDocumentCollection.lookupDocument(location); } public Document getOrConstructDocument(ParsedURL location) { if (location == null) return null; Document result = localDocumentCollection.getOrConstruct(location, false); result.setSemanticsSessionScope(this); if (result.getLogRecord() == null) { result.setLogRecord(createLogRecord()); } return result; } public void putDocumentIfAbsent(Document document) { localDocumentCollection.putIfAbsent(document); } public Image getOrConstructImage(ParsedURL location) { if (location == null) return null; Document constructDocument = localDocumentCollection.getOrConstruct(location, true); Image result = null; if (constructDocument.isImage()) { result = (Image) constructDocument; result.setSemanticsSessionScope(this); } return result; } /** * Create a DownloadController using the given DocumentClosure. * * @param closure * @return */ public DownloadController createDownloadController(DocumentClosure closure) { return new DefaultDownloadController(); } /** * Tells if this scope is a web service scope. * * @return */ public boolean isService() { return false; } /** * Unlike the session scope, the global scope is not discriminating. * * @param purl * @return true */ public boolean accept(ParsedURL purl) { return true; } /** * @param location * @return always true in this base class. */ public boolean isLocationNew(ParsedURL location) { return true; } /** * @return If this scope will automatically update document references when referred documents are * downloaded. */ public boolean ifAutoUpdateDocRefs() { return false; } /** * @return If the extraction module should try to find the favicon for downloaded documents (more * specifically, their sites). */ public boolean ifLookForFavicon() { return false; } /** * Does nothing in this base class. */ public Seeding getSeeding() { return null; } public XPathAmender getXPathAmender() { return xpathAmender; } protected XPathAmender createXPathAmender() { return new XPathAmender(); } /** * Does nothing in this base class. */ public int getAppropriateFontIndex() { return -1; } /** * Does nothing in this base class. */ public boolean hasCrawler() { return false; } /** * Does nothing in this base class. */ public Crawler getCrawler() { return null; } /** * Does nothing in this base class. */ public InteractiveSpace getInteractiveSpace() { return null; } /** * @return A DocumentCache for caching extracted Document objects. Subclasses can override this * method to use different caches. */ protected DocumentCache<ParsedURL, Document> getDocumentCache() { return new HashMapDocumentCache(); } /** * @return Null in this base class. Subclasses can override this to use different persistent * caches. */ public PersistentDocumentCache getPersistentDocumentCache() { return null; } public DocumentLogRecord createLogRecord() { return new DocumentLogRecord(); } public void displayStatus(String message) { logger.info(message); } public void displayStatus(String message, int ticks) { logger.info(message); } }