/** * */ package ecologylab.bigsemantics.metametadata; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import ecologylab.appframework.types.prefs.Pref; import ecologylab.bigsemantics.collecting.CookieProcessing; import ecologylab.bigsemantics.collecting.LinkedMetadataMonitor; import ecologylab.bigsemantics.collecting.SemanticsGlobalScope; import ecologylab.bigsemantics.collecting.SemanticsSite; import ecologylab.bigsemantics.collecting.SemanticsSiteMap; import ecologylab.bigsemantics.metadata.Metadata; import ecologylab.bigsemantics.metadata.MetadataClassDescriptor; import ecologylab.bigsemantics.metadata.builtins.Document; import ecologylab.bigsemantics.metadata.builtins.Image; import ecologylab.bigsemantics.metadata.scalar.types.MetadataScalarType; import ecologylab.bigsemantics.metametadata.exceptions.MetaMetadataException; import ecologylab.bigsemantics.namesandnums.DocumentParserTagNames; import ecologylab.collections.PrefixCollection; import ecologylab.collections.PrefixPhrase; import ecologylab.generic.Debug; import ecologylab.generic.HashMapArrayList; import ecologylab.net.ParsedURL; import ecologylab.net.UserAgent; import ecologylab.serialization.ElementState; import ecologylab.serialization.SimplTypesScope; import ecologylab.serialization.TranslationContext; import ecologylab.serialization.annotations.simpl_collection; import ecologylab.serialization.annotations.simpl_composite; import ecologylab.serialization.annotations.simpl_inherit; import ecologylab.serialization.annotations.simpl_map; import ecologylab.serialization.annotations.simpl_nowrap; import ecologylab.serialization.annotations.simpl_scalar; import ecologylab.serialization.annotations.simpl_tag; import ecologylab.textformat.NamedStyle; /** * The repository of meta-metadata wrappers. * * Wrapper definitions can scatter over multiple files, while loadFromXXX() methods will collect * information and assemble a unified representation. * * @author damaraju * */ @SuppressWarnings({ "rawtypes", "unchecked" }) @simpl_inherit public class MetaMetadataRepository extends ElementState implements PackageSpecifier, DocumentParserTagNames { static final Logger logger = LoggerFactory.getLogger(MetaMetadataRepository.class); private static final String DEFAULT_STYLE_NAME = "default"; static MetaMetadata baseDocumentMM; static MetaMetadata baseImageMM; // [region] de/serializable data fields. /** * The name of the repository. */ @simpl_scalar private String name; private String hash; @simpl_composite private Build build; /** * The package in which the class files have to be generated. */ @simpl_tag("package") @simpl_scalar private String packageAttribute; /** * user agent strings. */ @simpl_map("user_agent") private Map<String, UserAgent> userAgents; /** * default user agent string name. */ @simpl_scalar private String defaultUserAgentName; @simpl_scalar private String defaultCacheLife; /** * definition of search engines. */ @simpl_composite private SearchEngines searchEngines; /** * definition of sites. */ @simpl_map("site") private SemanticsSiteMap sites; /** * definition of name styles. */ @simpl_map("named_style") private Map<String, NamedStyle> namedStyles; /** * definition of cookie processors. */ @simpl_nowrap @simpl_collection("cookie_processing") private ArrayList<CookieProcessing> cookieProcessors; /** * definition of selectors. selectors are used to select a meta-metadata based on URL pattern or * MIME type / suffix. */ @simpl_map("selector") @simpl_nowrap private HashMapArrayList<String, MetaMetadataSelector> selectorsByName; /** * The map from meta-metadata name (currently simple name, but might be extended to fully * qualified name in the future) to meta-metadata objects. This collection is filled during the * loading process. * * @see {@code MetaMetadata}, {@code Mappable} */ @simpl_map("meta_metadata") @simpl_nowrap HashMapArrayList<String, MetaMetadata> repositoryByName; /** * The map from alternative meta-metadata names to meta-metadata type objects. * * Call initAltNames() to fill in this collection after loading the repository. * * When looking up meta-metadata by name, canonical names should be looked up before this map. */ @simpl_map("entry") HashMapArrayList<String, MetaMetadataAltNameEntry> altNames; // [endregion] /** * package mmd scopes. */ @simpl_map("mmd_scope") Map<String, MmdScope> packageMmdScopes; // [region] repository maps generated from repositoryByName. used for look-up. /** * meta-metadata sorted by metadata class name. */ private HashMap<String, MetaMetadata> repositoryByClassName = new HashMap<String, MetaMetadata>(); /** * Repository by MIME type. */ private HashMap<String, MetaMetadata> repositoryByMime = new HashMap<String, MetaMetadata>(); /** * Repository by suffix. */ private HashMap<String, MetaMetadata> repositoryBySuffix = new HashMap<String, MetaMetadata>(); /** * Collection of URL prefixes. */ private PrefixCollection urlPrefixCollection = new PrefixCollection('/'); /** * Repository of documents with domain as key. */ private HashMap<String, MetaMetadata> documentRepositoryByDomain = new HashMap<String, MetaMetadata>(); /** * Repository of documents with noAnchorNoQuery URL string as key. */ private HashMap<String, ArrayList<StrippedUrlEntry>> documentRepositoryByUrlStripped = new HashMap<String, ArrayList<StrippedUrlEntry>>(); /** * Repository of documents with URL pattern as key. */ private HashMap<String, ArrayList<RepositoryPatternEntry>> documentRepositoryByPattern = new HashMap<String, ArrayList<RepositoryPatternEntry>>(); /** * Repository of images with noAnchroNoQuery URL string as key. */ // private HashMap<String, MetaMetadata> imageRepositoryByUrlStripped = new HashMap<String, // MetaMetadata>(); /** * Repository of images with URL pattern as key. */ // private HashMap<String, ArrayList<RepositoryPatternEntry>> imageRepositoryByPattern = new // HashMap<String, ArrayList<RepositoryPatternEntry>>(); // [endregion] /** * The metadata translation scope used by this repository. */ private SimplTypesScope metadataTScope; private LinkedMetadataMonitor linkedMetadataMonitor = new LinkedMetadataMonitor(); // static Logger log4j = Logger.getLogger(BaseLogger.baseLogger); private static boolean initializedTypes; static { initializeTypes(); } public static synchronized void initializeTypes() { if (!initializedTypes) { initializedTypes = true; MetadataScalarType.init(); // register metadata-specific scalar types ecologylab.bigsemantics.metadata.builtins.MetadataBuiltinsTypesScope.get(); } } // [region] basic methods (getter/setters, etc.). public String getName() { return name; } public void setName(String name) { this.name = name; } public Build build() { if (this.build == null) { this.build = new Build(); } return this.build; } @Override public String packageName() { return packageAttribute; } public NamedStyle getDefaultStyle() { return lookupStyle(DEFAULT_STYLE_NAME); } public NamedStyle lookupStyle(String styleName) { return namedStyles == null ? null : namedStyles.get(styleName); } public String getDefaultCacheLife() { return defaultCacheLife; } public String getDefaultUserAgentString() { if (defaultUserAgentName == null) { for (UserAgent userAgent : getUserAgents().values()) { if (userAgent.isDefaultAgent()) { defaultUserAgentName = userAgent.name(); break; } } } return getUserAgentString(defaultUserAgentName); } public String getUserAgentString(String name) { return getUserAgents().get(name).userAgentString(); } private Map<String, UserAgent> getUserAgents() { if (userAgents == null) userAgents = new HashMap<String, UserAgent>(); return userAgents; } public String getDefaultSearchEngine() { return searchEngines != null ? searchEngines.getDefaultEngine() : "bing"; } public SearchEngine getSearchEngine(String engineName) { if (searchEngines != null) { return searchEngines.getEngine(engineName); } return null; } public SemanticsSite getSite(String domain) { return sites.get(domain); } public SemanticsSite getSite(Document document, SemanticsGlobalScope semanticsSessionScope) { return sites.getOrConstruct(document, semanticsSessionScope); } public SemanticsSiteMap getSemanticsSiteMap() { return sites; } public SimplTypesScope metadataTranslationScope() { return metadataTScope; } public Set<String> keySet() { return (repositoryByName == null) ? null : repositoryByName.keySet(); } public Collection<MetaMetadata> values() { return (repositoryByName == null) ? null : repositoryByName.values(); } public Map<String, MmdScope> getPackageMmdScopes() { return this.packageMmdScopes; } /** * * @return the monitor used for linking metadata. */ public LinkedMetadataMonitor getLinkedMetadataMonitor() { return linkedMetadataMonitor; } /** * Convenience method for allowing a repository to be used as a wrapper, for deserialization of * MMD into the repository directory. * * @param additionalMMD */ public void addMetaMetadata(MetaMetadata additionalMMD) { if (additionalMMD == null || additionalMMD.getName() == null || additionalMMD.getName().length() == 0) { System.err.println("Invalid mmd, not adding to the repository!"); return; } if (repositoryByName == null) // initialize only if we need to. repositoryByName = new HashMapArrayList<String, MetaMetadata>(); repositoryByName.put(additionalMMD.getName(), additionalMMD); } // [endregion] /** * Combines the data stored in the parameter repository into this repository, except for * repositoryByName. * * @param theOtherRepository * @return */ void integrateRepositoryWithThis(MetaMetadataRepository theOtherRepository) { this.userAgents = combineMap(this.userAgents, theOtherRepository.userAgents); if (this.searchEngines != null && theOtherRepository.searchEngines != null) { String theOtherDefaultEngine = theOtherRepository.searchEngines.getDefaultEngine(); if (theOtherDefaultEngine != null) { if (this.searchEngines.getDefaultEngine() == null) this.searchEngines.setDefaultEngine(theOtherDefaultEngine); else warning("default engine already defined, ignoring the one defined in " + theOtherRepository); } this.searchEngines.setSearchEngines(combineMap(this.searchEngines.getSearchEngines(), theOtherRepository.searchEngines .getSearchEngines())); } this.namedStyles = combineMap(this.namedStyles, theOtherRepository.namedStyles); this.sites = combineMap(this.sites, theOtherRepository.sites); if (this.defaultCacheLife == null) { this.defaultCacheLife = theOtherRepository.defaultCacheLife; } if (this.build == null) { this.build = theOtherRepository.build; } } /** * copy all items in srcMap to destMap, and report duplicate elements. * * @param destMap * @param srcMap * @param controller * the filter used to tune the combining process. * @return the combined map. if destMap != null, this is destMap; or this is srcMap. */ private <KT, VT, MT extends Map<KT, VT>> MT combineMap(MT destMap, MT srcMap) { if (destMap == null) return srcMap; if (srcMap != null) { for (KT key : srcMap.keySet()) { VT value = srcMap.get(key); if (destMap.containsKey(key)) { error(value.getClass().getSimpleName() + " DEFINED TWICE: " + key); } else { destMap.put(key, value); } } } return destMap; } /** * traverse the repository and generate a translation scope from it. note that the graph switch * should be turned on because there probably will be type graphs in the meta-metadata type * system. * * @param TSName * the name of the resulted translation scope. * @return */ public SimplTypesScope traverseAndGenerateTranslationScope(String TSName) { // init the TScope with built-ins, and bind descriptors for built-ins. // SimplTypesScope metadataBuiltInTScope = MetadataBuiltinsTranslationScope.get(); // SimplTypesScope ts = SimplTypesScope.get(TSName, new SimplTypesScope[] // {metadataBuiltInTScope}); // for (MetaMetadata metaMetadata : repositoryByName) // { // if (metaMetadata.isBuiltIn()) // metaMetadata.bindMetadataClassDescriptor(ts); // } SimplTypesScope ts = SimplTypesScope.get(TSName, new Class[] {}); // inheritance. traverseAndInheritMetaMetadata(); // generate translation scopes. for (MetaMetadata metaMetadata : new ArrayList<MetaMetadata>(repositoryByName.values())) metaMetadata.findOrGenerateMetadataClassDescriptor(ts); return ts; } /** * Recursively bind MetadataClassDescriptors to all MetaMetadata. Perform other initialization. * * @param metadataTScope * the (global) metadata translation scope used for binding. */ public void bindMetadataClassDescriptorsToMetaMetadata(SimplTypesScope metadataTScope) { this.metadataTScope = metadataTScope; traverseAndInheritMetaMetadata(); // global metadata classes // use another copy because we may modify the scope during the process ArrayList<MetaMetadata> mmds = new ArrayList<MetaMetadata>(this.repositoryByName.values()); for (MetaMetadata mmd : mmds) { MetadataClassDescriptor mcd = mmd.bindMetadataClassDescriptor(metadataTScope); if (mcd == null) { warning("Cannot bind metadata class descriptor for " + mmd); this.repositoryByName.remove(mmd.getName()); } } // other initialization stuffs for (MetaMetadata mmd : repositoryByName.values()) { addToRepositoryByClassName(mmd); mmd.setUpLinkWith(this); } for (MmdScope scope : packageMmdScopes.values()) { for (MetaMetadata mmd : scope.valuesOfType(MetaMetadata.class)) { addToRepositoryByClassName(mmd); mmd.setUpLinkWith(this); } } initializeLocationBasedMaps(); } private void addToRepositoryByClassName(MetaMetadata mmd) { if (mmd.getExtendsAttribute() != null || mmd.isNewMetadataClass()) { MetadataClassDescriptor mcd = mmd.getMetadataClassDescriptor(); if (mcd != null) { // debug("Adding mapping from " + mcd + " to type " + mmd.getName()); repositoryByClassName.put(mcd.getDescribedClass().getName(), mmd); } for (MetaMetadata localMmd : mmd.getScope().valuesOfType(MetaMetadata.class)) { addToRepositoryByClassName(localMmd); } } } public RepositoryOrdering ordering; /** * traverse the repository and do inheritance on each meta-metadata. */ public void traverseAndInheritMetaMetadata() { getInheritanceHandler().handleMmdRepository(this); traverseAndResolveGenericTypeVars(); } private void traverseAndResolveGenericTypeVars() { HashSet<MetaMetadataNestedField> visited = new HashSet<MetaMetadataNestedField>(); for (MetaMetadata mmd : repositoryByName) { resolveGenericTypeVars(mmd, visited); } } private void resolveGenericTypeVars(MetaMetadataNestedField nestedField, HashSet<MetaMetadataNestedField> visited) { if (!visited.contains(nestedField)) { visited.add(nestedField); MmdScope scope = nestedField.getScope(); if (scope != null) { scope.resolveGenericTypeVars(); } Collection<MetaMetadataField> children = nestedField.getChildren(); if (children != null) { for (MetaMetadataField field : children) { if (field instanceof MetaMetadataNestedField) { resolveGenericTypeVars((MetaMetadataNestedField) field, visited); } } } } } private InheritanceHandler inheritanceHandler; public InheritanceHandler getInheritanceHandler() { if (this.inheritanceHandler == null) { this.inheritanceHandler = new InheritanceHandler(); } return this.inheritanceHandler; } public void setInheritanceHandler(InheritanceHandler handler) { this.inheritanceHandler = handler; } public Collection<MetaMetadata> getMetaMetadataCollection() { return repositoryByName.values(); } public MetaMetadata getMMByName(String name) { if (name == null) return null; MetaMetadata result = repositoryByName.get(name); return result == null ? getMMByAltName(name) : result; } private MetaMetadata getMMByAltName(String altName) { if (altNames != null) { MetaMetadataAltNameEntry entry = altNames.get(altName); return entry == null ? null : entry.mmd; } return null; } /** * * @return A <b>new</b> ArrayList containing the names of the MMDs in the repo map. */ public ArrayList<String> getMMNameList() { if (repositoryByName == null || repositoryByName.isEmpty()) return null; return new ArrayList<String>(repositoryByName.keySet()); } public MetaMetadata getMMByClass(Class<? extends Metadata> metadataClass) { if (metadataClass == null) return null; return repositoryByClassName.get(metadataClass.getName()); } /** * Get MetaMetadata. First, try matching by url_base. If this fails, including if the attribute is * null, then try by url_prefix. If this fails, including if the attribute is null, then try by * url_pattern (regular expression). * <p/> * If that lookup fails, then lookup by tag name, to acquire the default. * * @param purl * @param tagName * @return */ public MetaMetadata getDocumentMM(final ParsedURL purl, final String tagName) { // long millis = System.currentTimeMillis(); MetaMetadata result = null; if (purl != null) { if (!purl.isFile()) { String noAnchorNoQueryPageString = purl.noAnchorNoQueryPageString(); ArrayList<StrippedUrlEntry> strippedUrlEntries = documentRepositoryByUrlStripped .get(noAnchorNoQueryPageString); if (strippedUrlEntries != null) { for (StrippedUrlEntry strippedUrlEntry : strippedUrlEntries) { if (strippedUrlEntry.getSelector().checkForParams(purl)) { result = strippedUrlEntry.getMetaMetadata(); break; } } } if (result == null) { PrefixPhrase matchingPrefix = urlPrefixCollection.getMatchingPrefix(purl); if (matchingPrefix != null) { result = (MetaMetadata) matchingPrefix.getMappedObject(); } } if (result == null) { String domain = purl.domain(); if (domain != null) { ArrayList<RepositoryPatternEntry> entries = documentRepositoryByPattern.get(domain); if (entries != null) { final String purlString = purl.toString(); for (RepositoryPatternEntry entry : entries) { Matcher matcher = entry.getPattern().matcher(purlString); boolean matched = entry.isPatternFragment() ? matcher.find() : matcher.matches(); if (matched && entry.getSelector().checkForParams(purl)) { // debug(entry.isPatternFragment() ? "matched URL fragment by regex." : // "matched whole URL by regex."); result = entry.getMetaMetadata(); break; } } } if (result == null) { result = documentRepositoryByDomain.get(domain); if (result != null) debug("Matched by domain = " + domain + "\t" + result); } } } } if (result == null) { String suffix = purl.suffix(); if (suffix != null) result = getMMBySuffix(suffix); } } if (result == null) result = getMMByName(tagName); // log4j.debug(this.getClassSimpleName() + " getDocumentMM - Time taken (ms): " + // (System.currentTimeMillis() - millis)); return result; } // TODO implement get by domain too /** * Find the best matching MetaMetadata for the ParsedURL. Otherwise, return the default Document * metadata. * * @param purl * @return appropriate MetaMetadata. */ public MetaMetadata getDocumentMM(ParsedURL purl) { return getDocumentMM(purl, RICH_DOCUMENT_TAG); } public MetaMetadata getDocumentMM(Document metadata) { return getDocumentMM(metadata.getLocation(), metadataTScope.getTag(metadata.getClass())); } public MetaMetadata getRichDocumentMM(ParsedURL purl) { return getDocumentMM(purl, RICH_DOCUMENT_TAG); } public MetaMetadata getClippableDocumentMM(ParsedURL purl, String tagName) { return getDocumentMM(purl, tagName); } /* * public MetaMetadata getOldClippableDocumentMM(ParsedURL purl, String tagName) { MetaMetadata * result = null; if (purl != null && !purl.isFile()) { result = * imageRepositoryByUrlStripped.get(purl.noAnchorNoQueryPageString()); * * if (result == null) { String protocolStrippedURL = purl.toString().split("://")[1]; * * String key = purl.url().getProtocol() + "://" + * urlPrefixCollection.getMatchingPhrase(protocolStrippedURL, '/'); * * result = imageRepositoryByUrlStripped.get(key); * * if (result == null) { String domain = purl.domain(); if (domain != null) { * ArrayList<RepositoryPatternEntry> entries = imageRepositoryByPattern.get(domain); if (entries * != null) { for (RepositoryPatternEntry entry : entries) { Matcher matcher = * entry.getPattern().matcher(purl.toString()); if (matcher.find()) { result = * entry.getMetaMetadata(); } } } } } } } return (result != null) ? result : getMMByName(tagName); * } */ public MetaMetadata getImageMM(ParsedURL purl) { return getClippableDocumentMM(purl, IMAGE_TAG); } public MetaMetadata getMMBySuffix(String suffix) { return repositoryBySuffix.get(suffix); } public MetaMetadata getMMByMime(String mimeType) { return repositoryByMime.get(mimeType); } public Metadata constructByName(String name) { Metadata result = null; MetaMetadata metaMetadata = getMMByName(name); if (metaMetadata != null) { result = metaMetadata.constructMetadata(metadataTScope); } return result; } public Metadata constructBySuffix(String suffix) { MetaMetadata metaMetadata = this.getMMBySuffix(suffix); return metaMetadata == null ? null : metaMetadata.constructMetadata(metadataTScope); } /** * Look-up MetaMetadata for this purl. If there is no special MetaMetadata, use Document. * Construct Metadata of the correct subtype, base on the MetaMetadata. Set its location field to * purl. * * @param purl * @return */ public Document constructDocument(ParsedURL purl) { MetaMetadata metaMetadata = getDocumentMM(purl); Document result = (Document) metaMetadata.constructMetadata(metadataTScope); result.setLocation(purl); return result; } public Document constructDocumentBySuffix(String suffix) { Metadata result = constructBySuffix(suffix); return result instanceof Document ? (Document) result : null; } public Document constructDocumentByMime(String mimeType) { MetaMetadata metaMetadata = this.getMMByMime(mimeType); return metaMetadata == null ? null : (Document) metaMetadata.constructMetadata(metadataTScope); } /** * Construct a document by location. If nothing particular turns up, make it either a * CompoundDocument, or, if isImage, an Image. * * @param purl * @param isImage * @return */ public Document constructDocument(ParsedURL purl, boolean isImage) { if (isImage) { return constructImage(purl); } if (purl.isImg()) { try { Image img = constructImage(purl); if (img != null) { return img; } } catch (Exception e) { // ignore } } MetaMetadata metaMetadata = getRichDocumentMM(purl); FilterLocation rewriteLocation = metaMetadata.getRewriteLocation(); List<ParsedURL> additionalLocations = null; if (rewriteLocation != null) { try { additionalLocations = new ArrayList<ParsedURL>(); ParsedURL newPurl = rewriteLocation.filter(purl, additionalLocations); if (!purl.equals(newPurl)) { // if the purl has been changed by the filter, add the old one as additional locations, // and use the new one as the primary location. additionalLocations.add(purl); purl = newPurl; } } catch (Exception e) { logger.error("Exception filtering location " + purl, e); } } Document result = (Document) metaMetadata.constructMetadata(metadataTScope); result.setLocation(purl); if (additionalLocations != null) { for (ParsedURL additionalLocation : additionalLocations) { result.addAdditionalLocation(additionalLocation); } } return result; } /** * Look-up MetaMetadata for this purl. If there is no special MetaMetadata, use Image. Construct * Metadata of the correct subtype, base on the MetaMetadata. * * @param purl * @return A Metadata object, either of type Image, or a subclass. Never null! */ public Image constructImage(ParsedURL purl) { MetaMetadata metaMetadata = getImageMM(purl); Image result = null; if (metaMetadata != null) { Metadata constructed = metaMetadata.constructMetadata(metadataTScope); if (!(constructed instanceof Image)) { throw new MetaMetadataException("Cannot convert " + constructed + " to an Image object.\n" + "This is usually caused by inaccurate URL patterns in " + "meta-metadata repository, that match a non-Image URL " + "with an Image or Image-derived meta-metadata type.\n" + "The accessed URL: " + purl); } result = (Image) constructed; result.setLocation(purl); } return result; } /** * Initializes HashMaps for MetaMetadata selectors by URL or pattern. Uses the ClippableDocument * and Document base classes to ensure that maps are only filled with appropriate matching * MetaMetadata. */ private void initializeLocationBasedMaps() { for (MetaMetadata metaMetadata : repositoryByName) { // metaMetadata.inheritMetaMetadata(this); // Class<? extends Metadata> metadataClass = metaMetadata.getMetadataClass(metadataTScope); Class<? extends Metadata> metadataClass = (Class<? extends Metadata>) metaMetadata .getMetadataClassDescriptor().getDescribedClass(); if (metadataClass == null) { continue; } HashMap<String, ArrayList<StrippedUrlEntry>> repositoryByUrlStripped; HashMap<String, ArrayList<RepositoryPatternEntry>> repositoryByPattern; // if (Image.class.isAssignableFrom(metadataClass)) // { // repositoryByUrlStripped = imageRepositoryByUrlStripped; // repositoryByPattern = imageRepositoryByPattern; // } // else if (Document.class.isAssignableFrom(metadataClass)) // { // repositoryByUrlStripped = documentRepositoryByUrlStripped; // repositoryByPattern = documentRepositoryByPattern; // } // else // continue; repositoryByUrlStripped = documentRepositoryByUrlStripped; repositoryByPattern = documentRepositoryByPattern; // We need to check if something is there already // if something is there, then we need to check to see if it has its cf pref set // if not, then if I am null then I win ArrayList<MetaMetadataSelector> selectors = metaMetadata.getSelectors(); for (MetaMetadataSelector selector : selectors) { String reselectMetaMetadataName = selector.getReselectMetaMetadataName(); if (reselectMetaMetadataName != null) { MetaMetadata reselectMetaMetadata = repositoryByName.get(reselectMetaMetadataName); if (reselectMetaMetadata != null) { reselectMetaMetadata.addReselectEntry(selector, metaMetadata); } continue; } ParsedURL strippedPurl = selector.getUrlStripped(); if (strippedPurl != null) { String noAnchorNoQueryPageString = strippedPurl.noAnchorNoQueryPageString(); ArrayList<StrippedUrlEntry> strippedUrlEntries = repositoryByUrlStripped .get(noAnchorNoQueryPageString); if (strippedUrlEntries == null) { strippedUrlEntries = new ArrayList<StrippedUrlEntry>(); repositoryByUrlStripped.put(noAnchorNoQueryPageString, strippedUrlEntries); } strippedUrlEntries.add(new StrippedUrlEntry(metaMetadata, selector)); metaMetadata.setMmSelectorType(MMSelectorType.LOCATION); } else { ParsedURL urlPathTree = selector.getUrlPathTree(); if (urlPathTree != null) { PrefixPhrase pp = urlPrefixCollection.add(urlPathTree); pp.setMappedObject(metaMetadata); // TODO is this next line correct??? it looks wrong! // repositoryByUrlStripped.put(urlPathTree.toString(), metaMetadata); metaMetadata.setMmSelectorType(MMSelectorType.LOCATION); } else { // use .pattern() for comparison String domain = selector.getDomain(); boolean isPatternFragment = false; Pattern urlPattern = selector.getUrlRegex(); if (urlPattern == null || urlPattern.pattern().length() <= 0) { urlPattern = selector.getUrlRegexFragment(); isPatternFragment = true; } if (domain != null) { if (urlPattern != null) { ArrayList<RepositoryPatternEntry> bucket = repositoryByPattern.get(domain); if (bucket == null) { bucket = new ArrayList<RepositoryPatternEntry>(2); repositoryByPattern.put(domain, bucket); } bucket.add(new RepositoryPatternEntry(urlPattern, metaMetadata, selector, isPatternFragment)); metaMetadata.setMmSelectorType(MMSelectorType.LOCATION); } else { // domain only -- no pattern documentRepositoryByDomain.put(domain, metaMetadata); metaMetadata.setMmSelectorType(MMSelectorType.DOMAIN); } } else if (urlPattern != null) { metaMetadata.error("<selector with url_regex=\"" + urlPattern + "\" but domain is not specified :("); } } } } } } /** * This initalizes the map based on mime type and suffix. */ void initializeSuffixAndMimeBasedMaps() { if (repositoryByName == null) return; for (MetaMetadata metaMetadata : repositoryByName) { ArrayList<String> suffixes = metaMetadata.getSuffixes(); if (suffixes != null) { for (String suffix : suffixes) { // FIXME-- Ask whether the suffix and mime should be inherited or not if (!repositoryBySuffix.containsKey(suffix)) { repositoryBySuffix.put(suffix, metaMetadata); metaMetadata.setMmSelectorType(MMSelectorType.SUFFIX_OR_MIME); } } } ArrayList<String> mimeTypes = metaMetadata.getMimeTypes(); if (mimeTypes != null) { for (String mimeType : mimeTypes) { // FIXME -- Ask whether the suffix and mime should be inherited or not if (!repositoryByMime.containsKey(mimeType)) { repositoryByMime.put(mimeType, metaMetadata); metaMetadata.setMmSelectorType(MMSelectorType.SUFFIX_OR_MIME); } } } } } /** * This method initializes the mappings from selectors in the repository to selectors in meta * metadata. */ private void initializeSelectors() { if (selectorsByName != null) { for (MetaMetadataSelector selector : selectorsByName) { String prefName = selector.getPrefName(); prefName = Pref.lookupString(prefName); if (prefName == null) { prefName = selector.getDefaultPref(); } int numberOfMetametaData = 0; if (repositoryByName != null) { MetaMetadata onlyCandidate = null; boolean prefNameMetaMetadataFound = false; for (MetaMetadata metaMetadata : repositoryByName) { onlyCandidate = metaMetadata; if (metaMetadata != null) { ArrayList<MetaMetadataSelector> mSelectors = metaMetadata.getSelectors(); for (int i = 0; i < mSelectors.size(); i++) { MetaMetadataSelector mSelector = mSelectors.get(i); if (mSelector != null) { String mSelectorName = mSelector.getName(); if (mSelectorName != null) { if (mSelectorName.equals(selector.getName())) { numberOfMetametaData += 1; if (metaMetadata.getName().equals(prefName)) { mSelectors.set(i, selector); prefNameMetaMetadataFound = true; } } } } } } } if (prefNameMetaMetadataFound == false) { if (numberOfMetametaData == 0) { Debug.warning(this, "Selector " + selector.getName() + " does not appear to be used in any MetaMetadata."); } else if (numberOfMetametaData == 1) { onlyCandidate.addSelector(selector); } else if (numberOfMetametaData > 1) { Debug.error(this, "Selector " + selector.getName() + " is ambiguous. Set the pref_name or use a default_pref."); } } } } } } public void initAltNames() { if (altNames == null) { altNames = new HashMapArrayList<String, MetaMetadataAltNameEntry>(); for (MetaMetadata mmd : repositoryByName.values()) { String otherTags = mmd.getOtherTags(); if (otherTags != null && otherTags.length() > 0) { String[] tags = otherTags.split(","); for (String tag : tags) { tag = tag.trim(); if (tag.length() > 0) { altNames.put(tag, new MetaMetadataAltNameEntry(tag, mmd)); } } } } } } public void deserializationPostHook(TranslationContext translationContext, Object object, Object parent) { initializeSelectors(); } @Override public String toString() { String result = "MetaMetadataRepository"; if (name != null) result += "[" + name + "]"; return result; } /** * @return the baseDocumentMM */ public static MetaMetadata getBaseDocumentMM() { return baseDocumentMM; } /** * @return the baseImageMM */ public static MetaMetadata getBaseImageMM() { return baseImageMM; } /** * This is not Object.getHashCode()! This is for repository versioning. * * @return The hash of the whole repository. */ public String getHash() { return hash; } public void setHash(String hash) { this.hash = hash; } }