/**
*
*/
package ecologylab.bigsemantics.metametadata;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import ecologylab.appframework.types.prefs.Pref;
import ecologylab.bigsemantics.collecting.CookieProcessing;
import ecologylab.bigsemantics.collecting.LinkedMetadataMonitor;
import ecologylab.bigsemantics.collecting.SemanticsGlobalScope;
import ecologylab.bigsemantics.collecting.SemanticsSite;
import ecologylab.bigsemantics.collecting.SemanticsSiteMap;
import ecologylab.bigsemantics.metadata.Metadata;
import ecologylab.bigsemantics.metadata.MetadataClassDescriptor;
import ecologylab.bigsemantics.metadata.builtins.Document;
import ecologylab.bigsemantics.metadata.builtins.Image;
import ecologylab.bigsemantics.metadata.scalar.types.MetadataScalarType;
import ecologylab.bigsemantics.metametadata.exceptions.MetaMetadataException;
import ecologylab.bigsemantics.namesandnums.DocumentParserTagNames;
import ecologylab.collections.PrefixCollection;
import ecologylab.collections.PrefixPhrase;
import ecologylab.generic.Debug;
import ecologylab.generic.HashMapArrayList;
import ecologylab.net.ParsedURL;
import ecologylab.net.UserAgent;
import ecologylab.serialization.ElementState;
import ecologylab.serialization.SimplTypesScope;
import ecologylab.serialization.TranslationContext;
import ecologylab.serialization.annotations.simpl_collection;
import ecologylab.serialization.annotations.simpl_composite;
import ecologylab.serialization.annotations.simpl_inherit;
import ecologylab.serialization.annotations.simpl_map;
import ecologylab.serialization.annotations.simpl_nowrap;
import ecologylab.serialization.annotations.simpl_scalar;
import ecologylab.serialization.annotations.simpl_tag;
import ecologylab.textformat.NamedStyle;
/**
* The repository of meta-metadata wrappers.
*
* Wrapper definitions can scatter over multiple files, while loadFromXXX() methods will collect
* information and assemble a unified representation.
*
* @author damaraju
*
*/
@SuppressWarnings({ "rawtypes", "unchecked" })
@simpl_inherit
public class MetaMetadataRepository extends ElementState
implements PackageSpecifier, DocumentParserTagNames
{
static final Logger logger = LoggerFactory.getLogger(MetaMetadataRepository.class);
private static final String DEFAULT_STYLE_NAME = "default";
static MetaMetadata baseDocumentMM;
static MetaMetadata baseImageMM;
// [region] de/serializable data fields.
/**
* The name of the repository.
*/
@simpl_scalar
private String name;
private String hash;
@simpl_composite
private Build build;
/**
* The package in which the class files have to be generated.
*/
@simpl_tag("package")
@simpl_scalar
private String packageAttribute;
/**
* user agent strings.
*/
@simpl_map("user_agent")
private Map<String, UserAgent> userAgents;
/**
* default user agent string name.
*/
@simpl_scalar
private String defaultUserAgentName;
@simpl_scalar
private String defaultCacheLife;
/**
* definition of search engines.
*/
@simpl_composite
private SearchEngines searchEngines;
/**
* definition of sites.
*/
@simpl_map("site")
private SemanticsSiteMap sites;
/**
* definition of name styles.
*/
@simpl_map("named_style")
private Map<String, NamedStyle> namedStyles;
/**
* definition of cookie processors.
*/
@simpl_nowrap
@simpl_collection("cookie_processing")
private ArrayList<CookieProcessing> cookieProcessors;
/**
* definition of selectors. selectors are used to select a meta-metadata based on URL pattern or
* MIME type / suffix.
*/
@simpl_map("selector")
@simpl_nowrap
private HashMapArrayList<String, MetaMetadataSelector> selectorsByName;
/**
* The map from meta-metadata name (currently simple name, but might be extended to fully
* qualified name in the future) to meta-metadata objects. This collection is filled during the
* loading process.
*
* @see {@code MetaMetadata}, {@code Mappable}
*/
@simpl_map("meta_metadata")
@simpl_nowrap
HashMapArrayList<String, MetaMetadata> repositoryByName;
/**
* The map from alternative meta-metadata names to meta-metadata type objects.
*
* Call initAltNames() to fill in this collection after loading the repository.
*
* When looking up meta-metadata by name, canonical names should be looked up before this map.
*/
@simpl_map("entry")
HashMapArrayList<String, MetaMetadataAltNameEntry> altNames;
// [endregion]
/**
* package mmd scopes.
*/
@simpl_map("mmd_scope")
Map<String, MmdScope> packageMmdScopes;
// [region] repository maps generated from repositoryByName. used for look-up.
/**
* meta-metadata sorted by metadata class name.
*/
private HashMap<String, MetaMetadata> repositoryByClassName = new HashMap<String, MetaMetadata>();
/**
* Repository by MIME type.
*/
private HashMap<String, MetaMetadata> repositoryByMime = new HashMap<String, MetaMetadata>();
/**
* Repository by suffix.
*/
private HashMap<String, MetaMetadata> repositoryBySuffix = new HashMap<String, MetaMetadata>();
/**
* Collection of URL prefixes.
*/
private PrefixCollection urlPrefixCollection = new PrefixCollection('/');
/**
* Repository of documents with domain as key.
*/
private HashMap<String, MetaMetadata> documentRepositoryByDomain = new HashMap<String, MetaMetadata>();
/**
* Repository of documents with noAnchorNoQuery URL string as key.
*/
private HashMap<String, ArrayList<StrippedUrlEntry>> documentRepositoryByUrlStripped = new HashMap<String, ArrayList<StrippedUrlEntry>>();
/**
* Repository of documents with URL pattern as key.
*/
private HashMap<String, ArrayList<RepositoryPatternEntry>> documentRepositoryByPattern = new HashMap<String, ArrayList<RepositoryPatternEntry>>();
/**
* Repository of images with noAnchroNoQuery URL string as key.
*/
// private HashMap<String, MetaMetadata> imageRepositoryByUrlStripped = new HashMap<String,
// MetaMetadata>();
/**
* Repository of images with URL pattern as key.
*/
// private HashMap<String, ArrayList<RepositoryPatternEntry>> imageRepositoryByPattern = new
// HashMap<String, ArrayList<RepositoryPatternEntry>>();
// [endregion]
/**
* The metadata translation scope used by this repository.
*/
private SimplTypesScope metadataTScope;
private LinkedMetadataMonitor linkedMetadataMonitor = new LinkedMetadataMonitor();
// static Logger log4j = Logger.getLogger(BaseLogger.baseLogger);
private static boolean initializedTypes;
static
{
initializeTypes();
}
public static synchronized void initializeTypes()
{
if (!initializedTypes)
{
initializedTypes = true;
MetadataScalarType.init(); // register metadata-specific scalar types
ecologylab.bigsemantics.metadata.builtins.MetadataBuiltinsTypesScope.get();
}
}
// [region] basic methods (getter/setters, etc.).
public String getName()
{
return name;
}
public void setName(String name)
{
this.name = name;
}
public Build build()
{
if (this.build == null)
{
this.build = new Build();
}
return this.build;
}
@Override
public String packageName()
{
return packageAttribute;
}
public NamedStyle getDefaultStyle()
{
return lookupStyle(DEFAULT_STYLE_NAME);
}
public NamedStyle lookupStyle(String styleName)
{
return namedStyles == null ? null : namedStyles.get(styleName);
}
public String getDefaultCacheLife()
{
return defaultCacheLife;
}
public String getDefaultUserAgentString()
{
if (defaultUserAgentName == null)
{
for (UserAgent userAgent : getUserAgents().values())
{
if (userAgent.isDefaultAgent())
{
defaultUserAgentName = userAgent.name();
break;
}
}
}
return getUserAgentString(defaultUserAgentName);
}
public String getUserAgentString(String name)
{
return getUserAgents().get(name).userAgentString();
}
private Map<String, UserAgent> getUserAgents()
{
if (userAgents == null)
userAgents = new HashMap<String, UserAgent>();
return userAgents;
}
public String getDefaultSearchEngine()
{
return searchEngines != null ? searchEngines.getDefaultEngine() : "bing";
}
public SearchEngine getSearchEngine(String engineName)
{
if (searchEngines != null)
{
return searchEngines.getEngine(engineName);
}
return null;
}
public SemanticsSite getSite(String domain)
{
return sites.get(domain);
}
public SemanticsSite getSite(Document document, SemanticsGlobalScope semanticsSessionScope)
{
return sites.getOrConstruct(document, semanticsSessionScope);
}
public SemanticsSiteMap getSemanticsSiteMap()
{
return sites;
}
public SimplTypesScope metadataTranslationScope()
{
return metadataTScope;
}
public Set<String> keySet()
{
return (repositoryByName == null) ? null : repositoryByName.keySet();
}
public Collection<MetaMetadata> values()
{
return (repositoryByName == null) ? null : repositoryByName.values();
}
public Map<String, MmdScope> getPackageMmdScopes()
{
return this.packageMmdScopes;
}
/**
*
* @return the monitor used for linking metadata.
*/
public LinkedMetadataMonitor getLinkedMetadataMonitor()
{
return linkedMetadataMonitor;
}
/**
* Convenience method for allowing a repository to be used as a wrapper, for deserialization of
* MMD into the repository directory.
*
* @param additionalMMD
*/
public void addMetaMetadata(MetaMetadata additionalMMD)
{
if (additionalMMD == null
|| additionalMMD.getName() == null
|| additionalMMD.getName().length() == 0)
{
System.err.println("Invalid mmd, not adding to the repository!");
return;
}
if (repositoryByName == null) // initialize only if we need to.
repositoryByName = new HashMapArrayList<String, MetaMetadata>();
repositoryByName.put(additionalMMD.getName(), additionalMMD);
}
// [endregion]
/**
* Combines the data stored in the parameter repository into this repository, except for
* repositoryByName.
*
* @param theOtherRepository
* @return
*/
void integrateRepositoryWithThis(MetaMetadataRepository theOtherRepository)
{
this.userAgents = combineMap(this.userAgents, theOtherRepository.userAgents);
if (this.searchEngines != null && theOtherRepository.searchEngines != null)
{
String theOtherDefaultEngine = theOtherRepository.searchEngines.getDefaultEngine();
if (theOtherDefaultEngine != null)
{
if (this.searchEngines.getDefaultEngine() == null)
this.searchEngines.setDefaultEngine(theOtherDefaultEngine);
else
warning("default engine already defined, ignoring the one defined in "
+ theOtherRepository);
}
this.searchEngines.setSearchEngines(combineMap(this.searchEngines.getSearchEngines(),
theOtherRepository.searchEngines
.getSearchEngines()));
}
this.namedStyles = combineMap(this.namedStyles, theOtherRepository.namedStyles);
this.sites = combineMap(this.sites, theOtherRepository.sites);
if (this.defaultCacheLife == null)
{
this.defaultCacheLife = theOtherRepository.defaultCacheLife;
}
if (this.build == null)
{
this.build = theOtherRepository.build;
}
}
/**
* copy all items in srcMap to destMap, and report duplicate elements.
*
* @param destMap
* @param srcMap
* @param controller
* the filter used to tune the combining process.
* @return the combined map. if destMap != null, this is destMap; or this is srcMap.
*/
private <KT, VT, MT extends Map<KT, VT>> MT combineMap(MT destMap, MT srcMap)
{
if (destMap == null)
return srcMap;
if (srcMap != null)
{
for (KT key : srcMap.keySet())
{
VT value = srcMap.get(key);
if (destMap.containsKey(key))
{
error(value.getClass().getSimpleName() + " DEFINED TWICE: " + key);
}
else
{
destMap.put(key, value);
}
}
}
return destMap;
}
/**
* traverse the repository and generate a translation scope from it. note that the graph switch
* should be turned on because there probably will be type graphs in the meta-metadata type
* system.
*
* @param TSName
* the name of the resulted translation scope.
* @return
*/
public SimplTypesScope traverseAndGenerateTranslationScope(String TSName)
{
// init the TScope with built-ins, and bind descriptors for built-ins.
// SimplTypesScope metadataBuiltInTScope = MetadataBuiltinsTranslationScope.get();
// SimplTypesScope ts = SimplTypesScope.get(TSName, new SimplTypesScope[]
// {metadataBuiltInTScope});
// for (MetaMetadata metaMetadata : repositoryByName)
// {
// if (metaMetadata.isBuiltIn())
// metaMetadata.bindMetadataClassDescriptor(ts);
// }
SimplTypesScope ts = SimplTypesScope.get(TSName, new Class[] {});
// inheritance.
traverseAndInheritMetaMetadata();
// generate translation scopes.
for (MetaMetadata metaMetadata : new ArrayList<MetaMetadata>(repositoryByName.values()))
metaMetadata.findOrGenerateMetadataClassDescriptor(ts);
return ts;
}
/**
* Recursively bind MetadataClassDescriptors to all MetaMetadata. Perform other initialization.
*
* @param metadataTScope
* the (global) metadata translation scope used for binding.
*/
public void bindMetadataClassDescriptorsToMetaMetadata(SimplTypesScope metadataTScope)
{
this.metadataTScope = metadataTScope;
traverseAndInheritMetaMetadata();
// global metadata classes
// use another copy because we may modify the scope during the process
ArrayList<MetaMetadata> mmds = new ArrayList<MetaMetadata>(this.repositoryByName.values());
for (MetaMetadata mmd : mmds)
{
MetadataClassDescriptor mcd = mmd.bindMetadataClassDescriptor(metadataTScope);
if (mcd == null)
{
warning("Cannot bind metadata class descriptor for " + mmd);
this.repositoryByName.remove(mmd.getName());
}
}
// other initialization stuffs
for (MetaMetadata mmd : repositoryByName.values())
{
addToRepositoryByClassName(mmd);
mmd.setUpLinkWith(this);
}
for (MmdScope scope : packageMmdScopes.values())
{
for (MetaMetadata mmd : scope.valuesOfType(MetaMetadata.class))
{
addToRepositoryByClassName(mmd);
mmd.setUpLinkWith(this);
}
}
initializeLocationBasedMaps();
}
private void addToRepositoryByClassName(MetaMetadata mmd)
{
if (mmd.getExtendsAttribute() != null || mmd.isNewMetadataClass())
{
MetadataClassDescriptor mcd = mmd.getMetadataClassDescriptor();
if (mcd != null)
{
// debug("Adding mapping from " + mcd + " to type " + mmd.getName());
repositoryByClassName.put(mcd.getDescribedClass().getName(), mmd);
}
for (MetaMetadata localMmd : mmd.getScope().valuesOfType(MetaMetadata.class))
{
addToRepositoryByClassName(localMmd);
}
}
}
public RepositoryOrdering ordering;
/**
* traverse the repository and do inheritance on each meta-metadata.
*/
public void traverseAndInheritMetaMetadata()
{
getInheritanceHandler().handleMmdRepository(this);
traverseAndResolveGenericTypeVars();
}
private void traverseAndResolveGenericTypeVars()
{
HashSet<MetaMetadataNestedField> visited = new HashSet<MetaMetadataNestedField>();
for (MetaMetadata mmd : repositoryByName)
{
resolveGenericTypeVars(mmd, visited);
}
}
private void resolveGenericTypeVars(MetaMetadataNestedField nestedField,
HashSet<MetaMetadataNestedField> visited)
{
if (!visited.contains(nestedField))
{
visited.add(nestedField);
MmdScope scope = nestedField.getScope();
if (scope != null)
{
scope.resolveGenericTypeVars();
}
Collection<MetaMetadataField> children = nestedField.getChildren();
if (children != null)
{
for (MetaMetadataField field : children)
{
if (field instanceof MetaMetadataNestedField)
{
resolveGenericTypeVars((MetaMetadataNestedField) field, visited);
}
}
}
}
}
private InheritanceHandler inheritanceHandler;
public InheritanceHandler getInheritanceHandler()
{
if (this.inheritanceHandler == null)
{
this.inheritanceHandler = new InheritanceHandler();
}
return this.inheritanceHandler;
}
public void setInheritanceHandler(InheritanceHandler handler)
{
this.inheritanceHandler = handler;
}
public Collection<MetaMetadata> getMetaMetadataCollection()
{
return repositoryByName.values();
}
public MetaMetadata getMMByName(String name)
{
if (name == null)
return null;
MetaMetadata result = repositoryByName.get(name);
return result == null ? getMMByAltName(name) : result;
}
private MetaMetadata getMMByAltName(String altName)
{
if (altNames != null)
{
MetaMetadataAltNameEntry entry = altNames.get(altName);
return entry == null ? null : entry.mmd;
}
return null;
}
/**
*
* @return A <b>new</b> ArrayList containing the names of the MMDs in the repo map.
*/
public ArrayList<String> getMMNameList()
{
if (repositoryByName == null || repositoryByName.isEmpty())
return null;
return new ArrayList<String>(repositoryByName.keySet());
}
public MetaMetadata getMMByClass(Class<? extends Metadata> metadataClass)
{
if (metadataClass == null)
return null;
return repositoryByClassName.get(metadataClass.getName());
}
/**
* Get MetaMetadata. First, try matching by url_base. If this fails, including if the attribute is
* null, then try by url_prefix. If this fails, including if the attribute is null, then try by
* url_pattern (regular expression).
* <p/>
* If that lookup fails, then lookup by tag name, to acquire the default.
*
* @param purl
* @param tagName
* @return
*/
public MetaMetadata getDocumentMM(final ParsedURL purl, final String tagName)
{
// long millis = System.currentTimeMillis();
MetaMetadata result = null;
if (purl != null)
{
if (!purl.isFile())
{
String noAnchorNoQueryPageString = purl.noAnchorNoQueryPageString();
ArrayList<StrippedUrlEntry> strippedUrlEntries = documentRepositoryByUrlStripped
.get(noAnchorNoQueryPageString);
if (strippedUrlEntries != null)
{
for (StrippedUrlEntry strippedUrlEntry : strippedUrlEntries)
{
if (strippedUrlEntry.getSelector().checkForParams(purl))
{
result = strippedUrlEntry.getMetaMetadata();
break;
}
}
}
if (result == null)
{
PrefixPhrase matchingPrefix = urlPrefixCollection.getMatchingPrefix(purl);
if (matchingPrefix != null)
{
result = (MetaMetadata) matchingPrefix.getMappedObject();
}
}
if (result == null)
{
String domain = purl.domain();
if (domain != null)
{
ArrayList<RepositoryPatternEntry> entries = documentRepositoryByPattern.get(domain);
if (entries != null)
{
final String purlString = purl.toString();
for (RepositoryPatternEntry entry : entries)
{
Matcher matcher = entry.getPattern().matcher(purlString);
boolean matched = entry.isPatternFragment() ? matcher.find() : matcher.matches();
if (matched && entry.getSelector().checkForParams(purl))
{
// debug(entry.isPatternFragment() ? "matched URL fragment by regex." :
// "matched whole URL by regex.");
result = entry.getMetaMetadata();
break;
}
}
}
if (result == null)
{
result = documentRepositoryByDomain.get(domain);
if (result != null)
debug("Matched by domain = " + domain + "\t" + result);
}
}
}
}
if (result == null)
{
String suffix = purl.suffix();
if (suffix != null)
result = getMMBySuffix(suffix);
}
}
if (result == null)
result = getMMByName(tagName);
// log4j.debug(this.getClassSimpleName() + " getDocumentMM - Time taken (ms): " +
// (System.currentTimeMillis() - millis));
return result;
}
// TODO implement get by domain too
/**
* Find the best matching MetaMetadata for the ParsedURL. Otherwise, return the default Document
* metadata.
*
* @param purl
* @return appropriate MetaMetadata.
*/
public MetaMetadata getDocumentMM(ParsedURL purl)
{
return getDocumentMM(purl, RICH_DOCUMENT_TAG);
}
public MetaMetadata getDocumentMM(Document metadata)
{
return getDocumentMM(metadata.getLocation(), metadataTScope.getTag(metadata.getClass()));
}
public MetaMetadata getRichDocumentMM(ParsedURL purl)
{
return getDocumentMM(purl, RICH_DOCUMENT_TAG);
}
public MetaMetadata getClippableDocumentMM(ParsedURL purl, String tagName)
{
return getDocumentMM(purl, tagName);
}
/*
* public MetaMetadata getOldClippableDocumentMM(ParsedURL purl, String tagName) { MetaMetadata
* result = null; if (purl != null && !purl.isFile()) { result =
* imageRepositoryByUrlStripped.get(purl.noAnchorNoQueryPageString());
*
* if (result == null) { String protocolStrippedURL = purl.toString().split("://")[1];
*
* String key = purl.url().getProtocol() + "://" +
* urlPrefixCollection.getMatchingPhrase(protocolStrippedURL, '/');
*
* result = imageRepositoryByUrlStripped.get(key);
*
* if (result == null) { String domain = purl.domain(); if (domain != null) {
* ArrayList<RepositoryPatternEntry> entries = imageRepositoryByPattern.get(domain); if (entries
* != null) { for (RepositoryPatternEntry entry : entries) { Matcher matcher =
* entry.getPattern().matcher(purl.toString()); if (matcher.find()) { result =
* entry.getMetaMetadata(); } } } } } } } return (result != null) ? result : getMMByName(tagName);
* }
*/
public MetaMetadata getImageMM(ParsedURL purl)
{
return getClippableDocumentMM(purl, IMAGE_TAG);
}
public MetaMetadata getMMBySuffix(String suffix)
{
return repositoryBySuffix.get(suffix);
}
public MetaMetadata getMMByMime(String mimeType)
{
return repositoryByMime.get(mimeType);
}
public Metadata constructByName(String name)
{
Metadata result = null;
MetaMetadata metaMetadata = getMMByName(name);
if (metaMetadata != null)
{
result = metaMetadata.constructMetadata(metadataTScope);
}
return result;
}
public Metadata constructBySuffix(String suffix)
{
MetaMetadata metaMetadata = this.getMMBySuffix(suffix);
return metaMetadata == null ? null : metaMetadata.constructMetadata(metadataTScope);
}
/**
* Look-up MetaMetadata for this purl. If there is no special MetaMetadata, use Document.
* Construct Metadata of the correct subtype, base on the MetaMetadata. Set its location field to
* purl.
*
* @param purl
* @return
*/
public Document constructDocument(ParsedURL purl)
{
MetaMetadata metaMetadata = getDocumentMM(purl);
Document result = (Document) metaMetadata.constructMetadata(metadataTScope);
result.setLocation(purl);
return result;
}
public Document constructDocumentBySuffix(String suffix)
{
Metadata result = constructBySuffix(suffix);
return result instanceof Document ? (Document) result : null;
}
public Document constructDocumentByMime(String mimeType)
{
MetaMetadata metaMetadata = this.getMMByMime(mimeType);
return metaMetadata == null ? null : (Document) metaMetadata.constructMetadata(metadataTScope);
}
/**
* Construct a document by location. If nothing particular turns up, make it either a
* CompoundDocument, or, if isImage, an Image.
*
* @param purl
* @param isImage
* @return
*/
public Document constructDocument(ParsedURL purl, boolean isImage)
{
if (isImage)
{
return constructImage(purl);
}
if (purl.isImg())
{
try
{
Image img = constructImage(purl);
if (img != null)
{
return img;
}
}
catch (Exception e)
{
// ignore
}
}
MetaMetadata metaMetadata = getRichDocumentMM(purl);
FilterLocation rewriteLocation = metaMetadata.getRewriteLocation();
List<ParsedURL> additionalLocations = null;
if (rewriteLocation != null)
{
try
{
additionalLocations = new ArrayList<ParsedURL>();
ParsedURL newPurl = rewriteLocation.filter(purl, additionalLocations);
if (!purl.equals(newPurl))
{
// if the purl has been changed by the filter, add the old one as additional locations,
// and use the new one as the primary location.
additionalLocations.add(purl);
purl = newPurl;
}
}
catch (Exception e)
{
logger.error("Exception filtering location " + purl, e);
}
}
Document result = (Document) metaMetadata.constructMetadata(metadataTScope);
result.setLocation(purl);
if (additionalLocations != null)
{
for (ParsedURL additionalLocation : additionalLocations)
{
result.addAdditionalLocation(additionalLocation);
}
}
return result;
}
/**
* Look-up MetaMetadata for this purl. If there is no special MetaMetadata, use Image. Construct
* Metadata of the correct subtype, base on the MetaMetadata.
*
* @param purl
* @return A Metadata object, either of type Image, or a subclass. Never null!
*/
public Image constructImage(ParsedURL purl)
{
MetaMetadata metaMetadata = getImageMM(purl);
Image result = null;
if (metaMetadata != null)
{
Metadata constructed = metaMetadata.constructMetadata(metadataTScope);
if (!(constructed instanceof Image))
{
throw new MetaMetadataException("Cannot convert " + constructed + " to an Image object.\n"
+ "This is usually caused by inaccurate URL patterns in "
+ "meta-metadata repository, that match a non-Image URL "
+ "with an Image or Image-derived meta-metadata type.\n"
+ "The accessed URL: " + purl);
}
result = (Image) constructed;
result.setLocation(purl);
}
return result;
}
/**
* Initializes HashMaps for MetaMetadata selectors by URL or pattern. Uses the ClippableDocument
* and Document base classes to ensure that maps are only filled with appropriate matching
* MetaMetadata.
*/
private void initializeLocationBasedMaps()
{
for (MetaMetadata metaMetadata : repositoryByName)
{
// metaMetadata.inheritMetaMetadata(this);
// Class<? extends Metadata> metadataClass = metaMetadata.getMetadataClass(metadataTScope);
Class<? extends Metadata> metadataClass = (Class<? extends Metadata>) metaMetadata
.getMetadataClassDescriptor().getDescribedClass();
if (metadataClass == null)
{
continue;
}
HashMap<String, ArrayList<StrippedUrlEntry>> repositoryByUrlStripped;
HashMap<String, ArrayList<RepositoryPatternEntry>> repositoryByPattern;
// if (Image.class.isAssignableFrom(metadataClass))
// {
// repositoryByUrlStripped = imageRepositoryByUrlStripped;
// repositoryByPattern = imageRepositoryByPattern;
// }
// else if (Document.class.isAssignableFrom(metadataClass))
// {
// repositoryByUrlStripped = documentRepositoryByUrlStripped;
// repositoryByPattern = documentRepositoryByPattern;
// }
// else
// continue;
repositoryByUrlStripped = documentRepositoryByUrlStripped;
repositoryByPattern = documentRepositoryByPattern;
// We need to check if something is there already
// if something is there, then we need to check to see if it has its cf pref set
// if not, then if I am null then I win
ArrayList<MetaMetadataSelector> selectors = metaMetadata.getSelectors();
for (MetaMetadataSelector selector : selectors)
{
String reselectMetaMetadataName = selector.getReselectMetaMetadataName();
if (reselectMetaMetadataName != null)
{
MetaMetadata reselectMetaMetadata = repositoryByName.get(reselectMetaMetadataName);
if (reselectMetaMetadata != null)
{
reselectMetaMetadata.addReselectEntry(selector, metaMetadata);
}
continue;
}
ParsedURL strippedPurl = selector.getUrlStripped();
if (strippedPurl != null)
{
String noAnchorNoQueryPageString = strippedPurl.noAnchorNoQueryPageString();
ArrayList<StrippedUrlEntry> strippedUrlEntries = repositoryByUrlStripped
.get(noAnchorNoQueryPageString);
if (strippedUrlEntries == null)
{
strippedUrlEntries = new ArrayList<StrippedUrlEntry>();
repositoryByUrlStripped.put(noAnchorNoQueryPageString, strippedUrlEntries);
}
strippedUrlEntries.add(new StrippedUrlEntry(metaMetadata, selector));
metaMetadata.setMmSelectorType(MMSelectorType.LOCATION);
}
else
{
ParsedURL urlPathTree = selector.getUrlPathTree();
if (urlPathTree != null)
{
PrefixPhrase pp = urlPrefixCollection.add(urlPathTree);
pp.setMappedObject(metaMetadata);
// TODO is this next line correct??? it looks wrong!
// repositoryByUrlStripped.put(urlPathTree.toString(), metaMetadata);
metaMetadata.setMmSelectorType(MMSelectorType.LOCATION);
}
else
{
// use .pattern() for comparison
String domain = selector.getDomain();
boolean isPatternFragment = false;
Pattern urlPattern = selector.getUrlRegex();
if (urlPattern == null || urlPattern.pattern().length() <= 0)
{
urlPattern = selector.getUrlRegexFragment();
isPatternFragment = true;
}
if (domain != null)
{
if (urlPattern != null)
{
ArrayList<RepositoryPatternEntry> bucket = repositoryByPattern.get(domain);
if (bucket == null)
{
bucket = new ArrayList<RepositoryPatternEntry>(2);
repositoryByPattern.put(domain, bucket);
}
bucket.add(new RepositoryPatternEntry(urlPattern,
metaMetadata,
selector,
isPatternFragment));
metaMetadata.setMmSelectorType(MMSelectorType.LOCATION);
}
else
{
// domain only -- no pattern
documentRepositoryByDomain.put(domain, metaMetadata);
metaMetadata.setMmSelectorType(MMSelectorType.DOMAIN);
}
}
else if (urlPattern != null)
{
metaMetadata.error("<selector with url_regex=\""
+ urlPattern
+ "\" but domain is not specified :(");
}
}
}
}
}
}
/**
* This initalizes the map based on mime type and suffix.
*/
void initializeSuffixAndMimeBasedMaps()
{
if (repositoryByName == null)
return;
for (MetaMetadata metaMetadata : repositoryByName)
{
ArrayList<String> suffixes = metaMetadata.getSuffixes();
if (suffixes != null)
{
for (String suffix : suffixes)
{
// FIXME-- Ask whether the suffix and mime should be inherited or not
if (!repositoryBySuffix.containsKey(suffix))
{
repositoryBySuffix.put(suffix, metaMetadata);
metaMetadata.setMmSelectorType(MMSelectorType.SUFFIX_OR_MIME);
}
}
}
ArrayList<String> mimeTypes = metaMetadata.getMimeTypes();
if (mimeTypes != null)
{
for (String mimeType : mimeTypes)
{
// FIXME -- Ask whether the suffix and mime should be inherited or not
if (!repositoryByMime.containsKey(mimeType))
{
repositoryByMime.put(mimeType, metaMetadata);
metaMetadata.setMmSelectorType(MMSelectorType.SUFFIX_OR_MIME);
}
}
}
}
}
/**
* This method initializes the mappings from selectors in the repository to selectors in meta
* metadata.
*/
private void initializeSelectors()
{
if (selectorsByName != null)
{
for (MetaMetadataSelector selector : selectorsByName)
{
String prefName = selector.getPrefName();
prefName = Pref.lookupString(prefName);
if (prefName == null)
{
prefName = selector.getDefaultPref();
}
int numberOfMetametaData = 0;
if (repositoryByName != null)
{
MetaMetadata onlyCandidate = null;
boolean prefNameMetaMetadataFound = false;
for (MetaMetadata metaMetadata : repositoryByName)
{
onlyCandidate = metaMetadata;
if (metaMetadata != null)
{
ArrayList<MetaMetadataSelector> mSelectors = metaMetadata.getSelectors();
for (int i = 0; i < mSelectors.size(); i++)
{
MetaMetadataSelector mSelector = mSelectors.get(i);
if (mSelector != null)
{
String mSelectorName = mSelector.getName();
if (mSelectorName != null)
{
if (mSelectorName.equals(selector.getName()))
{
numberOfMetametaData += 1;
if (metaMetadata.getName().equals(prefName))
{
mSelectors.set(i, selector);
prefNameMetaMetadataFound = true;
}
}
}
}
}
}
}
if (prefNameMetaMetadataFound == false)
{
if (numberOfMetametaData == 0)
{
Debug.warning(this, "Selector "
+ selector.getName()
+ " does not appear to be used in any MetaMetadata.");
}
else if (numberOfMetametaData == 1)
{
onlyCandidate.addSelector(selector);
}
else if (numberOfMetametaData > 1)
{
Debug.error(this, "Selector "
+ selector.getName()
+ " is ambiguous. Set the pref_name or use a default_pref.");
}
}
}
}
}
}
public void initAltNames()
{
if (altNames == null)
{
altNames = new HashMapArrayList<String, MetaMetadataAltNameEntry>();
for (MetaMetadata mmd : repositoryByName.values())
{
String otherTags = mmd.getOtherTags();
if (otherTags != null && otherTags.length() > 0)
{
String[] tags = otherTags.split(",");
for (String tag : tags)
{
tag = tag.trim();
if (tag.length() > 0)
{
altNames.put(tag, new MetaMetadataAltNameEntry(tag, mmd));
}
}
}
}
}
}
public void deserializationPostHook(TranslationContext translationContext,
Object object,
Object parent)
{
initializeSelectors();
}
@Override
public String toString()
{
String result = "MetaMetadataRepository";
if (name != null)
result += "[" + name + "]";
return result;
}
/**
* @return the baseDocumentMM
*/
public static MetaMetadata getBaseDocumentMM()
{
return baseDocumentMM;
}
/**
* @return the baseImageMM
*/
public static MetaMetadata getBaseImageMM()
{
return baseImageMM;
}
/**
* This is not Object.getHashCode()! This is for repository versioning.
*
* @return The hash of the whole repository.
*/
public String getHash()
{
return hash;
}
public void setHash(String hash)
{
this.hash = hash;
}
}