package ecologylab.bigsemantics.metametadata;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import com.google.common.base.Charsets;
import com.google.common.hash.HashCode;
import com.google.common.hash.Hashing;
import ecologylab.bigsemantics.Utils;
import ecologylab.bigsemantics.collecting.CookieProcessing;
import ecologylab.bigsemantics.collecting.MetaMetadataRepositoryLocator;
import ecologylab.bigsemantics.metadata.Metadata;
import ecologylab.bigsemantics.metametadata.exceptions.MetaMetadataException;
import ecologylab.bigsemantics.namesandnums.DocumentParserTagNames;
import ecologylab.generic.Debug;
import ecologylab.generic.HashMapArrayList;
import ecologylab.io.NamedInputStream;
import ecologylab.net.ParsedURL;
import ecologylab.serialization.SIMPLTranslationException;
import ecologylab.serialization.SimplTypesScope;
import ecologylab.serialization.formatenums.Format;
/**
* Take charge in loading the repository.
*
* @author quyin
*
*/
public class MetaMetadataRepositoryLoader extends Debug implements DocumentParserTagNames
{
static final SimplTypesScope mmdTScope = MetaMetadataTranslationScope.get();
private String repositoryHash;
/**
* @return The hash code of the current repository if available, otherwise null.
*/
public String getRepositoryHash()
{
return repositoryHash;
}
/**
* Load meta-metadata from repository files from a directory.
* <p />
* Order: base level, then repositorySources, then powerUser.
*
* @see loadFromFiles()
*
* @param dir
* The repository directory.
* @param format
* The format of the repository.
* @return An integrated representation of the repository.
* @throws FileNotFoundException
* @throws SIMPLTranslationException
*/
public MetaMetadataRepository loadFromDir(File dir, Format format)
throws IOException, SIMPLTranslationException
{
if (!dir.exists())
{
throw new MetaMetadataException("MetaMetadataRepository directory does not exist : "
+ dir.getAbsolutePath());
}
println("MetaMetadataRepository directory : " + dir + "\n");
List<File> allFiles = MetaMetadataRepositoryLocator.listRepositoryFiles(dir, format);
return loadFromFiles(allFiles, format);
}
/**
* Load the repository from a set of files, in a specified format.
* <p />
* Meta-metadata types will be placed into corresponding packages, each with a package-wide type
* scope. The local type scope for each meta-metadata is initialized.
* <p />
* MIME type and suffix based selectors are processed. Location based selectors will not be
* processed here because that requires the metadata translation scope, which is not yet
* determined at this stage.
*
* @param files
* The list of files storing the repository.
* @param format
* The format of the repository.
* @return An integrated representation of the repository.
* @throws FileNotFoundException
* @throws SIMPLTranslationException
*/
public MetaMetadataRepository loadFromFiles(List<File> files, Format format)
throws IOException, SIMPLTranslationException
{
List<NamedInputStream> nistreams = new ArrayList<NamedInputStream>();
for (File file : files)
{
if (file == null || !file.exists())
{
warning("Ignoring " + file);
continue;
}
println("Opening MetaMetadataRepository:\t" + file.getPath());
NamedInputStream nistream = new NamedInputStream(file);
nistreams.add(nistream);
}
return loadFromInputStreams(nistreams, format);
}
/**
* Load the repository from a list of InputStreams. This is useful for loading the repository from
* jar'ed resources.
*
* @param nistreams
* @param format
* @return
* @throws IOException
* @throws SIMPLTranslationException
*/
public MetaMetadataRepository loadFromInputStreams(List<NamedInputStream> nistreams, Format format)
throws IOException
{
List<MetaMetadataRepository> repositories = deserializeRepositories(nistreams, format);
MetaMetadataRepository result = mergeRepositories(repositories);
initializeRepository(result);
return result;
}
List<MetaMetadataRepository> deserializeRepositories(List<NamedInputStream> streams,
Format format) throws IOException
{
List<MetaMetadataRepository> result = new ArrayList<MetaMetadataRepository>(streams.size());
List<HashCode> fileHashes = new ArrayList<HashCode>();
for (NamedInputStream nistream : streams)
{
InputStream istream = nistream.getInputStream();
String content = Utils.readInputStream(istream);
HashCode fileHash = Hashing.md5().hashString(content, Charsets.UTF_8);
fileHashes.add(fileHash);
InputStream newStream = new ByteArrayInputStream(content.getBytes(Charsets.UTF_8));
MetaMetadataRepository repo = null;
boolean error = true;
try
{
repo = (MetaMetadataRepository) mmdTScope.deserialize(newStream, format);
error = false;
}
catch (SIMPLTranslationException e)
{
e.printStackTrace();
}
if (!error)
{
result.add(repo);
println("Deserialized " + repo);
}
else
{
error("Could not deserialize repository from InputStream: " + nistream.getName());
}
}
HashCode repoHash = Hashing.combineUnordered(fileHashes);
repositoryHash = Utils.base64urlEncode(repoHash.asBytes());
return result;
}
MetaMetadataRepository mergeRepositories(List<MetaMetadataRepository> repositories)
{
MetaMetadataRepository result = new MetaMetadataRepository();
result.repositoryByName = new HashMapArrayList<String, MetaMetadata>();
result.packageMmdScopes = new HashMap<String, MmdScope>();
for (MetaMetadataRepository repo : repositories)
{
mergeOneRepositoryIntoAnother(result, repo);
}
return result;
}
void mergeOneRepositoryIntoAnother(MetaMetadataRepository toRepository,
MetaMetadataRepository fromRepository)
{
if (fromRepository != null)
{
// sort meta-metadata into toRepository.repositoryByName and mmdScope for that package.
if (fromRepository.repositoryByName != null)
{
for (String mmdName : fromRepository.repositoryByName.keySet())
{
MetaMetadata mmd = fromRepository.repositoryByName.get(mmdName);
mmd.setParent(toRepository);
mmd.setRepository(toRepository);
String packageName = mmd.packageName();
if (packageName == null)
{
packageName = fromRepository.packageName();
if (packageName == null)
throw new MetaMetadataException("no package name specified for " + mmd);
mmd.setPackageName(packageName);
}
MmdScope packageMmdScope = toRepository.packageMmdScopes.get(packageName);
if (packageMmdScope == null)
{
packageMmdScope = new MmdScope("package:" + packageName);
packageMmdScope.putAll(toRepository.repositoryByName);
toRepository.packageMmdScopes.put(packageName, packageMmdScope);
}
switch (mmd.visibility)
{
case GLOBAL:
{
MetaMetadata existingMmd = toRepository.repositoryByName.get(mmdName);
if (existingMmd != null && existingMmd != mmd)
throw new MetaMetadataException("meta-metadata already exists: " + mmdName + " in "
+ fromRepository);
toRepository.repositoryByName.put(mmdName, mmd);
break;
}
case PACKAGE:
{
MetaMetadata existingMmd = (MetaMetadata) packageMmdScope.get(mmdName);
if (existingMmd != null && existingMmd != mmd)
throw new MetaMetadataException("meta-metadata already exists: " + mmdName + " in "
+ fromRepository);
packageMmdScope.put(mmdName, mmd);
break;
}
}
}
for (MetaMetadata mmd : fromRepository.repositoryByName.values())
{
MmdScope packageMmdScope = toRepository.packageMmdScopes.get(mmd.packageName());
mmd.scope().addAncestor(packageMmdScope);
}
}
// combine other parts
toRepository.integrateRepositoryWithThis(fromRepository);
}
}
void initializeRepository(MetaMetadataRepository result)
{
result.initAltNames();
// initialize meta-metadata look-up maps
// result.initializeLocationBasedMaps(); // cannot do this since it needs the metadata TScope.
result.initializeSuffixAndMimeBasedMaps();
// We might want to do this only if we have some policies worth enforcing.
ParsedURL.cookieManager.setCookiePolicy(CookieProcessing.semanticsCookiePolicy);
// FIXME -- get rid of this?!
Metadata.setRepository(result);
MetaMetadataRepository.baseDocumentMM = result.getMMByName(DOCUMENT_TAG);
MetaMetadataRepository.baseImageMM = result.getMMByName(IMAGE_TAG);
}
}