/*
* OREDisseminationCrosswalk.java
*
* Version: $Revision: 2108 $
*
* Date: $Date: 2007-07-30 12:26:50 -0500 (Mon, 30 Jul 2007) $
*
* Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts
* Institute of Technology. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of the Hewlett-Packard Company nor the name of the
* Massachusetts Institute of Technology nor the names of their
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
package org.dspace.content.crosswalk;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.URL;
import java.sql.SQLException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.log4j.Logger;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Bitstream;
import org.dspace.content.Bundle;
import org.dspace.content.DCValue;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Item;
import org.dspace.content.MetadataSchema;
import org.dspace.content.packager.PackageDisseminator;
import org.dspace.content.packager.PackageException;
import org.dspace.content.packager.PackageParameters;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.core.PluginManager;
import org.dspace.core.ConfigurationManager;
import org.dspace.core.Utils;
//import org.dspace.core.Utils;
import org.jdom.Attribute;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.Namespace;
import org.jdom.input.SAXBuilder;
import org.jdom.output.Format;
import org.jdom.output.XMLOutputter;
/**
* ORE dissemination crosswalk
* <p>
* Produces an Atom-encoded ORE aggregation of a DSpace item.
*
* @author Alexey Maslov
* @version $Revision: 1 $
*/
public class OREDisseminationCrosswalk
implements DisseminationCrosswalk
{
/** log4j category */
private static Logger log = Logger.getLogger(OREDisseminationCrosswalk.class);
/* Schema for Atom only available in Relax NG format */
public static final String ATOM_RNG = "http://tweety.lanl.gov/public/schemas/2008-06/atom-tron.sch";
/* Namespaces */
public static final Namespace ATOM_NS =
Namespace.getNamespace("atom", "http://www.w3.org/2005/Atom");
private static final Namespace ORE_NS =
Namespace.getNamespace("ore", "http://www.openarchives.org/ore/terms/");
private static final Namespace ORE_ATOM =
Namespace.getNamespace("oreatom", "http://www.openarchives.org/ore/atom/");
private static final Namespace RDF_NS =
Namespace.getNamespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
private static final Namespace DCTERMS_NS =
Namespace.getNamespace("dcterms", "http://purl.org/dc/terms/");
private static final Namespace DS_NS =
Namespace.getNamespace("ds","http://www.dspace.org/objectModel/");
private static final Namespace namespaces[] = { ATOM_NS, ORE_NS, ORE_ATOM, RDF_NS, DCTERMS_NS, DS_NS };
public Namespace[] getNamespaces()
{
return namespaces;
}
/* There is (and currently can be) no XSD schema that validates Atom feeds, only RNG */
public String getSchemaLocation()
{
return ATOM_NS.getURI() + " " + ATOM_RNG;
}
/**
* Disseminate an Atom-encoded ORE ReM mapped from a DSpace Item
* @param item
* @return
* @throws CrosswalkException
* @throws IOException
* @throws SQLException
* @throws AuthorizeException
*/
private Element disseminateItem(Item item) throws CrosswalkException, IOException, SQLException, AuthorizeException
{
String oaiUrl = null;
String dsUrl = ConfigurationManager.getProperty("dspace.url");
String remSource = ConfigurationManager.getProperty("ore.authoritative.source");
if (remSource == null || remSource.equalsIgnoreCase("oai"))
oaiUrl = ConfigurationManager.getProperty("dspace.oai.url");
else if (remSource.equalsIgnoreCase("xmlui") || remSource.equalsIgnoreCase("manakin"))
oaiUrl = dsUrl;
if (oaiUrl == null)
throw new CrosswalkInternalException("Base uri for the ore generator has not been set. Check the ore.authoritative.source setting.");
String uriA = oaiUrl + "/metadata/handle/" + item.getHandle() + "/ore.xml";
// Top level atom feed element
Element aggregation = new Element("entry",ATOM_NS);
aggregation.addNamespaceDeclaration(ATOM_NS);
aggregation.addNamespaceDeclaration(ORE_NS);
aggregation.addNamespaceDeclaration(ORE_ATOM);
aggregation.addNamespaceDeclaration(DCTERMS_NS);
// Atom-entry specific info
Element atomId = new Element("id",ATOM_NS);
atomId.addContent(uriA);
aggregation.addContent(atomId);
Element aggLink;
DCValue[] uris = item.getMetadata(MetadataSchema.DC_SCHEMA,"identifier","uri",Item.ANY);
for (DCValue uri : uris) {
aggLink = new Element("link",ATOM_NS);
aggLink.setAttribute("rel", "alternate");
aggLink.setAttribute("href", uri.value);
aggregation.addContent(aggLink);
}
// Information about the resource map, as separate entity from the aggregation it describes
Element uriALink = new Element("link",ATOM_NS);
uriALink.setAttribute("rel", "http://www.openarchives.org/ore/terms/describes");
uriALink.setAttribute("href", uriA);
Element uriRLink = new Element("link",ATOM_NS);
uriRLink.setAttribute("rel","self");
uriRLink.setAttribute("href", uriA + "#atom");
uriRLink.setAttribute("type","application/atom+xml");
Element remPublished = new Element("published",ATOM_NS);
remPublished.addContent(Utils.formatISO8601Date(new Date()));
Element remUpdated = new Element("updated",ATOM_NS);
remUpdated.addContent(Utils.formatISO8601Date(new Date()));
Element remCreator = new Element("source",ATOM_NS);
Element remGenerator = new Element("generator",ATOM_NS);
remGenerator.addContent(ConfigurationManager.getProperty("dspace.name"));
remGenerator.setAttribute("uri", oaiUrl);
remCreator.addContent(remGenerator);
aggregation.addContent(uriALink);
aggregation.addContent(uriRLink);
aggregation.addContent(remPublished);
aggregation.addContent(remUpdated);
aggregation.addContent(remCreator);
// Information about the aggregation (item) itself
Element aggTitle = new Element("title",ATOM_NS);
DCValue[] titles = item.getMetadata(MetadataSchema.DC_SCHEMA, "title", null, Item.ANY);
if (titles != null && titles.length>0)
aggTitle.addContent(titles[0].value);
else
aggTitle.addContent("");
aggregation.addContent(aggTitle);
Element aggAuthor;
Element aggAuthorName;
DCValue[] authors = item.getMetadata(MetadataSchema.DC_SCHEMA,"contributor","author",Item.ANY);
for (DCValue author : authors) {
aggAuthor = new Element("author",ATOM_NS);
aggAuthorName = new Element("name",ATOM_NS);
aggAuthorName.addContent(author.value);
aggAuthor.addContent(aggAuthorName);
aggregation.addContent(aggAuthor);
}
Element oreCategory = new Element("category",ATOM_NS);
oreCategory.setAttribute("scheme", ORE_NS.getURI());
oreCategory.setAttribute("term", ORE_NS.getURI()+"Aggregation");
oreCategory.setAttribute("label","Aggregation");
Element updateCategory = new Element("category",ATOM_NS);
updateCategory.setAttribute("scheme", ORE_ATOM.getURI()+"modified");
updateCategory.setAttribute("term", Utils.formatISO8601Date(item.getLastModified()));
Element dsCategory = new Element("category",ATOM_NS);
dsCategory.setAttribute("scheme", DS_NS.getURI());
dsCategory.setAttribute("term", "DSpaceItem");
dsCategory.setAttribute("label", "DSpace Item");
aggregation.addContent(oreCategory);
aggregation.addContent(updateCategory);
aggregation.addContent(dsCategory);
// metadata section
Element arLink;
Element rdfDescription, rdfType, dcModified, dcDesc;
Element triples = new Element("triples", ORE_ATOM);
// metadata about the item
rdfDescription = new Element("Description", RDF_NS);
rdfDescription.setAttribute("about", uriA, RDF_NS);
rdfType = new Element("type", RDF_NS);
rdfType.setAttribute("resource", DS_NS.getURI()+"DSpaceItem", RDF_NS);
dcModified = new Element("modified", DCTERMS_NS);
dcModified.addContent(Utils.formatISO8601Date(item.getLastModified()));
rdfDescription.addContent(rdfType);
rdfDescription.addContent(dcModified);
triples.addContent(rdfDescription);
// Add a link and an oreatom metadata entry for each bitstream in the item
Bundle[] bundles = item.getBundles();
Bitstream[] bitstreams;
for (Bundle bundle : bundles)
{
// Omit the special "ORE" bitstream
if (bundle.getName().equals("ORE"))
continue;
bitstreams = bundle.getBitstreams();
for (Bitstream bs : bitstreams)
{
arLink = new Element("link",ATOM_NS);
arLink.setAttribute("rel", ORE_NS.getURI()+"aggregates");
arLink.setAttribute("href",dsUrl + "/bitstream/handle/" + item.getHandle() + "/" + URLencode(bs.getName()) + "?sequence=" + bs.getSequenceID());
arLink.setAttribute("title",bs.getName());
arLink.setAttribute("type",bs.getFormat().getMIMEType());
arLink.setAttribute("length",Long.toString(bs.getSize()));
aggregation.addContent(arLink);
// metadata about the bitstream
rdfDescription = new Element("Description", RDF_NS);
rdfDescription.setAttribute("about", dsUrl + "/bitstream/handle/" + item.getHandle() + "/" + URLencode(bs.getName()) + "?sequence=" + bs.getSequenceID(), RDF_NS);
rdfType = new Element("type", RDF_NS);
rdfType.setAttribute("resource", DS_NS.getURI()+"DSpaceBitstream", RDF_NS);
dcDesc = new Element("description", DCTERMS_NS);
dcDesc.addContent(bundle.getName());
rdfDescription.addContent(rdfType);
rdfDescription.addContent(dcDesc);
triples.addContent(rdfDescription);
}
}
aggregation.addContent(triples);
// Add a link to the OAI-PMH served metadata (oai_dc is always on)
/*
Element pmhMeta = new Element("entry",ATOM_NS);
pUri = new Element("id",ATOM_NS);
String oaiId = new String("oai:" + ConfigurationManager.getProperty("dspace.hostname") + ":" + item.getHandle());
pUri.addContent(oaiId + "#oai_dc");
pmhMeta.addContent(pUri);
Element pmhAuthor = new Element("author",ATOM_NS);
Element pmhAuthorName = new Element("name",ATOM_NS);
Element pmhAuthorUri = new Element("uri",ATOM_NS);
pmhAuthorName.addContent(ConfigurationManager.getProperty("dspace.name"));
pmhAuthorUri.addContent(oaiUrl);
pmhAuthor.addContent(pmhAuthorName);
pmhAuthor.addContent(pmhAuthorUri);
pmhMeta.addContent(pmhAuthor);
arUri = new Element("link",ATOM_NS);
arUri.setAttribute("rel","alternate");
arUri.setAttribute("href",oaiUrl + "/request?verb=GetRecord&identifier=" + oaiId + "&metadataprefix=oai_dc");
pmhMeta.addContent(arUri);
Element rdfDesc = new Element("Description",RDF_NS);
rdfDesc.setAttribute("about",oaiUrl + "/request?verb=GetRecord&identifier=" + oaiId + "&metadataprefix=oai_dc",RDF_NS);
Element dcTerms = new Element("dcterms",DCTERMS_NS);
dcTerms.setAttribute("resource","http://www.openarchives.org/OAI/2.0/oai_dc/",RDF_NS);
rdfDesc.addContent(dcTerms);
pmhMeta.addContent(rdfDesc);
arUpdated = new Element("updated",ATOM_NS);
arUpdated.addContent(Utils.formatISO8601Date(item.getLastModified()));
pmhMeta.addContent(arUpdated);
arTitle = new Element("title",ATOM_NS);
arTitle.addContent("");
pmhMeta.addContent(arTitle);
aggregation.addContent(pmhMeta);*/
return aggregation;
}
public Element disseminateElement(DSpaceObject dso) throws CrosswalkException, IOException, SQLException, AuthorizeException
{
switch(dso.getType()) {
case Constants.ITEM: return disseminateItem((Item)dso);
case Constants.COLLECTION: break;
case Constants.COMMUNITY: break;
default: throw new CrosswalkObjectNotSupported("ORE implementation unable to disseminate unknown DSpace object.");
}
return null;
}
/**
* Helper method to escape all chaacters that are not part of the canon set
* @param sourceString source unescaped string
*/
private String URLencode(String sourceString) {
Character lowalpha[] = {'a' , 'b' , 'c' , 'd' , 'e' , 'f' , 'g' , 'h' , 'i' ,
'j' , 'k' , 'l' , 'm' , 'n' , 'o' , 'p' , 'q' , 'r' ,
's' , 't' , 'u' , 'v' , 'w' , 'x' , 'y' , 'z'};
Character upalpha[] = {'A' , 'B' , 'C' , 'D' , 'E' , 'F' , 'G' , 'H' , 'I' ,
'J' , 'K' , 'L' , 'M' , 'N' , 'O' , 'P' , 'Q' , 'R' ,
'S' , 'T' , 'U' , 'V' , 'W' , 'X' , 'Y' , 'Z'};
Character digit[] = {'0' , '1' , '2' , '3' , '4' , '5' , '6' , '7' , '8' , '9'};
Character mark[] = {'-' , '_' , '.' , '!' , '~' , '*' , '\'' , '(' , ')'};
// reserved
//Character reserved[] = {';' , '/' , '?' , ':' , '@' , '&' , '=' , '+' , '$' , ',' ,'%', '#'};
Set<Character> URLcharsSet = new HashSet<Character>();
URLcharsSet.addAll(Arrays.asList(lowalpha));
URLcharsSet.addAll(Arrays.asList(upalpha));
URLcharsSet.addAll(Arrays.asList(digit));
URLcharsSet.addAll(Arrays.asList(mark));
//URLcharsSet.addAll(Arrays.asList(reserved));
String processedString = new String();
for (int i=0; i<sourceString.length(); i++) {
char ch = sourceString.charAt(i);
if (URLcharsSet.contains(ch)) {
processedString += ch;
}
else {
processedString += "%" + Integer.toHexString((int)ch);
}
}
return processedString;
}
public List disseminateList(DSpaceObject dso) throws CrosswalkException, IOException, SQLException, AuthorizeException
{
List result = new ArrayList(1);
result.add(disseminateElement(dso));
return result;
}
/* Only interested in disseminating items at this time */
public boolean canDisseminate(DSpaceObject dso)
{
if (dso.getType() == Constants.ITEM || dso.getType() == Constants.COLLECTION || dso.getType() == Constants.COMMUNITY)
return true;
else
return false;
}
public boolean preferList()
{
return false;
}
}