/**
* Copyright 2008 The University of North Carolina at Chapel Hill
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.unc.lib.dl.util;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import javax.xml.namespace.QName;
import org.apache.abdera.model.Element;
import org.apache.abdera.model.Entry;
import org.apache.log4j.Logger;
import org.jdom2.JDOMException;
import org.jdom2.input.SAXBuilder;
import edu.unc.lib.dl.fedora.DatastreamPID;
import edu.unc.lib.dl.fedora.PID;
import edu.unc.lib.dl.xml.JDOMNamespaceUtil;
public class AtomPubMetadataParserUtil {
private static Logger log = Logger
.getLogger(AtomPubMetadataParserUtil.class);
public static final String ATOM_DC_DATASTREAM = "ATOM_DC";
private static final QName datastreamQName = new QName(
"http://cdr.lib.unc.edu/", "datastream");
private static final QName modsQName = new QName(
"http://www.loc.gov/mods/v3", "mods");
private static final String dcNamespace = "http://purl.org/dc/terms/";
private static final String atomPubNamespace = "http://www.w3.org/2005/Atom";
public static Map<String, org.jdom2.Element> extractDatastreams(Entry entry)
throws IOException, JDOMException {
return extractDatastreams(entry, (String) null);
}
public static Map<String, org.jdom2.Element> extractDatastreams(
Entry entry, PID pid) throws IOException, JDOMException {
String defaultDatastream = null;
// If the request was for a specific datastream, add it in
if (pid != null && pid instanceof DatastreamPID)
defaultDatastream = ((DatastreamPID) pid).getDatastream();
return extractDatastreams(entry, defaultDatastream);
}
/**
* Returns a map containing the metadata content as jdom elements associated
* with their datastream id. The content is extracted from an Atom Pub
* abdera entry. If the
*
* root level qualified dublin core tags or a MODS entry, as well as any
* number of cdr:datastream tags containing specific metadata streams to
* extract.
*
* If a datastream tag contains more than one root element, only the first
* element will be retained
*
* @param entry
* abdera Atom Pub entry containing metadata for extraction.
* @return
* @throws IOException
* @throws JDOMException
*/
public static Map<String, org.jdom2.Element> extractDatastreams(
Entry entry, String defaultDatastream) throws IOException,
JDOMException {
if (entry == null || entry.getElements().size() == 0) {
return null;
}
SAXBuilder saxBuilder = new SAXBuilder();
Map<String, org.jdom2.Element> datastreamMap = new HashMap<String, org.jdom2.Element>();
// Outstream containing the compiled default dublin core tags
ByteArrayOutputStream dcOutStream = null;
Element defaultDatastreamElement = null;
boolean multiDocumentMode = defaultDatastream == null;
boolean rootDublinCoreElements = false;
try {
for (Element element : entry.getElements()) {
if (dcNamespace.equals(element.getQName().getNamespaceURI())) {
// Populate dublin core properties from the default entry
// metadata
if (dcOutStream == null) {
dcOutStream = new ByteArrayOutputStream();
dcOutStream
.write("<dcterms:dc xmlns:dcterms=\"http://purl.org/dc/terms/\">"
.getBytes("UTF-8"));
rootDublinCoreElements = true;
}
element.writeTo(dcOutStream);
} else if (multiDocumentMode) { // Multi document mode
// Datastream wrapper tag
if (datastreamQName.equals(element.getQName())) {
// Create new datastream entry
String id = element.getAttributeValue("id");
if (id != null) {
org.jdom2.Element jdomElement = abderaToJDOM(
element, saxBuilder);
org.jdom2.Element dsContentElement = null;
// Store the first child of the datastream tag as
// the content for this DS
if (jdomElement.getChildren().size() > 0) {
dsContentElement = ((org.jdom2.Element) jdomElement
.getChildren().get(0));
datastreamMap.put(id,
(org.jdom2.Element) dsContentElement
.detach());
}
}
// MODS root tag
} else if (modsQName.equals(element.getQName())) {
// Create the default mods datastream, taking precedence
// over the stub from DC terms
org.jdom2.Element modsElement = abderaToJDOM(element,
saxBuilder);
datastreamMap.put(
ContentModelHelper.Datastream.MD_DESCRIPTIVE
.getName(), modsElement);
} else if (JDOMNamespaceUtil.CDR_ACL_NS.getURI().equals(
element.getQName().getNamespaceURI())) {
log.debug("Extracting access control virtual datastream info");
org.jdom2.Element aclElement = abderaToJDOM(element,
saxBuilder);
datastreamMap.put("ACL", aclElement);
}
} else {
// Specific datastream mode, use the first non-atompub tag
// since we can't have multiple roots
// Can't have one of these if we're already in dublin core
// mode
if (!rootDublinCoreElements
&& !atomPubNamespace.equals(element.getQName()
.getNamespaceURI())) {
defaultDatastreamElement = element;
break;
}
}
}
// Create the atom dublin core default datastream if it's populated
if (dcOutStream != null) {
dcOutStream.write("</dcterms:dc>".getBytes("UTF-8"));
try (ByteArrayInputStream inStream = new ByteArrayInputStream(
dcOutStream.toByteArray())) {
org.jdom2.Document jdomDocument = saxBuilder
.build(inStream);
org.jdom2.Element rootNode = jdomDocument.getRootElement();
if (defaultDatastream == null)
datastreamMap.put(ATOM_DC_DATASTREAM, rootNode);
else
datastreamMap.put(defaultDatastream, rootNode);
}
} else if (!multiDocumentMode) {
// Add in the targeted datastream
org.jdom2.Element jdomElement = abderaToJDOM(
defaultDatastreamElement, saxBuilder);
datastreamMap.put(defaultDatastream, jdomElement);
}
// Implied datastreams
// Add RELS-EXT datastream stub if the ACL datastream is specified
// and there isn't currently a RELS-EXT
if (datastreamMap.containsKey("ACL")
&& !datastreamMap
.containsKey(ContentModelHelper.Datastream.RELS_EXT
.getName())) {
datastreamMap.put(
ContentModelHelper.Datastream.RELS_EXT.getName(), null);
}
// Add in a stub for MD_DESCRIPTIVE if a root dc entry was generated
// and no MODS have been added yet.
if (multiDocumentMode
&& datastreamMap.containsKey(ATOM_DC_DATASTREAM)
&& !datastreamMap
.containsKey(ContentModelHelper.Datastream.MD_DESCRIPTIVE
.getName())) {
datastreamMap.put(
ContentModelHelper.Datastream.MD_DESCRIPTIVE.getName(),
null);
}
} finally {
if (dcOutStream != null)
try {
dcOutStream.close();
} catch (IOException e) {
log.error("Failed to close DC", e);
}
}
return datastreamMap;
}
/**
* Converts an abdera element to a jdom element by converting it back to raw
* xml.
*
* @param element
* @return
* @throws JDOMException
* @throws IOException
*/
public static org.jdom2.Element abderaToJDOM(Element element,
SAXBuilder saxBuilder) throws JDOMException, IOException {
if (element == null)
return null;
try (ByteArrayOutputStream outStream = new ByteArrayOutputStream()) {
element.writeTo(outStream);
try (ByteArrayInputStream inStream = new ByteArrayInputStream(
outStream.toByteArray())) {
org.jdom2.Document jdomDocument = saxBuilder.build(inStream);
return jdomDocument.detachRootElement();
}
}
}
}