/*
* Copyright (C) 2014 Robert Simonovsky
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package cz.cas.lib.proarc.common.export.mets;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.StringWriter;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import javax.xml.XMLConstants;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Marshaller;
import javax.xml.bind.Unmarshaller;
import javax.xml.datatype.DatatypeConfigurationException;
import javax.xml.datatype.DatatypeFactory;
import javax.xml.datatype.XMLGregorianCalendar;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.apache.commons.codec.binary.Hex;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import com.yourmediashelf.fedora.client.FedoraClient;
import com.yourmediashelf.fedora.client.response.FedoraResponse;
import com.yourmediashelf.fedora.generated.foxml.DatastreamType;
import com.yourmediashelf.fedora.generated.foxml.DatastreamVersionType;
import com.yourmediashelf.fedora.generated.foxml.DigitalObject;
import com.yourmediashelf.fedora.generated.foxml.PropertyType;
import com.yourmediashelf.fedora.generated.foxml.XmlContentType;
import cz.cas.lib.proarc.common.export.mets.structure.IMetsElement;
import cz.cas.lib.proarc.common.export.mets.structure.MetsElement;
import cz.cas.lib.proarc.common.fedora.FoxmlUtils;
import cz.cas.lib.proarc.common.fedora.RemoteStorage;
import cz.cas.lib.proarc.common.fedora.SearchView.Item;
import cz.cas.lib.proarc.mets.DivType;
import cz.cas.lib.proarc.mets.Mets;
import cz.cas.lib.proarc.mets.MetsType.FileSec.FileGrp;
import cz.cas.lib.proarc.mets.StructMapType;
import cz.cas.lib.proarc.mets.info.Info;
import cz.cas.lib.proarc.mets.info.Info.Checksum;
import cz.cas.lib.proarc.mets.info.Info.Itemlist;
import cz.cas.lib.proarc.mets.info.Info.Titleid;
import cz.cas.lib.proarc.mets.info.Info.Validation;
/**
* @author Robert Simonovsky
*
* Utility class
*
*/
public class MetsUtils {
private static Logger LOG = Logger.getLogger(MetsUtils.class.getName());
private static Properties mimeToExtension = new Properties();
/**
* Retuns an XMLGregorianCalendar representation of current date
*
* @return
* @throws MetsExportException
*/
public static XMLGregorianCalendar getCurrentDate() throws MetsExportException {
GregorianCalendar gregory = new GregorianCalendar();
gregory.setTime(new Date());
XMLGregorianCalendar calendar;
try {
calendar = DatatypeFactory.newInstance()
.newXMLGregorianCalendar(
gregory);
} catch (DatatypeConfigurationException e1) {
throw new MetsExportException("Unable to create XMLGregorianDate", false, e1);
}
return calendar;
}
/**
* Returns the properties for mapping Mime type to file extension
*
* @return
* @throws MetsExportException
*/
public static Properties getMimeToExtension() throws MetsExportException {
if (mimeToExtension.isEmpty()) {
try {
mimeToExtension.loadFromXML(MetsUtils.class.getResourceAsStream("mimeToExt.xml"));
} catch (Exception e) {
throw new MetsExportException("Unable to read mime type mapping", false, e);
}
}
return mimeToExtension;
}
private static void findChildPSPs(DigitalObject dObj, MetsContext ctx, List<String> psps, String parentType) throws MetsExportException {
List<Element> relsExt = FoxmlUtils.findDatastream(dObj, "RELS-EXT").getDatastreamVersion().get(0).getXmlContent().getAny();
Node node = MetsUtils.xPathEvaluateNode(relsExt, "*[local-name()='RDF']/*[local-name()='Description']");
NodeList hasPageNodes = node.getChildNodes();
for (int a = 0; a < hasPageNodes.getLength(); a++) {
if (MetsUtils.hasReferenceXML(hasPageNodes.item(a).getNodeName())) {
Node rdfResourceNode = hasPageNodes.item(a).getAttributes().getNamedItem("rdf:resource");
String fileName = rdfResourceNode.getNodeValue();
DigitalObject object = null;
if (ctx.getFedoraClient() != null) {
object = MetsUtils.readRelatedFoXML(fileName, ctx.getFedoraClient());
} else {
object = MetsUtils.readRelatedFoXML(ctx.getPath(), fileName);
}
relsExt = FoxmlUtils.findDatastream(object, "RELS-EXT").getDatastreamVersion().get(0).getXmlContent().getAny();
String model = MetsUtils.getModel(relsExt);
String elementType = Const.typeMap.get(model);
if (Const.PSPElements.contains(elementType)) {
if (((Const.MONOGRAPH_UNIT.equals(parentType) || (Const.ISSUE.equals(parentType)))) && (Const.SUPPLEMENT.equals(elementType))) {
// do not add
} else {
psps.add(object.getPID());
}
} else {
findChildPSPs(object, ctx, psps, elementType);
}
}
}
}
public static List<String> findPSPPIDs(String pid, MetsContext ctx, boolean fillChildren) throws MetsExportException {
List<String> result = new ArrayList<String>();
DigitalObject dObj;
if (ctx.getFedoraClient() != null) {
dObj = readFoXML(pid, ctx.getFedoraClient());
} else {
dObj = readFoXML(ctx.getPath() + File.separator + pid + ".xml");
}
// List<Element> relsExt = FoxmlUtils.findDatastream(dObj,
// "RELS-EXT").getDatastreamVersion().get(0).getXmlContent().getAny();
// String model = MetsUtils.getModel(relsExt);
// String elementType = Const.typeMap.get(model);
String parentId = pid;
String parentModel = null;
String parentType = null;
List<Element> parentRels = null;
DigitalObject parentdbObj = null;
String firstParentType = null;
// if (ctx.getFedoraClient() != null) {
// parentId = MetsUtils.getParent(pid, ctx.getRemoteStorage());
// } else {
// parentId = MetsUtils.getParent(pid, ctx.getFsParentMap());
// }
//
// if (Const.PSPElements.contains(parentType)) {
// result.add(e)
// }
while (parentId != null) {
if (ctx.getFedoraClient() != null) {
parentdbObj = readFoXML(parentId, ctx.getFedoraClient());
} else {
parentdbObj = readFoXML(ctx.getPath() + File.separator + parentId + ".xml");
}
parentRels = FoxmlUtils.findDatastream(parentdbObj, "RELS-EXT").getDatastreamVersion().get(0).getXmlContent().getAny();
parentModel = MetsUtils.getModel(parentRels);
parentType = Const.typeMap.get(parentModel);
if ((parentId.equals(pid)) && (firstParentType == null)) {
firstParentType = parentType;
}
String oldParentId = parentId;
if (ctx.getFedoraClient() != null) {
parentId = MetsUtils.getParent(parentId, ctx.getRemoteStorage());
} else {
parentId = MetsUtils.getParent(parentId, ctx.getFsParentMap());
}
if (Const.PSPElements.contains(parentType)) {
if (Const.SUPPLEMENT.equals(parentType)) {
if (parentId != null) {
DigitalObject parentdbObjSupp;
if (ctx.getFedoraClient() != null) {
parentdbObjSupp = readFoXML(parentId, ctx.getFedoraClient());
} else {
parentdbObjSupp = readFoXML(ctx.getPath() + File.separator + parentId + ".xml");
}
List<Element> parentRelsSupp = FoxmlUtils.findDatastream(parentdbObjSupp, "RELS-EXT").getDatastreamVersion().get(0).getXmlContent().getAny();
String parentTypeSupp = Const.typeMap.get(MetsUtils.getModel(parentRelsSupp));
if (Const.MONOGRAPH_UNIT.equals(parentTypeSupp) || (Const.ISSUE.equals(parentTypeSupp))) {
// do not add an PSP for Supplement under monograph
// unit or issue
} else {
result.add(oldParentId);
}
}
} else {
result.add(oldParentId);
}
}
}
if (fillChildren) {
findChildPSPs(dObj, ctx, result, firstParentType);
}
return result;
}
/**
*
* Converts byte array to hex string
*
* @param byteArray
* @return
*/
public static String byteToHex(byte[] byteArray) {
StringBuffer result = new StringBuffer();
for (byte b : byteArray) {
result.append(String.format("%02X", b));
}
return result.toString();
}
/**
*
* Returns a file name (content location) from the datastream
*
* @param elements
* @return
*/
public static String getFileNameFromStream(List<Element> elements) throws MetsExportException {
if (elements == null) {
return null;
}
return MetsUtils.xPathEvaluateString(elements, "*[local-name()='datastreamVersion']/*[local-name()='contentLocation'/@REF");
}
/**
*
* Returns a mime type attribute from datastream
*
* @param elements
* @return
*/
public static String getMimeFromStream(List<Element> elements) throws MetsExportException {
if (elements == null) {
return null;
}
return MetsUtils.xPathEvaluateString(elements, "*[local-name()='datastreamVersion']/@MIMETYPE");
}
/**
*
* Returns a property value from a list of properties
*
* @param name
* @param properties
* @return
*/
public static String getProperty(String name, java.util.List<PropertyType> properties) throws MetsExportException {
if (name == null) {
throw new MetsExportException("Name is null");
}
if (properties == null) {
throw new MetsExportException("Properties is null");
}
for (PropertyType property : properties) {
if (name.equalsIgnoreCase(property.getNAME())) {
return property.getVALUE();
}
}
throw new MetsExportException("Property " + name + " not found");
}
/**
* Removes the schemaLocation attribute
*
* @param elements
* @return
*/
public static List<Element> removeSchemaLocation(List<Element> elements) {
if (elements.size() > 0) {
Element element = elements.get(0);
element.removeAttribute("xsi:schemaLocation");
element.removeAttribute("schemaLocation");
}
return elements;
}
/**
*
* Removes the top element "modsCollection" from the xml
*
* @param elements
* @return
*/
public static List<Element> removeModsCollection(List<Element> elements) {
if (elements.size() > 0) {
if ("mods:modsCollection".equalsIgnoreCase(elements.get(0).getNodeName())) {
NodeList nl = elements.get(0).getChildNodes();
List<Element> result = new ArrayList<Element>();
result.add((Element) nl.item(0));
return result;
} else {
return elements;
}
}
return null;
}
/**
*
* Returns a datastream of given type
*
* @param datastreams
* @param type
* @return
*/
public static List<Element> getDataStreams(List<DatastreamType> datastreams, String type) {
for (DatastreamType streamType : datastreams) {
if (streamType.getID().startsWith(type)) {
List<DatastreamVersionType> dsVersions = streamType.getDatastreamVersion();
for (DatastreamVersionType dsVersion : dsVersions) {
XmlContentType dcContent = dsVersion.getXmlContent();
List<Element> elements = dcContent.getAny();
return elements;
}
}
}
return null;
}
/**
*
* Returns a datastream of given type from binary representation
*
* @param datastreams
* @param type
* @return
*/
public static byte[] getBinaryDataStreams(List<DatastreamType> datastreams, String type) {
for (DatastreamType streamType : datastreams) {
if (streamType.getID().startsWith(type)) {
List<DatastreamVersionType> dsVersions = streamType.getDatastreamVersion();
for (DatastreamVersionType dsVersion : dsVersions) {
return dsVersion.getBinaryContent();
}
}
}
return null;
}
/**
* Method for identifying dataStream name
*
* @param dataStream
* @param streamName
* @return
*/
public static boolean equalDataStreams(String dataStream, String streamName) {
if (dataStream.equalsIgnoreCase(streamName)) {
return true;
}
if (dataStream.startsWith(streamName + ".")) {
return true;
}
String datastreamIMG = "IMG_" + streamName;
if (dataStream.equalsIgnoreCase(datastreamIMG)) {
return true;
}
if (dataStream.startsWith(datastreamIMG + ".")) {
return true;
}
return false;
}
/**
*
* Generates an XML document from list of elements
*
* @param elements
* @return
*/
public static Document getDocumentFromList(List<Element> elements) throws MetsExportException {
Document document = null;
try {
DocumentBuilderFactory builder = DocumentBuilderFactory.newInstance();
builder.setValidating(true);
builder.setNamespaceAware(true);
document = builder.newDocumentBuilder().newDocument();
} catch (ParserConfigurationException e1) {
throw new MetsExportException("Error while getting document from list", false, e1);
}
for (Element element : elements) {
Node newNode = element.cloneNode(true);
document.adoptNode(newNode);
document.appendChild(newNode);
}
return document;
}
/**
*
* Returns a string from the xml document defined by the Xpath
*
* @param elements
* @param xPath
* @return
*/
public static String xPathEvaluateString(List<Element> elements, String xPath) throws MetsExportException {
XPath xpathObject = XPathFactory.newInstance().newXPath();
Document document = getDocumentFromList(elements);
try {
return xpathObject.compile(xPath).evaluate(document);
} catch (XPathExpressionException e) {
throw new MetsExportException("Error while evaluating xPath:" + xPath, false, e);
}
}
/**
*
* Returns a node from the xml document defined by the Xpath
*
* @param elements
* @param xPath
* @return
*/
public static Node xPathEvaluateNode(List<Element> elements, String xPath) throws MetsExportException {
Document document = null;
try {
document = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
} catch (ParserConfigurationException e1) {
throw new MetsExportException("Error while evaluating xPath " + xPath, false, e1);
}
for (Element element : elements) {
Node newNode = element.cloneNode(true);
document.adoptNode(newNode);
document.appendChild(newNode);
}
XPath xpathObject = XPathFactory.newInstance().newXPath();
try {
return (Node) xpathObject.compile(xPath).evaluate(document, XPathConstants.NODE);
} catch (XPathExpressionException e) {
throw new MetsExportException("Error while evaluating xPath " + xPath, false, e);
}
}
/**
*
* Returns a model of the document
*
* @param relExtStream
* @return
*/
public static String getModel(List<Element> relExtStream) throws MetsExportException {
Node hasPageNodes = MetsUtils.xPathEvaluateNode(relExtStream, "*[local-name()='RDF']/*[local-name()='Description']/*[local-name()='hasModel']");
String model = hasPageNodes.getAttributes().getNamedItem("rdf:resource").getNodeValue();
return model;
}
/**
*
* Returns a dataStream from Fedora for given pid
*
* @param fedoraClient
* @param pid
* @param streamName
* @return
* @throws MetsExportException
*/
public static List<Element> getDataStreams(FedoraClient fedoraClient, String pid, String streamName) throws MetsExportException {
try {
FedoraResponse response = FedoraClient.getDatastreamDissemination(pid, streamName).execute(fedoraClient);
InputStream is = response.getEntityInputStream();
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse(is);
List<Element> elements = new ArrayList<Element>();
elements.add(doc.getDocumentElement());
return elements;
} catch (Exception ex) {
throw new MetsExportException("Error while getting stream " + streamName + " from " + pid, false, ex);
}
}
/**
*
* Copies inputStream to outputStream
*
* @param is
* @param os
* @throws IOException
*/
public static void copyStream(InputStream is, OutputStream os) throws IOException {
byte[] buffer = new byte[1024];
int len;
while ((len = is.read(buffer)) != -1) {
os.write(buffer, 0, len);
}
is.close();
}
/**
*
* Returns the byteArray of the specified datastream from fedora
*
* @param fedoraClient
* @param pid
* @param streamName
* @return
* @throws MetsExportException
*/
public static byte[] getBinaryDataStreams(FedoraClient fedoraClient, IMetsElement metsElement, String streamName) throws MetsExportException {
try {
DatastreamType rawDS = FoxmlUtils.findDatastream(metsElement.getSourceObject(), streamName);
if (rawDS != null) {
FedoraResponse response = FedoraClient.getDatastreamDissemination(metsElement.getOriginalPid(), streamName).execute(fedoraClient);
InputStream is = response.getEntityInputStream();
ByteArrayOutputStream bos = new ByteArrayOutputStream();
copyStream(is, bos);
bos.close();
return bos.toByteArray();
} else {
return null;
}
} catch (Exception ex) {
throw new MetsExportException(metsElement.getOriginalPid(), "Error while getting stream " + streamName + " from " + metsElement.getElementType(), false, ex);
}
}
/**
*
* Prepares a logical/physical structure divs in mets
*
* @param mets
* @param label
* @param type
* @return
*/
public static DivType createStructureDiv(Mets mets, String label, String type) {
StructMapType structType = new StructMapType();
mets.getStructMap().add(structType);
structType.setLabel2(label);
structType.setTYPE(type);
DivType divType = new DivType();
structType.setDiv(divType);
divType.setLabel(mets.getLabel1());
return divType;
}
/**
*
* Inits the file groups in mets
*
* @param mets
* @return
*/
public static HashMap<String, FileGrp> initFileGroups() {
FileGrp MCimagesGRP = new FileGrp();
MCimagesGRP.setID("MC_IMGGRP");
MCimagesGRP.setUSE("Images");
// mets.getFileSec().getFileGrp().add(MCimagesGRP);
FileGrp UCimageGrp = new FileGrp();
UCimageGrp.setID("UC_IMGGRP");
UCimageGrp.setUSE("Images");
// mets.getFileSec().getFileGrp().add(UCimageGrp);
FileGrp AltoGRP = new FileGrp();
AltoGRP.setID("ALTOGRP");
AltoGRP.setUSE("Layout");
// mets.getFileSec().getFileGrp().add(AltoGRP);
FileGrp TxtGRP = new FileGrp();
TxtGRP.setID("TXTGRP");
TxtGRP.setUSE("Text");
// mets.getFileSec().getFileGrp().add(TxtGRP);
FileGrp TechMDGrp = new FileGrp();
TechMDGrp.setID("TECHMDGRP");
TechMDGrp.setUSE("Technical Metadata");
// mets.getFileSec().getFileGrp().add(TechMDGrp);
HashMap<String, FileGrp> fileGrpMap = new HashMap<String, FileGrp>();
fileGrpMap.put("UC_IMGGRP", UCimageGrp);
fileGrpMap.put("MC_IMGGRP", MCimagesGRP);
fileGrpMap.put("ALTOGRP", AltoGRP);
fileGrpMap.put("TXTGRP", TxtGRP);
fileGrpMap.put("TECHMDGRP", TechMDGrp);
return fileGrpMap;
}
/**
*
* Reads and unmarshalls Digital Object
*
* @param path
* @return
*/
public static DigitalObject readFoXML(String path) throws MetsExportException {
DigitalObject foXMLObject;
File file = new File(path);
try {
JAXBContext jaxbContext = JAXBContext.newInstance(DigitalObject.class);
Unmarshaller unmarshaller = jaxbContext.createUnmarshaller();
foXMLObject = (DigitalObject) unmarshaller.unmarshal(file);
return foXMLObject;
} catch (JAXBException e) {
throw new MetsExportException("Unable to read FoXML document " + path, false, e);
}
}
/**
*
* Reads and unmarshalls Digital Object from Fedora
*
* @param path
* @return
*/
public static DigitalObject readFoXML(String uuid, FedoraClient client) throws MetsExportException {
DigitalObject foXMLObject = null;
if (uuid.startsWith("info:fedora/")) {
uuid = uuid.substring(uuid.indexOf("/") + 1);
}
LOG.log(Level.FINE, "Reading document from Fedora:" + uuid);
try {
FedoraResponse response = FedoraClient.getObjectXML(uuid).execute(client);
JAXBContext jaxbContext = JAXBContext.newInstance(DigitalObject.class);
Unmarshaller unmarshaller = jaxbContext.createUnmarshaller();
foXMLObject = (DigitalObject) unmarshaller.unmarshal(response.getEntityInputStream());
} catch (Exception e) {
throw new MetsExportException("Unable to get " + uuid + " from Fedora", false, e);
}
return foXMLObject;
}
/**
*
* Transforms the xml document to a string
*
* @param doc
* @return
*/
public static String documentToString(Document doc) throws MetsExportException {
try {
StringWriter sw = new StringWriter();
TransformerFactory tf = TransformerFactory.newInstance();
Transformer transformer = tf.newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no");
transformer.setOutputProperty(OutputKeys.METHOD, "xml");
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
transformer.transform(new DOMSource(doc), new StreamResult(sw));
return sw.toString();
} catch (TransformerException ex) {
throw new MetsExportException("Error converting Document to String", false, ex);
}
}
/**
*
* Validates given document agains an XSD schema
*
* @param document
* @param xsd
* @return
*/
public static List<String> validateAgainstXSD(Document document, InputStream xsd) throws Exception {
SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
factory.setResourceResolver(MetsLSResolver.getInstance());
Schema schema = factory.newSchema(new StreamSource(xsd));
TransformerFactory tFactory = TransformerFactory.newInstance();
Transformer transformer = tFactory.newTransformer();
DOMSource domSource = new DOMSource(document);
StreamResult sResult = new StreamResult();
ByteArrayOutputStream bos = new ByteArrayOutputStream();
sResult.setOutputStream(bos);
transformer.transform(domSource, sResult);
InputStream is = new ByteArrayInputStream(bos.toByteArray());
DocumentBuilderFactory dbfactory = DocumentBuilderFactory.newInstance();
dbfactory.setValidating(false);
dbfactory.setNamespaceAware(true);
dbfactory.setSchema(schema);
DocumentBuilder documentBuilder = dbfactory.newDocumentBuilder();
ValidationErrorHandler errorHandler = new ValidationErrorHandler();
documentBuilder.setErrorHandler(errorHandler);
documentBuilder.parse(is);
return errorHandler.getValidationErrors();
}
/**
*
* Validates given XML file against an XSD schema
*
* @param file
* @param xsd
* @return
*/
public static List<String> validateAgainstXSD(File file, InputStream xsd) throws Exception {
SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
factory.setResourceResolver(MetsLSResolver.getInstance());
Schema schema = factory.newSchema(new StreamSource(xsd));
DocumentBuilderFactory dbfactory = DocumentBuilderFactory.newInstance();
dbfactory.setValidating(false);
dbfactory.setNamespaceAware(true);
dbfactory.setSchema(schema);
DocumentBuilder documentBuilder = dbfactory.newDocumentBuilder();
ValidationErrorHandler errorHandler = new ValidationErrorHandler();
documentBuilder.setErrorHandler(errorHandler);
documentBuilder.parse(file);
return errorHandler.getValidationErrors();
}
/**
*
* Indicates if the "has..." is used for defining children
*
* @param name
* @return
*/
public static boolean hasReferenceXML(String name) {
if (Const.HASINTCOMPPART.equalsIgnoreCase(name)) {
return true;
}
if (Const.HASISSUE.equalsIgnoreCase(name)) {
return true;
}
if (Const.HASMEMBER.equalsIgnoreCase(name)) {
return true;
}
if (Const.HASPAGE.equalsIgnoreCase(name)) {
return true;
}
if (Const.HASUNIT.equalsIgnoreCase(name)) {
return true;
}
if (Const.HASVOLUME.equalsIgnoreCase(name)) {
return true;
}
return false;
}
/* Return a valid identifier for mets document removes whitespaces and if an
* identifier does not start with a letter it adds a prefix
*
* @param identifier
* @return
*/
public static String validateIdentifier(String identifier) {
identifier = removeNonAlpabetChars(identifier);
if (!(identifier.toUpperCase().substring(0, 1).matches("[A-Z]"))) {
return "FID_" + identifier;
} else {
return identifier;
}
}
/**
* Returns a string with alphabetical characters only
*
* @param inputString
* @return
*/
public static String removeNonAlpabetChars(String inputString) {
String validChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890abcdefghijklmnopqrstuvwxyz_-.";
String output = "";
for (int a = 0; a < inputString.length(); a++) {
if (validChars.contains(inputString.substring(a, a + 1))) {
output = output + inputString.substring(a, a + 1);
}
}
return output;
}
public static void addModsIdentifiersRecursive(MetsElement element, Info infoJaxb) throws MetsExportException {
Map<String, String> identifiers = element.getModsIdentifiers();
for (String type : identifiers.keySet()) {
if (Const.allowedIdentifiers.contains(type)) {
boolean alreadyAdded = false;
for (Titleid titleId : infoJaxb.getTitleid()) {
if ((titleId.getType().equals(type)) && (titleId.getValue().equals(identifiers.get(type)))) {
alreadyAdded = true;
break;
}
}
if (!alreadyAdded) {
Titleid titleId = new Titleid();
titleId.setType(type);
titleId.setValue(identifiers.get(type));
infoJaxb.getTitleid().add(titleId);
}
}
}
for (MetsElement child : element.getChildren()) {
addModsIdentifiersRecursive(child, infoJaxb);
}
}
/**
*
* Generates and saves info.xml
*
* @param path
* @param mets
*/
public static void saveInfoFile(String path, MetsContext metsContext, String md5, String fileMd5Name, File metsFile) throws MetsExportException {
File infoFile = new File(path + File.separator + metsContext.getPackageID() + File.separator + "info_" + metsContext.getPackageID() + ".xml");
GregorianCalendar c = new GregorianCalendar();
c.setTime(new Date());
XMLGregorianCalendar date2;
try {
date2 = DatatypeFactory.newInstance().newXMLGregorianCalendar(c);
} catch (DatatypeConfigurationException e1) {
throw new MetsExportException("Error while generating info.xml file", false, e1);
}
Info infoJaxb = new Info();
infoJaxb.setCreated(date2);
infoJaxb.setMainmets("./" + metsFile.getName());
Checksum checkSum = new Checksum();
checkSum.setChecksum(md5);
checkSum.setType("MD5");
addModsIdentifiersRecursive(metsContext.getRootElement(), infoJaxb);
checkSum.setValue(fileMd5Name);
infoJaxb.setChecksum(checkSum);
Validation validation = new Validation();
validation.setValue("W3C-XML");
validation.setVersion(Float.valueOf("0.0"));
infoJaxb.setValidation(validation);
infoJaxb.setCreator(metsContext.getCreatorOrganization());
infoJaxb.setPackageid(metsContext.getPackageID());
if (Const.PERIODICAL_TITLE.equalsIgnoreCase(metsContext.getRootElement().getElementType())) {
infoJaxb.setMetadataversion((float) 1.5);
} else {
infoJaxb.setMetadataversion((float) 1.1);
}
Itemlist itemList = new Itemlist();
infoJaxb.setItemlist(itemList);
itemList.setItemtotal(BigInteger.valueOf(metsContext.getFileList().size()));
List<FileMD5Info> fileList = metsContext.getFileList();
long size = 0;
for (FileMD5Info fileName : fileList) {
itemList.getItem().add(fileName.getFileName().replaceAll(Matcher.quoteReplacement(File.separator), "/"));
size += fileName.getSize();
}
int infoTotalSize = (int) (size/1024);
infoJaxb.setSize(infoTotalSize);
try {
JAXBContext jaxbContext = JAXBContext.newInstance(Info.class);
Marshaller marshaller = jaxbContext.createMarshaller();
// SchemaFactory factory =
// SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
// factory.setResourceResolver(MetsLSResolver.getInstance());
// Schema schema = factory.newSchema(new
// StreamSource(Info.class.getResourceAsStream("info.xsd")));
// marshaller.setSchema(schema);
marshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true);
marshaller.setProperty(Marshaller.JAXB_ENCODING, "utf-8");
marshaller.marshal(infoJaxb, infoFile);
} catch (Exception ex) {
throw new MetsExportException("Error while generating info.xml", false, ex);
}
List<String> validationErrors;
try {
validationErrors = MetsUtils.validateAgainstXSD(infoFile, Info.class.getResourceAsStream("info.xsd"));
} catch (Exception e) {
throw new MetsExportException("Error while validating info.xml", false, e);
}
if (validationErrors.size() > 0) {
MetsExportException metsException = new MetsExportException("Invalid info file:" + infoFile.getAbsolutePath(), false, null);
metsException.getExceptions().get(0).setValidationErrors(validationErrors);
for (String error : validationErrors) {
LOG.fine(error);
}
throw metsException;
}
}
/**
*
* Returns an ObjectID from the rels-ext stream
*
* @param relExtElements
* @return
*/
public static String getObjectId(List<Element> relExtElements) throws MetsExportException {
String XPATH = "*[local-name()='RDF']/*[local-name()='Description']";
Node descNode = xPathEvaluateNode(relExtElements, XPATH);
String ID = descNode.getAttributes().getNamedItem("rdf:about").getNodeValue();
return ID.substring(ID.indexOf("/") + 1);
}
/**
*
* Reads referenced object from Fedora
*
* @param uuid
* @param client
* @return
*/
public static DigitalObject readRelatedFoXML(String uuid, FedoraClient client) throws MetsExportException {
DigitalObject object = readFoXML(uuid, client);
return object;
}
/**
*
* Reads referenced object from file
*
* @param path
* @param fileName
* @return
*/
public static DigitalObject readRelatedFoXML(String path, String fileName) throws MetsExportException {
String fileNameInternal = path + fileName.substring(fileName.lastIndexOf(":") + 1) + ".xml";
DigitalObject object = readFoXML(fileNameInternal);
return object;
}
/**
*
* Generates an MD5 checksum and copies a file (image) to defined
* OutputStream
*
* @param is
* @param os
* @return
* @throws NoSuchAlgorithmException
* @throws IOException
*/
public static FileMD5Info getDigestAndCopy(InputStream is, OutputStream os) throws NoSuchAlgorithmException, IOException {
MessageDigest md = MessageDigest.getInstance("MD5");
md.reset();
byte[] bytes = new byte[2048];
int numBytes;
long totalBytes = 0;
while ((numBytes = is.read(bytes)) > 0) {
totalBytes += numBytes;
md.update(bytes, 0, numBytes);
os.write(bytes, 0, numBytes);
}
byte[] digest = md.digest();
os.close();
is.close();
String result = new String(Hex.encodeHex(digest));
return new FileMD5Info(result, totalBytes);
}
/**
*
* Generates an MD5 checksum OutputStream
*
* @param is
* @param os
* @return
* @throws NoSuchAlgorithmException
* @throws IOException
*/
public static FileMD5Info getDigest(InputStream is) throws NoSuchAlgorithmException, IOException {
MessageDigest md = MessageDigest.getInstance("MD5");
md.reset();
byte[] bytes = new byte[2048];
int numBytes;
long totalBytes = 0;
while ((numBytes = is.read(bytes)) > 0) {
totalBytes += numBytes;
md.update(bytes, 0, numBytes);
}
byte[] digest = md.digest();
String result = new String(Hex.encodeHex(digest));
return new FileMD5Info(result, totalBytes);
}
/**
* Returns parent pid from Resource index
*
* @param uuid
* @param remoteStorage
* @return
*/
public static String getParent(String uuid, RemoteStorage remoteStorage) throws MetsExportException {
List<Item> referrers;
try {
referrers = remoteStorage.getSearch().findReferrers(uuid);
} catch (Exception e) {
throw new MetsExportException("Error while finding parent for:" + uuid, false, e);
}
if (referrers.size() > 1) {
throw new MetsExportException("More referrers for pid:" + uuid, false);
}
if (referrers.size() == 0) {
return null;
}
return referrers.get(0).getPid();
}
/**
*
* Mock method for simulation of resource index
*
* @param uuid
* @return
*/
public static String getParent(String uuid, Map<String, String> fileSystemParents) {
String result = fileSystemParents.get(uuid);
LOG.log(Level.FINE, "Parent from FS for :" + uuid + " found:" + result);
return result;
}
/**
*
* Checks if a monograph is MultiUnit
*
* @param monograph
* @return
*/
public static boolean isMultiUnitMonograph(MetsElement monograph) {
if (Const.VOLUME.equals(monograph.getElementType())) {
for (MetsElement element : monograph.getChildren()) {
if (Const.MONOGRAPH_UNIT.equalsIgnoreCase(element.getElementType())) {
return true;
}
}
}
return false;
}
/**
*
* Generates a document from a byte array
*
* @param bytes
* @return
*/
public static Document getDocumentFromBytes(byte[] bytes) throws MetsExportException {
if (bytes == null) {
return null;
}
DocumentBuilder builder;
try {
builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
} catch (ParserConfigurationException e) {
throw new MetsExportException("Error while creating DocumentBuilder", false, e);
}
Document document;
try {
document = builder.parse(new ByteArrayInputStream(bytes));
} catch (Exception e) {
throw new MetsExportException("Error while parsing document", false, e);
}
return document;
}
/**
* Deletes a folder
*
* @param folder
*/
public static void deleteFolder(File folder) {
File[] files = folder.listFiles();
if (files != null) {
for (File f : files) {
if (f.isDirectory()) {
deleteFolder(f);
} else {
f.delete();
}
}
}
folder.delete();
}
}