/*
* Copyright (C) 2014 Robert Simonovsky
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package cz.cas.lib.proarc.common.export.mets.structure;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.StringWriter;
import java.math.BigInteger;
import java.net.URI;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBElement;
import javax.xml.bind.Marshaller;
import javax.xml.datatype.DatatypeConfigurationException;
import javax.xml.datatype.DatatypeFactory;
import javax.xml.datatype.XMLGregorianCalendar;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.Schema;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;
import org.apache.commons.codec.binary.Hex;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import com.yourmediashelf.fedora.client.FedoraClient;
import com.yourmediashelf.fedora.client.FedoraClientException;
import com.yourmediashelf.fedora.client.request.GetDatastreamDissemination;
import com.yourmediashelf.fedora.generated.foxml.DatastreamType;
import com.yourmediashelf.fedora.generated.foxml.DatastreamVersionType;
import cz.cas.lib.proarc.common.device.Device;
import cz.cas.lib.proarc.common.device.DeviceException;
import cz.cas.lib.proarc.common.device.DeviceRepository;
import cz.cas.lib.proarc.common.export.mets.Const;
import cz.cas.lib.proarc.common.export.mets.JHoveOutput;
import cz.cas.lib.proarc.common.export.mets.JhoveUtility;
import cz.cas.lib.proarc.common.export.mets.MetsContext;
import cz.cas.lib.proarc.common.export.mets.FileMD5Info;
import cz.cas.lib.proarc.common.export.mets.MetsExportException;
import cz.cas.lib.proarc.common.export.mets.MetsUtils;
import cz.cas.lib.proarc.common.export.mets.MimeType;
import cz.cas.lib.proarc.common.fedora.FoxmlUtils;
import cz.cas.lib.proarc.common.fedora.MixEditor;
import cz.cas.lib.proarc.common.ocr.AltoDatastream;
import cz.cas.lib.proarc.mets.AmdSecType;
import cz.cas.lib.proarc.mets.AreaType;
import cz.cas.lib.proarc.mets.DivType;
import cz.cas.lib.proarc.mets.MetsType;
import cz.cas.lib.proarc.mets.DivType.Fptr;
import cz.cas.lib.proarc.mets.FileType;
import cz.cas.lib.proarc.mets.FileType.FLocat;
import cz.cas.lib.proarc.mets.MdSecType;
import cz.cas.lib.proarc.mets.MdSecType.MdWrap;
import cz.cas.lib.proarc.mets.MdSecType.MdWrap.XmlData;
import cz.cas.lib.proarc.mets.Mets;
import cz.cas.lib.proarc.mets.MetsType.FileSec;
import cz.cas.lib.proarc.mets.MetsType.MetsHdr;
import cz.cas.lib.proarc.mets.MetsType.FileSec.FileGrp;
import cz.cas.lib.proarc.mets.MetsType.MetsHdr.Agent;
import cz.cas.lib.proarc.mets.MetsType.StructLink;
import cz.cas.lib.proarc.mets.StructLinkType.SmLink;
import cz.cas.lib.proarc.mets.StructMapType;
import cz.cas.lib.proarc.mix.BasicImageInformationType.BasicImageCharacteristics.PhotometricInterpretation;
import cz.cas.lib.proarc.mix.Mix;
import cz.cas.lib.proarc.mods.ModsDefinition;
import cz.cas.lib.proarc.oaidublincore.OaiDcType;
import cz.cas.lib.proarc.premis.AgentComplexType;
import cz.cas.lib.proarc.premis.AgentIdentifierComplexType;
import cz.cas.lib.proarc.premis.CreatingApplicationComplexType;
import cz.cas.lib.proarc.premis.EventComplexType;
import cz.cas.lib.proarc.premis.EventIdentifierComplexType;
import cz.cas.lib.proarc.premis.EventOutcomeInformationComplexType;
import cz.cas.lib.proarc.premis.FixityComplexType;
import cz.cas.lib.proarc.premis.FormatComplexType;
import cz.cas.lib.proarc.premis.FormatDesignationComplexType;
import cz.cas.lib.proarc.premis.FormatRegistryComplexType;
import cz.cas.lib.proarc.premis.LinkingAgentIdentifierComplexType;
import cz.cas.lib.proarc.premis.LinkingEventIdentifierComplexType;
import cz.cas.lib.proarc.premis.LinkingObjectIdentifierComplexType;
import cz.cas.lib.proarc.premis.ObjectCharacteristicsComplexType;
import cz.cas.lib.proarc.premis.ObjectFactory;
import cz.cas.lib.proarc.premis.ObjectIdentifierComplexType;
import cz.cas.lib.proarc.premis.OriginalNameComplexType;
import cz.cas.lib.proarc.premis.PremisComplexType;
import cz.cas.lib.proarc.premis.PreservationLevelComplexType;
import cz.cas.lib.proarc.premis.RelatedEventIdentificationComplexType;
import cz.cas.lib.proarc.premis.RelatedObjectIdentificationComplexType;
import cz.cas.lib.proarc.premis.RelationshipComplexType;
/**
* Visitor class for creating mets document out of Mets objects
*
* @author Robert Simonovsky
*
*/
public class MetsElementVisitor implements IMetsElementVisitor {
private final Logger LOG = Logger.getLogger(MetsElementVisitor.class.getName());
private Mets mets;
private StructMapType logicalStruct;
private StructMapType physicalStruct;
private HashMap<String, FileGrp> fileGrpMap;
private final Map<StructLinkMapping, String> pageOrderToDivMap = new HashMap<StructLinkMapping, String>();
private final Map<String, List<StructLinkMapping>> structToPageMap = new HashMap<String, List<StructLinkMapping>>();
int pageCounter = 0;
int articleCounter = 0;
int chapterCounter = 0;
/**
* creates directory structure for mets elements
*/
private void createDirectoryStructure(MetsContext metsContext) {
for (String directory : Const.streamMappingFile.values()) {
File file = new File(metsContext.getOutputPath() + File.separator + metsContext.getPackageID() + File.separator + directory);
if (file.exists()) {
deleteFolder(file);
}
file.mkdir();
}
}
/**
* Deletes a folder
*
* @param folder
*/
private static void deleteFolder(File folder) {
File[] files = folder.listFiles();
if (files != null) {
for (File f : files) {
if (f.isDirectory()) {
deleteFolder(f);
} else {
f.delete();
}
}
}
folder.delete();
}
/**
* Inits the Mets header info
*/
protected void initHeader(IMetsElement metsElement) {
mets.setLabel1(metsElement.getLabel());
MetsHdr metsHdr = new MetsHdr();
metsHdr.setCREATEDATE(metsElement.getCreateDate());
metsHdr.setLASTMODDATE(metsElement.getLastUpdateDate());
Agent agent = new Agent();
agent.setName(metsElement.getMetsContext().getCreatorOrganization());
agent.setROLE("CREATOR");
agent.setTYPE("ORGANIZATION");
metsHdr.getAgent().add(agent);
mets.setMetsHdr(metsHdr);
fileGrpMap = MetsUtils.initFileGroups();
}
/**
* Prepares the generic mets information
*
* @param metsElement
* @return
* @throws MetsExportException
*/
private Mets prepareMets(IMetsElement metsElement) throws MetsExportException {
Mets mets = new Mets();
logicalStruct = new StructMapType();
logicalStruct.setTYPE("LOGICAL");
logicalStruct.setLabel2("Logical_Structure");
mets.getStructMap().add(logicalStruct);
physicalStruct = new StructMapType();
physicalStruct.setTYPE("PHYSICAL");
physicalStruct.setLabel2("Physical_Structure");
mets.getStructMap().add(physicalStruct);
return mets;
}
/**
* Saves the mets document into a file
*
* @param mets
* @param outputFile
* @throws MetsExportException
*/
private void saveMets(Mets mets, File outputFile, IMetsElement metsElement) throws MetsExportException {
String fileMd5Name;
try {
addFileGrpToMets(fileGrpMap);
addStructLink();
try {
JAXBContext jaxbContext = JAXBContext.newInstance(Mets.class, OaiDcType.class, ModsDefinition.class);
Marshaller marshaller = jaxbContext.createMarshaller();
marshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true);
marshaller.setProperty(Marshaller.JAXB_ENCODING, "utf-8");
// marshaller.setProperty(Marshaller.JAXB_SCHEMA_LOCATION,
// "http://www.w3.org/2001/XMLSchema-instance http://www.w3.org/2001/XMLSchema.xsd http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/mets.xsd http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/mods.xsd http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd");
marshaller.marshal(mets, outputFile);
MessageDigest md;
try {
md = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e) {
throw new MetsExportException("Unable to create MD5 hash", false, e);
}
md.reset();
InputStream is;
try {
is = new FileInputStream(outputFile);
} catch (FileNotFoundException e) {
throw new MetsExportException("Unable to open file:" + outputFile.getAbsolutePath(), false, e);
}
byte[] bytes = new byte[2048];
int numBytes;
long totalBytes = 0;
try {
while ((numBytes = is.read(bytes)) != -1) {
totalBytes = totalBytes + numBytes;
md.update(bytes, 0, numBytes);
}
} catch (IOException e) {
throw new MetsExportException("Unable to generate MD5 hash", false, e);
}
byte[] digest = md.digest();
String result = new String(Hex.encodeHex(digest));
metsElement.getMetsContext().getFileList().add(new FileMD5Info("." + File.separator + outputFile.getName(), result, totalBytes));
fileMd5Name = "MD5_" + MetsUtils.removeNonAlpabetChars(metsElement.getMetsContext().getPackageID()) + ".md5";
File fileMd5 = new File(metsElement.getMetsContext().getOutputPath() + File.separator + metsElement.getMetsContext().getPackageID() + File.separator + fileMd5Name);
OutputStreamWriter osw = new OutputStreamWriter(new FileOutputStream(fileMd5));
for (FileMD5Info info : metsElement.getMetsContext().getFileList()) {
osw.write(info.getMd5() + " " + info.getFileName() + "\n");
}
osw.close();
is.close();
// calculate md5 for md5file - it's inserted into info.xml
is = new FileInputStream(fileMd5);
FileMD5Info md5InfoMd5File = MetsUtils.getDigest(is);
is.close();
metsElement.getMetsContext().getFileList().add(new FileMD5Info("." + File.separator + fileMd5Name, null, fileMd5.length()));
MetsUtils.saveInfoFile(metsElement.getMetsContext().getOutputPath(), metsElement.getMetsContext(), md5InfoMd5File.getMd5(), fileMd5Name, outputFile);
} catch (Exception ex) {
throw new MetsExportException(metsElement.getOriginalPid(), "Unable to save mets file:" + outputFile.getAbsolutePath(), false, ex);
}
List<String> validationErrors;
try {
validationErrors = MetsUtils.validateAgainstXSD(outputFile, Mets.class.getResourceAsStream("mets.xsd"));
} catch (Exception ex) {
throw new MetsExportException("Error while validation document:" + outputFile, false, ex);
}
if (validationErrors.size() > 0) {
MetsExportException metsException = new MetsExportException("Invalid mets file:" + outputFile, false, null);
metsException.getExceptions().get(0).setValidationErrors(validationErrors);
for (String error : validationErrors) {
LOG.fine(error);
}
throw metsException;
}
LOG.log(Level.FINE, "Element validated:" + metsElement.getOriginalPid() + "(" + metsElement.getElementType() + ")");
} finally {
JhoveUtility.destroyConfigFiles(metsElement.getMetsContext().getJhoveContext());
}
metsElement.getMetsContext().getGeneratedPSP().add(metsElement.getMetsContext().getPackageID());
}
/**
* Adds all non-empty filegroups to the mets
*
* @param fileGrpMap
* @param fileSec
*/
private void addFileGrpToMets(Map<String, FileGrp> fileGrpMap) {
for (String key : fileGrpMap.keySet()) {
FileGrp fileGrp = fileGrpMap.get(key);
if (fileGrp.getFile().size() > 0) {
if (mets.getFileSec() == null) {
mets.setFileSec(new FileSec());
}
mets.getFileSec().getFileGrp().add(fileGrp);
}
}
}
/**
*
* Adds an element descriptors (DC, BIBLIO_MODS) to the mets document
*
* @param metsElement
*/
private void addDmdSec(IMetsElement metsElement) {
// MODS
if (metsElement.getModsStream() != null) {
MdSecType modsMdSecType = new MdSecType();
metsElement.setModsMetsElement(modsMdSecType);
mets.getDmdSec().add(modsMdSecType);
modsMdSecType.setID("MODSMD_" + metsElement.getModsElementID());
MdWrap modsMdWrap = new MdWrap();
modsMdWrap.setMDTYPE("MODS");
modsMdWrap.setMIMETYPE("text/xml");
XmlData modsxmlData = new XmlData();
metsElement.getModsStream().get(0).setAttribute("ID", "MODS_" + metsElement.getModsElementID());
modsxmlData.getAny().addAll(metsElement.getModsStream());
modsMdWrap.setXmlData(modsxmlData);
modsMdSecType.setMdWrap(modsMdWrap);
}
// DC
if (metsElement.getDescriptor() != null) {
MdSecType dcMdSecType = new MdSecType();
mets.getDmdSec().add(dcMdSecType);
dcMdSecType.setID("DCMD_" + metsElement.getModsElementID());
MdWrap dcMdWrap = new MdWrap();
dcMdWrap.setMDTYPE("DC");
dcMdWrap.setMIMETYPE("text/xml");
XmlData dcxmlData = new XmlData();
dcxmlData.getAny().addAll(metsElement.getDescriptor());
dcMdWrap.setXmlData(dcxmlData);
dcMdSecType.setMdWrap(dcMdWrap);
}
}
/**
* adds an order and index attributes to pageDiv
*
* @param metsElement
* @param pageDiv
* @throws MetsExportException
*/
private void fillPageIndexOrder(IMetsElement metsElement, DivType pageDiv) throws MetsExportException {
Node partNode = MetsUtils.xPathEvaluateNode(metsElement.getModsStream(), "*[local-name()='modsCollection']/*[local-name()='mods']/*[local-name()='part']");
if (partNode == null) {
partNode = MetsUtils.xPathEvaluateNode(metsElement.getModsStream(), "*[local-name()='mods']/*[local-name()='part']");
}
if ((partNode.getAttributes() != null) && (partNode.getAttributes().getNamedItem("type") != null)) {
pageDiv.setTYPE(partNode.getAttributes().getNamedItem("type").getNodeValue());
} else {
pageDiv.setTYPE("NormalPage");
}
NodeList nodeList = partNode.getChildNodes();
for (int a = 0; a < nodeList.getLength(); a++) {
if ((nodeList.item(a).getLocalName() != null) && (nodeList.item(a).getLocalName().equalsIgnoreCase("detail"))) {
Node numberNode = nodeList.item(a).getChildNodes().item(0).getFirstChild();
if (nodeList.item(a).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("pageNumber")) {
pageDiv.setORDERLABEL(numberNode.getNodeValue());
}
if (nodeList.item(a).getAttributes().getNamedItem("type").getNodeValue().equalsIgnoreCase("pageIndex")) {
pageDiv.setORDER(new BigInteger(numberNode.getNodeValue()));
}
}
}
}
private InputStream addLabelToAmdSec(InputStream is, MetsContext metsContext) throws MetsExportException {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
try {
MetsUtils.copyStream(is, bos);
bos.close();
} catch (IOException ex) {
throw new MetsExportException("Unable to copy stream", false, ex);
}
Document TECHDoc = MetsUtils.getDocumentFromBytes(bos.toByteArray());
Element element = (Element) TECHDoc.getFirstChild();
element.setAttribute("LABEL", mets.getLabel1());
element.setAttribute("TYPE", mets.getTYPE());
DOMSource domSource = new DOMSource(TECHDoc);
StringWriter xmlAsWriter = new StringWriter();
StreamResult result = new StreamResult(xmlAsWriter);
try {
TransformerFactory.newInstance().newTransformer().transform(domSource, result);
InputStream resultIS = new ByteArrayInputStream(xmlAsWriter.toString().getBytes("UTF-8"));
is.close();
return resultIS;
} catch (Exception ex) {
throw new MetsExportException("Unable to transform Tech metadata to XML", false, ex);
}
}
/**
* Prepares a mets FileType element for a file
*
* @param seq
* @param metsStreamName
* @return
*/
private FileType prepareFileType(int seq, String metsStreamName, HashMap<String, Object> fileNames, HashMap<String, String> mimeTypes, MetsContext metsContext, HashMap<String, String> outputFileNames, HashMap<String, FileMD5Info> md5InfosMap) throws MetsExportException {
// String streamName = Const.streamMapping.get(metsStreamName);
FileType fileType = new FileType();
fileType.setCHECKSUMTYPE("MD5");
GregorianCalendar gregory = new GregorianCalendar();
gregory.setTime(new Date());
XMLGregorianCalendar calendar;
try {
calendar = DatatypeFactory.newInstance()
.newXMLGregorianCalendar(
gregory);
} catch (DatatypeConfigurationException e1) {
throw new MetsExportException("Unable to create XMLGregorianDate", false, e1);
}
fileType.setCREATED(calendar);
fileType.setSEQ(seq);
fileType.setMIMETYPE(mimeTypes.get(metsStreamName));
InputStream is = null;
fileType.setID(Const.streamMappingPrefix.get(metsStreamName) + "_" + MetsUtils.removeNonAlpabetChars(metsContext.getPackageID()) + "_" + String.format("%04d", seq));
if (fileNames.get(metsStreamName) instanceof String) {
String fileNameOriginal = (String) fileNames.get(metsStreamName);
int lastIndex = fileNameOriginal.lastIndexOf('/');
int preLastIndex = fileNameOriginal.substring(1, lastIndex).lastIndexOf('/');
String fileName = metsContext.getPath() + fileNameOriginal.substring(preLastIndex + 2);
File file = new File(fileName);
try {
is = new FileInputStream(file);
} catch (FileNotFoundException e) {
throw new MetsExportException("File not found:" + fileName, false, e);
}
}
if (fileNames.get(metsStreamName) instanceof byte[]) {
byte[] bytes = (byte[]) fileNames.get(metsStreamName);
is = new ByteArrayInputStream(bytes);
}
if (fileNames.get(metsStreamName) instanceof InputStream) {
is = (InputStream) fileNames.get(metsStreamName);
}
if (metsStreamName.equalsIgnoreCase("TECHMDGRP")) {
is = addLabelToAmdSec(is, metsContext);
}
String outputFileName = fileType.getID() + "." + MimeType.getExtension(mimeTypes.get(metsStreamName));
String fullOutputFileName = metsContext.getPackageDir().getAbsolutePath() + File.separator + Const.streamMappingFile.get(metsStreamName) + File.separator + outputFileName;
outputFileNames.put(metsStreamName, fullOutputFileName);
try {
FileMD5Info fileMD5Info;
if (md5InfosMap.get(metsStreamName) == null) {
fileMD5Info = MetsUtils.getDigestAndCopy(is, new FileOutputStream(fullOutputFileName));
md5InfosMap.put(metsStreamName, fileMD5Info);
} else {
FileMD5Info tempMd5 = MetsUtils.getDigestAndCopy(is, new FileOutputStream(fullOutputFileName));
fileMD5Info = md5InfosMap.get(metsStreamName);
fileMD5Info.setSize(tempMd5.getSize());
fileMD5Info.setMd5(tempMd5.getMd5());
}
fileType.setSIZE(Long.valueOf(fileMD5Info.getSize()));
fileMD5Info.setFileName("." + File.separator + Const.streamMappingFile.get(metsStreamName) + File.separator + outputFileName);
fileMD5Info.setMimeType(fileType.getMIMETYPE());
fileType.setCHECKSUM(fileMD5Info.getMd5());
metsContext.getFileList().add(fileMD5Info);
} catch (Exception e) {
throw new MetsExportException("Unable to process file " + fullOutputFileName, false, e);
}
FLocat flocat = new FLocat();
flocat.setLOCTYPE("URL");
String href = "." + "/" + Const.streamMappingFile.get(metsStreamName) + "/" + outputFileName;
URI uri;
uri = URI.create(href);
flocat.setHref(uri.toASCIIString());
fileType.getFLocat().add(flocat);
return fileType;
}
/**
* Reads files/streams for each stream and puts it into the map (fileNames)
*
* @param metsElement
* @param seq
* @param fileNames
* @param mimeTypes
* @throws MetsExportException
*/
private void processPageFiles(IMetsElement metsElement, int seq, HashMap<String, Object> fileNames, HashMap<String, String> mimeTypes, HashMap<String, XMLGregorianCalendar> createDates, HashMap<String, FileMD5Info> md5InfosMap) throws MetsExportException {
for (String streamName : Const.streamMapping.keySet()) {
if (metsElement.getMetsContext().getFedoraClient() != null) {
try {
// GetDatastreamsResponse streams =
// FedoraClient.getDatastreams(metsElement.getOriginalPid()).execute(metsElement.getMetsContext().getFedoraClient());
// List<DatastreamProfile> profiles =
// streams.getDatastreamProfiles();
// for (DatastreamProfile profile : profiles) {
// if (profile.getDsID().contains(streamName)) {
for (String dataStream : Const.streamMapping.get(streamName)) {
DatastreamType rawDS = FoxmlUtils.findDatastream(metsElement.getSourceObject(), dataStream);
if (rawDS != null) {
FileMD5Info fileMd5Info;
if (md5InfosMap.get(streamName) == null) {
fileMd5Info = new FileMD5Info();
md5InfosMap.put(streamName, fileMd5Info);
} else {
fileMd5Info = md5InfosMap.get(streamName);
}
fileMd5Info.setCreated(rawDS.getDatastreamVersion().get(0).getCREATED());
GetDatastreamDissemination dsRaw = FedoraClient.getDatastreamDissemination(metsElement.getOriginalPid(), dataStream);
createDates.put(streamName, rawDS.getDatastreamVersion().get(0).getCREATED());
try {
InputStream is = dsRaw.execute(metsElement.getMetsContext().getFedoraClient()).getEntityInputStream();
fileNames.put(streamName, is);
} catch (FedoraClientException e) {
throw new MetsExportException(metsElement.getOriginalPid(), "Unable to read raw datastream content", false, e);
}
// mimeTypes.put(streamName, profile.getDsMIME());
mimeTypes.put(streamName, rawDS.getDatastreamVersion().get(0).getMIMETYPE());
break;
//
}
}
} catch (Exception ex) {
throw new MetsExportException(metsElement.getOriginalPid(), "Error while getting file datastreams for " + metsElement.getOriginalPid(), false, ex);
}
} else {
List<DatastreamType> datastreams = metsElement.getSourceObject().getDatastream();
for (String dataStream : Const.streamMapping.get(streamName)) {
if (fileNames.get(streamName) != null) {
break;
}
for (DatastreamType ds : datastreams) {
if (MetsUtils.equalDataStreams(ds.getID(), dataStream)) {
Iterator<DatastreamVersionType> dvIter = ds.getDatastreamVersion().iterator();
while (dvIter.hasNext()) {
DatastreamVersionType dv = dvIter.next();
mimeTypes.put(streamName, dv.getMIMETYPE());
if (dv.getContentLocation() != null) {
fileNames.put(streamName, dv.getContentLocation().getREF());
FileMD5Info fileMd5Info;
if (md5InfosMap.get(streamName) == null) {
fileMd5Info = new FileMD5Info();
md5InfosMap.put(streamName, fileMd5Info);
} else {
fileMd5Info = md5InfosMap.get(streamName);
}
fileMd5Info.setCreated(dv.getCREATED());
}
if (dv.getBinaryContent() != null) {
fileNames.put(streamName, dv.getBinaryContent());
FileMD5Info fileMd5Info;
if (md5InfosMap.get(streamName) == null) {
fileMd5Info = new FileMD5Info();
md5InfosMap.put(streamName, fileMd5Info);
} else {
fileMd5Info = md5InfosMap.get(streamName);
}
fileMd5Info.setCreated(dv.getCREATED());
}
}
break;
}
}
}
}
}
}
/**
* Returns the description of scanner
*
* @param metsElement
* @return
* @throws MetsExportException
*/
private Mix getScannerMix(IMetsElement metsElement) throws MetsExportException {
if (metsElement.getMetsContext().getRemoteStorage()!=null) {
Node deviceNode = MetsUtils.xPathEvaluateNode(metsElement.getRelsExt(), "*[local-name()='RDF']/*[local-name()='Description']/*[local-name()='hasDevice']");
if (deviceNode == null) {
return null;
}
Node attrNode = deviceNode.getAttributes().getNamedItem("rdf:resource");
if (attrNode==null) {
return null;
}
DeviceRepository deviceRepository = new DeviceRepository(metsElement.getMetsContext().getRemoteStorage());
String deviceId = attrNode.getNodeValue().replaceAll("info:fedora/", "");
List<Device> deviceList;
try {
deviceList = deviceRepository.find(deviceId, true);
} catch (DeviceException e) {
throw new MetsExportException(metsElement.getOriginalPid(), "Unable to get scanner info", false, e);
}
if (deviceList.size() != 1) {
throw new MetsExportException(metsElement.getOriginalPid(), "Unable to get scanner info - expected 1 device, got:" + deviceList.size(), false, null);
}
Device device = deviceList.get(0);
if ((device.getDescription() == null) || (device.getDescription().getImageCaptureMetadata() == null)) {
throw new MetsExportException(metsElement.getOriginalPid(), "Scanner device does not have the description/imageCaptureMetadata set", false, null);
}
Mix mix = device.getDescription();
return mix;
}
return null;
}
private Node getAgent(IMetsElement metsElement) throws MetsExportException {
AgentComplexType agent = new AgentComplexType();
ObjectFactory factory = new ObjectFactory();
JAXBElement<AgentComplexType> jaxbPremix = factory.createAgent(agent);
AgentIdentifierComplexType agentIdentifier = new AgentIdentifierComplexType();
agent.getAgentIdentifier().add(agentIdentifier);
agentIdentifier.setAgentIdentifierType("ProArc_AgentID");
agentIdentifier.setAgentIdentifierValue("ProArc");
agent.setAgentType("software");
agent.getAgentName().add("ProArc");
JAXBContext jc;
try {
jc = JAXBContext.newInstance(AgentComplexType.class);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document document = db.newDocument();
// Marshal the Object to a Document
Marshaller marshaller = jc.createMarshaller();
marshaller.marshal(jaxbPremix, document);
XPath xpath = XPathFactory.newInstance().newXPath();
Node agentNode = (Node) xpath.compile("*[local-name()='agent']").evaluate(document, XPathConstants.NODE);
return agentNode;
} catch (Exception e) {
throw new MetsExportException(metsElement.getOriginalPid(), "Error while generating premis data", false, e);
}
}
private Node getPremisEvent(IMetsElement metsElement, String datastream, FileMD5Info md5Info, String eventDetail) throws MetsExportException {
PremisComplexType premis = new PremisComplexType();
ObjectFactory factory = new ObjectFactory();
JAXBElement<PremisComplexType> jaxbPremix = factory.createPremis(premis);
EventComplexType event = factory.createEventComplexType();
premis.getEvent().add(event);
event.setEventDateTime(md5Info.getCreated().toXMLFormat());
event.setEventDetail(eventDetail);
EventIdentifierComplexType eventIdentifier = new EventIdentifierComplexType();
event.setEventIdentifier(eventIdentifier);
event.setEventType("derivation");
eventIdentifier.setEventIdentifierType("ProArc_EventID");
eventIdentifier.setEventIdentifierValue(Const.dataStreamToEvent.get(datastream));
EventOutcomeInformationComplexType eventInformation = new EventOutcomeInformationComplexType();
event.getEventOutcomeInformation().add(eventInformation);
eventInformation.getContent().add(factory.createEventOutcome("successful"));
LinkingAgentIdentifierComplexType linkingAgentIdentifier = new LinkingAgentIdentifierComplexType();
linkingAgentIdentifier.setLinkingAgentIdentifierType("ProArc_AgentID");
linkingAgentIdentifier.setLinkingAgentIdentifierValue("ProArc");
linkingAgentIdentifier.getLinkingAgentRole().add("software");
LinkingObjectIdentifierComplexType linkingObject = new LinkingObjectIdentifierComplexType();
linkingObject.setLinkingObjectIdentifierType("ProArc_URI");
linkingObject.setLinkingObjectIdentifierValue(Const.FEDORAPREFIX + metsElement.getOriginalPid() + "/" + Const.dataStreamToModel.get(datastream));
event.getLinkingObjectIdentifier().add(linkingObject);
event.getLinkingAgentIdentifier().add(linkingAgentIdentifier);
JAXBContext jc;
try {
jc = JAXBContext.newInstance(PremisComplexType.class);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document document = db.newDocument();
// Marshal the Object to a Document
Marshaller marshaller = jc.createMarshaller();
marshaller.marshal(jaxbPremix, document);
XPath xpath = XPathFactory.newInstance().newXPath();
Node premisNode = (Node) xpath.compile("*[local-name()='premis']/*[local-name()='event']").evaluate(document, XPathConstants.NODE);
return premisNode;
} catch (Exception e) {
throw new MetsExportException(metsElement.getOriginalPid(), "Error while generating premis data", false, e);
}
}
/**
* Generates the premis for amdSec
*
* @param metsElement
* @param datastream
* @param md5Info
* @param created
* @param formatVersion
* @return
* @throws MetsExportException
*/
private Node getPremisFile(IMetsElement metsElement, String datastream, FileMD5Info md5Info) throws MetsExportException {
PremisComplexType premis = new PremisComplexType();
ObjectFactory factory = new ObjectFactory();
JAXBElement<PremisComplexType> jaxbPremix = factory.createPremis(premis);
cz.cas.lib.proarc.premis.File file = factory.createFile();
premis.getObject().add(file);
ObjectIdentifierComplexType objectIdentifier = new ObjectIdentifierComplexType();
objectIdentifier.setObjectIdentifierType("ProArc_URI");
objectIdentifier.setObjectIdentifierValue(Const.FEDORAPREFIX + metsElement.getOriginalPid() + "/" + Const.dataStreamToModel.get(datastream));
file.getObjectIdentifier().add(objectIdentifier);
PreservationLevelComplexType preservation = new PreservationLevelComplexType();
if ("RAW".equals(datastream)) {
preservation.setPreservationLevelValue("deleted");
} else {
preservation.setPreservationLevelValue("preservation");
}
file.getPreservationLevel().add(preservation);
ObjectCharacteristicsComplexType characteristics = new ObjectCharacteristicsComplexType();
characteristics.setCompositionLevel(BigInteger.ZERO);
file.getObjectCharacteristics().add(characteristics);
FixityComplexType fixity = new FixityComplexType();
fixity.setMessageDigest(md5Info.getMd5());
fixity.setMessageDigestAlgorithm("MD5");
fixity.setMessageDigestOriginator("ProArc");
characteristics.getFixity().add(fixity);
characteristics.setSize(md5Info.getSize());
FormatComplexType format = new FormatComplexType();
characteristics.getFormat().add(format);
FormatDesignationComplexType formatDesignation = new FormatDesignationComplexType();
formatDesignation.setFormatName(md5Info.getMimeType());
formatDesignation.setFormatVersion(md5Info.getFormatVersion());
JAXBElement<FormatDesignationComplexType> jaxbDesignation = factory.createFormatDesignation(formatDesignation);
format.getContent().add(jaxbDesignation);
FormatRegistryComplexType formatRegistry = new FormatRegistryComplexType();
formatRegistry.setFormatRegistryName("PRONOM");
formatRegistry.setFormatRegistryKey(Const.mimeToFmtMap.get(md5Info.getMimeType()));
JAXBElement<FormatRegistryComplexType> jaxbRegistry = factory.createFormatRegistry(formatRegistry);
format.getContent().add(jaxbRegistry);
CreatingApplicationComplexType creatingApplication = new CreatingApplicationComplexType();
characteristics.getCreatingApplication().add(creatingApplication);
creatingApplication.getContent().add(factory.createCreatingApplicationName("ProArc"));
creatingApplication.getContent().add(factory.createCreatingApplicationVersion(metsElement.getMetsContext().getProarcVersion()));
creatingApplication.getContent().add(factory.createDateCreatedByApplication(MetsUtils.getCurrentDate().toXMLFormat()));
RelationshipComplexType relationShip = new RelationshipComplexType();
if (!("RAW").equals(datastream)) {
relationShip.setRelationshipType("derivation");
relationShip.setRelationshipSubType("created from");
RelatedObjectIdentificationComplexType relatedObject = new RelatedObjectIdentificationComplexType();
relationShip.getRelatedObjectIdentification().add(relatedObject);
relatedObject.setRelatedObjectIdentifierType("ProArc_URI");
relatedObject.setRelatedObjectIdentifierValue(Const.FEDORAPREFIX + metsElement.getOriginalPid() + "/" + Const.dataStreamToModel.get("RAW"));
RelatedEventIdentificationComplexType eventObject = new RelatedEventIdentificationComplexType();
relationShip.getRelatedEventIdentification().add(eventObject);
eventObject.setRelatedEventIdentifierType("ProArc_EventID");
eventObject.setRelatedEventIdentifierValue(Const.dataStreamToEvent.get(datastream));
eventObject.setRelatedEventSequence(BigInteger.ONE);
file.getRelationship().add(relationShip);
} else {
relationShip.setRelationshipType("creation");
relationShip.setRelationshipSubType("created from");
LinkingEventIdentifierComplexType eventIdentifier = new LinkingEventIdentifierComplexType();
file.getLinkingEventIdentifier().add(eventIdentifier);
eventIdentifier.setLinkingEventIdentifierType("ProArc_EventID");
eventIdentifier.setLinkingEventIdentifierValue(Const.dataStreamToEvent.get(datastream));
}
String originalFile = MetsUtils.xPathEvaluateString(metsElement.getRelsExt(), "*[local-name()='RDF']/*[local-name()='Description']/*[local-name()='importFile']");
OriginalNameComplexType originalName = factory.createOriginalNameComplexType();
originalName.setValue(originalFile);
file.setOriginalName(originalName);
JAXBContext jc;
try {
jc = JAXBContext.newInstance(PremisComplexType.class);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document document = db.newDocument();
// Marshal the Object to a Document
Marshaller marshaller = jc.createMarshaller();
marshaller.marshal(jaxbPremix, document);
XPath xpath = XPathFactory.newInstance().newXPath();
Node premisNode = (Node) xpath.compile("*[local-name()='premis']/*[local-name()='object']").evaluate(document, XPathConstants.NODE);
return premisNode;
} catch (Exception e) {
throw new MetsExportException(metsElement.getOriginalPid(), "Error while generating premis data", false, e);
}
}
private void addPremisNodeToMets(Node premisNode, AmdSecType amdSec, String Id, boolean isDigiprov, HashMap<String, FileGrp> amdSecFileGrpMap) {
MdSecType mdSec = new MdSecType();
mdSec.setID(Id);
MdWrap mdWrap = new MdWrap();
mdWrap.setMIMETYPE("text/xml");
mdWrap.setMDTYPE("PREMIS");
XmlData xmlData = new XmlData();
xmlData.getAny().add(premisNode);
mdWrap.setXmlData(xmlData);
mdSec.setMdWrap(mdWrap);
if (isDigiprov) {
amdSec.getDigiprovMD().add(mdSec);
} else {
amdSec.getTechMD().add(mdSec);
}
if ("OBJ_002".equals(Id) || ("EVT_002".equals(Id))) {
if ((amdSecFileGrpMap.get("MC_IMGGRP") != null) && (amdSecFileGrpMap.get("MC_IMGGRP").getFile().get(0) != null)) {
amdSecFileGrpMap.get("MC_IMGGRP").getFile().get(0).getADMID().add(mdSec);
}
}
if ("OBJ_003".equals(Id) || ("EVT_003".equals(Id))) {
if ((amdSecFileGrpMap.get("ALTOGRP") != null) && (amdSecFileGrpMap.get("ALTOGRP").getFile().get(0) != null)) {
amdSecFileGrpMap.get("ALTOGRP").getFile().get(0).getADMID().add(mdSec);
}
}
}
private void addPremisToAmdSec(AmdSecType amdSec, HashMap<String, FileMD5Info> md5InfosMap, IMetsElement metsElement, HashMap<String, FileGrp> amdSecFileGrpMap) throws MetsExportException {
HashMap<String, String> toGenerate = new HashMap<String, String>();
toGenerate.put("OBJ_001", "RAW");
toGenerate.put("OBJ_002", "MC_IMGGRP");
toGenerate.put("OBJ_003", "ALTOGRP");
for (String obj : toGenerate.keySet()) {
String stream = toGenerate.get(obj);
if (md5InfosMap.get(stream) == null) {
continue;
}
addPremisNodeToMets(getPremisFile(metsElement, stream, md5InfosMap.get(stream)), amdSec, obj, false, amdSecFileGrpMap);
}
if (md5InfosMap.get("RAW") != null) {
addPremisNodeToMets(getPremisEvent(metsElement, "RAW", md5InfosMap.get("RAW"), "capture/digitization"), amdSec, "EVT_001", true, null);
}
if (md5InfosMap.get("MC_IMGGRP") != null) {
addPremisNodeToMets(getPremisEvent(metsElement, "MC_IMGGRP", md5InfosMap.get("MC_IMGGRP"), "migration/MC_creation"), amdSec, "EVT_002", true, amdSecFileGrpMap);
}
if (md5InfosMap.get("ALTOGRP") != null) {
addPremisNodeToMets(getPremisEvent(metsElement, "ALTOGRP", md5InfosMap.get("ALTOGRP"), "capture/XML_creation"), amdSec, "EVT_003", true, amdSecFileGrpMap);
}
addPremisNodeToMets(getAgent(metsElement), amdSec, "AGENT_001", true, null);
}
/**
* Fixes PS Mix
*
* @param jHoveOutputRaw
* @param metsElement
* @param rawCreated
*/
public static void fixPSMix(JHoveOutput jHoveOutputRaw, String originalPid, XMLGregorianCalendar rawCreated) {
JhoveUtility.insertObjectIdentifier(jHoveOutputRaw.getMix(), originalPid, "RAW");
JhoveUtility.addDenominator(jHoveOutputRaw);
JhoveUtility.addOrientation(jHoveOutputRaw);
JhoveUtility.insertDateCreated(jHoveOutputRaw.getMix(), rawCreated);
}
/**
* Fixes MC Mix
*
* @param jHoveOutputMC
* @param metsElement
* @param mcCreated
* @param originalFile
* @param photometricInterpretation
*/
public static void fixMCMix(JHoveOutput jHoveOutputMC, String originalPid, XMLGregorianCalendar mcCreated, String originalFile, PhotometricInterpretation photometricInterpretation) {
JhoveUtility.insertChangeHistory(jHoveOutputMC.getMix(), mcCreated, originalFile);
JhoveUtility.insertObjectIdentifier(jHoveOutputMC.getMix(), originalPid, "MC_IMGGRP");
JhoveUtility.addPhotometricInformation(jHoveOutputMC, photometricInterpretation);
JhoveUtility.addDenominator(jHoveOutputMC);
JhoveUtility.addOrientation(jHoveOutputMC);
JhoveUtility.insertDateCreated(jHoveOutputMC.getMix(), mcCreated);
}
/**
* Generates technical metadata using JHOVE
*
* @param metsElement
* @param fileNames
* @param seq
* @param fileTypes
* @param mimeTypes
* @param pageDiv
* @throws MetsExportException
*/
private void generateTechMetadata(IMetsElement metsElement, HashMap<String, Object> fileNames, int seq, HashMap<String, FileGrp> fileGrpPage, HashMap<String, String> mimeTypes, DivType pageDiv, HashMap<String, String> outputFileNames, HashMap<String, FileMD5Info> md5InfosMap) throws MetsExportException {
if (fileNames.get("TECHMDGRP") == null) {
LOG.log(Level.FINE, "Generating tech");
Mets amdSecMets = new Mets();
amdSecMets.setLabel1(mets.getLabel1());
amdSecMets.setTYPE(mets.getTYPE());
StructMapType mapType = new StructMapType();
mapType.setTYPE(Const.DIV_PHYSICAL_ID);
amdSecMets.getStructMap().add(mapType);
AmdSecType amdSec = new AmdSecType();
amdSec.setID(metsElement.getElementID());
amdSecMets.getAmdSec().add(amdSec);
DivType divType = new DivType();
if (Const.PERIODICAL_TITLE.equalsIgnoreCase(metsElement.getMetsContext().getRootElement().getElementType())) {
divType.setTYPE("PERIODICAL_PAGE");
} else {
divType.setTYPE("MONOGRAPH_PAGE");
}
FileSec fileSec = new FileSec();
amdSecMets.setFileSec(fileSec);
HashMap<String, FileGrp> amdSecFileGrpMap = new HashMap<String, MetsType.FileSec.FileGrp>();
for (String fileMap : fileGrpPage.keySet()) {
FileGrp fileGrp = fileGrpPage.get(fileMap);
if (fileGrp.getFile().size() > 0) {
FileGrp fileGrpAmd = new FileGrp();
amdSecFileGrpMap.put(fileMap, fileGrpAmd);
fileGrpAmd.setID(fileGrp.getID());
fileGrpAmd.setUSE(fileGrp.getUSE());
fileSec.getFileGrp().add(fileGrpAmd);
for (FileType fileTypePage : fileGrp.getFile()) {
FileType fileTypeAmdSec = new FileType();
fileTypeAmdSec.setCHECKSUM(fileTypePage.getCHECKSUM());
fileTypeAmdSec.setCHECKSUMTYPE(fileTypePage.getCHECKSUMTYPE());
fileTypeAmdSec.setCREATED(fileTypePage.getCREATED());
fileTypeAmdSec.setID(fileTypePage.getID());
fileTypeAmdSec.setMIMETYPE(fileTypePage.getMIMETYPE());
fileTypeAmdSec.setSEQ(fileTypePage.getSEQ());
fileTypeAmdSec.setSIZE(fileTypePage.getSIZE());
fileGrpAmd.getFile().add(fileTypeAmdSec);
if (fileTypePage.getFLocat().get(0) != null) {
FLocat flocatAmd = new FLocat();
FLocat pageFlocat = fileTypePage.getFLocat().get(0);
if (pageFlocat.getHref() != null) {
flocatAmd.setHref(".." + pageFlocat.getHref().substring(1));
}
flocatAmd.setLOCTYPE(pageFlocat.getLOCTYPE());
fileTypeAmdSec.getFLocat().add(flocatAmd);
}
Fptr fptr = new Fptr();
fptr.setFILEID(fileTypeAmdSec);
divType.getFptr().add(fptr);
}
}
}
HashMap<String, String> toGenerate = new HashMap<String, String>();
File rawFile = null;
XMLGregorianCalendar rawCreated = null;
Mix mixDevice = getScannerMix(metsElement);
// RAW datastream for MIX_001 - only for Fedora
PhotometricInterpretation photometricInterpretation = null;
JHoveOutput jHoveOutputRaw = null;
JHoveOutput jHoveOutputMC = null;
if (metsElement.getMetsContext().getFedoraClient() != null) {
try {
DatastreamType rawDS = FoxmlUtils.findDatastream(metsElement.getSourceObject(), "RAW");
if (rawDS != null) {
GetDatastreamDissemination dsRaw = FedoraClient.getDatastreamDissemination(metsElement.getOriginalPid(), "RAW");
try {
rawCreated = rawDS.getDatastreamVersion().get(0).getCREATED();
InputStream is = dsRaw.execute(metsElement.getMetsContext().getFedoraClient()).getEntityInputStream();
String rawExtendsion = MimeType.getExtension(rawDS.getDatastreamVersion().get(0).getMIMETYPE());
rawFile = new File(metsElement.getMetsContext().getOutputPath() + File.separator + metsElement.getMetsContext().getPackageID() + File.separator + "raw" + "." + rawExtendsion);
FileMD5Info rawinfo;
try {
rawinfo = MetsUtils.getDigestAndCopy(is, new FileOutputStream(rawFile));
} catch (NoSuchAlgorithmException e) {
throw new MetsExportException(metsElement.getOriginalPid(), "Unable to copy RAW image and get digest", false, e);
}
rawinfo.setMimeType(rawDS.getDatastreamVersion().get(0).getMIMETYPE());
rawinfo.setCreated(rawDS.getDatastreamVersion().get(0).getCREATED());
md5InfosMap.put("RAW", rawinfo);
outputFileNames.put("RAW", rawFile.getAbsolutePath());
toGenerate.put("MIX_001", "RAW");
// If mix is present in fedora, then use this one
if (metsElement.getMetsContext().getFedoraClient() != null) {
jHoveOutputRaw = JhoveUtility.getMixFromFedora(metsElement, MixEditor.RAW_ID);
}
// If not present, then generate new
if (jHoveOutputRaw == null) {
jHoveOutputRaw = JhoveUtility.getMix(new File(rawFile.getAbsolutePath()), metsElement.getMetsContext(), mixDevice, rawCreated, null);
if (jHoveOutputRaw.getMix() == null) {
throw new MetsExportException(metsElement.getOriginalPid(), "Unable to generate Mix information for RAW image", false, null);
}
} else {
// Merges the information from the device mix
JhoveUtility.mergeMix(jHoveOutputRaw.getMix(), mixDevice);
}
if ((jHoveOutputRaw.getMix() != null) && (jHoveOutputRaw.getMix().getBasicImageInformation() != null) && (jHoveOutputRaw.getMix().getBasicImageInformation().getBasicImageCharacteristics() != null) && (jHoveOutputRaw.getMix().getBasicImageInformation().getBasicImageCharacteristics().getPhotometricInterpretation() != null)) {
photometricInterpretation = jHoveOutputRaw.getMix().getBasicImageInformation().getBasicImageCharacteristics().getPhotometricInterpretation();
}
fixPSMix(jHoveOutputRaw, metsElement.getOriginalPid(), rawCreated);
} catch (FedoraClientException e) {
throw new MetsExportException(metsElement.getOriginalPid(), "Unable to read raw datastream content", false, e);
}
}
} catch (IOException ex) {
throw new MetsExportException(metsElement.getOriginalPid(), "Error while getting RAW datastream " + metsElement.getOriginalPid(), false, ex);
}
}
if (fileNames.get("MC_IMGGRP") != null) {
toGenerate.put("MIX_002", "MC_IMGGRP");
String outputFileName = outputFileNames.get("MC_IMGGRP");
if (outputFileName != null) {
String originalFile = MetsUtils.xPathEvaluateString(metsElement.getRelsExt(), "*[local-name()='RDF']/*[local-name()='Description']/*[local-name()='importFile']");
if (metsElement.getMetsContext().getFedoraClient() != null) {
jHoveOutputMC = JhoveUtility.getMixFromFedora(metsElement, MixEditor.NDK_ARCHIVAL_ID);
}
if (jHoveOutputMC == null) {
jHoveOutputMC = JhoveUtility.getMix(new File(outputFileName), metsElement.getMetsContext(), null, md5InfosMap.get("MC_IMGGRP").getCreated(), originalFile);
if (jHoveOutputMC.getMix() == null) {
throw new MetsExportException(metsElement.getOriginalPid(), "Unable to generate Mix information for MC image", false, null);
}
}
fixMCMix(jHoveOutputMC, metsElement.getOriginalPid(), md5InfosMap.get("MC_IMGGRP").getCreated(), originalFile, photometricInterpretation);
}
}
for (String name : toGenerate.keySet()) {
String streamName = toGenerate.get(name);
if (streamName != null) {
MdSecType mdSec = new MdSecType();
mdSec.setID(name);
MdWrap mdWrap = new MdWrap();
mdWrap.setMIMETYPE("text/xml");
mdWrap.setMDTYPE("NISOIMG");
XmlData xmlData = new XmlData();
Node mixNode = null;
if ("RAW".equals(streamName)) {
if (jHoveOutputRaw != null) {
mixNode = jHoveOutputRaw.getMixNode();
if (md5InfosMap.get(streamName) != null) {
md5InfosMap.get(streamName).setFormatVersion(jHoveOutputRaw.getFormatVersion());
}
}
} else if (("MC_IMGGRP".equals(streamName)) && (md5InfosMap.get("MC_IMGGRP") != null)) {
if (jHoveOutputMC != null) {
mixNode = jHoveOutputMC.getMixNode();
if (md5InfosMap.get(streamName) != null) {
md5InfosMap.get(streamName).setFormatVersion(jHoveOutputMC.getFormatVersion());
}
if (mixNode != null) {
if ((amdSecFileGrpMap.get("MC_IMGGRP") != null) && (amdSecFileGrpMap.get("MC_IMGGRP").getFile().get(0) != null)) {
amdSecFileGrpMap.get("MC_IMGGRP").getFile().get(0).getADMID().add(mdSec);
}
}
}
}
if (mixNode != null) {
xmlData.getAny().add(mixNode);
} else {
throw new MetsExportException(metsElement.getOriginalPid(), "Unable to generate image metadata (MIX) for " + streamName, false, null);
}
mdWrap.setXmlData(xmlData);
mdSec.setMdWrap(mdWrap);
amdSec.getTechMD().add(mdSec);
}
}
if (rawFile != null) {
outputFileNames.remove("RAW");
rawFile.delete();
}
if (outputFileNames.get("ALTOGRP") != null) {
File altoFile = new File(outputFileNames.get("ALTOGRP"));
if (altoFile.exists()) {
Schema altoSchema;
try {
altoSchema = AltoDatastream.getSchema();
} catch (SAXException e) {
throw new MetsExportException("Unable to get ALTO schema", false);
}
try {
altoSchema.newValidator().validate(new StreamSource(altoFile));
} catch (Exception exSax) {
throw new MetsExportException(metsElement.getOriginalPid(), "Invalid ALTO", false, exSax);
}
md5InfosMap.get("ALTOGRP").setFormatVersion("2.0");
}
}
addPremisToAmdSec(amdSec, md5InfosMap, metsElement, amdSecFileGrpMap);
mapType.setDiv(divType);
saveAmdSec(metsElement, amdSecMets, fileNames, mimeTypes);
FileType fileType = prepareFileType(seq, "TECHMDGRP", fileNames, mimeTypes, metsElement.getMetsContext(), outputFileNames, md5InfosMap);
this.fileGrpMap.get("TECHMDGRP").getFile().add(fileType);
Fptr fptr = new Fptr();
fptr.setFILEID(fileType);
pageDiv.getFptr().add(fptr);
}
}
/**
*
* Parses an ALTO stream and returns a list of internal elements
*
* @param document
* @return
*/
private List<IntPartInfo> parseAltoInfo(Document document) {
List<IntPartInfo> intPartInfoList = new ArrayList<IntPartInfo>();
Node partElement = document.getFirstChild();
NodeList partsList = partElement.getChildNodes();
for (int a = 0; a < partsList.getLength(); a++) {
Node node = partsList.item(a);
if ((node instanceof Element) && (node.hasAttributes())) {
String type = node.getAttributes().getNamedItem("type").getNodeValue();
String alto = node.getAttributes().getNamedItem("alto").getNodeValue();
String begin = node.getAttributes().getNamedItem("begin").getNodeValue();
String order = node.getAttributes().getNamedItem("order").getNodeValue();
IntPartInfo info = new IntPartInfo(type, alto.substring(0, alto.indexOf("/")), begin, order);
intPartInfoList.add(info);
}
}
return intPartInfoList;
}
/**
*
* Fills the "isOnPage" structure - currently not used, prepared for future
* release
*
* @param metsElement
* @throws MetsExportException
*/
@SuppressWarnings("unused")
private void fillIsOnPage(IMetsElement metsElement) throws MetsExportException {
Node node = MetsUtils.xPathEvaluateNode(metsElement.getRelsExt(), "*[local-name()='RDF']/*[local-name()='Description']");
NodeList hasPageNodes = node.getChildNodes();
for (int a = 0; a < hasPageNodes.getLength(); a++) {
if (hasPageNodes.item(a).getNodeName().equalsIgnoreCase(Const.ISONPAGE)) {
String fileName = hasPageNodes.item(a).getAttributes().getNamedItem("rdf:resource").getNodeValue();
IMetsElement page = metsElement.getMetsContext().getPidElements().get(fileName.substring(fileName.indexOf(File.separator) + 1));
SmLink smLink = new SmLink();
smLink.setFrom(metsElement.getElementID());
smLink.setTo(page.getElementID());
if (mets.getStructLink() == null) {
mets.setStructLink(new MetsType.StructLink());
}
mets.getStructLink().getSmLinkOrSmLinkGrp().add(smLink);
}
}
}
/**
*
* saves technical metadata
*
* @param amdSecMets
*/
private void saveAmdSec(IMetsElement metsElement, Mets amdSecMets, HashMap<String, Object> fileNames, HashMap<String, String> mimeTypes) throws MetsExportException {
try {
JAXBContext jaxbContext = JAXBContext.newInstance(Mets.class);
Marshaller marshaller = jaxbContext.createMarshaller();
marshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true);
marshaller.setProperty(Marshaller.JAXB_ENCODING, "utf-8");
// marshaller.setProperty(Marshaller.JAXB_SCHEMA_LOCATION,
// "http://www.w3.org/2001/XMLSchema-instance http://www.w3.org/2001/XMLSchema.xsd http://www.loc.gov/METS/ http://www.loc.gov/standards/mets/mets.xsd http://www.loc.gov/MIX/ http://www.loc.gov/mix/v20");
ByteArrayOutputStream bos = new ByteArrayOutputStream();
marshaller.marshal(amdSecMets, bos);
byte[] byteArray = bos.toByteArray();
fileNames.put("TECHMDGRP", byteArray);
mimeTypes.put("TECHMDGRP", "text/xml");
Document document = MetsUtils.getDocumentFromBytes(byteArray);
MetsUtils.validateAgainstXSD(document, Mets.class.getResourceAsStream("mets.xsd"));
} catch (Exception ex) {
throw new MetsExportException(metsElement.getOriginalPid(), "Error while saving AMDSec file", false, ex);
}
}
/**
* Inserts Issue structure to the mets
*
* @param physicalDiv
* @param logicalDiv
* @param metsElement
* @throws MetsExportException
*/
private void insertIssue(DivType physicalDiv, DivType logicalDiv, IMetsElement metsElement) throws MetsExportException {
addDmdSec(metsElement);
physicalDiv.setID("DIV_P_0000");
physicalDiv.setLabel3(metsElement.getLabel());
physicalDiv.getDMDID().add(metsElement.getModsMetsElement());
physicalDiv.setTYPE(metsElement.getElementType());
DivType divType = new DivType();
divType.setID(metsElement.getElementID());
divType.setLabel3(metsElement.getMetsContext().getRootElement().getLabel());
divType.setTYPE(metsElement.getElementType());
divType.getDMDID().add(metsElement.getModsMetsElement());
logicalDiv.getDiv().add(divType);
for (IMetsElement element : metsElement.getChildren()) {
if (Const.PAGE.equals(element.getElementType())) {
insertPage(physicalDiv, element, pageCounter, metsElement);
pageCounter++;
continue;
}
if (Const.SUPPLEMENT.equals(element.getElementType())) {
insertSupplement(divType, physicalDiv, element);
continue;
}
if (Const.PICTURE.equals(element.getElementType())) {
insertPicture(divType, physicalDiv, element);
continue;
}
if (Const.ARTICLE.equals(element.getElementType())) {
continue;
}
throw new MetsExportException(element.getOriginalPid(),
"This type is not accepted in Issue:" +
metsElement.getElementType(), false, null);
}
for (IMetsElement element : metsElement.getChildren()) {
if (Const.ARTICLE.equals(element.getElementType())) {
insertArticle(divType, physicalDiv, element, articleCounter);
articleCounter++;
continue;
}
}
}
/**
* Return the first parent, which can contain pages
*
* @param metsElement
* @return
* @throws MetsExportException
*/
private IMetsElement findFirstParentWithPage(IMetsElement metsElement) throws MetsExportException {
if (metsElement == null) {
throw new MetsExportException("Unable to find parent with pages", false, null);
}
if (!Const.canContainPage.contains(metsElement.getElementType())) {
if (metsElement.getParent() == null) {
throw new MetsExportException(metsElement.getOriginalPid(), "Unable to find parent with pages", false, null);
}
return findFirstParentWithPage(metsElement.getParent());
}
return metsElement;
}
/**
* Inserts Page structure to the mets
*
* @param physicalDiv
* @param metsElement
* @param pageCounter
* @param sourceElement
* @throws MetsExportException
*/
private void insertPage(DivType physicalDiv, IMetsElement metsElement, int pageCounter, IMetsElement sourceElement) throws MetsExportException {
List<IMetsElement> sourceElements = new ArrayList<IMetsElement>();
sourceElements.add(sourceElement);
insertPage(physicalDiv, metsElement, pageCounter, sourceElements);
}
/**
* Inserts Page structure to the mets
*
* @param physicalDiv
* @param metsElement
* @param pageCounter
* @param sourceElement
* @throws MetsExportException
*/
private void insertPage(DivType physicalDiv, IMetsElement metsElement, int pageCounter, List<IMetsElement> sourceElements) throws MetsExportException {
if (metsElement.getMetsContext().getPackageDir() == null) {
File packageDir = createPackageDir(metsElement);
metsElement.getMetsContext().setPackageDir(packageDir);
}
HashMap<String, String> outputFileNames = new HashMap<String, String>();
if (!Const.PAGE.equals(metsElement.getElementType()) && !Const.MONOGRAPH_UNIT.equals(metsElement.getElementType())) {
throw new MetsExportException(metsElement.getOriginalPid(), "Expected page, got " + metsElement.getElementType(), false, null);
}
HashMap<String, FileGrp> fileGrpPage = MetsUtils.initFileGroups();
DivType pageDiv = new DivType();
physicalDiv.getDiv().add(pageDiv);
fillPageIndexOrder(metsElement, pageDiv);
String ID = "DIV_P_PAGE_" + metsElement.getElementID().replace("PAGE_", "");
pageDiv.setID(ID);
HashMap<String, Object> fileNames = new HashMap<String, Object>();
HashMap<String, String> mimeTypes = new HashMap<String, String>();
HashMap<String, XMLGregorianCalendar> createDates = new HashMap<String, XMLGregorianCalendar>();
HashMap<String, FileMD5Info> md5InfosMap = new HashMap<String, FileMD5Info>();
processPageFiles(metsElement, pageCounter, fileNames, mimeTypes, createDates, md5InfosMap);
for (String streamName : Const.streamMapping.keySet()) {
if (fileNames.containsKey(streamName)) {
FileType fileType = prepareFileType(pageCounter, streamName, fileNames, mimeTypes, metsElement.getMetsContext(), outputFileNames, md5InfosMap);
fileGrpPage.get(streamName).getFile().add(fileType);
fileGrpMap.get(streamName).getFile().add(fileType);
Fptr fptr = new Fptr();
fptr.setFILEID(fileType);
pageDiv.getFptr().add(fptr);
if ("ALTOGRP".equals(streamName)) {
metsElement.setAltoFile(fileType);
}
} else {
if ((Const.mandatoryStreams.contains(streamName)) && (!metsElement.getMetsContext().isAllowNonCompleteStreams())) {
throw new MetsExportException(metsElement.getOriginalPid(), "Stream:" + streamName + " is missing", false, null);
}
}
}
generateTechMetadata(metsElement, fileNames, pageCounter, fileGrpPage, mimeTypes, pageDiv, outputFileNames, md5InfosMap);
StructLinkMapping structLinkMapping = new StructLinkMapping();
structLinkMapping.pageDiv = metsElement.getParent().getModsElementID();
structLinkMapping.pageOrder = pageDiv.getORDER();
pageOrderToDivMap.put(structLinkMapping, ID);
for (IMetsElement sourceElement : sourceElements) {
addMappingPageStruct(structLinkMapping, sourceElement.getModsElementID());
}
}
class StructLinkMapping {
String pageDiv;
BigInteger pageOrder;
@Override
public int hashCode() {
return pageDiv.hashCode() * 1000 + pageOrder.hashCode();
};
@Override
public boolean equals(Object obj) {
StructLinkMapping structLinkMapping = (StructLinkMapping) obj;
if (structLinkMapping.pageDiv.equals(this.pageDiv) && (structLinkMapping.pageOrder.equals(this.pageOrder))) {
return true;
}
return false;
}
}
private void addMappingPageStruct(StructLinkMapping structLinkMapping, String fromDiv) {
if (structToPageMap.get(fromDiv) == null) {
structToPageMap.put(fromDiv, new ArrayList<StructLinkMapping>());
}
structToPageMap.get(fromDiv).add(structLinkMapping);
}
/**
* Adds the struct-link to the mets
*
* @throws MetsExportException
*/
private void addStructLink() throws MetsExportException {
if (structToPageMap.keySet().size() > 0) {
StructLink structLink = mets.getStructLink();
if (structLink == null) {
structLink = new StructLink();
mets.setStructLink(structLink);
}
for (String structFrom : structToPageMap.keySet()) {
if (structToPageMap.get(structFrom) != null) {
for (StructLinkMapping structLinkMapping : structToPageMap.get(structFrom)) {
if (pageOrderToDivMap.get(structLinkMapping) != null) {
SmLink smLink = new SmLink();
smLink.setFrom(structFrom);
smLink.setTo(pageOrderToDivMap.get(structLinkMapping));
structLink.getSmLinkOrSmLinkGrp().add(smLink);
} else {
throw new MetsExportException("Unable to find DIV for page order:" + structLinkMapping.pageDiv + " " + structLinkMapping.pageOrder, false, null);
}
}
}
}
}
}
/**
* Generates PackageID from the metsElement info
*
* @param element
* @return
* @throws MetsExportException
*/
private String getPackageID(IMetsElement element) throws MetsExportException {
Map<String, String> identifiersMap = element.getModsIdentifiers();
if (identifiersMap.containsKey(Const.URNNBN)) {
String urnnbn = identifiersMap.get(Const.URNNBN);
return urnnbn.substring(urnnbn.lastIndexOf(":") + 1);
} else if (element.getMetsContext().isAllowMissingURNNBN()) {
// if missing URNNBN is allowed, then try to use UUID - otherwise
// throw an exception
element.getMetsContext().getMetsExportException().addException(element.getOriginalPid(), "URNNBN identifier is missing", true, null);
if (identifiersMap.containsKey(Const.UUID)) {
return identifiersMap.get(Const.UUID);
} else {
throw new MetsExportException(element.getOriginalPid(), "Unable to find identifier URNNBN and UUID is missing", false, null);
}
} else {
// URNNBN is mandatory
throw new MetsExportException(element.getOriginalPid(), "URNNBN identifier is missing", true, null);
}
}
/**
* Creates a directory for package
*
* @param metsElement
* @return
* @throws MetsExportException
*/
private File createPackageDir(IMetsElement metsElement) throws MetsExportException {
if (metsElement.getMetsContext().getPackageID() == null) {
throw new MetsExportException(metsElement.getOriginalPid(), "Package ID is null", false, null);
}
File file = new File(metsElement.getMetsContext().getOutputPath() + File.separator + metsElement.getMetsContext().getPackageID());
if (file.exists()) {
if (file.isDirectory()) {
createDirectoryStructure(metsElement.getMetsContext());
return file;
} else {
throw new MetsExportException(metsElement.getOriginalPid(), "File:" + file.getAbsolutePath() + " exists, but is not directory", false, null);
}
} else {
file.mkdir();
createDirectoryStructure(metsElement.getMetsContext());
return file;
}
}
/**
* Inserts Supplement structure to the mets
*
* @param logicalDiv
* @param physicalDiv
* @param metsElement
* @throws MetsExportException
*/
private void insertSupplement(DivType logicalDiv, DivType physicalDiv, IMetsElement metsElement) throws MetsExportException {
addDmdSec(metsElement);
if (physicalDiv.getID() == null) {
physicalDiv.setID("DIV_P_0000");
physicalDiv.setLabel3(metsElement.getLabel());
physicalDiv.getDMDID().add(metsElement.getModsMetsElement());
physicalDiv.setTYPE(metsElement.getElementType());
}
DivType divType = new DivType();
divType.setID(metsElement.getElementID());
divType.setLabel3(metsElement.getMetsContext().getRootElement().getLabel());
divType.setTYPE(Const.typeNameMap.get(metsElement.getElementType()));
divType.getDMDID().add(metsElement.getModsMetsElement());
logicalDiv.getDiv().add(divType);
for (IMetsElement element : metsElement.getChildren()) {
if (Const.PAGE.equals(element.getElementType())) {
insertPage(physicalDiv, element, pageCounter, metsElement);
pageCounter++;
} else if (Const.PICTURE.equals(element.getElementType())) {
insertPicture(divType, physicalDiv, element);
} else
throw new MetsExportException(element.getOriginalPid(), "Expected Page or Picture, got:" + element.getElementType(), false, null);
}
}
/**
* Inserts Volume structure to the mets
*
* @param logicalDiv
* @param physicalDiv
* @param metsElement
* @param volumeCounter
* @param isMultiPartMonograph
* @throws MetsExportException
*/
private void insertVolume(DivType logicalDiv, DivType physicalDiv, IMetsElement metsElement, boolean isMultiPartMonograph) throws MetsExportException {
addDmdSec(metsElement);
DivType divType = new DivType();
divType.setID(metsElement.getElementID());
// Label for volume is inherited from the parent monograph
divType.setLabel3(metsElement.getMetsContext().getRootElement().getLabel());
if (Const.PERIODICAL_VOLUME.equals(metsElement.getElementType())) {
divType.setTYPE("PERIODICAL_VOLUME");
} else
if (Const.MONOGRAPH_MULTIPART.equals(metsElement.getElementType())) {
divType.setTYPE(Const.MONOGRAPH);
} else if (Const.MONOGRAPH_UNIT.equals(metsElement.getElementType())) {
divType.setTYPE("VOLUME");
divType.setID(metsElement.getElementID().replaceAll(Const.MONOGRAPH_UNIT, Const.VOLUME));
physicalDiv.getDMDID().add(metsElement.getModsMetsElement());
} else {
divType.setTYPE(Const.VOLUME);
}
divType.getDMDID().add(metsElement.getModsMetsElement());
logicalDiv.getDiv().add(divType);
for (IMetsElement element : metsElement.getChildren()) {
if (Const.ISSUE.equals(element.getElementType())) {
element.getMetsContext().setPackageID(getPackageID(element));
insertIssue(physicalDiv, divType, element);
continue;
} else
if (Const.SUPPLEMENT.equals(element.getElementType())) {
if (!Const.MONOGRAPH_UNIT.equals(metsElement.getElementType())) {
element.getMetsContext().setPackageID(getPackageID(element));
}
insertSupplement(divType, physicalDiv, element);
} else
if (Const.PAGE.equals(element.getElementType())) {
insertPage(physicalDiv, element, pageCounter, metsElement);
pageCounter++;
continue;
} else if (Const.CHAPTER.equals(element.getElementType())) {
insertChapter(divType, physicalDiv, element, chapterCounter);
chapterCounter++;
} else if (Const.PICTURE.equals(element.getElementType())) {
insertPicture(divType, physicalDiv, element);
}
else
throw new MetsExportException(element.getOriginalPid(), "Expected Issue, Supplement, Picture or Page, got:" + element.getElementType(), false, null);
}
}
/**
* Inserts Monograph structure to the mets
*
* @param metsElement
* @throws MetsExportException
*/
private void insertMonograph(IMetsElement metsElement) throws MetsExportException {
mets.setTYPE("Monograph");
DivType logicalDiv = new DivType();
logicalStruct.setDiv(logicalDiv);
DivType physicalDiv = new DivType();
physicalDiv.setLabel3(metsElement.getLabel());
physicalDiv.setID("DIV_P_0000");
physicalDiv.setTYPE("MONOGRAPH");
physicalStruct.setDiv(physicalDiv);
boolean containsUnit = false;
if (Const.MONOGRAPH_MULTIPART.equalsIgnoreCase(metsElement.getElementType())) {
containsUnit = true;
}
for (IMetsElement childMetsElement : metsElement.getChildren()) {
if (Const.MONOGRAPH_UNIT.equals(childMetsElement.getElementType())) {
containsUnit = true;
break;
}
}
logicalDiv.setLabel3(metsElement.getLabel());
logicalDiv.setTYPE("MONOGRAPH");
logicalDiv.setID("MONOGRAPH_0001");
if (!containsUnit) {
metsElement.getMetsContext().setPackageID(getPackageID(metsElement));
insertVolume(logicalDiv, physicalDiv, metsElement, false);
} else {
metsElement.setModsElementID("TITLE_0001");
addDmdSec(metsElement);
logicalDiv.getDMDID().add(metsElement.getModsMetsElement());
physicalDiv.getDMDID().add(metsElement.getModsMetsElement());
for (IMetsElement childMetsElement : metsElement.getChildren()) {
if (Const.MONOGRAPH_UNIT.equals(childMetsElement.getElementType())) {
continue;
} else
if (Const.SUPPLEMENT.equals(childMetsElement.getElementType())) {
childMetsElement.getMetsContext().setPackageID(getPackageID(childMetsElement));
insertSupplement(logicalDiv, physicalDiv, childMetsElement);
} else
if (Const.PAGE.equals(childMetsElement.getElementType())) {
pageCounter++;
insertPage(physicalDiv, childMetsElement, pageCounter, metsElement);
} else if (Const.CHAPTER.equals(childMetsElement.getElementType())) {
insertChapter(logicalDiv, physicalDiv, childMetsElement, chapterCounter);
chapterCounter++;
} else
throw new MetsExportException(childMetsElement.getOriginalPid(), "Expected Supplement, Monograph unit, Chapter or Page, got:" + childMetsElement.getElementType(), false, null);
}
for (IMetsElement childMetsElement : metsElement.getChildren()) {
if (Const.MONOGRAPH_UNIT.equals(childMetsElement.getElementType())) {
childMetsElement.getMetsContext().setPackageID(getPackageID(childMetsElement));
insertVolume(logicalDiv, physicalDiv, childMetsElement, true);
}
}
}
}
/**
*
* Adds the internal elements into the mets div
*
* @param parentType
*/
private void addInternalElements(DivType parentType, IMetsElement metsElement) throws MetsExportException {
byte[] structStream;
if (metsElement.getMetsContext().getFedoraClient() != null) {
structStream = MetsUtils.getBinaryDataStreams(metsElement.getMetsContext().getFedoraClient(), metsElement, "STRUCT_MAP");
} else {
structStream = MetsUtils.getBinaryDataStreams(metsElement.getSourceObject().getDatastream(), "STRUCT_MAP");
}
if (structStream == null) {
return;
}
List<IntPartInfo> partInfoList = parseAltoInfo(MetsUtils.getDocumentFromBytes(structStream));
for (IntPartInfo partInfo : partInfoList) {
DivType divType = new DivType();
divType.setTYPE(partInfo.getType());
if ((partInfo.getOrder() != null) && (!("null".equalsIgnoreCase(partInfo.getOrder())))) {
try {
divType.setORDER(new BigInteger(partInfo.getOrder()));
} catch (NumberFormatException ex) {
LOG.log(Level.WARNING, partInfo.getOrder() + " is not a number in object " + metsElement.getOriginalPid(), ex);
}
}
String number = "";
if (Const.ARTICLE.equals(metsElement.getParent().getElementType())) {
number = String.format("%04d", metsElement.getMetsContext().addElementId(metsElement.getParent().getElementID()));
} else {
number = String.format("%04d", metsElement.getMetsContext().addElementId(metsElement.getElementID()));
}
/**
* if an internal element is part of article, then the ID is
* inherited
*/
if ("ARTICLE".equalsIgnoreCase(metsElement.getParent().getElementType())) {
divType.setID(metsElement.getParent().getElementID() + "_" + number);
} else {
divType.setID(metsElement.getElementID() + "_" + number);
}
Fptr fptr = new Fptr();
AreaType area = new AreaType();
IMetsElement refPage = metsElement.getMetsContext().getPidElements().get(partInfo.getAltoPID());
area.setFILEID(refPage.getAltoFile());
area.setBEGIN(partInfo.getBegin());
area.setBETYPE("IDREF");
fptr.setArea(area);
divType.getFptr().add(fptr);
parentType.getDiv().add(divType);
}
}
/**
* Inserts Periodical structure to the mets
*
* @param metsElement
* @throws MetsExportException
*/
private void insertPeriodical(IMetsElement metsElement) throws MetsExportException {
mets.setTYPE("Periodical");
addDmdSec(metsElement);
DivType divType = new DivType();
logicalStruct.setDiv(divType);
DivType physicalDiv = new DivType();
physicalStruct.setDiv(physicalDiv);
divType.setID(metsElement.getElementID());
// Label for volume is inherited from the parent monograph
divType.setLabel3(metsElement.getMetsContext().getRootElement().getLabel());
divType.setTYPE(metsElement.getElementType());
divType.getDMDID().add(metsElement.getModsMetsElement());
for (IMetsElement childMetsElement : metsElement.getChildren()) {
if (Const.PERIODICAL_VOLUME.equals(childMetsElement.getElementType())) {
insertVolume(divType, physicalDiv, childMetsElement, false);
} else if (Const.ISSUE.equals(childMetsElement.getElementType())) {
childMetsElement.getMetsContext().setPackageID(getPackageID(childMetsElement));
insertIssue(physicalDiv, divType, childMetsElement);
} else if (Const.SUPPLEMENT.equals(childMetsElement.getElementType())) {
childMetsElement.getMetsContext().setPackageID(getPackageID(childMetsElement));
insertSupplement(divType, physicalDiv, childMetsElement);
} else
throw new MetsExportException(childMetsElement.getOriginalPid(), "Expected Supplement, Volume or Issue, got:" + childMetsElement.getElementType(), false, null);
}
}
/**
* Inserts Picture structure into mets
*
* @param logicalDiv
* @param physicalDiv
* @param metsElement
* @param counterIntPart
* @throws MetsExportException
*/
private void insertPicture(DivType logicalDiv, DivType physicalDiv, IMetsElement metsElement) throws MetsExportException {
if (!Const.PICTURE.equals(metsElement.getElementType())) {
throw new MetsExportException(metsElement.getOriginalPid(), "Expected picture got " + metsElement.getElementType(), false, null);
}
addDmdSec(metsElement);
DivType elementDivType = new DivType();
if (Const.ARTICLE.equalsIgnoreCase(metsElement.getParent().getElementType())) {
int seq = metsElement.getMetsContext().addElementId(metsElement.getParent().getElementID());
String number = String.format("%04d", seq);
elementDivType.setID(metsElement.getParent().getElementID() + "_" + number);
} else {
elementDivType.setID(metsElement.getElementID());
}
elementDivType.setLabel3(metsElement.getLabel());
elementDivType.setTYPE(metsElement.getElementType());
elementDivType.getDMDID().add(metsElement.getModsMetsElement());
logicalDiv.getDiv().add(elementDivType);
addInternalElements(elementDivType, metsElement);
addStructLinkFromMods(metsElement);
}
/**
* Adds the info about linkage between an element and page into the
* struct-link
*
* @param metsElement
*/
private void addStructLinkFromMods(IMetsElement metsElement) throws MetsExportException {
if ((metsElement.getModsStart() != null) && (metsElement.getModsEnd() != null)) {
if (metsElement.getModsEnd().longValue() < metsElement.getModsStart().longValue()) {
throw new MetsExportException(metsElement.getOriginalPid(), "Mods start is bigger than mods end", false,null);
}
for (long i=metsElement.getModsStart().longValue(); i<=metsElement.getModsEnd().longValue();i++) {
StructLinkMapping structLinkMapping = new StructLinkMapping();
structLinkMapping.pageDiv = findFirstParentWithPage(metsElement).getModsElementID();
structLinkMapping.pageOrder = BigInteger.valueOf(i);
addMappingPageStruct(structLinkMapping, metsElement.getModsElementID());
}
}
}
/**
* Inserts Article element structure into mets for future release
*
* @param logicalDiv
* @param physicalDiv
* @param metsElement
* @param counterIntPart
* @throws MetsExportException
*/
private void insertArticle(DivType logicalDiv, DivType physicalDiv, IMetsElement metsElement, int counterIntPart) throws MetsExportException {
if (!Const.ARTICLE.equals(metsElement.getElementType())) {
throw new MetsExportException(metsElement.getOriginalPid(), "Expected article got " + metsElement.getElementType(), false, null);
}
addDmdSec(metsElement);
DivType elementDivType = new DivType();
elementDivType.setID(metsElement.getElementID());
elementDivType.setORDER(BigInteger.valueOf(counterIntPart));
elementDivType.setLabel3(metsElement.getLabel());
elementDivType.setTYPE(metsElement.getElementType());
elementDivType.getDMDID().add(metsElement.getModsMetsElement());
logicalDiv.getDiv().add(elementDivType);
addInternalElements(elementDivType, metsElement);
for (MetsElement element : metsElement.getChildren()) {
if (Const.PICTURE.equals(element.getElementType())) {
insertPicture(elementDivType, physicalDiv, element);
} else
throw new MetsExportException(element.getOriginalPid(), "Expected Picture got:" + element.getElementType(), false, null);
}
addStructLinkFromMods(metsElement);
}
/**
* Inserts Chapter element structure into mets for future release
*
* @param logicalDiv
* @param physicalDiv
* @param metsElement
* @param counterIntPart
* @throws MetsExportException
*/
private void insertChapter(DivType logicalDiv, DivType physicalDiv, IMetsElement metsElement, int counterIntPart) throws MetsExportException {
if (!Const.CHAPTER.equals(metsElement.getElementType())) {
throw new MetsExportException(metsElement.getOriginalPid(), "Expected chapter got " + metsElement.getElementType(), false, null);
}
addDmdSec(metsElement);
DivType elementDivType = new DivType();
elementDivType.setID(metsElement.getElementID());
elementDivType.setORDER(BigInteger.valueOf(counterIntPart));
elementDivType.setLabel3(metsElement.getLabel());
elementDivType.setTYPE(metsElement.getElementType());
elementDivType.getDMDID().add(metsElement.getModsMetsElement());
logicalDiv.getDiv().add(elementDivType);
addInternalElements(elementDivType, metsElement);
addStructLinkFromMods(metsElement);
for (MetsElement element : metsElement.getChildren()) {
if (Const.PICTURE.equals(element.getElementType())) {
insertPicture(elementDivType, physicalDiv, element);
// } else if (Const.PAGE.equals(element.getElementType())) {
// List<IMetsElement> sourceElements = new
// ArrayList<IMetsElement>();
// sourceElements.add(metsElement);
// sourceElements.add(metsElement.getParent());
// insertPage(physicalDiv, element, pageCounter,
// sourceElements);
// pageCounter++;
} else
{
throw new MetsExportException(metsElement.getOriginalPid(), "Unexpected element under Chapter:" + element.getElementType(), false, null);
}
}
}
/*
* (non-Javadoc)
*
* @see cz.cas.lib.proarc.common.export.mets.structure.IMetsElementVisitor#
* insertIntoMets
* (cz.cas.lib.proarc.common.export.mets.structure.IMetsElement)
*/
@Override
public void insertIntoMets(IMetsElement metsElement) throws MetsExportException {
try {
// clear the output fileList before the generation starts
metsElement.getMetsContext().getFileList().clear();
mets = prepareMets(metsElement);
initHeader(metsElement);
LOG.log(Level.FINE, "Inserting into Mets:" + metsElement.getOriginalPid() + "(" + metsElement.getElementType() + ")");
// get root element first
IMetsElement rootElement = metsElement.getMetsContext().getRootElement();
if (rootElement == null) {
throw new MetsExportException("Element does not have a root set:" + metsElement.getModel() + " - " + metsElement.getOriginalPid(), false);
}
if (Const.PERIODICAL_TITLE.equalsIgnoreCase(rootElement.getElementType())) {
insertPeriodical(rootElement);
} else if (Const.MONOGRAPH_UNIT.equalsIgnoreCase(rootElement.getElementType())) {
insertMonograph(rootElement);
} else if (Const.MONOGRAPH_MULTIPART.equalsIgnoreCase(rootElement.getElementType())) {
insertMonograph(rootElement);
} else
throw new MetsExportException(rootElement.getOriginalPid(), "Unknown type:" + rootElement.getElementType() + " model:" + rootElement.getModel(), false, null);
if (metsElement.getMetsContext().getPackageID()==null) {
throw new MetsExportException(metsElement.getOriginalPid(),"Package ID is null",false,null);
}
if (metsElement.getMetsContext().getPackageDir() == null) {
File packageDirFile = createPackageDir(metsElement);
metsElement.getMetsContext().setPackageDir(packageDirFile);
}
saveMets(mets, new File(metsElement.getMetsContext().getPackageDir().getAbsolutePath() + File.separator + "METS_" + MetsUtils.removeNonAlpabetChars(metsElement.getMetsContext().getPackageID()) + ".xml"), metsElement);
} finally {
JhoveUtility.destroyConfigFiles(metsElement.getMetsContext().getJhoveContext());
}
}
}