/* * Copyright (C) 2015 Jan Pokorsky * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package cz.cas.lib.proarc.common.export.archive; import com.yourmediashelf.fedora.generated.foxml.DatastreamType; import com.yourmediashelf.fedora.generated.foxml.DatastreamVersionType; import com.yourmediashelf.fedora.generated.foxml.DigitalObject; import com.yourmediashelf.fedora.generated.foxml.PropertyType; import cz.cas.lib.proarc.common.device.DeviceRepository; import cz.cas.lib.proarc.common.export.mets.FileMD5Info; import cz.cas.lib.proarc.common.export.mets.MetsExportException; import cz.cas.lib.proarc.common.export.mets.MetsUtils; import cz.cas.lib.proarc.common.fedora.DigitalObjectException; import cz.cas.lib.proarc.common.fedora.FoxmlUtils; import cz.cas.lib.proarc.common.fedora.FoxmlUtils.ControlGroup; import cz.cas.lib.proarc.common.fedora.LocalStorage.LocalObject; import cz.cas.lib.proarc.common.object.DigitalObjectElement; import cz.cas.lib.proarc.common.object.DisseminationHandler; import cz.cas.lib.proarc.mets.DivType; import cz.cas.lib.proarc.mets.DivType.Fptr; import cz.cas.lib.proarc.mets.FileType; import cz.cas.lib.proarc.mets.FileType.FLocat; import cz.cas.lib.proarc.mets.MdSecType; import cz.cas.lib.proarc.mets.MdSecType.MdWrap; import cz.cas.lib.proarc.mets.MdSecType.MdWrap.XmlData; import cz.cas.lib.proarc.mets.Mets; import cz.cas.lib.proarc.mets.MetsType.FileSec; import cz.cas.lib.proarc.mets.MetsType.FileSec.FileGrp; import cz.cas.lib.proarc.mets.MetsType.MetsHdr; import cz.cas.lib.proarc.mets.MetsType.MetsHdr.Agent; import cz.cas.lib.proarc.mets.StructMapType; import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.URI; import java.security.NoSuchAlgorithmException; import java.util.HashMap; import java.util.List; import javax.ws.rs.core.Response; import javax.xml.bind.JAXB; import javax.xml.datatype.DatatypeConfigurationException; import javax.xml.datatype.DatatypeFactory; import javax.xml.datatype.XMLGregorianCalendar; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.apache.commons.io.IOUtils; /** * Builds resulting METS package and a corresponding folder layout. * * @author Jan Pokorsky */ public class PackageBuilder { /** * A {@link MdWrap#setMDTYPE(java.lang.String) } helper. * @see <a href='http://www.loc.gov/standards/mets/docs/mets.v1-9.html#mdWrap'>mdWrap</a> */ public enum MdType { DC, MODS } public static final String METS_FILENAME = "mets.xml"; /** The type of the structural map of other objects like devices. */ public static final String STRUCTMAP_OTHERS_TYPE = "OTHERS"; /** The type of the structural map of digital objects. */ public static final String STRUCTMAP_PHYSICAL_TYPE = "PHYSICAL"; /** The ID of the {@code div} containing a list of devices. */ public static final String DIV_DEVICE_LIST_ID = "DIV_DEVICES"; private File pkgFolder; private URI pkgFolderUri; private final File parentFolder; private final DatatypeFactory xmlTypes; private Mets mets; private StructMapType othersStructMap; private final Transformer domTransformer; private final HashMap<String, DivType> pid2PhysicalDiv; public PackageBuilder(File targetFolder) { this.parentFolder = targetFolder; this.pid2PhysicalDiv = new HashMap<String, DivType>(); try { this.xmlTypes = DatatypeFactory.newInstance(); this.domTransformer = TransformerFactory.newInstance().newTransformer(); this.domTransformer.setOutputProperty(OutputKeys.INDENT, "yes"); this.domTransformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4"); } catch (DatatypeConfigurationException ex) { throw new IllegalStateException(ex); } catch (TransformerConfigurationException ex) { throw new IllegalStateException(ex); } } public void prepare(List<DigitalObjectElement> objectPath, LocalObject lobj) { DigitalObjectElement entry = objectPath.get(0); // create package folder pkgFolder = new File(parentFolder, FoxmlUtils.pidAsUuid(entry.getPid())); if (!pkgFolder.mkdir()) { throw new IllegalStateException("The package folder already exists: " + pkgFolder); } pkgFolderUri = pkgFolder.toURI(); DigitalObject digitalObject = lobj.getDigitalObject(); MetsHdr metsHdr = new MetsHdr(); // XXX should we use rather actual date? // for now use modified date as create day to later decide whether fedora contains same or updated object // metsHdr.setCREATEDATE(xmlTypes.newXMLGregorianCalendar()); metsHdr.setCREATEDATE(getXmlDate(digitalObject, FoxmlUtils.PROPERTY_LASTMODIFIED)); // metsHdr.setCREATEDATE(getDate(digitalObject, FoxmlUtils.PROPERTY_CREATEDATE)); // metsHdr.setLASTMODDATE(getDate(digitalObject, FoxmlUtils.PROPERTY_LASTMODIFIED)); Agent agent = new Agent(); agent.setName("ProArc"); agent.setROLE("CREATOR"); agent.setTYPE("OTHER"); // agent.setTYPE("ORGANIZATION"); metsHdr.getAgent().add(agent); mets = new Mets(); // mets.setID(null); mets.setLabel1(getPackageLabel(objectPath)); mets.setMetsHdr(metsHdr); mets.setTYPE(entry.getModelId()); mets.setFileSec(new FileSec()); } public void build() { JAXB.marshal(mets, new File(pkgFolder, METS_FILENAME)); } public DivType addObject(int index, DigitalObjectElement elm, DigitalObjectElement parentElm) { DivType div = new DivType(); String modelId = elm.getModelId(); String type = getObjectId(modelId); div.setID(String.format("div_%s_%04d", type, index)); div.getCONTENTIDS().add(elm.getPid()); div.setLabel3(elm.getItem().getLabel()); div.setORDER(null); div.setTYPE(modelId); DivType parent = parentElm == null ? null : pid2PhysicalDiv.get(parentElm.getPid()); if (parent == null) { StructMapType structMap = new StructMapType(); structMap.setDiv(div); structMap.setTYPE(STRUCTMAP_PHYSICAL_TYPE); structMap.setLabel2("Physical Structure"); mets.getStructMap().add(structMap); } else { parent.getDiv().add(div); } pid2PhysicalDiv.put(elm.getPid(), div); return div; } public DivType addDevice(LocalObject cache) { String pid = cache.getPid(); DivType div = pid2PhysicalDiv.get(pid); if (div != null) { return div; } div = new DivType(); String modelId = DeviceRepository.METAMODEL_ID; String type = getObjectId(modelId); div.getCONTENTIDS().add(pid); div.setLabel3(cache.getLabel()); div.setORDER(null); div.setTYPE(modelId); DivType devicesDiv; if (othersStructMap == null) { othersStructMap = new StructMapType(); devicesDiv = new DivType(); devicesDiv.setID(DIV_DEVICE_LIST_ID); devicesDiv.setLabel3("List of devices"); othersStructMap.setDiv(devicesDiv); othersStructMap.setTYPE(STRUCTMAP_OTHERS_TYPE); othersStructMap.setLabel2("Other objects"); mets.getStructMap().add(othersStructMap); } else { devicesDiv = othersStructMap.getDiv(); } div.setID(String.format("div_%s_%04d", type, devicesDiv.getDiv().size() + 1)); devicesDiv.getDiv().add(div); pid2PhysicalDiv.put(pid, div); return div; } public void addFoxmlAsFile(int index, DigitalObjectElement elm, LocalObject obj) throws DigitalObjectException { addFoxmlAsFile(index, elm.getModelId(), obj); } public void addFoxmlAsFile(int index, String modelId, LocalObject obj) throws DigitalObjectException { try { String uuid = getObjectId(obj.getPid()); String dsId = "FOXML"; String modelName = getObjectId(modelId); File grpFile = getGroupFile(pkgFolder, dsId, getFilename(index, modelName, uuid, "xml")); DigitalObject dObj = obj.getDigitalObject(); FoxmlUtils.marshal(new StreamResult(grpFile), dObj, true); FileMD5Info fileInfo = getDigest(new BufferedInputStream(new FileInputStream(grpFile))); FileGrp fileGrp = getMetsFileGrp(dsId); FileType fileType = new FileType(); fileType.setCHECKSUM(fileInfo.getMd5()); fileType.setCHECKSUMTYPE("MD5"); fileType.setCREATED(getXmlDate(dObj, FoxmlUtils.PROPERTY_LASTMODIFIED)); fileType.setID(String.format("%s_%s_%04d_%s", dsId, modelName, index, uuid)); fileType.setMIMETYPE("text/xml"); // fileType.setSEQ(index); fileType.setSIZE(fileInfo.getSize()); fileType.getFLocat().add(createFLocat(grpFile)); fileGrp.getFile().add(fileType); DivType div = pid2PhysicalDiv.get(obj.getPid()); Fptr fptr = new Fptr(); fptr.setFILEID(fileType); div.getFptr().add(fptr); } catch (NoSuchAlgorithmException ex) { throw new DigitalObjectException(obj.getPid(), null, ex); } catch (IOException ex) { throw new DigitalObjectException(obj.getPid(), null, ex); } } public void addStreamAsMdSec( int index, DatastreamType dt, String pid, String modelId, MdType mdType ) throws DigitalObjectException { String uuid = getObjectId(pid); String mimetype = dt.getDatastreamVersion().get(0).getMIMETYPE(); String modelName = getObjectId(modelId); DatastreamVersionType ds = dt.getDatastreamVersion().get(0); MdSecType mdSec = new MdSecType(); mdSec.setCREATED(ds.getCREATED()); mdSec.setID(String.format("DMD_%s_%s_%04d_%s", mdType.name(), modelName, index, uuid)); MdWrap mdWrap = new MdWrap(); mdWrap.setMIMETYPE(mimetype); mdWrap.setMDTYPE(mdType.name()); XmlData xmlData = new XmlData(); xmlData.getAny().addAll(ds.getXmlContent().getAny()); mdWrap.setXmlData(xmlData); mdSec.setMdWrap(mdWrap); mets.getDmdSec().add(mdSec); DivType div = pid2PhysicalDiv.get(pid); div.getDMDID().add(mdSec); } public void addStreamAsFile( int index, DatastreamType dt, String pid, String modelId, DisseminationHandler dHandler ) throws DigitalObjectException { String dsId = dt.getID(); String uuid = getObjectId(pid); DatastreamVersionType ds = dt.getDatastreamVersion().get(0); String mimetype = ds.getMIMETYPE(); String ext = getMimeFileExtension(mimetype); String modelName = getObjectId(modelId); File dsFile = getGroupFile(pkgFolder, dsId, getFilename(index, modelName, uuid, ext)); FileMD5Info fileInfo = copyStream(pid, dt, ds, dHandler, dsFile); // add to fileGrp FileGrp fileGrp = getMetsFileGrp(dsId); FileType fileType = new FileType(); fileType.setCHECKSUM(fileInfo.getMd5()); fileType.setCHECKSUMTYPE("MD5"); fileType.setCREATED(ds.getCREATED()); fileType.setID(String.format("%s_%s_%04d_%s", dsId, modelName, index, uuid)); fileType.setMIMETYPE(mimetype); // fileType.setSEQ(index); fileType.setSIZE(fileInfo.getSize()); fileType.getFLocat().add(createFLocat(dsFile)); fileGrp.getFile().add(fileType); DivType div = pid2PhysicalDiv.get(pid); Fptr fptr = new Fptr(); fptr.setFILEID(fileType); div.getFptr().add(fptr); } private FileMD5Info copyStream(String pid, DatastreamType dt, DatastreamVersionType ds, DisseminationHandler dHandler, File dsFile ) throws DigitalObjectException { String dsId = dt.getID(); ControlGroup ctrlGroup = ControlGroup.fromExternal(dt.getCONTROLGROUP()); FileMD5Info fileInfo; try { if (ctrlGroup == ControlGroup.INLINE) { DOMSource domSource = new DOMSource(ds.getXmlContent().getAny().get(0)); domTransformer.transform(domSource, new StreamResult(dsFile)); fileInfo = getDigest(new BufferedInputStream(new FileInputStream(dsFile))); } else { Response resp = dHandler.getDissemination(null); Object entity = resp.getEntity(); if (entity instanceof InputStream) { fileInfo = MetsUtils.getDigestAndCopy((InputStream) entity, new FileOutputStream(dsFile)); } else { String msg = "Unsupported entity " + (entity == null ? null : entity.getClass().getName()); throw new DigitalObjectException(pid, null, dsId, msg, null); } } return fileInfo; } catch (TransformerException ex) { throw new DigitalObjectException(pid, null, dsId, null, ex); } catch (NoSuchAlgorithmException ex) { throw new DigitalObjectException(pid, null, dsId, null, ex); } catch (IOException ex) { throw new DigitalObjectException(pid, null, dsId, null, ex); } } private FLocat createFLocat(File dsFile) { FLocat fLocat = new FLocat(); fLocat.setLOCTYPE("URL"); fLocat.setHref("./" + pkgFolderUri.relativize(dsFile.toURI()).toASCIIString()); return fLocat; } private FileGrp getMetsFileGrp(String dsId) { List<FileGrp> fileGrps = mets.getFileSec().getFileGrp(); for (FileGrp fileGrp : fileGrps) { if (dsId.equals(fileGrp.getID())) { return fileGrp; } } FileGrp fileGrp = new FileGrp(); fileGrp.setID(dsId); fileGrps.add(fileGrp); return fileGrp; } private File getGroupFile(File parent, String grpId, String filename) { File dsFolder = new File(parent, grpId); dsFolder.mkdirs(); File dsFile = new File(dsFolder, filename); if (dsFile.exists()) { throw new IllegalStateException("File exists: " + dsFile); } return dsFile; } static String getFilename(int index, String model, String name, String ext) { return String.format("%s_%04d_%s.%s", model, index, name, ext); } private static String getMimeFileExtension(String mime) { try { return MetsUtils.getMimeToExtension().getProperty(mime); } catch (MetsExportException ex) { throw new IllegalStateException(ex.getCause().getMessage(), ex); } } static String getObjectId(String pid) { return pid.substring(pid.indexOf(':') + 1); } private XMLGregorianCalendar getXmlDate(DigitalObject dobj, String name) { PropertyType createProp = FoxmlUtils.findProperty(dobj, name); if (createProp != null) { String value = createProp.getVALUE(); if (value != null && !value.isEmpty()) { return xmlTypes.newXMLGregorianCalendar(value); } } return xmlTypes.newXMLGregorianCalendar(); } private static String getPackageLabel(List<DigitalObjectElement> objectPath) { StringBuilder sb = new StringBuilder(); for (DigitalObjectElement elm : objectPath) { if (sb.length() > 0) { sb.append(", "); } sb.append(elm.getItem().getLabel()); } return sb.toString(); } private static FileMD5Info getDigest(InputStream is) throws NoSuchAlgorithmException, IOException { try { FileMD5Info fileInfo = MetsUtils.getDigest(is); is.close(); is = null; return fileInfo; } finally { IOUtils.closeQuietly(is); } } }