/* * Copyright (C) 2014 Robert Simonovsky * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package cz.cas.lib.proarc.common.export.mets.structure; import java.math.BigInteger; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.logging.Level; import java.util.logging.Logger; import javax.xml.datatype.DatatypeConfigurationException; import javax.xml.datatype.DatatypeFactory; import javax.xml.datatype.XMLGregorianCalendar; import javax.xml.transform.dom.DOMResult; import javax.xml.transform.dom.DOMSource; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import com.yourmediashelf.fedora.generated.foxml.DigitalObject; import cz.cas.lib.proarc.common.dublincore.DcUtils; import cz.cas.lib.proarc.common.export.Kramerius4Export; import cz.cas.lib.proarc.common.export.mets.Const; import cz.cas.lib.proarc.common.export.mets.MetsContext; import cz.cas.lib.proarc.common.export.mets.MetsExportException; import cz.cas.lib.proarc.common.export.mets.MetsUtils; import cz.cas.lib.proarc.common.fedora.FoxmlUtils; import cz.cas.lib.proarc.common.mods.ModsUtils; import cz.cas.lib.proarc.common.mods.ndk.NdkMapper; import cz.cas.lib.proarc.mets.FileType; import cz.cas.lib.proarc.mets.MdSecType; import cz.cas.lib.proarc.mods.ModsDefinition; import cz.cas.lib.proarc.oaidublincore.OaiDcType; /** * Class that represents the element of Mets export * * @author Robert Simonovsky * */ public class MetsElement implements IMetsElement { public final List<Element> descriptor; public final String model; private final MetsContext metsContext; private final String originalPid; private final Logger LOG = Logger.getLogger(MetsElement.class.getName()); private MetsElement parent; private final List<MetsElement> children = new ArrayList<MetsElement>(); private final List<Element> relsExt; private final DigitalObject sourceObject; public final List<Element> modsStream; public final XMLGregorianCalendar createDate; public final String label; public BigInteger modsStart; public BigInteger modsEnd; public MdSecType modsMetsElement; private FileType altoFile; @Override public FileType getAltoFile() { return altoFile; } @Override public void setAltoFile(FileType altoFile) { this.altoFile = altoFile; } @Override public MdSecType getModsMetsElement() { return modsMetsElement; } @Override public void setModsMetsElement(MdSecType modsMetsElement) { this.modsMetsElement = modsMetsElement; } /* * (non-Javadoc) * * @see * cz.cas.lib.proarc.common.export.mets2.structure.IMetsElement#getLabel() */ @Override public String getLabel() { return label; } /* * (non-Javadoc) * * @see * cz.cas.lib.proarc.common.export.mets2.structure.IMetsElement#getCreateDate * () */ @Override public XMLGregorianCalendar getCreateDate() { return createDate; } /* * (non-Javadoc) * * @see cz.cas.lib.proarc.common.export.mets2.structure.IMetsElement# * getLastUpdateDate() */ @Override public XMLGregorianCalendar getLastUpdateDate() { return lastUpdateDate; } public final XMLGregorianCalendar lastUpdateDate; private final String elementType; private String elementID; private String modsElementID; @Override public void setModsElementID(String modsElementID) { this.modsElementID = modsElementID; } /* * (non-Javadoc) * * @see * cz.cas.lib.proarc.common.export.mets2.structure.IMetsElement#getModsElementID * () */ @Override public String getModsElementID() { return modsElementID; } /* * (non-Javadoc) * * @see * cz.cas.lib.proarc.common.export.mets.structure.IMetsElement#getMetsContext * () */ @Override public MetsContext getMetsContext() { return metsContext; } /* * (non-Javadoc) * * @see * cz.cas.lib.proarc.common.export.mets.structure.IMetsElement#getElementType * () */ @Override public String getElementType() { return elementType; } @Override public Map<String, String> getModsIdentifiers() throws MetsExportException { Map<String, String> result = new HashMap<String, String>(); String XPATH = "*[local-name()='mods']"; if (this.modsStream != null) { Node descNode = MetsUtils.xPathEvaluateNode(MetsUtils.removeModsCollection(this.modsStream), XPATH); if (descNode != null) { NodeList nodeList = descNode.getChildNodes(); for (int a = 0; a < nodeList.getLength(); a++) { Node node = nodeList.item(a); if ("identifier".equalsIgnoreCase(node.getLocalName())) { result.put(node.getAttributes().getNamedItem("type").getNodeValue(), node.getTextContent()); } } } } return result; } @Override public BigInteger getModsStart() { return modsStart; } @Override public BigInteger getModsEnd() { return modsEnd; } /** * Validates Mods and Dc against xsd schema * * @throws MetsExportException */ private void validateDCMODS() throws MetsExportException { List<String> validationErrors; if (this.descriptor != null) { Document dcDoc = MetsUtils.getDocumentFromList(this.descriptor); try { validationErrors = MetsUtils.validateAgainstXSD(dcDoc, OaiDcType.class.getResourceAsStream("dc_oai.xsd")); } catch (Exception ex) { throw new MetsExportException(this.getOriginalPid(), "Error while validating DC for:" + this.getOriginalPid() + "(" + this.getElementType() + ")", false, ex); } if (validationErrors.size() > 0) { MetsExportException metsException = new MetsExportException(this.getOriginalPid(), "Invalid DC in BIBLIO_MODS for:" + this.getOriginalPid() + "(" + this.getElementType() + ")", false, null); metsException.getExceptions().get(0).setValidationErrors(validationErrors); throw metsException; } } Document modsDoc = MetsUtils.getDocumentFromList(this.modsStream); try { if ("3.5".equals(this.modsStream.get(0).getAttribute("version"))) { validationErrors = MetsUtils.validateAgainstXSD(modsDoc, ModsDefinition.class.getResourceAsStream("mods-3-5.xsd")); } else { validationErrors = MetsUtils.validateAgainstXSD(modsDoc, ModsDefinition.class.getResourceAsStream("mods.xsd")); } } catch (Exception ex) { throw new MetsExportException(this.getOriginalPid(), "Error while validating MODS for:" + this.getOriginalPid() + "(" + this.getElementType() + ")", false, ex); } if (validationErrors.size() > 0) { MetsExportException metsException = new MetsExportException(this.getOriginalPid(), "Invalid MODS for:" + this.getOriginalPid() + "(" + this.getElementType() + ")", false, null); metsException.getExceptions().get(0).setValidationErrors(validationErrors); throw metsException; } } /** * Constructor * * @param digitalObject * @param parent * @param metsContext * @param fillChildren * @throws MetsExportException */ public MetsElement(DigitalObject digitalObject, Object parent, MetsContext metsContext, boolean fillChildren) throws MetsExportException { this.metsContext = metsContext; this.sourceObject = digitalObject; this.originalPid = digitalObject.getPID(); metsContext.getPidElements().put(this.originalPid, this); try { this.createDate = DatatypeFactory.newInstance().newXMLGregorianCalendar(MetsUtils.getProperty(Const.FEDORA_CREATEDATE, digitalObject.getObjectProperties().getProperty())); this.lastUpdateDate = DatatypeFactory.newInstance().newXMLGregorianCalendar(MetsUtils.getProperty(Const.FEDORA_LASTMODIFIED, digitalObject.getObjectProperties().getProperty())); } catch (DatatypeConfigurationException ex) { throw new MetsExportException(this.getOriginalPid(), "Unable to set create/lastModDate", false, ex); } this.label = MetsUtils.getProperty(Const.FEDORA_LABEL, digitalObject.getObjectProperties().getProperty()); this.relsExt = FoxmlUtils.findDatastream(digitalObject, "RELS-EXT").getDatastreamVersion().get(0).getXmlContent().getAny(); if (FoxmlUtils.findDatastream(digitalObject, "BIBLIO_MODS") != null) { this.modsStream = MetsUtils.removeSchemaLocation(MetsUtils.removeModsCollection(FoxmlUtils.findDatastream(digitalObject, "BIBLIO_MODS").getDatastreamVersion().get(0).getXmlContent().getAny())); } else { this.modsStream = null; } Kramerius4Export.removeNils(modsStream.get(0)); model = MetsUtils.getModel(relsExt); this.elementType = Const.typeMap.get(model); if (!Const.PAGE.equals(elementType)) { NdkMapper mapper = NdkMapper.get(model.replaceAll("info:fedora/", "")); Document modsDocument = MetsUtils.getDocumentFromList(modsStream); DOMSource modsDOMSource = new DOMSource(modsDocument); ModsDefinition modsDefinition = ModsUtils.unmarshalModsType(modsDOMSource); if (modsDefinition.getPart().size()>0) { if (modsDefinition.getPart().get(0).getExtent().size()>0) { try { if (modsDefinition.getPart().get(0).getExtent().get(0).getStart() != null) { this.modsStart = new BigInteger(modsDefinition.getPart().get(0).getExtent().get(0).getStart().getValue()); } if (modsDefinition.getPart().get(0).getExtent().get(0).getEnd() != null) { this.modsEnd = new BigInteger(modsDefinition.getPart().get(0).getExtent().get(0).getEnd().getValue()); } } catch (NumberFormatException ex) { throw new MetsExportException(digitalObject.getPID(), "Unable to parse start-end info from mods", false, ex); } } } OaiDcType dcType = mapper.toDc(modsDefinition, null); DOMResult dcDOMResult = new DOMResult(); DcUtils.marshal(dcDOMResult, dcType, true); this.descriptor = new ArrayList<Element>(); this.descriptor.add((Element) dcDOMResult.getNode().getFirstChild()); } else { this.descriptor = null; } validateDCMODS(); String modsName = Const.typeNameMap.get(this.elementType); if (modsName == null) { throw new MetsExportException(this.originalPid, "Unable to find mods name for:" + this.elementType, false, null); } this.elementID = this.elementType + "_" + String.format("%04d", metsContext.addElementId(this.elementType)); this.modsElementID = elementID.replaceAll(this.elementType, modsName); if (Const.ARTICLE.equals(elementType)) { this.elementID = elementID.replaceAll(this.elementType, modsName); } if (Const.SUPPLEMENT.equals(elementType)) { this.elementID = elementID.replaceAll(this.elementType, modsName); } if (parent instanceof MetsElement) { this.parent = (MetsElement) parent; } if (fillChildren) { fillChildren(); } if (parent == null) { this.parent = initParent(); } if (this.parent == null) { metsContext.setRootElement(this); LOG.log(Level.FINE, "Root element found:" + getOriginalPid() + "(" + getElementType() + ")"); } } /* * (non-Javadoc) * * @see * cz.cas.lib.proarc.common.export.mets.structure.IMetsElement#getElementID * () */ @Override public String getElementID() { return elementID; } /* * (non-Javadoc) * * @see * cz.cas.lib.proarc.common.export.mets.structure.IMetsElement#getParent() */ @Override public MetsElement getParent() { return parent; } /* * (non-Javadoc) * * @see * cz.cas.lib.proarc.common.export.mets.structure.IMetsElement#getChildren() */ @Override public List<MetsElement> getChildren() { return children; } /** * Inits the parent element of current element * * @return * @throws MetsExportException */ private MetsElement initParent() throws MetsExportException { String parentId; if (metsContext.getFedoraClient() != null) { parentId = MetsUtils.getParent(originalPid, metsContext.getRemoteStorage()); LOG.fine("Parent found from Fedora:" + parentId); } else { parentId = MetsUtils.getParent(originalPid, metsContext.getFsParentMap()); LOG.fine("Parent found from Local:" + parentId); } if (parentId == null) { LOG.fine("Parent not found - returning null"); return null; } DigitalObject parentObject = null; if (metsContext.getFedoraClient() != null) { parentObject = MetsUtils.readRelatedFoXML(parentId, metsContext.getFedoraClient()); } else { parentObject = MetsUtils.readRelatedFoXML(metsContext.getPath(), parentId); } MetsElement parentInit = new MetsElement(parentObject, null, metsContext, false); parentInit.children.add(this); return parentInit; } /* * (non-Javadoc) * * @see * cz.cas.lib.proarc.common.export.mets.structure.IMetsElement#getDescriptor * () */ @Override public List<Element> getDescriptor() { return descriptor; } /* * (non-Javadoc) * * @see * cz.cas.lib.proarc.common.export.mets.structure.IMetsElement#getModel() */ @Override public String getModel() { return model; } /* * (non-Javadoc) * * @see * cz.cas.lib.proarc.common.export.mets.structure.IMetsElement#getOriginalPid * () */ @Override public String getOriginalPid() { return originalPid; } /* * (non-Javadoc) * * @see * cz.cas.lib.proarc.common.export.mets.structure.IMetsElement#getRelsExt() */ @Override public List<Element> getRelsExt() { return relsExt; } /* * (non-Javadoc) * * @see * cz.cas.lib.proarc.common.export.mets.structure.IMetsElement#getSourceObject * () */ @Override public DigitalObject getSourceObject() { return sourceObject; } /** * Static method for instantiating an Element * * @param object * @param parent * @param metsContext * @param withChildren * @return * @throws MetsExportException */ public static MetsElement getElement(DigitalObject object, MetsElement parent, MetsContext metsContext, boolean withChildren) throws MetsExportException { List<Element> relsExt = FoxmlUtils.findDatastream(object, "RELS-EXT").getDatastreamVersion().get(0).getXmlContent().getAny(); String model = MetsUtils.getModel(relsExt); String type = Const.typeMap.get(model); if (type == null) { throw new MetsExportException(object.getPID(), "Unknown model:" + model, false, null); } return new MetsElement(object, parent, metsContext, withChildren); } /** * Generates children of this element * */ @Override public void fillChildren() throws MetsExportException { Node node = MetsUtils.xPathEvaluateNode(relsExt, "*[local-name()='RDF']/*[local-name()='Description']"); NodeList hasPageNodes = node.getChildNodes(); for (int a = 0; a < hasPageNodes.getLength(); a++) { if (MetsUtils.hasReferenceXML(hasPageNodes.item(a).getNodeName())) { Node rdfResourceNode = hasPageNodes.item(a).getAttributes().getNamedItem("rdf:resource"); String fileName = rdfResourceNode.getNodeValue(); DigitalObject object = null; if (metsContext.getFedoraClient() != null) { object = MetsUtils.readRelatedFoXML(fileName, metsContext.getFedoraClient()); } else { object = MetsUtils.readRelatedFoXML(metsContext.getPath(), fileName); } MetsElement child = new MetsElement(object, this, metsContext, true); this.children.add(child); LOG.log(Level.FINE, "Child found for:" + getOriginalPid() + "(" + getElementType() + ") - " + child.getOriginalPid() + "(" + child.getElementType() + ")"); } } } /* * (non-Javadoc) * * @see * cz.cas.lib.proarc.common.export.mets.structure.IMetsElement#accept(cz * .cas.lib.proarc.common.export.mets.structure.IMetsElementVisitor) */ @Override public void accept(IMetsElementVisitor metsVisitor) throws MetsExportException { metsVisitor.insertIntoMets(this); } /* * (non-Javadoc) * * @see * cz.cas.lib.proarc.common.export.mets.structure.IMetsElement#getModsStream * () */ @Override public List<Element> getModsStream() { return this.modsStream; } }