/* * Copyright (C) 2016 Jan Pokorsky * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package cz.cas.lib.proarc.common.export.crossref; import cz.cas.lib.proarc.common.export.ExportException; import cz.cas.lib.proarc.common.export.ExportUtils; import cz.cas.lib.proarc.common.export.mets.ValidationErrorHandler; import cz.cas.lib.proarc.common.fedora.DigitalObjectException; import cz.cas.lib.proarc.common.fedora.FoxmlUtils; import cz.cas.lib.proarc.common.mods.custom.ModsConstants; import cz.cas.lib.proarc.common.object.DescriptionMetadata; import cz.cas.lib.proarc.common.object.DigitalObjectElement; import cz.cas.lib.proarc.common.object.MetadataHandler; import cz.cas.lib.proarc.common.object.ndk.NdkPlugin; import cz.cas.lib.proarc.common.xml.ProarcXmlUtils; import cz.cas.lib.proarc.common.xml.SimpleLSResourceResolver; import cz.cas.lib.proarc.common.xml.SimpleNamespaceContext; import cz.cas.lib.proarc.common.xml.TransformErrorListener; import java.io.File; import java.io.IOException; import java.io.StringReader; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import javax.xml.XMLConstants; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.OutputKeys; import javax.xml.transform.Result; import javax.xml.transform.Source; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import javax.xml.transform.stream.StreamSource; import javax.xml.validation.Schema; import javax.xml.validation.SchemaFactory; import javax.xml.validation.Validator; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathExpression; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; import org.w3c.dom.DOMException; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.xml.sax.InputSource; import org.xml.sax.SAXException; /** * Builds CrossRef XML files inside the output folder. * * @author Jan Pokorsky */ class CrossrefBuilder { private static Schema SCHEMA_CROSSREF; private static final String XSD_FILENAME = "crossref4.3.6.xsd"; private final File outputFolder; private final XPathExpression issnPath; private final XPathExpression partNumberPath; private final XPathExpression dateIssuedPath; private final XPathExpression physicalFormPath; private final XPathExpression abbrevTitlePath; private final DocumentBuilder db; private final Transformer crosssrefXsl; private final TransformErrorListener tranformationErrorHandler; private final List<Document> articles = new ArrayList<Document>(); private final Map<DigitalObjectElement, Document> docCache = new HashMap<DigitalObjectElement, Document>(); private final XPathExpression titlePath; private final SimpleDateFormat exportDateFormat; private Validator crossrefValidator; private int pkgIndex; public CrossrefBuilder(File outputFolder) throws XPathExpressionException, ParserConfigurationException, TransformerConfigurationException { this.outputFolder = outputFolder; this.exportDateFormat = new SimpleDateFormat("yyyyMMddHHmm"); XPathFactory xPathFactory = ProarcXmlUtils.defaultXPathFactory(); XPath xpath = xPathFactory.newXPath(); xpath.setNamespaceContext(new SimpleNamespaceContext().add("m", ModsConstants.NS)); issnPath = xpath.compile("m:mods/m:identifier[@type='issn' and not(@invalid)]"); partNumberPath = xpath.compile("m:mods/m:titleInfo/m:partNumber"); abbrevTitlePath = xpath.compile("m:mods/m:titleInfo/m:title[@type='abbreviated']"); titlePath = xpath.compile("m:mods/m:titleInfo/m:title"); dateIssuedPath = xpath.compile("m:mods/m:originInfo/m:dateIssued"); physicalFormPath = xpath.compile("m:mods/m:physicalDescription/m:form"); DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setNamespaceAware(true); db = dbf.newDocumentBuilder(); TransformerFactory xslFactory = TransformerFactory.newInstance(); tranformationErrorHandler = new TransformErrorListener(); crosssrefXsl = xslFactory.newTransformer(new StreamSource( CrossrefBuilder.class.getResource("mods_crossref.xsl").toExternalForm())); if (crosssrefXsl == null) { throw new TransformerConfigurationException("Cannot load XSL: " + "mods_crossref.xsl"); } crosssrefXsl.setOutputProperty(OutputKeys.INDENT, "yes"); crosssrefXsl.setErrorListener(tranformationErrorHandler); } public File createPackage(CrossrefPackage pkg) throws ExportException { crosssrefXsl.reset(); articles.clear(); File packageFile = null; DigitalObjectElement pkgElm = pkg.getPath().get(0); try { String pkgName = createPackageName(pkg); packageFile = new File(outputFolder, pkgName); processPath(pkg); List<DigitalObjectElement> articleElms = pkg.getArticles(); for (DigitalObjectElement articleElm : articleElms) { Document articleMods = getModsDom(articleElm, false); if (articleMods != null) { addArticle(articleMods); } else { return null; } } Document mergedArticles = mergeArticles(); TransformErrorListener errors = createCrossrefXml( new DOMSource(mergedArticles), new StreamResult(packageFile)); if (!errors.getErrors().isEmpty()) { throw new ExportException(pkgElm, "Transformation errors!", errors.getErrors().toString(), null); } List<String> validationErrors = validateCrossref(new StreamSource(packageFile)); if (!validationErrors.isEmpty()) { throw new ExportException(pkgElm, "Validation errors!", ExportUtils.toString(validationErrors), null); } } catch (ExportException ex) { throw ex; } catch (Exception ex) { throw new ExportException(pkgElm, "Unexpected error!", null, ex); } return packageFile; } /** * Transforms a collection of articles to the Crossref document. * @param src modsCollection in MODS format * @param dst Crossref document * @return the error handler */ TransformErrorListener createCrossrefXml(Source src, Result dst) { try { tranformationErrorHandler.reset(); crosssrefXsl.setParameter("export_time", exportDateFormat.format(new Date())); crosssrefXsl.transform(src, dst); } catch (TransformerException ex) { if (tranformationErrorHandler.getErrors().isEmpty()) { tranformationErrorHandler.getErrors().add(ex.getMessageAndLocation()); } } return tranformationErrorHandler; } private void processPath(CrossrefPackage pkg) throws XPathExpressionException, ExportException { List<DigitalObjectElement> path = pkg.getPath(); Iterator<DigitalObjectElement> it = path.iterator(); if (!it.hasNext()) { throw new ExportException(null, "No parent!", null, new IllegalStateException()); } processIssue(it, pkg); } private static DigitalObjectElement readNext( Iterator<DigitalObjectElement> path, DigitalObjectElement last ) throws ExportException { if (path.hasNext()) { return path.next(); } throw new ExportException(last, "No parent!", null, null); } private void processIssue( Iterator<DigitalObjectElement> path, CrossrefPackage pkg ) throws XPathExpressionException, ExportException { DigitalObjectElement elm = path.next(); if (NdkPlugin.MODEL_PERIODICALISSUE.equals(elm.getModelId())) { addIssue(getModsDom(elm), elm.getPid()); DigitalObjectElement parent = readNext(path, elm); if (NdkPlugin.MODEL_PERIODICALVOLUME.equals(parent.getModelId())) { addVolume(getModsDom(parent)); parent = readNext(path, parent); if (NdkPlugin.MODEL_PERIODICAL.equals(parent.getModelId())) { addPeriodicalTitle(getModsDom(parent)); } else { throw new ExportException(parent, "Expected a periodical title!", null, null); } } else if (NdkPlugin.MODEL_PERIODICAL.equals(parent.getModelId())) { addPeriodicalTitle(getModsDom(parent)); } else { throw new ExportException(parent, "Expected a periodical volume!", null, null); } } else if (NdkPlugin.MODEL_PERIODICALVOLUME.equals(elm.getModelId())) { addVolumeWithoutIssue(getModsDom(elm), elm.getPid()); DigitalObjectElement parent = readNext(path, elm); if (NdkPlugin.MODEL_PERIODICAL.equals(parent.getModelId())) { addPeriodicalTitle(getModsDom(parent)); } else { throw new ExportException(parent, "Expected a periodical title!", null, null); } } else { throw new ExportException(elm, "Expected a periodical issue or volume!", null, null); } } boolean addPeriodicalTitle(String issn, String title, String abbrevTitle, String media) throws XPathExpressionException { crosssrefXsl.setParameter("issn", issn); crosssrefXsl.setParameter("abbrev_title", abbrevTitle); crosssrefXsl.setParameter("full_title", title); crosssrefXsl.setParameter("media_type", media); return true; } private boolean addPeriodicalTitle(Document d) throws XPathExpressionException { // title issn - mods/identifier[type="issn"][0] // title - mods/titleInfo/title // abbreviated title - mods/titleInfo/title/@type="abbreviated" // physical form - mods/physicalDescription/form String issn = issnPath.evaluate(d); String abbrevTitle = abbrevTitlePath.evaluate(d); String title = titlePath.evaluate(d); String form = physicalFormPath.evaluate(d); return addPeriodicalTitle(issn, title, abbrevTitle, form); } boolean addVolume(String partNumber, String dateIssued, String uuid) throws XPathExpressionException { crosssrefXsl.setParameter("volume", partNumber); if (dateIssued != null) { crosssrefXsl.setParameter("publication_date", dateIssued); } if (uuid != null) { crosssrefXsl.setParameter("export_uuid", uuid); } return true; } private void addVolume(Document d) throws XPathExpressionException, ExportException { // volume number - mods/titleInfo/partNumber String partNumber = partNumberPath.evaluate(d); addVolume(partNumber, null, null); } private void addVolumeWithoutIssue(Document d, String pid) throws XPathExpressionException, ExportException { // volume number - mods/titleInfo/partNumber // issue date - mods/originInfo/dateIssued String partNumber = partNumberPath.evaluate(d); String dateIssued = dateIssuedPath.evaluate(d); addVolume(partNumber, dateIssued, FoxmlUtils.pidAsUuid(pid)); } boolean addIssue(String partNumber, String dateIssued, String issueUuid) throws XPathExpressionException { crosssrefXsl.setParameter("issue", partNumber); crosssrefXsl.setParameter("publication_date", dateIssued); crosssrefXsl.setParameter("export_uuid", issueUuid); return true; } private void addIssue(Document d, String pid) throws XPathExpressionException { // issue number - mods/titleInfo/partNumber // issue date - mods/originInfo/dateIssued String partNumber = partNumberPath.evaluate(d); String dateIssued = dateIssuedPath.evaluate(d); addIssue(partNumber, dateIssued, FoxmlUtils.pidAsUuid(pid)); } void addArticle(Document d) { articles.add(d); } Document mergeArticles() throws DOMException { Document doc = db.newDocument(); Element root = doc.createElementNS(ModsConstants.NS, "modsCollection"); for (Document article : articles) { Element modsElm = article.getDocumentElement(); Node n = doc.adoptNode(modsElm); root.appendChild(n); } doc.appendChild(root); return doc; } String createPackageName(CrossrefPackage pkg) { String pkgName = String.format("batch_%04d.xml", pkgIndex++); return pkgName; } private Document getModsDom(DigitalObjectElement elm) throws ExportException { return getModsDom(elm, true); } private Document getModsDom(DigitalObjectElement elm, boolean useCache) throws ExportException { Document d = useCache ? docCache.get(elm) : null; if (d != null) { return d; } try { MetadataHandler<?> metadataHandler = elm.getHandler().metadata(); DescriptionMetadata<String> dm = metadataHandler.getMetadataAsXml(); String mods = dm.getData(); Document modsDom = db.parse(new InputSource(new StringReader(mods))); if (useCache) { docCache.put(elm, d); } return modsDom; } catch (DigitalObjectException ex) { throw new ExportException(elm, "Missing MODS!", null, ex); } catch (SAXException ex) { throw new ExportException(elm, "Invalid MODS!", null, ex); } catch (IOException ex) { throw new ExportException(elm, "IO error!", null, ex); } } DocumentBuilder getDocumentBuilder() { return db; } /** * Validates a package against CrossRef schemas. */ List<String> validateCrossref(Source crossref) throws SAXException, IOException { if (crossrefValidator == null) { crossrefValidator = getCrossrefSchema().newValidator(); crossrefValidator.setErrorHandler(new ValidationErrorHandler()); } List<String> errors = ((ValidationErrorHandler) crossrefValidator.getErrorHandler()).getValidationErrors(); try { errors.clear(); crossrefValidator.validate(crossref); } catch (SAXException ex) { errors.add(0, ex.getMessage()); } return errors; } static Schema getCrossrefSchema() throws SAXException { if (SCHEMA_CROSSREF == null) { SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); factory.setResourceResolver(new SimpleLSResourceResolver() .base(CrossrefBuilder.class) ); SCHEMA_CROSSREF = factory.newSchema(CrossrefBuilder.class.getResource(XSD_FILENAME)); } return SCHEMA_CROSSREF; } }