/* * Copyright (C) 2015 Jan Pokorsky * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package cz.cas.lib.proarc.common.export.cejsh; import cz.cas.lib.proarc.common.export.ExportUtils; import cz.cas.lib.proarc.common.export.cejsh.CejshBuilder.Article; import cz.cas.lib.proarc.common.fedora.DigitalObjectException; import cz.cas.lib.proarc.common.fedora.DigitalObjectNotFoundException; import cz.cas.lib.proarc.common.fedora.FoxmlUtils; import cz.cas.lib.proarc.common.fedora.RemoteStorage; import cz.cas.lib.proarc.common.object.DigitalObjectCrawler; import cz.cas.lib.proarc.common.object.DigitalObjectElement; import cz.cas.lib.proarc.common.object.DigitalObjectManager; import cz.cas.lib.proarc.common.object.VisitorException; import cz.cas.lib.proarc.common.object.emods.BornDigitalModsPlugin; import cz.cas.lib.proarc.common.object.ndk.DefaultNdkVisitor; import cz.cas.lib.proarc.common.object.ndk.NdkPlugin; import java.io.File; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Deque; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map.Entry; import java.util.Set; /** * Exports born-digital articles in CEJSH format. * * @see <a href='https://github.com/proarc/proarc/issues/286'>issue 286</a> * @author Jan Pokorsky */ public class CejshExport { private DigitalObjectManager dom; private final RemoteStorage remotes; private final CejshConfig config; public CejshExport(DigitalObjectManager dom, RemoteStorage remotes, CejshConfig config) { this.dom = dom; this.remotes = remotes; this.config = config; } public CejshStatusHandler export(File outputFolder, List<String> pids) { return export(outputFolder, pids, new CejshStatusHandler(config.getLogLevel())); } /** * Runs the export and writes the export log. * @param outputFolder a folder where to create the export folder * @param pids PIDs of digital objects to include in the export. NDK and born-digital * objects are expected * @param status an export status * @return the export status including the target folder and errors */ public CejshStatusHandler export(File outputFolder, List<String> pids, CejshStatusHandler status) { try { return exportImpl(outputFolder, pids, status); } finally { File targetFolder = status.getTargetFolder(); if (targetFolder != null) { ExportUtils.writeExportResult(targetFolder, status.getReslog()); } } } private CejshStatusHandler exportImpl(File output, List<String> pids, CejshStatusHandler status) { // XXX write export to RELS-EXT if (!output.exists() || !output.isDirectory()) { status.error((String) null, "Invalid output: " + output, null); return status; } if (pids == null || pids.isEmpty()) { status.error((String) null, "Nothing to export. Missing input PID!", null); return status; } output = ExportUtils.createFolder(output, "cejsh_" + FoxmlUtils.pidAsUuid(pids.get(0))); status.setTargetFolder(output); final DigitalObjectCrawler crawler = new DigitalObjectCrawler(dom, remotes.getSearch()); CejshContext ctx; try { ctx = new CejshContext(output, status, config); } catch (Exception ex) { status.error(pids.get(0), "Broken context!", ex); return status; } CejshHierarchy hierarchy = new CejshHierarchy(crawler); LinkedHashMap<DigitalObjectElement, Set<DigitalObjectElement>> inputs = prepareInputQueue(pids, crawler, ctx); for (Entry<DigitalObjectElement, Set<DigitalObjectElement>> entry : inputs.entrySet()) { DigitalObjectElement dobj = entry.getKey(); try { status.startInput(dobj); ctx.reset(); ctx.setFilter(dobj, entry.getValue()); hierarchy.visit(dobj, ctx); } catch (Throwable ex) { status.error(dobj, "Unexpected error", null, ex); // XXX continue in case of validation or always? return status; } finally { status.finishInput(dobj); } } return status; } /** * Transforms a list of PIDs to the list of digital objects to be exported. * When an object is the article then its parent is listed instead and * the article is included in the attached set. * Other children of the parent are ignored during the export. * @param pids input PIDs * @param crawler the search index * @param ctx the context * @return the list of unique digital objects and their articles to include. * The {@code null} Set means include all children. */ private LinkedHashMap<DigitalObjectElement, Set<DigitalObjectElement>> prepareInputQueue( List<String> pids, final DigitalObjectCrawler crawler, CejshContext ctx) { LinkedHashMap<DigitalObjectElement, Set<DigitalObjectElement>> dobjs = new LinkedHashMap<DigitalObjectElement, Set<DigitalObjectElement>>(pids.size()); for (String pid : pids) { try { DigitalObjectElement elm = crawler.getEntry(pid); if (BornDigitalModsPlugin.MODEL_ARTICLE.equals(elm.getModelId())) { // add article as inlude filter DigitalObjectElement parent = crawler.getParent(pid); if (parent == DigitalObjectElement.NULL) { ctx.getStatus().error(elm, "No parent!", null, null); break; } Set<DigitalObjectElement> children = dobjs.get(parent); if (children == null) { children = new HashSet<DigitalObjectElement>(); dobjs.put(parent, children); } children.add(elm); } else { if (!dobjs.containsKey(elm)) { dobjs.put(elm, null); } } } catch (DigitalObjectException ex) { ctx.getStatus().error(pid, "No parent!", ex); } } return dobjs; } static final class CejshHierarchy extends DefaultNdkVisitor<Void, CejshContext> { private final Deque<DigitalObjectElement> traversePath = new ArrayDeque<DigitalObjectElement>(); public CejshHierarchy(DigitalObjectCrawler crawler) { super(crawler); } protected DigitalObjectElement getParent() { Iterator<DigitalObjectElement> it = getParentPath(); return it.hasNext() ? it.next() : null; } protected Iterator<DigitalObjectElement> getParentPath() { Iterator<DigitalObjectElement> it = getPath().iterator(); it.next(); return it; } protected Deque<DigitalObjectElement> getPath() { return traversePath; } @Override public Void visit(DigitalObjectElement elm, CejshContext p) throws VisitorException { try { traversePath.addFirst(elm); String modelId = elm.getModelId(); if (BornDigitalModsPlugin.MODEL_ARTICLE.equals(modelId)) { return visitBdmArticle(elm, p); } else { return super.visit(elm, p); } } finally { traversePath.removeFirst(); } } @Override public Void visitChildren(DigitalObjectElement elm, CejshContext p) throws VisitorException { Set<DigitalObjectElement> filter = p.getFilter(elm); if (filter.isEmpty()) { return super.visitChildren(elm, p); } else { try { List<DigitalObjectElement> children = getCrawler().getChildren(elm.getPid()); for (DigitalObjectElement child : children) { if (filter.contains(child)) { child.accept(this, p); } } return null; } catch (DigitalObjectException ex) { throw new VisitorException(ex); } } } @Override public Void visitNdkArticle(DigitalObjectElement elm, CejshContext p) throws VisitorException { return null; } public Void visitBdmArticle(DigitalObjectElement elm, CejshContext p) throws VisitorException { CejshBuilder builder = p.getBuilder(); List<Article> articles = p.getArticles(); if (articles != null && p.acceptArticle(getParent(), elm)) { Article article = builder.addArticle(elm, p); if (article == null) { // broken package, discard articles and ignore others p.setArticles(null); } else if (article.isReviewed()) { articles.add(article); } } return null; } @Override public Void visitNdkPeriodicalIssue(DigitalObjectElement elm, CejshContext p) throws VisitorException { if (p.getIssue() != null) { p.getStatus().error(elm, "Issue inside issue: " + p.getIssue().toLog(), null, null); return null; } CejshBuilder builder = p.getBuilder(); if (getPath().size() == 1) { // start point try { p.setVolume(findParent(elm, p, NdkPlugin.MODEL_PERIODICALVOLUME)); if (!builder.addVolume(elm, p.getVolume(), p)) { return null; } p.setTitle(findParent(p.getVolume(), p, NdkPlugin.MODEL_PERIODICAL)); if (!builder.addTitle(elm, p.getTitle(), p)) { return null; } } catch (DigitalObjectNotFoundException ex) { p.getStatus().error(elm, "Parent not found!", null, ex); return null; } } // process only articles linked by this issue; do not mix with articles from the volume level List<Article> parentArticles = p.getArticles(); p.setArticles(new ArrayList<Article>()); try { p.setIssue(elm); Void result = null; if (builder.addIssue(elm, elm, p)) { result = super.visitNdkPeriodicalIssue(elm, p); builder.writePackage(elm, p.getArticles(), p); } return result; } finally { p.setArticles(parentArticles); p.setIssue(null); builder.setIssue(null); } } @Override public Void visitNdkPeriodicalVolume(DigitalObjectElement elm, CejshContext p) throws VisitorException { if (p.getVolume() != null) { p.getStatus().error(elm, "Volume inside volume: " + p.getVolume().toLog(), null, null); return null; } CejshBuilder builder = p.getBuilder(); if (getPath().size() == 1) { // start point try { p.setTitle(findParent(elm, p, NdkPlugin.MODEL_PERIODICAL)); if (!builder.addTitle(elm, p.getTitle(), p)) { return null; } } catch (DigitalObjectNotFoundException ex) { p.getStatus().error(elm, "Parent not found!", null, ex); return null; } } try { p.setVolume(elm); p.setArticles(new ArrayList<Article>()); Void result = null; if (builder.addVolume(elm, elm, p)) { result = super.visitNdkPeriodicalVolume(elm, p); builder.writePackage(elm, p.getArticles(), p); } return result; } finally { p.setArticles(null); p.setVolume(null); builder.setVolume(null); } } @Override public Void visitNdkPeriodical(DigitalObjectElement elm, CejshContext p) throws VisitorException { if (p.getTitle() != null) { p.getStatus().error(elm, "Title inside title: " + p.getTitle().toLog(), null, null); return null; } p.setTitle(elm); try { Void result = null; if (p.getBuilder().addTitle(elm, elm, p)) { result = super.visitNdkPeriodical(elm, p); } return result; } finally { p.setTitle(null); p.getBuilder().setTitle(null); } } @Override public Void visitNdkCartographic(DigitalObjectElement elm, CejshContext p) throws VisitorException { return null; } @Override public Void visitNdkChapter(DigitalObjectElement elm, CejshContext p) throws VisitorException { return null; } @Override public Void visitNdkMonographSupplement(DigitalObjectElement elm, CejshContext p) throws VisitorException { return null; } @Override public Void visitNdkPage(DigitalObjectElement elm, CejshContext p) throws VisitorException { return null; } @Override public Void visitNdkPicture(DigitalObjectElement elm, CejshContext p) throws VisitorException { return null; } @Override public Void visitNdkSheetMusic(DigitalObjectElement elm, CejshContext p) throws VisitorException { return null; } private DigitalObjectElement findParent(DigitalObjectElement elm, CejshContext p, String... modelId) throws DigitalObjectNotFoundException { HashSet<String> set = new HashSet<String>(); for (String id : modelId) { set.add(id); } return findParent(elm, set, p); } private DigitalObjectElement findParent(DigitalObjectElement elm, Set<String> modelId, CejshContext p) throws DigitalObjectNotFoundException { DigitalObjectElement parent = getCrawler().getParent(elm.getPid()); if (parent == DigitalObjectElement.NULL) { return null; } else if (modelId.contains(parent.getModelId())) { return parent; } else { return findParent(parent, modelId, p); } } } }