/** * Copyright 2008 The University of North Carolina at Chapel Hill * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package edu.unc.lib.deposit.normalize; import static edu.unc.lib.deposit.work.DepositGraphUtils.dprop; import static edu.unc.lib.deposit.work.DepositGraphUtils.fprop; import static edu.unc.lib.dl.util.ContentModelHelper.Model.CONTAINER; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.nio.file.Paths; import java.util.Arrays; import java.util.HashMap; import java.util.Map; import java.util.UUID; import org.apache.commons.lang.StringUtils; import org.codehaus.jackson.JsonNode; import org.codehaus.jackson.map.ObjectMapper; import org.jdom2.Document; import org.jdom2.Element; import org.jdom2.output.XMLOutputter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import com.hp.hpl.jena.rdf.model.Bag; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.Property; import com.hp.hpl.jena.rdf.model.Resource; import edu.unc.lib.deposit.work.AbstractDepositJob; import edu.unc.lib.dl.fedora.PID; import edu.unc.lib.dl.util.ContentModelHelper.DepositRelationship; import edu.unc.lib.dl.util.ContentModelHelper.FedoraProperty; import edu.unc.lib.dl.util.RedisWorkerConstants.DepositField; import edu.unc.lib.dl.xml.JDOMNamespaceUtil; import edu.unc.lib.staging.Stages; /** * Abstract deposit normalization job which processes walks file system paths to interpret them into n3 and MODS for * deposit * * @author lfarrell */ public abstract class AbstractFileServerToBagJob extends AbstractDepositJob { private static final Logger log = LoggerFactory .getLogger(AbstractFileServerToBagJob.class); @Autowired public Stages stages; private Map<String, Bag> pathToFolderBagCache; public AbstractFileServerToBagJob() { pathToFolderBagCache = new HashMap<>(); } public AbstractFileServerToBagJob(String uuid, String depositUUID) { super(uuid, depositUUID); pathToFolderBagCache = new HashMap<>(); } @Override public abstract void runJob(); protected Bag getSourceBag(Bag depositBag, File sourceFile) { Model model = depositBag.getModel(); Map<String, String> status = getDepositStatus(); PID containerPID = new PID("uuid:" + UUID.randomUUID()); Bag bagFolder = model.createBag(containerPID.getURI()); model.add(bagFolder, dprop(model, DepositRelationship.label), status.get(DepositField.fileName.name())); model.add(bagFolder, fprop(model, FedoraProperty.hasModel), model.createResource(CONTAINER.getURI().toString())); depositBag.add(bagFolder); // Cache the source bag folder pathToFolderBagCache.put(sourceFile.getName(), bagFolder); // Add extra descriptive information addDescription(containerPID, status); return bagFolder; } /** * Creates and returns a Jena Resource for the given path representing a file, * adding it to the hierarchy for the deposit * * @param sourceBag * @param filepath * @return */ protected Resource getFileResource(Bag sourceBag, String filepath) { Bag parentBag = getParentBag(sourceBag, filepath); PID pid = createPID(); Resource fileResource = sourceBag.getModel().createResource(pid.getURI()); parentBag.add(fileResource); return fileResource; } /** * Creates and returns a Jena Bag for the given filepath representing a folder, and adds * it to the hierarchy for the deposit * * @param sourceBag * @param filepath * @param model * @return */ protected Bag getFolderBag(Bag sourceBag, String filepath) { Bag parentBag = getParentBag(sourceBag, filepath); PID pid = createPID(); Bag bagFolder = sourceBag.getModel().createBag(pid.getURI()); parentBag.add(bagFolder); pathToFolderBagCache.put(filepath, bagFolder); return bagFolder; } private PID createPID() { UUID uuid = UUID.randomUUID(); PID pid = new PID("uuid:" + uuid.toString()); return pid; } /** * Returns a Jena Bag object for the parent folder of the given filepath, creating the parent if it is not present. * * @param sourceBag * @param filepath * @return */ protected Bag getParentBag(Bag sourceBag, String filepath) { // Retrieve the bag from the cache by base filepath if available. String basePath = Paths.get(filepath).getParent().toString(); if (pathToFolderBagCache.containsKey(basePath)) { return pathToFolderBagCache.get(basePath); } Model model = sourceBag.getModel(); // find or create a folder resource for the filepath String[] pathSegments = filepath.split("/"); // Nothing to do with paths that only have data if (pathSegments.length <= 2) { return sourceBag; } Property labelProp = dprop(model, DepositRelationship.label); Property hasModelProp = model.createProperty(FedoraProperty.hasModel.getURI().toString()); Resource containerResource = model.createResource(CONTAINER.getURI().toString()); Bag currentNode = sourceBag; for (int i = 1; i < pathSegments.length - 1; i++) { String segment = pathSegments[i]; String folderPath = StringUtils.join(Arrays.copyOfRange(pathSegments, 0, i + 1), "/"); if (pathToFolderBagCache.containsKey(folderPath)) { currentNode = pathToFolderBagCache.get(folderPath); continue; } log.debug("No cached folder bag for {}, creating new one", folderPath); // No existing folder was found, create one PID pid = new PID("uuid:" + UUID.randomUUID().toString()); Bag childBag = model.createBag(pid.getURI()); currentNode.add(childBag); model.add(childBag, labelProp, segment); model.add(childBag, hasModelProp, containerResource); pathToFolderBagCache.put(folderPath, childBag); currentNode = childBag; } return currentNode; } /** * Adds additional metadata fields for the root bag container if they are provided * * @param containerPID * @param status */ public void addDescription(PID containerPID, Map<String, String> status) { Document doc = new Document(); Element mods = new Element("mods", JDOMNamespaceUtil.MODS_V3_NS); doc.addContent(mods); if (status.containsKey(DepositField.extras.name())) { ObjectMapper mapper = new ObjectMapper(); try { JsonNode node = mapper.readTree(status.get(DepositField.extras.name())); JsonNode accessionNode = node.get("accessionNumber"); if (accessionNode != null) { Element identifier = new Element("identifier", JDOMNamespaceUtil.MODS_V3_NS); identifier.setText(accessionNode.asText()); identifier.setAttribute("type", "local"); identifier.setAttribute("displayLabel", "Accession Identifier"); mods.addContent(identifier); } JsonNode mediaNode = node.get("mediaId"); if (mediaNode != null) { Element identifier = new Element("identifier", JDOMNamespaceUtil.MODS_V3_NS); identifier.setText(mediaNode.asText()); identifier.setAttribute("type", "local"); identifier.setAttribute("displayLabel", "Source Identifier"); mods.addContent(identifier); } } catch (IOException e) { failJob(e, "Failed to parse extras data for {}", getDepositPID()); log.error("Failed to parse extras data for {}", this.getDepositPID(), e); } } // Persist the MODS file to disk if there were any fields added if (mods.getChildren().size() > 0) { final File modsFolder = getDescriptionDir(); modsFolder.mkdirs(); File modsFile = new File(modsFolder, containerPID.getUUID() + ".xml"); try (FileOutputStream fos = new FileOutputStream(modsFile)) { new XMLOutputter(org.jdom2.output.Format.getPrettyFormat()).output(mods.getDocument(), fos); } catch (IOException e) { failJob(e, "Unable to write descriptive metadata for bag deposit {}", getDepositPID()); } } } public void setStages(Stages stages) { this.stages = stages; } }