/** * Copyright 2008 The University of North Carolina at Chapel Hill * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package edu.unc.lib.deposit.normalize; import static edu.unc.lib.deposit.work.DepositGraphUtils.dprop; import static edu.unc.lib.deposit.work.DepositGraphUtils.fprop; import static edu.unc.lib.dl.util.ContentModelHelper.DepositRelationship.md5sum; import static edu.unc.lib.dl.util.ContentModelHelper.Model.SIMPLE; import java.io.File; import java.net.URI; import java.nio.file.Path; import java.nio.file.Paths; import java.util.Collection; import java.util.Map; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.Property; import com.hp.hpl.jena.rdf.model.Resource; import edu.unc.lib.dl.util.ContentModelHelper.DepositRelationship; import edu.unc.lib.dl.util.ContentModelHelper.FedoraProperty; import edu.unc.lib.dl.util.RedisWorkerConstants.DepositField; import edu.unc.lib.staging.StagingException; import gov.loc.repository.bagit.Bag; import gov.loc.repository.bagit.Bag.Format; import gov.loc.repository.bagit.BagFactory; import gov.loc.repository.bagit.BagFile; import gov.loc.repository.bagit.BagHelper; import gov.loc.repository.bagit.Manifest; import gov.loc.repository.bagit.utilities.SimpleResult; /** * Transforms bagit bags stored in a staging location into n3 for deposit * * @author bbpennel * @author daines * @date Nov 9, 2015 */ public class BagIt2N3BagJob extends AbstractFileServerToBagJob { private static final Logger log = LoggerFactory.getLogger(BagIt2N3BagJob.class); public BagIt2N3BagJob() { super(); } public BagIt2N3BagJob(String uuid, String depositUUID) { super(uuid, depositUUID); } @Override public void runJob() { Model model = getWritableModel(); com.hp.hpl.jena.rdf.model.Bag depositBag = model.createBag(getDepositPID().getURI().toString()); Map<String, String> status = getDepositStatus(); String sourcePath = status.get(DepositField.sourcePath.name()); File sourceFile = new File(sourcePath); if (BagHelper.getVersion(sourceFile) == null) { failJob("Can't find BagIt bag", "A BagIt bag could not be found at the source path."); } BagFactory bagFactory = new BagFactory(); Bag bag = bagFactory.createBag(sourceFile); if (bag.getFormat() != Format.FILESYSTEM) { failJob("Unsupported BagIt bag format", "Only filesystem bags are supported."); } // Verify that the bag has all the required parts SimpleResult completeResult = bag.verifyComplete(); if (!bag.verifyComplete().isSuccess()) { // Bag did not validate, generate error report and throw exception StringBuilder msg = new StringBuilder(); for (String error : completeResult.getErrorMessages()) { msg.append(error).append("\n"); } failJob("Unable to normalize bag " + sourcePath + ", it was not complete according to bagit specifications", msg.toString()); } Collection<BagFile> payload = bag.getPayload(); Property labelProp = dprop(model, DepositRelationship.label); Property hasModelProp = fprop(model, FedoraProperty.hasModel); Property md5sumProp = dprop(model, md5sum); Property locationProp = dprop(model, DepositRelationship.stagingLocation); Resource simpleResource = model.createResource(SIMPLE.getURI().toString()); // Turn the bag itself into the top level folder for this deposit com.hp.hpl.jena.rdf.model.Bag sourceBag = getSourceBag(depositBag, sourceFile); int i = 0; // Add all of the payload objects into the bag folder for (BagFile file : payload) { log.debug("Adding object {}: {}", i++, file.getFilepath()); String filePath = file.getFilepath(); Map<Manifest.Algorithm, String> checksums = bag.getChecksums(filePath); Resource fileResource = getFileResource(sourceBag, filePath); // add checksum, size, label String filename = filePath.substring(filePath.lastIndexOf("/") + 1); model.add(fileResource, labelProp, filename); model.add(fileResource, hasModelProp, simpleResource); if (checksums.containsKey(Manifest.Algorithm.MD5)) { model.add(fileResource, md5sumProp, checksums.get(Manifest.Algorithm.MD5)); } // Find staged path for the file Path storedPath = Paths.get(sourceFile.getAbsolutePath(), filePath); try { URI stagedURI = stages.getStagedURI(storedPath.toUri()); model.add(fileResource, locationProp, stagedURI.toString()); } catch (StagingException e) { failJob(e, "Unable to get staged path for file {}", storedPath); } } String sourceAbsPath = sourceFile.getAbsolutePath(); // Register tag file as deposit manifests, then register them for cleanup laterĀ  for (BagFile tag : bag.getTags()) { Path path = Paths.get(sourceAbsPath, tag.getFilepath()); try { URI stagedURI = stages.getStagedURI(path.toUri()); if (stagedURI != null) { getDepositStatusFactory().addManifest(getDepositUUID(), stagedURI.toString()); model.add(depositBag, dprop(model, DepositRelationship.cleanupLocation), stagedURI.toString()); } } catch (StagingException e) { failJob(e, "Unable to get staged path for file {}", path); } } // Register the bag itself for cleanup Path storedPath = sourceFile.toPath(); try { URI stagedURI = stages.getStagedURI(storedPath.toUri()); if (stagedURI != null) { model.add(depositBag, dprop(model, DepositRelationship.cleanupLocation), stagedURI.toString()); } } catch (StagingException e) { failJob(e, "Unable to get staged path for file {}", storedPath); } } }