/** * Copyright 2008 The University of North Carolina at Chapel Hill * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package edu.unc.lib.dl.update; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import javax.mail.MessagingException; import javax.mail.internet.MimeMessage; import javax.xml.stream.XMLStreamException; import org.apache.commons.codec.digest.DigestUtils; import org.jdom2.Element; import org.jdom2.JDOMException; import org.jdom2.output.Format; import org.jdom2.output.XMLOutputter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.mail.javamail.JavaMailSender; import org.springframework.mail.javamail.MimeMessageHelper; import redis.clients.jedis.Jedis; import redis.clients.jedis.JedisPool; import com.samskivert.mustache.Template; import edu.unc.lib.dl.fedora.FedoraException; import edu.unc.lib.dl.fedora.ManagementClient; import edu.unc.lib.dl.fedora.OptimisticLockException; import edu.unc.lib.dl.fedora.PID; import edu.unc.lib.dl.services.DigitalObjectManager; import edu.unc.lib.dl.util.ContentModelHelper.Datastream; import edu.unc.lib.dl.util.RedisWorkerConstants; /** * Update processor which performs bulk metadata imports from a CDR metadata package * * @author bbpennel * @date Jul 21, 2015 */ public class BulkMetadataUIPProcessor implements UIPProcessor { private static Logger log = LoggerFactory.getLogger(BulkMetadataUIPProcessor.class); private DigitalObjectManager digitalObjectManager; private ManagementClient managementClient; private UIPUpdatePipeline validationPipeline; private UIPUpdatePipeline transformPipeline; private JedisPool jedisPool; private JavaMailSender mailSender; private Template completeTemplate; private Template failedTemplate; private String fromAddress; @Override public void process(UpdateInformationPackage uip) throws UpdateException, UIPException { if (!(uip instanceof BulkMetadataUIP)) { throw new UIPException("Incorrect UIP class, found " + uip.getClass().getName() + ", expected " + BulkMetadataUIP.class.getName()); } BulkMetadataUIP bulkUIP = (BulkMetadataUIP) uip; long start = System.currentTimeMillis(); try { if (bulkUIP.isExistingUpdate()) { resume(bulkUIP); log.info("Resuming metadata update {} by {}", bulkUIP.getOriginalFilename(), bulkUIP.getUser()); } else { // Store data related to this update in case it is interrupted storeUpdateInformation(bulkUIP); log.info("Starting metadata update {} by {}", bulkUIP.getOriginalFilename(), bulkUIP.getUser()); } Map<String, String> failed = new HashMap<>(); BulkMetadataDatastreamUIP singleUIP; do { try { singleUIP = bulkUIP.getNextUpdate(); if (singleUIP == null) { break; } validationPipeline.processUIP(singleUIP); } catch (XMLStreamException | JDOMException e) { failed.put(bulkUIP.getCurrentPid().getPid(), "Could not parse XML: " + e.getMessage()); break; } catch (UIPException e) { failed.put(bulkUIP.getCurrentPid().getPid(), "Invalid update: " + e.getMessage()); } } while (true); // If there were any validation problems, inform the user and end the update if (failed.size() > 0) { log.info("Metadata update {} by user {} failed due to {} problems", new Object[] { bulkUIP.getOriginalFilename(), bulkUIP.getUser(), failed.size() }); sendValidationFailureEmail(bulkUIP, failed); cleanup(bulkUIP); return; } // Wait for the repository to become available before proceeding with updates while (!managementClient.isRepositoryAvailable()) { try { Thread.sleep(10000L); } catch (InterruptedException e) { return; } } // Reset the state of the package so as to start updating from the beginning bulkUIP.reset(); List<String> skipped = new ArrayList<>(); List<String> updated = new ArrayList<>(); List<String> outdated = new ArrayList<>(); while ((singleUIP = bulkUIP.getNextUpdate()) != null) { for (java.util.Map.Entry<String, Element> entry : singleUIP.getIncomingData().entrySet()) { try { // Check to see if the checksum of the new datastream matches the existing edu.unc.lib.dl.fedora.types.Datastream datastream = managementClient.getDatastream(singleUIP.getPID(), entry.getKey()); transformPipeline.processUIP(singleUIP); // New datastream, create it if (datastream == null) { File contentFile = File.createTempFile("content", null); try { XMLOutputter xmlOutput = new XMLOutputter(Format.getRawFormat()); try (FileOutputStream outStream = new FileOutputStream(contentFile)) { xmlOutput.output(entry.getValue(), outStream); } digitalObjectManager.addOrReplaceDatastream(singleUIP.getPID(), Datastream.getDatastream(entry.getKey()), contentFile, "text/xml", uip.getUser(), uip.getMessage()); updated.add(singleUIP.getPID().getPid()); } finally { contentFile.delete(); } } else { // Updating existing, so check if the update is necessary Format formatForChecksum = Format.getCompactFormat(); formatForChecksum.setOmitDeclaration(false); XMLOutputter checksumOutputter = new XMLOutputter(formatForChecksum); String incomingChecksum = DigestUtils.md5Hex( checksumOutputter.outputString(entry.getValue().getDocument()) .trim().replaceAll("\r\n", "")); if (!incomingChecksum.equals(datastream.getChecksum())) { XMLOutputter rawOutputter = new XMLOutputter(Format.getRawFormat()); // or the checksums don't match, so update try { managementClient.modifyDatastream(singleUIP.getPID(), entry.getKey(), null, singleUIP.getLastModified(), rawOutputter.outputString(entry.getValue()).getBytes("UTF-8")); updated.add(singleUIP.getPID().getPid()); } catch (OptimisticLockException e) { // Datastream on the server more recent than submitted copy, reject it outdated.add(singleUIP.getPID().getPid()); } log.info("Updated {} for object {} during bulk update", new Object[] { entry.getKey(), singleUIP.getPID().getPid()}); } else { log.debug("Skipping update of {} because the content has not changed."); skipped.add(singleUIP.getPID().getPid()); } } } catch (UIPException | FedoraException e) { log.error("Failed to perform update to {} as part of bulk update", singleUIP.getPID(), e); failed.put(singleUIP.getPID().getPid(), e.getMessage()); } // Store information about the last update completed so we can resume if interrupted updateResumptionPoint(bulkUIP.getPID(), singleUIP); } } sendCompletedEmail(bulkUIP, updated, skipped, outdated, failed); // Finalize the update and clean up the trash cleanup(bulkUIP); log.info("Completed metadata update {} by {} containing {} updates after {}ms", new Object[] { bulkUIP.getOriginalFilename(), bulkUIP.getUser(), bulkUIP.getUpdateCount(), (System.currentTimeMillis() - start) }); } catch (XMLStreamException | IOException | JDOMException e) { throw new UpdateException("Failed to perform metadata update for user " + uip.getUser(), e); } finally { bulkUIP.close(); } } private void updateResumptionPoint(PID uipPID, BulkMetadataDatastreamUIP singleUIP) { try (Jedis jedis = jedisPool.getResource()) { Map<String, String> values = new HashMap<>(); values.put("lastPid", singleUIP.getPID().getPid()); values.put("lastDatastream", singleUIP.getDatastream()); jedis.hmset(RedisWorkerConstants.BULK_RESUME_PREFIX + uipPID.getPid(), values); } } private void storeUpdateInformation(BulkMetadataUIP uip) { try (Jedis jedis = jedisPool.getResource()) { Map<String, String> values = new HashMap<>(); values.put("email", uip.getEmailAddress()); values.put("user", uip.getUser()); values.put("groups", uip.getGroups().joinAccessGroups(" ", null, false)); values.put("filePath", uip.getImportFile().getAbsolutePath()); values.put("originalFilename", uip.getOriginalFilename()); jedis.hmset(RedisWorkerConstants.BULK_UPDATE_PREFIX + uip.getPID().getPid(), values); } } /** * Resumes an interrupted update using the last resumption point stored, moving the update cursor up to the point * where the next getNextUpdate call will return the information for the next datastream after where the previous run * left off. * * @param uip * @throws UpdateException */ private void resume(BulkMetadataUIP uip) throws UpdateException { try (Jedis jedis = jedisPool.getResource()) { Map<String, String> resumeValues = jedis.hgetAll(RedisWorkerConstants.BULK_RESUME_PREFIX + uip.getPID().getPid()); if (resumeValues == null) { // No resumption info, so store update info just in case storeUpdateInformation(uip); return; } // If the update file doesn't exist anymore, clear this update out so it doesn't stick around forever if (!uip.getImportFile().exists()) { cleanup(uip); throw new UpdateException("Unable to resume update " + uip.getPID() + ", could not find update file"); } // Move the update cursor past the last updated object try { uip.seekNextUpdate(new PID(resumeValues.get("lastPid")), resumeValues.get("lastDatastream")); } catch (Exception e) { cleanup(uip); throw new UpdateException("Failed to parse update package while resuming", e); } } } /** * Cleans up resumption information and files related to the update * @param uip */ private void cleanup(BulkMetadataUIP uip) { String pid = uip.getPID().getPid(); try (Jedis jedis = jedisPool.getResource()) { jedis.del(RedisWorkerConstants.BULK_UPDATE_PREFIX + pid); jedis.del(RedisWorkerConstants.BULK_RESUME_PREFIX + pid); } uip.getImportFile().delete(); } public void sendCompletedEmail(BulkMetadataUIP uip, List<String> updated, List<String> skipped, List<String> outdated, Map<String, String> failed) { MimeMessage mimeMsg = mailSender.createMimeMessage(); try { MimeMessageHelper msg = new MimeMessageHelper(mimeMsg, MimeMessageHelper.MULTIPART_MODE_MIXED); msg.setFrom(fromAddress); String toEmail = uip.getEmailAddress(); log.error("Sending email to '{}'", toEmail); if (toEmail == null || toEmail.trim().length() == 0) { // No email provided, send to admins instead msg.addTo(fromAddress); } else { msg.addTo(toEmail); } Map<String, Object> data = new HashMap<>(); data.put("fileName", uip.getOriginalFilename()); data.put("updated", updated); int updatedCount = updated.size(); if (skipped.size() > 0) { data.put("skippedCount", skipped.size()); data.put("skipped", skipped); updatedCount += skipped.size(); } data.put("updatedCount", updatedCount); if (outdated.size() > 0) { data.put("outdatedCount", outdated.size()); data.put("outdated", outdated); } if (failed.size() > 0) { data.put("failedCount", failed.size()); data.put("failed", failed.entrySet()); } if (outdated.size() > 0 || failed.size() > 0) { data.put("issues", true); msg.setSubject("CDR Metadata update completed with issues:" + uip.getOriginalFilename()); msg.addTo(fromAddress); } else { msg.setSubject("CDR Metadata update completed:" + uip.getOriginalFilename()); } String html = completeTemplate.execute(data); msg.setText(html, true); mailSender.send(mimeMsg); } catch (MessagingException e) { log.error("Failed to send email to " + uip.getEmailAddress() + " for update " + uip.getOriginalFilename(), e); } } public void sendValidationFailureEmail(BulkMetadataUIP uip, Map<String, String> problems) { MimeMessage mimeMsg = mailSender.createMimeMessage(); try { MimeMessageHelper msg = new MimeMessageHelper(mimeMsg, MimeMessageHelper.MULTIPART_MODE_MIXED); msg.setFrom(fromAddress); String toEmail = uip.getEmailAddress(); if (toEmail == null || toEmail.trim().length() == 0) { // No email provided, send to admins instead msg.addTo(fromAddress); } else { msg.addTo(toEmail); } msg.setSubject("CDR Metadata update failed"); Map<String, Object> data = new HashMap<>(); data.put("fileName", uip.getOriginalFilename()); data.put("problems", problems.entrySet()); data.put("problemCount", problems.size()); String html = failedTemplate.execute(data); msg.setText(html, true); mailSender.send(mimeMsg); } catch (MessagingException e) { log.error("Failed to send email to " + uip.getEmailAddress() + " for update " + uip.getOriginalFilename(), e); } } public DigitalObjectManager getDigitalObjectManager() { return digitalObjectManager; } public void setDigitalObjectManager(DigitalObjectManager digitalObjectManager) { this.digitalObjectManager = digitalObjectManager; } public ManagementClient getManagementClient() { return managementClient; } public void setManagementClient(ManagementClient managementClient) { this.managementClient = managementClient; } public void setValidationPipeline(UIPUpdatePipeline validationPipeline) { this.validationPipeline = validationPipeline; } public void setTransformPipeline(UIPUpdatePipeline transformPipeline) { this.transformPipeline = transformPipeline; } public void setCompleteTemplate(Template completeTemplate) { this.completeTemplate = completeTemplate; } public void setFailedTemplate(Template failedTemplate) { this.failedTemplate = failedTemplate; } public JedisPool getJedisPool() { return jedisPool; } public void setJedisPool(JedisPool jedisPool) { this.jedisPool = jedisPool; } public void setMailSender(JavaMailSender mailSender) { this.mailSender = mailSender; } public void setFromAddress(String fromAddress) { this.fromAddress = fromAddress; } }