/** * Copyright 2008 The University of North Carolina at Chapel Hill * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package edu.unc.lib.dl.update; import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.StringWriter; import java.util.UUID; import javax.xml.namespace.QName; import javax.xml.stream.XMLEventReader; import javax.xml.stream.XMLEventWriter; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLOutputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.events.Attribute; import javax.xml.stream.events.StartElement; import javax.xml.stream.events.XMLEvent; import org.jdom2.Document; import org.jdom2.JDOMException; import org.jdom2.input.SAXBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import edu.unc.lib.dl.acl.util.AccessGroupSet; import edu.unc.lib.dl.fedora.PID; import edu.unc.lib.dl.util.ContentModelHelper.Datastream; import edu.unc.lib.dl.util.PremisEventLogger; /** * Bulk metadata update information package, stores information related to the entire import and retrieves * information for individual object datastream updates. * * @author bbpennel * @date Jul 30, 2015 */ public class BulkMetadataUIP extends UIPImpl { private static final Logger log = LoggerFactory.getLogger(BulkMetadataUIP.class); private PID currentPid; private int updateCount = 0; private int objectCount = 0; private final XMLOutputFactory xmlOutput; private XMLEventReader xmlReader; private DocumentState state = DocumentState.ROOT; private final String emailAddress; private final AccessGroupSet groups; private final File importFile; private final String originalFilename; private final boolean existingUpdate; private final static String BULK_MD_TAG = "bulkMetadata"; private final static String OBJECT_TAG = "object"; private final static String UPDATE_TAG = "update"; private final static String MODS_TYPE = "MODS"; private final static QName pidAttribute = new QName("pid"); private final static QName lastModifiedAttribute = new QName("lastModified"); private final static QName typeAttribute = new QName("type"); private enum DocumentState { ROOT, IN_BULK, IN_OBJECT, IN_CONTENT; } public BulkMetadataUIP(String pid, String emailAddress, String user, AccessGroupSet groups, File importFile, String originalFilename) throws UIPException { super(new PID(pid == null? UUID.randomUUID().toString() : pid), user, UpdateOperation.REPLACE); existingUpdate = pid != null; this.emailAddress = emailAddress; this.groups = groups; this.importFile = importFile; this.originalFilename = originalFilename; xmlOutput = XMLOutputFactory.newInstance(); initializeXMLReader(); } private void initializeXMLReader() throws UIPException { XMLInputFactory xmlFactory = XMLInputFactory.newInstance(); try { xmlReader = xmlFactory.createXMLEventReader(new FileInputStream(importFile)); } catch (FileNotFoundException | XMLStreamException e) { throw new UIPException("Failed to read metadata update package for " + user, e); } } public BulkMetadataDatastreamUIP getNextUpdate() throws UpdateException, JDOMException, XMLStreamException { return seekNextUpdate(null, null); } public BulkMetadataDatastreamUIP seekNextUpdate(PID resumePid, String resumeDs) throws UpdateException, JDOMException, XMLStreamException { QName contentOpening = null; long countOpenings = 0; XMLEventWriter xmlWriter = null; StringWriter contentWriter = null; String currentDs = null; String lastModified = null; boolean resumeMode = resumePid != null; boolean foundResumptionPoint = false; try { while (xmlReader.hasNext()) { XMLEvent e = xmlReader.nextEvent(); switch (state) { case ROOT: if (e.isStartElement()) { StartElement element = e.asStartElement(); // Make sure that this document begins with bulk md tag if (element.getName().getLocalPart().equals(BULK_MD_TAG)) { state = DocumentState.IN_BULK; } } break; case IN_BULK: if (e.isStartElement()) { StartElement element = e.asStartElement(); // Found an opening object tag, capture its PID if (element.getName().getLocalPart().equals(OBJECT_TAG)) { Attribute pid = element.getAttributeByName(pidAttribute); if (pid != null) { currentPid = new PID(pid.getValue()); objectCount++; } state = DocumentState.IN_OBJECT; } } break; case IN_OBJECT: if (e.isStartElement()) { StartElement element = e.asStartElement(); // Found start of update, extract the datastream if (element.getName().getLocalPart().equals(UPDATE_TAG)) { // Get last modified date if available Attribute lastModifiedAttr = element.getAttributeByName(lastModifiedAttribute); lastModified = lastModifiedAttr == null? null : lastModifiedAttr.getValue(); Attribute typeAttr = element.getAttributeByName(typeAttribute); if (typeAttr == null) { throw new UpdateException("Invalid import data, missing type attribute on update of " + currentPid); } if (MODS_TYPE.equals(typeAttr.getValue())) { currentDs = Datastream.MD_DESCRIPTIVE.toString(); } else { throw new UpdateException("Invalid import data, unsupport type in update tag " + currentPid); } foundResumptionPoint = resumeMode && currentPid.equals(resumePid) && currentDs.equals(resumeDs); state = DocumentState.IN_CONTENT; if (!resumeMode) { contentWriter = new StringWriter(); xmlWriter = xmlOutput.createXMLEventWriter(contentWriter); } } } else if (e.isEndElement()) { // Closing object tag state = DocumentState.IN_BULK; } break; case IN_CONTENT: // Record the name of the content opening tag so we can tell when it closes if (e.isStartElement()) { if (contentOpening == null) { contentOpening = e.asStartElement().getName(); countOpenings = 1; } else if (contentOpening.equals(e.asStartElement().getName())) { // Count the number of openings just in case there are nested records countOpenings++; } } // Subtract from count of opening tags for root element if (e.isEndElement() && contentOpening.equals(e.asEndElement().getName())) { countOpenings--; } // Finished with opening tags and the update tag is ending, done with content. if (countOpenings == 0 && e.isEndElement() && UPDATE_TAG.equals(e.asEndElement().getName().getLocalPart())) { state = DocumentState.IN_OBJECT; // Increment the number of updates retrieved updateCount++; if (!resumeMode) { xmlWriter.close(); xmlWriter = null; Document dsDoc = new SAXBuilder() .build(new ByteArrayInputStream(contentWriter.toString().getBytes())); return new BulkMetadataDatastreamUIP(currentPid, user, UpdateOperation.REPLACE, currentDs, lastModified, dsDoc); } else if (foundResumptionPoint) { return null; } } else { if (!resumeMode) { // Store all of the content from the incoming document xmlWriter.add(e); } } break; } } } catch (IOException e) { throw new UpdateException("Could not parse content for " + currentPid, e); } finally { if (xmlWriter != null) { xmlWriter.close(); } } return null; } public void close() { try { xmlReader.close(); } catch (XMLStreamException e) { log.error("Failed to close XML Reader during CDR metadata update", e); } } public void reset() throws UIPException { close(); initializeXMLReader(); updateCount = 0; objectCount = 0; } /* (non-Javadoc) * @see edu.unc.lib.dl.update.UpdateInformationPackage#getEventLogger() */ @Override public PremisEventLogger getEventLogger() { // TODO Auto-generated method stub return null; } public String getEmailAddress() { return emailAddress; } public AccessGroupSet getGroups() { return groups; } public File getImportFile() { return importFile; } public boolean isExistingUpdate() { return existingUpdate; } public int getUpdateCount() { return updateCount; } public int getObjectCount() { return objectCount; } public String getOriginalFilename() { return originalFilename; } public PID getCurrentPid() { return currentPid; } }