package edu.unc.lib.dl.util; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.List; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.apache.commons.compress.archivers.ArchiveEntry; import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; import edu.unc.lib.dl.fedora.PID; import edu.unc.lib.dl.xml.NamespaceConstants; /** * An quick SAX extractor of METS header information. * * @author count0 * */ public class MetsHeaderScanner extends DefaultHandler { Logger log = LoggerFactory.getLogger(MetsHeaderScanner.class); PID objID = null; String label = null; String createDate = null; String lastModDate = null; String profile = null; String type = null; String id = null; List<String> names = new ArrayList<String>(); StringBuilder nameBuffer = null; public List<String> getNames() { return names; } String metsURI = NamespaceConstants.METS_URI; public PID getObjID() { return objID; } public String getLabel() { return label; } public String getCreateDate() { return createDate; } public String getLastModDate() { return lastModDate; } public String getProfile() { return profile; } public String getType() { return type; } public String getId() { return id; } @Override public void startElement(String uri, String localName, String qName, Attributes attr) throws SAXException { if(!NamespaceConstants.METS_URI.equals(uri)) return; if (localName.equals("mets")) { for (int i = 0; i < attr.getLength(); i++) { String n = attr.getLocalName(i); if (n.equals("OBJID")) { objID = new PID(attr.getValue(i)); } else if (n.equals("ID")) { id = attr.getValue(i); } else if (n.equals("LABEL")) { label = attr.getValue(i); } else if (n.equals("PROFILE")) { profile = attr.getValue(i); } else if (n.equals("TYPE")) { type = attr.getValue(i); } } } else if (localName.equals("metsHdr")) { for (int i = 0; i < attr.getLength(); i++) { String n = attr.getLocalName(i); if (n.equals("CREATEDATE")) { createDate = attr.getValue(i); } else if (n.equals("LASTMODDATE")) { lastModDate = attr.getValue(i); } } } else if (localName.equals("name")) { nameBuffer = new StringBuilder(); } super.startElement(uri, localName, qName, attr); } @Override public void characters(char[] ch, int start, int length) throws SAXException { if(nameBuffer != null) nameBuffer.append(ch, start, length); super.characters(ch, start, length); } @Override public void endElement(String uri, String localName, String qName) throws SAXException { if(!NamespaceConstants.METS_URI.equals(uri)) return; if (localName.equals("name")) { if(nameBuffer != null) { names.add(nameBuffer.toString()); nameBuffer = null; } } super.endElement(uri, localName, qName); } public void scan(File f, String filename) throws Exception { @SuppressWarnings("resource") InputStream toParse = null; try { if (filename.endsWith(".zip")) { log.debug("scanning for METS within a zip file"); @SuppressWarnings("resource") ZipArchiveInputStream zis = new ZipArchiveInputStream( new FileInputStream(f)); ArchiveEntry entry = null; while ((entry = zis.getNextZipEntry()) != null) { if (!entry.isDirectory()) { if (entry.getName().equals("METS.xml") || entry.getName().equals("mets.xml")) { log.debug("Found METS entry in ZIP: {}", entry.getName()); toParse = zis; break; } } } } else { log.debug("scanning METS file"); toParse = new FileInputStream(f); } SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setFeature("http://xml.org/sax/features/namespaces", true); SAXParser saxParser = null; saxParser = factory.newSAXParser(); saxParser.parse(toParse, this); } finally { if (toParse != null) { try { toParse.close(); } catch (IOException ignored) { } } } } }