package edu.unc.lib.deposit; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.text.MessageFormat; import java.util.UUID; import javanet.staxutils.IndentingXMLStreamWriter; import javax.xml.stream.XMLOutputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamWriter; import org.apache.commons.codec.digest.DigestUtils; import org.apache.commons.lang3.ArrayUtils; import de.svenjacobs.loremipsum.LoremIpsum; import edu.unc.lib.dl.xml.NamespaceConstants; public class DepositGenerator { private static File output = new File("/tmp/generated.xml"); private static File testPatternTif = new File( "src/test/resources/test_pattern.tif"); private static final String NS = NamespaceConstants.METS_URI; private static LoremIpsum li = new LoremIpsum(); private static int lindex = 0; private static int divCount = 0; private static int modsCount = 0; private static XMLStreamWriter w; private static class TreeVisitor { TreeVisitor(int levels, int folderBranches, int filesPerFolder) { this.levels = levels; this.folderBranches = folderBranches; this.filesPerFolder = filesPerFolder; } int levels; int folderBranches; int filesPerFolder; void start() throws XMLStreamException { for (int b = 1; b <= folderBranches; b++) { int[] id = new int[] { b }; visit(id, (id.length == levels)); } } void recurse(int[] id) throws XMLStreamException { if (id.length == levels) return; int children = (id.length == levels - 1) ? filesPerFolder : folderBranches; for (int b = 1; b <= children; b++) { int[] cid = ArrayUtils.add(id, b); visit(cid, (cid.length == levels)); } } void visit(int[] id, boolean file) throws XMLStreamException { recurse(id); } } public DepositGenerator() { } public static void main(String[] args) throws IOException { // TODO make mintPIDs optional // map arguments to a instance, then run that.. generate(2, 3, 5, "tag:vagrant@localhost,2014:/vagrant/" + testPatternTif.getName(), testPatternTif, true); } public static long generate(int levels, int folderBranches, int filesPerFolder, final String dataURI, File dataCopy, boolean addMODS) throws IOException { long totalRecords = (long) Math.pow(folderBranches, levels); String label = MessageFormat .format("Generating METS into {4} with {0} files, {1} depth, {2} branching, MODS is {3}", totalRecords, levels, folderBranches, addMODS, output.getAbsolutePath()); XMLOutputFactory fact = XMLOutputFactory.newInstance(); try (OutputStream os = new FileOutputStream(output)) { w = new IndentingXMLStreamWriter(fact.createXMLStreamWriter(os)); w.writeStartDocument(); w.writeStartElement("mets"); w.writeDefaultNamespace(NS); w.writeNamespace(NamespaceConstants.XLINK_PREFIX, NamespaceConstants.XLINK_URI); w.writeNamespace(NamespaceConstants.MODS_V3_PREFIX, NamespaceConstants.MODS_V3_URI); UUID uuid = UUID.randomUUID(); w.writeAttribute("ID", "uuid_" + uuid.toString()); w.writeAttribute("LABEL", label); w.writeAttribute("OBJID", "info:fedora/uuid:" + uuid.toString()); w.writeAttribute("PROFILE", "http://cdr.unc.edu/METS/profiles/Simple"); w.writeAttribute("TYPE", "WORKBENCH"); header(); if (addMODS) { new TreeVisitor(levels, folderBranches, filesPerFolder) { public void visit(int[] id, boolean file) throws XMLStreamException { w.writeStartElement("dmdSec"); w.writeAttribute("ID", makeID("dmdid", id)); w.writeAttribute("GROUPID", "foo"); w.writeAttribute("CREATED", "2014-01-08T16:54:55.850Z"); w.writeAttribute("STATUS", "USER_EDITED"); w.writeStartElement("mdWrap"); w.writeAttribute("LABEL", getRandomLIWords(2)); w.writeAttribute("MDTYPE", "MODS"); w.writeStartElement("xmlData"); mods(); w.writeEndElement(); w.writeEndElement(); w.writeEndElement(); modsCount++; recurse(id); } }.start(); } try (FileInputStream fis = new FileInputStream(dataCopy)) { w.writeStartElement("fileSec"); w.writeStartElement("fileGrp"); w.writeAttribute("ID", "OBJECTS"); final String checksum = DigestUtils.md5Hex(fis); final long dataLength = dataCopy.length(); new TreeVisitor(levels, folderBranches, filesPerFolder) { public void visit(int[] id, boolean file) throws XMLStreamException { if (file) { fileSec(id, dataURI, checksum, dataLength); } recurse(id); } }.start(); w.writeEndElement(); w.writeEndElement(); } structMap(levels, folderBranches, filesPerFolder, addMODS); w.writeEndElement(); w.writeEndDocument(); } catch (XMLStreamException e) { throw new Error(e); } return output.length(); } private static void mods() throws XMLStreamException { w.writeStartElement(NamespaceConstants.MODS_V3_URI, "mods"); w.writeStartElement(NamespaceConstants.MODS_V3_URI, "titleInfo"); w.writeStartElement(NamespaceConstants.MODS_V3_URI, "title"); w.writeCharacters(getRandomLIWords(6)); w.writeEndElement(); w.writeStartElement(NamespaceConstants.MODS_V3_URI, "subTitle"); w.writeCharacters(getRandomLIWords(5)); w.writeEndElement(); w.writeEndElement(); w.writeStartElement(NamespaceConstants.MODS_V3_URI, "abstract"); w.writeCharacters(li.getParagraphs()); w.writeEndElement(); w.writeEndElement(); } private static void rights(XMLStreamWriter w, int depth, int branching) { // TODO } private static void structMap(int levels, int branching, final int filesPerFolder, final boolean addMODS) throws XMLStreamException { w.writeStartElement("structMap"); w.writeStartElement("div"); w.writeAttribute("TYPE", "Bag"); new TreeVisitor(levels, branching, filesPerFolder) { @Override public void visit(int[] id, boolean file) throws XMLStreamException { UUID uuid = UUID.randomUUID(); w.writeStartElement("div"); w.writeAttribute("CONTENTIDS", "info:fedora/uuid:" + uuid.toString()); w.writeAttribute("ID", "uuid_" + uuid.toString()); if (addMODS) { w.writeAttribute("DMDID", makeID("dmdid", id)); } if (file) { w.writeAttribute("TYPE", "File"); w.writeAttribute("LABEL", getRandomLIWords(1) + ".tif"); w.writeStartElement("fptr"); w.writeAttribute("FILEID", makeID("file_", id)); w.writeEndElement(); } else { w.writeAttribute("TYPE", "Folder"); w.writeAttribute("LABEL", getRandomLIWords(4)); } divCount++; recurse(id); w.writeEndElement(); } }.start(); w.writeEndElement(); w.writeEndElement(); } private static String getRandomLIWords(int i) { int offset = (int) (Math.random() * (50 - i)); String result = li.getWords(i, offset); lindex = lindex + i; if (lindex > 1000) lindex = 0; return result; } private static void fileSec(int[] id, String dataURI, String checksum, long dataLength) throws XMLStreamException { w.writeStartElement("file"); w.writeAttribute("CHECKSUM", checksum); w.writeAttribute("ID", makeID("file_", id)); w.writeAttribute("CHECKSUMTYPE", "MD5"); w.writeAttribute("CREATED", "2013-01-09T10:12:21.000-05:00"); w.writeAttribute("MIMETYPE", "application/octet-stream"); w.writeAttribute("SIZE", String.valueOf(dataLength)); w.writeStartElement("FLocat"); w.writeAttribute(NamespaceConstants.XLINK_URI, "href", dataURI); w.writeAttribute("LOCTYPE", "OTHER"); w.writeAttribute("OTHERLOCTYPE", "tag"); w.writeAttribute("USE", "STAGE"); w.writeEndElement(); w.writeEndElement(); } private static void header() throws XMLStreamException { w.writeStartElement("metsHdr"); w.writeAttribute("CREATEDATE", "2014-04-03T13:26:41.509Z"); w.writeAttribute("LASTMODDATE", "2014-04-03T13:26:41.509Z"); w.writeStartElement("agent"); w.writeAttribute("ROLE", "CREATOR"); w.writeAttribute("TYPE", "OTHER"); w.writeStartElement("name"); w.writeCharacters("CDR Workbench"); w.writeEndElement(); w.writeEndElement(); w.writeStartElement("agent"); w.writeAttribute("ROLE", "CREATOR"); w.writeAttribute("TYPE", "INDIVIDUAL"); w.writeStartElement("name"); w.writeCharacters("count0"); w.writeEndElement(); w.writeEndElement(); w.writeEndElement(); } private static String makeID(String prefix, int[] id) { StringBuilder result = new StringBuilder(); result.append(prefix); for (int i : id) result.append("_").append(i); return result.toString(); } }