/* * This is eMonocot, a global online biodiversity information resource. * * Copyright © 2011–2015 The Board of Trustees of the Royal Botanic Gardens, Kew and The University of Oxford * * eMonocot is free software: you can redistribute it and/or modify it under the terms of the * GNU Affero General Public License as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * eMonocot is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * The complete text of the GNU Affero General Public License is in the source repository as the file * ‘COPYING’. It is also available from <http://www.gnu.org/licenses/>. */ package org.emonocot.job.dwc.write; import java.io.File; import java.io.IOException; import java.util.Date; import java.util.HashMap; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.emonocot.api.job.TermFactory; import org.gbif.dwc.terms.Term; import org.gbif.dwc.terms.DwcTerm; import org.gbif.dwc.terms.GbifTerm; import org.gbif.dwc.text.Archive; import org.gbif.dwc.text.ArchiveField; import org.gbif.dwc.text.ArchiveFile; import org.gbif.dwc.text.MetaDescriptorWriter; import org.gbif.metadata.eml.Agent; import org.gbif.metadata.eml.Eml; import org.gbif.metadata.eml.EmlWriter; import org.joda.time.DateTime; import org.joda.time.format.DateTimeFormat; import org.joda.time.format.DateTimeFormatter; import org.springframework.batch.core.StepContribution; import org.springframework.batch.core.scope.context.ChunkContext; import org.springframework.batch.core.step.tasklet.Tasklet; import org.springframework.batch.repeat.RepeatStatus; import org.springframework.core.io.FileSystemResource; import freemarker.template.TemplateException; public class ArchiveMetadataWriter implements Tasklet { private DateTimeFormatter dateTimeFormatter = DateTimeFormat.forPattern("dd/MM/YYYY"); private TermFactory termFactory = new TermFactory(); private Pattern defaultValuesPattern = Pattern.compile("((?:[^\\\\,]|\\\\.)*)(?:,|$)"); private String archiveFile; private String[] taxonFields; private Map<String,String> taxonDefaultValues = new HashMap<String,String>(); private String[] descriptionFields; private Map<String,String> descriptionDefaultValues = new HashMap<String,String>(); private String[] distributionFields; private Map<String,String> distributionDefaultValues = new HashMap<String,String>(); private String[] referenceFields; private Map<String,String> referenceDefaultValues = new HashMap<String,String>(); private String[] imageFields; private Map<String,String> imageDefaultValues = new HashMap<String,String>(); private String[] typeAndSpecimenFields; private Map<String,String> typeAndSpecimenDefaultValues = new HashMap<String,String>(); private String[] measurementOrFactFields; private Map<String,String> measurementOrFactDefaultValues = new HashMap<String,String>(); private String[] vernacularNameFields; private Map<String,String> vernacularNameDefaultValues = new HashMap<String,String>(); private String[] identifierFields; private Map<String,String> identifierDefaultValues = new HashMap<String,String>(); private Character quoteCharacter; private String delimiter; private FileSystemResource outputDirectory; private String citationString; private String creatorEmail; private String creatorName; private String description; private String homepageUrl; private String identifier; private String logoUrl; private String publisherEmail; private String publisherName; private String rights; private String subject; private String title; private int ignoreHeaderLines = 0; public void setIgnoreHeaderLines(int ignoreHeaderLines) { this.ignoreHeaderLines = ignoreHeaderLines; } public void setCitationString(String citationString) { this.citationString = citationString; } public void setCreatorEmail(String creatorEmail) { this.creatorEmail = creatorEmail; } public void setCreatorName(String creatorName) { this.creatorName = creatorName; } public void setDescription(String description) { this.description = description; } public void setHomepageUrl(String homepageUrl) { this.homepageUrl = homepageUrl; } public void setIdentifier(String identifier) { this.identifier = identifier; } public void setLogoUrl(String logoUrl) { this.logoUrl = logoUrl; } public void setPublisherEmail(String publisherEmail) { this.publisherEmail = publisherEmail; } public void setPublisherName(String publisherName) { this.publisherName = publisherName; } public void setRights(String rights) { this.rights = rights; } public void setSubject(String subject) { this.subject = subject; } public void setTitle(String title) { this.title = title; } public void setQuoteCharacter(Character quoteCharacter) { this.quoteCharacter = quoteCharacter; } public void setDelimiter(String delimiter) { this.delimiter = delimiter; } public void setArchiveFile(String archiveFile) { this.archiveFile = archiveFile; } public void setTaxonDefaultValues(String taxonDefaultValues) { this.taxonDefaultValues = toDefaultValues(taxonDefaultValues); } public void setDescriptionDefaultValues(String descriptionDefaultValues) { this.descriptionDefaultValues = toDefaultValues(descriptionDefaultValues); } public void setDistributionDefaultValues(String distributionDefaultValues) { this.distributionDefaultValues = toDefaultValues(distributionDefaultValues); } public void setReferenceDefaultValuesFields(String referenceDefaultValues) { this.referenceDefaultValues = toDefaultValues(referenceDefaultValues); } public void setImageDefaultValues(String imageDefaultValues) { this.imageDefaultValues = toDefaultValues(imageDefaultValues); } public void setTypeAndSpecimenDefaultValues(String typeAndSpecimenDefaultValues) { this.typeAndSpecimenDefaultValues = toDefaultValues(typeAndSpecimenDefaultValues); } public void setMeasurementOrFactDefaultValues(String measurementOrFactDefaultValues) { this.measurementOrFactDefaultValues = toDefaultValues(measurementOrFactDefaultValues); } public void setVernacularNameDefaultValues(String vernacularNameDefaultValues) { this.vernacularNameDefaultValues = toDefaultValues(vernacularNameDefaultValues); } public void setIdentifierDefaultValues(String identifierDefaultValues) { this.identifierDefaultValues = toDefaultValues(identifierDefaultValues); } public void setReferenceDefaultValues(String referenceDefaultValues) { this.referenceDefaultValues = toDefaultValues(referenceDefaultValues); } public void setTaxonFields(String[] taxonFields) { this.taxonFields = taxonFields; } public void setDescriptionFields(String[] descriptionFields) { this.descriptionFields = descriptionFields; } public void setDistributionFields(String[] distributionFields) { this.distributionFields = distributionFields; } public void setReferenceFields(String[] referenceFields) { this.referenceFields = referenceFields; } public void setImageFields(String[] imageFields) { this.imageFields = imageFields; } public void setTypeAndSpecimenFields(String[] typeAndSpecimenFields) { this.typeAndSpecimenFields = typeAndSpecimenFields; } public void setMeasurementOrFactFields(String[] measurementOrFactFields) { this.measurementOrFactFields = measurementOrFactFields; } public void setVernacularNameFields(String[] vernacularNameFields) { this.vernacularNameFields = vernacularNameFields; } public void setIdentifierFields(String[] identifierFields) { this.identifierFields = identifierFields; } public void setOutputDirectory(FileSystemResource outputDirectory) { this.outputDirectory = outputDirectory; } Map<String,String> toDefaultValues(String defaultValueList) { Map<String,String> defaultValues = new HashMap<String,String>(); if (defaultValueList != null && !defaultValueList.isEmpty()) { Matcher matcher = defaultValuesPattern.matcher(defaultValueList); while (matcher.find()) { String defaultValue = matcher.group(1); if (defaultValue.indexOf("=") != -1) { int i = defaultValue.indexOf("="); String key = defaultValue.substring(0, i); String value = defaultValue.substring(i + 1, defaultValue.length()); value = value.replace("\\", ""); defaultValues.put(key, value); } } } return defaultValues; } public RepeatStatus execute(StepContribution stepContribution, final ChunkContext chunkContext) throws Exception { Archive archive = new Archive(); archive.setCore(buildArchiveFile(taxonFields,taxonDefaultValues,DwcTerm.Taxon, DwcTerm.taxonID,"taxon.txt",ignoreHeaderLines ,"UTF-8",quoteCharacter,delimiter)); if(descriptionFields != null) { archive.addExtension(buildArchiveFile(descriptionFields,descriptionDefaultValues,GbifTerm.Description, DwcTerm.taxonID,"description.txt",ignoreHeaderLines,"UTF-8",quoteCharacter,delimiter)); } if(distributionFields != null) { archive.addExtension(buildArchiveFile(distributionFields,distributionDefaultValues,GbifTerm.Distribution, DwcTerm.taxonID,"distribution.txt",ignoreHeaderLines,"UTF-8",quoteCharacter,delimiter)); } if(referenceFields != null) { archive.addExtension(buildArchiveFile(referenceFields,referenceDefaultValues,GbifTerm.Reference, DwcTerm.taxonID,"reference.txt",ignoreHeaderLines,"UTF-8",quoteCharacter,delimiter)); } if(imageFields != null) { archive.addExtension(buildArchiveFile(imageFields,imageDefaultValues,GbifTerm.Image, DwcTerm.taxonID,"image.txt",ignoreHeaderLines,"UTF-8",quoteCharacter,delimiter)); } if(typeAndSpecimenFields != null) { archive.addExtension(buildArchiveFile(typeAndSpecimenFields,typeAndSpecimenDefaultValues,GbifTerm.TypesAndSpecimen, DwcTerm.taxonID,"typeAndSpecimen.txt",ignoreHeaderLines,"UTF-8",quoteCharacter,delimiter)); } if(measurementOrFactFields != null) { archive.addExtension(buildArchiveFile(measurementOrFactFields,measurementOrFactDefaultValues,DwcTerm.MeasurementOrFact, DwcTerm.taxonID,"measurementOrFact.txt",ignoreHeaderLines,"UTF-8",quoteCharacter,delimiter)); } if(vernacularNameFields != null) { archive.addExtension(buildArchiveFile(vernacularNameFields,vernacularNameDefaultValues,GbifTerm.VernacularName, DwcTerm.taxonID,"vernacularName.txt",ignoreHeaderLines,"UTF-8",quoteCharacter,delimiter)); } if(identifierFields != null) { archive.addExtension(buildArchiveFile(identifierFields,identifierDefaultValues,GbifTerm.Identifier, DwcTerm.taxonID,"identifier.txt",ignoreHeaderLines,"UTF-8",quoteCharacter,delimiter)); } archive.setMetadataLocation("eml.xml"); File workDirectory = new File(outputDirectory.getFile(),archiveFile); if(!workDirectory.exists()) { workDirectory.mkdir(); } File metaFile = new File(workDirectory,"meta.xml"); try { MetaDescriptorWriter.writeMetaFile(metaFile, archive); } catch (TemplateException te) { throw new IOException("Exception writing meta.xml", te); } File emlFile = new File(workDirectory,"eml.xml"); Eml eml = getEml(); try { EmlWriter.writeEmlFile(emlFile, eml); } catch (TemplateException te) { throw new IOException("Exception writing eml.xml", te); } return RepeatStatus.FINISHED; } private Eml getEml() { Eml eml = new Eml(); if(citationString != null) { DateTime now = new DateTime(); Integer year = new Integer(now.getYear()); citationString = citationString.replace("{0}", year.toString()).replace("{1}", dateTimeFormatter.print(now)); } eml.setCitation(citationString,identifier); Agent resourceCreator = new Agent(); resourceCreator.setEmail(creatorEmail); resourceCreator.setFirstName(creatorName); eml.setResourceCreator(resourceCreator); eml.setDescription(description); eml.setHomepageUrl(homepageUrl); eml.setLogoUrl(logoUrl); eml.setTitle(title); eml.setSubject(subject); eml.setPublished(new Date()); Agent metadataProvider = new Agent(); metadataProvider.setEmail(publisherEmail); metadataProvider.setFirstName(publisherName); eml.setMetadataProvider(metadataProvider); eml.setIntellectualRights(rights); return eml; } private ArchiveFile buildArchiveFile(String[] fieldNames, Map<String,String> defaultValues, Term rowType, Term idTerm, String location, Integer ignoreHeaderLines, String encoding, Character fieldsEnclosedBy, String fieldsTerminatedBy) { ArchiveFile archiveFile = new ArchiveFile(); ArchiveField idField = new ArchiveField(); idField.setIndex(0); idField.setTerm(idTerm); archiveFile.setId(idField); for(int i = 1; i < fieldNames.length; i++) { Term term = termFactory.findTerm(fieldNames[i]); ArchiveField archiveField = new ArchiveField(); archiveField.setTerm(term); archiveField.setIndex(i); if(defaultValues.containsKey(fieldNames[i])) { archiveField.setDefaultValue(defaultValues.get(fieldNames[i])); defaultValues.remove(fieldNames[i]); } archiveFile.addField(archiveField); } for(String fieldName : defaultValues.keySet()) { Term term = termFactory.findTerm(fieldName); ArchiveField archiveField = new ArchiveField(); archiveField.setTerm(term); archiveField.setDefaultValue(defaultValues.get(fieldName)); archiveFile.addField(archiveField); } archiveFile.setRowType(rowType.qualifiedName()); archiveFile.setIgnoreHeaderLines(ignoreHeaderLines); archiveFile.setEncoding(encoding); archiveFile.setFieldsEnclosedBy(fieldsEnclosedBy); archiveFile.setFieldsTerminatedBy(fieldsTerminatedBy); archiveFile.addLocation(location); return archiveFile; } }