package org.gbif.occurrence.download.file.dwca; import org.gbif.dwc.terms.DwcTerm; import org.gbif.dwc.terms.GbifTerm; import org.gbif.dwc.terms.Term; import org.gbif.dwca.io.Archive; import org.gbif.dwca.io.ArchiveField; import org.gbif.dwca.io.ArchiveFile; import org.gbif.dwca.io.MetaDescriptorWriter; import org.gbif.occurrence.common.HiveColumnsUtils; import org.gbif.occurrence.common.TermUtils; import java.io.File; import java.io.IOException; import com.google.common.base.Charsets; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static org.gbif.occurrence.download.file.dwca.DwcDownloadsConstants.DESCRIPTOR_FILENAME; import static org.gbif.occurrence.download.file.dwca.DwcDownloadsConstants.INTERPRETED_FILENAME; import static org.gbif.occurrence.download.file.dwca.DwcDownloadsConstants.METADATA_FILENAME; import static org.gbif.occurrence.download.file.dwca.DwcDownloadsConstants.MULTIMEDIA_FILENAME; import static org.gbif.occurrence.download.file.dwca.DwcDownloadsConstants.VERBATIM_FILENAME; /** * Utility class for Dwc archive handling during the download file creation. */ public class DwcArchiveUtils { private static final Logger LOG = LoggerFactory.getLogger(DwcArchiveUtils.class); private static final String DEFAULT_DELIMITER = ";"; /** * Creates a new archive file description for a dwc archive and sets the id field to the column of gbifID. * Used to generate the meta.xml with the help of the dwca-writer */ public static ArchiveFile createArchiveFile(String filename, Term rowType, Iterable<? extends Term> columns) { ArchiveFile af = buildBaseArchive(filename, rowType); int index = 0; for (Term term : columns) { ArchiveField field = new ArchiveField(); field.setIndex(index); field.setTerm(term); if (HiveColumnsUtils.isHiveArray(term)) { field.setDelimitedBy(DEFAULT_DELIMITER); } af.addField(field); index++; } ArchiveField coreId = af.getField(GbifTerm.gbifID); if (coreId == null) { throw new IllegalArgumentException("Archive columns MUST include the gbif:gbifID term"); } af.setId(coreId); return af; } /** * Utility function that creates an archive with common/default settings. */ private static ArchiveFile buildBaseArchive(String filename, Term rowType) { ArchiveFile af = new ArchiveFile(); af.addLocation(filename); af.setRowType(rowType); af.setEncoding(Charsets.UTF_8.displayName()); af.setIgnoreHeaderLines(1); af.setFieldsEnclosedBy(null); af.setFieldsTerminatedBy("\t"); af.setLinesTerminatedBy("\n"); return af; } /** * Creates an meta.xml descriptor file in the directory parameter. */ public static void createArchiveDescriptor(File directory) { LOG.info("Creating archive meta.xml descriptor"); Archive downloadArchive = new Archive(); downloadArchive.setMetadataLocation(METADATA_FILENAME); ArchiveFile occurrence = createArchiveFile(INTERPRETED_FILENAME, DwcTerm.Occurrence, TermUtils.interpretedTerms()); downloadArchive.setCore(occurrence); ArchiveFile verbatim = createArchiveFile(VERBATIM_FILENAME, DwcTerm.Occurrence, TermUtils.verbatimTerms()); downloadArchive.addExtension(verbatim); ArchiveFile multimedia = createArchiveFile(MULTIMEDIA_FILENAME, GbifTerm.Multimedia, TermUtils.multimediaTerms()); downloadArchive.addExtension(multimedia); try { File metaFile = new File(directory, DESCRIPTOR_FILENAME); MetaDescriptorWriter.writeMetaFile(metaFile, downloadArchive); } catch (IOException e) { LOG.error("Error creating meta.xml file", e); } } /** * Hidden constructor. */ private DwcArchiveUtils() { // private empty constructor } }