package org.gbif.checklistbank.service.mybatis.export; import com.google.common.io.Files; import com.google.inject.Guice; import com.google.inject.Injector; import org.apache.commons.io.FileUtils; import org.gbif.api.model.registry.Dataset; import org.gbif.api.service.registry.DatasetService; import org.gbif.checklistbank.config.ClbConfiguration; import org.gbif.checklistbank.config.RegistryServiceConfiguration; import org.gbif.checklistbank.service.mybatis.guice.InternalChecklistBankServiceMyBatisModule; import org.gbif.checklistbank.service.mybatis.mapper.*; import org.gbif.dwc.terms.DwcTerm; import org.gbif.dwca.io.DwcaStreamWriter; import org.gbif.utils.file.CompressionUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; import java.util.UUID; public class Exporter { private static final Logger LOG = LoggerFactory.getLogger(Exporter.class); private final File repository; private final NameUsageMapper usageMapper; private final VernacularNameMapper vernacularMapper; private final DescriptionMapper descriptionMapper; private final DistributionMapper distributionMapper; private final MultimediaMapper mediaMapper; private final ReferenceMapper referenceMapper; private final TypeSpecimenMapper typeSpecimenMapper; private final DatasetService datasetService; private Exporter(File repository, ClbConfiguration cfg, DatasetService datasetService) { this.repository = repository; // init postgres mappers Injector inj = Guice.createInjector(InternalChecklistBankServiceMyBatisModule.create(cfg)); usageMapper = inj.getInstance(NameUsageMapper.class); descriptionMapper = inj.getInstance(DescriptionMapper.class); distributionMapper = inj.getInstance(DistributionMapper.class); mediaMapper = inj.getInstance(MultimediaMapper.class); vernacularMapper = inj.getInstance(VernacularNameMapper.class); referenceMapper = inj.getInstance(ReferenceMapper.class); typeSpecimenMapper = inj.getInstance(TypeSpecimenMapper.class); this.datasetService = datasetService; } /** * @param registryWs base URL of the registry API, e.g. http://api.gbif.org/v1 */ public static Exporter create(File repository, ClbConfiguration cfg, String registryWs) { RegistryServiceConfiguration regCfg = new RegistryServiceConfiguration(); regCfg.wsUrl = registryWs; Injector inj = regCfg.createRegistryInjector(); return new Exporter(repository, cfg, inj.getInstance(DatasetService.class)); } /** * Synchroneously generates a new dwca export file for a given dataset * * @return the newly generated dwca file */ public File export(Dataset dataset) { DwcaExport exporter = new DwcaExport(dataset); exporter.run(); return exporter.dwca; } public File export(UUID datasetKey) { return export(datasetService.get(datasetKey)); } private class DwcaExport implements Runnable { private final Dataset dataset; private final File dwca; private DwcaStreamWriter writer; private int counter; private int extCounter; public DwcaExport(Dataset dataset) { this.dataset = dataset; this.dwca = new File(repository, dataset.getKey().toString() + ".zip"); } public void run() { LOG.info("Start exporting checklist {} into DwC-A at {}", dataset.getKey(), dwca.getAbsolutePath()); File tmp = Files.createTempDir(); try { writer = new DwcaStreamWriter(tmp, DwcTerm.Taxon, DwcTerm.taxonID, true); // add EML writer.setMetadata(dataset); // write core taxa try (RowHandler.TaxonHandler coreHandler = new RowHandler.TaxonHandler(writer, dataset.getKey())) { usageMapper.processDataset(dataset.getKey(), coreHandler); counter = coreHandler.getCounter(); LOG.info("Written {} core taxa", counter); // add constituents LOG.info("Adding {} constituents metadata", coreHandler.getConstituents().size()); for (UUID dkey : coreHandler.getConstituents()) { Dataset constituent = datasetService.get(dkey); if (constituent != null) { writer.addConstituent(constituent); } } } // descriptions try (RowHandler.DescriptionHandler handler = new RowHandler.DescriptionHandler(writer)) { descriptionMapper.processDataset(dataset.getKey(), handler); LOG.info("Written {} description records", handler.getCounter()); extCounter = handler.getCounter(); } // distributions try (RowHandler.DistributionHandler handler = new RowHandler.DistributionHandler(writer)) { distributionMapper.processDataset(dataset.getKey(), handler); LOG.info("Written {} distribution records", handler.getCounter()); extCounter = +handler.getCounter(); } // media try (RowHandler.NameUsageMediaObjectHandler handler = new RowHandler.NameUsageMediaObjectHandler(writer)) { mediaMapper.processDataset(dataset.getKey(), handler); LOG.info("Written {} media records", handler.getCounter()); extCounter = +handler.getCounter(); } // references try (RowHandler.ReferenceHandler handler = new RowHandler.ReferenceHandler(writer)) { referenceMapper.processDataset(dataset.getKey(), handler); LOG.info("Written {} reference records", handler.getCounter()); extCounter = +handler.getCounter(); } // types try (RowHandler.TypeSpecimenHandler handler = new RowHandler.TypeSpecimenHandler(writer)) { typeSpecimenMapper.processDataset(dataset.getKey(), handler); LOG.info("Written {} typification records", handler.getCounter()); extCounter = +handler.getCounter(); } // vernacular names try (RowHandler.VernacularNameHandler handler = new RowHandler.VernacularNameHandler(writer)) { vernacularMapper.processDataset(dataset.getKey(), handler); LOG.info("Written {} vernacular name records", handler.getCounter()); extCounter = +handler.getCounter(); } // finish dwca writer.close(); // zip it up to final location FileUtils.forceMkdir(dwca.getParentFile()); CompressionUtil.zipDir(tmp, dwca, true); LOG.info("Done exporting checklist {} with {} usages and {} extensions into DwC-A at {}", dataset.getKey(), counter, extCounter, dwca.getAbsolutePath()); } catch (Exception e) { LOG.error("Failed to create dwca for dataset {} at {}", dataset.getKey(), tmp.getAbsolutePath(), e); } finally { try { FileUtils.deleteDirectory(tmp); } catch (IOException e) { LOG.error("Failed to remove tmp dwca dir {}", tmp.getAbsolutePath(), e); } } } } }