package org.gbif.checklistbank.nub.source;
import org.gbif.api.model.checklistbank.NameUsage;
import org.gbif.api.vocabulary.TaxonomicStatus;
import org.gbif.checklistbank.cli.normalizer.Normalizer;
import org.gbif.checklistbank.cli.normalizer.NormalizerConfiguration;
import org.gbif.checklistbank.neo.UsageDao;
import org.gbif.dwca.io.ArchiveFactory;
import org.gbif.io.DownloadUtil;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.UUID;
import javax.annotation.Nullable;
import com.google.common.io.Files;
import org.apache.commons.io.FileUtils;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Transaction;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A nub source which is backed by a dwca checklist file which gets normalized into neo4j first
* and then drives the nub source
*/
public class DwcaSource extends NubSource {
private static final Logger LOG = LoggerFactory.getLogger(DwcaSource.class);
private NormalizerConfiguration cfg = new NormalizerConfiguration();
public DwcaSource(String name, File dwca) throws IOException {
super(UUID.randomUUID(), name.replaceAll("\\s", " "), false);
initRepos();
File archiveDir = cfg.archiveDir(key);
LOG.info("Open dwc archive {}", dwca);
ArchiveFactory.openArchive(dwca, archiveDir);
}
public DwcaSource(String name, URL dwca) throws IOException {
this(name, download(dwca));
}
private void initRepos() {
cfg.archiveRepository = Files.createTempDir();
cfg.neo.neoRepository = Files.createTempDir();
}
private static File download(URL dwca) throws IOException {
final File tmp = File.createTempFile("dwca-download", "dwca");
LOG.info("Download dwca from {} into {}", dwca, tmp);
DownloadUtil.download(dwca, tmp);
return tmp;
}
private static String nullsafeString(@Nullable Object obj) {
return obj == null ? null : obj.toString();
}
@Override
void initNeo(NeoUsageWriter writer) throws Exception {
UsageDao dao = normalize();
LOG.info("Import source usages");
try (Transaction tx = dao.beginTx()) {
for (Node n : dao.allNodes()) {
NameUsage u = dao.readUsage(n, true);
TaxonomicStatus status = u.getTaxonomicStatus();
if (status == null) {
status = u.isSynonym() ? TaxonomicStatus.SYNONYM : TaxonomicStatus.ACCEPTED;
}
String[] row = new String[8];
row[0] = String.valueOf(n.getId());
row[1] = nullsafeString(status.isSynonym() ? u.getAcceptedKey() : u.getParentKey());
row[2] = nullsafeString(u.getBasionymKey());
row[3] = nullsafeString(u.getRank());
row[4] = status.name();
//TODO: nom status
row[5] = null;
row[6] = u.getScientificName();
row[7] = u.getPublishedIn();
writer.addRow(row);
}
}
dao.closeAndDelete();
}
/**
* read dwca stream and normalize it
*/
private UsageDao normalize() {
LOG.info("Normalize dwca");
Normalizer normalizer = Normalizer.create(cfg, key);
normalizer.run();
return UsageDao.open(cfg.neo, key);
}
@Override
public void close() {
super.close();
FileUtils.deleteQuietly(cfg.archiveRepository);
FileUtils.deleteQuietly(cfg.neo.neoRepository);
}
}