package org.gbif.checklistbank.cli.nubchanged;
import org.gbif.api.model.Constants;
import org.gbif.api.model.checklistbank.DatasetMetrics;
import org.gbif.api.model.common.InterpretedEnum;
import org.gbif.api.model.registry.Dataset;
import org.gbif.api.model.registry.Organization;
import org.gbif.api.model.registry.eml.TaxonomicCoverage;
import org.gbif.api.model.registry.eml.TaxonomicCoverages;
import org.gbif.api.service.registry.DatasetService;
import org.gbif.api.service.registry.NetworkService;
import org.gbif.api.service.registry.OrganizationService;
import org.gbif.api.util.iterables.Iterables;
import org.gbif.api.vocabulary.Kingdom;
import org.gbif.api.vocabulary.Rank;
import org.gbif.registry.metadata.EMLWriter;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.regex.Pattern;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Charsets;
import com.google.common.collect.ComparisonChain;
import com.google.common.collect.Lists;
import com.google.common.collect.Ordering;
import com.google.common.collect.Sets;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
*
*/
public class BackboneDatasetUpdater {
private static final Logger LOG = LoggerFactory.getLogger(BackboneDatasetUpdater.class);
private final DatasetService datasetService;
private final OrganizationService organizationService;
private final NetworkService networkService;
@VisibleForTesting
protected static final Pattern SOURCE_LIST_PATTERN = Pattern.compile("\\s*The following +\\d* *sources from the.+$", Pattern.DOTALL);
public BackboneDatasetUpdater(DatasetService datasetService, OrganizationService organizationService, NetworkService networkService) {
this.datasetService = datasetService;
this.organizationService = organizationService;
this.networkService = networkService;
}
static class NubConstituent implements Comparable<NubConstituent> {
public NubConstituent(UUID key, String title, int count) {
this.key = key;
this.title = title;
this.count = count;
}
public final UUID key;
public final String title;
public final int count;
@Override
public int compareTo(NubConstituent o) {
return ComparisonChain.start()
.compare(count, o.count, Ordering.natural().reverse())
.compare(title, o.title)
.result();
}
}
public Dataset updateBackboneDataset(DatasetMetrics metrics) {
LOG.info("Updating backbone dataset metadata");
// update existing metadata (all fixed metadata is curated manually in the registry
// lets load it first
Dataset nub = datasetService.get(Constants.NUB_DATASET_KEY);
nub.setPubDate(metrics.getCreated());
List<TaxonomicCoverage> taxa = Lists.<TaxonomicCoverage>newArrayList();
for (Kingdom k : Kingdom.values()) {
new TaxonomicCoverage(k.scientificName(), null, new InterpretedEnum<String, Rank>("Kingdom", Rank.KINGDOM));
}
nub.setTaxonomicCoverages(Lists.newArrayList(new TaxonomicCoverages("All life", taxa)));
nub.setRights("CC0 1.0");
int plaziCounts = 0;
List<NubConstituent> constituents = Lists.newArrayList();
for (Map.Entry<UUID, Integer> src : metrics.getCountByConstituent().entrySet()) {
Dataset d = datasetService.get(src.getKey());
if (d.getPublishingOrganizationKey().equals(Constants.PLAZI_ORG_KEY)) {
plaziCounts += src.getValue();
} else {
constituents.add(new NubConstituent(src.getKey(), d.getTitle(), src.getValue()));
}
}
// add plazi
if (plaziCounts > 0) {
Organization plazi = organizationService.get(Constants.PLAZI_ORG_KEY);
constituents.add(new NubConstituent(Constants.PLAZI_ORG_KEY, plazi.getTitle(), plaziCounts));
}
// sort constituents by number of names
Collections.sort(constituents);
// build new description reusing the existing intro and then list the current sources
StringBuilder description = new StringBuilder();
// remove existing source list
description.append(SOURCE_LIST_PATTERN.matcher(nub.getDescription()).replaceAll(""));
// append new source list
description.append("\n\nThe following " + constituents.size() + " sources from the " +
"<a href='http://www.gbif.org/network/" + Constants.NUB_NETWORK_KEY + "'>GBIF Backbone network</a> " +
"have been used to assemble the GBIF backbone:\n");
description.append("<ul>");
for (NubConstituent nc : constituents) {
description.append("<li>" + nc.title+ " - " + nc.count+ " names</li>");
}
description.append("</ul>");
nub.setDescription(description.toString());
// convert to EML and send to registry
try {
StringWriter writer = new StringWriter();
EMLWriter.write(nub, writer);
writer.close();
InputStream stream = new ByteArrayInputStream(writer.getBuffer().toString().getBytes(Charsets.UTF_8));
datasetService.insertMetadata(Constants.NUB_DATASET_KEY, stream);
} catch (RuntimeException | IOException e) {
LOG.error("Failed to update backbone dataset metadata", e);
}
// update backbone sources network
Set<UUID> constituentKeys = Sets.newHashSet(metrics.getCountByConstituent().keySet());
LOG.info("Updating backbone source network with {} constituents", constituentKeys.size());
for (Dataset d : Iterables.networkDatasets(Constants.NUB_NETWORK_KEY, null, networkService)) {
if (!constituentKeys.remove(d.getKey())) {
LOG.debug("Remove backbone source network constituent {} {}", d.getKey(), d.getTitle());
networkService.removeConstituent(Constants.NUB_NETWORK_KEY, d.getKey());
}
}
// now add the remaining ones
for (UUID datasetKey : constituentKeys) {
LOG.debug("Add new backbone source network constituent {}", datasetKey);
networkService.addConstituent(Constants.NUB_NETWORK_KEY, datasetKey);
}
return nub;
}
}