package org.gbif.ipt.service.registry.impl;
import org.gbif.api.model.checklistbank.DatasetMetrics;
import org.gbif.api.model.common.paging.PagingRequest;
import org.gbif.api.model.common.paging.PagingResponse;
import org.gbif.api.model.metrics.cube.OccurrenceCube;
import org.gbif.api.model.metrics.cube.ReadBuilder;
import org.gbif.api.model.registry.Dataset;
import org.gbif.api.model.registry.Installation;
import org.gbif.api.model.registry.Organization;
import org.gbif.api.service.checklistbank.DatasetMetricsService;
import org.gbif.api.service.metrics.CubeService;
import org.gbif.api.service.registry.DatasetService;
import org.gbif.api.service.registry.InstallationService;
import org.gbif.api.service.registry.OrganizationService;
import org.gbif.api.vocabulary.Country;
import org.gbif.api.vocabulary.DatasetType;
import org.gbif.api.vocabulary.InstallationType;
import java.util.Set;
import java.util.UUID;
import com.google.common.collect.Sets;
import org.apache.log4j.Logger;
import org.junit.Ignore;
import org.junit.Test;
import static org.gbif.ipt.config.RegistryTestModule.webserviceClient;
import static org.gbif.ipt.config.RegistryTestModule.webserviceClientReadOnly;
import static org.junit.Assert.assertEquals;
public class RegistryWsClientTest {
// logging
private static final Logger LOG = Logger.getLogger(RegistryWsClientTest.class);
private static final int PAGING_LIMIT = 100;
@Test
public void testGetDataset() {
DatasetService ds = webserviceClientReadOnly().getInstance(DatasetService.class);
Dataset dataset = ds.get(UUID.fromString("8575f23e-f762-11e1-a439-00145eb45e9a"));
assertEquals("PonTaurus collection", dataset.getTitle());
}
@Ignore
public void testUpdateDataset() {
DatasetService ds = webserviceClient().getInstance(DatasetService.class);
Dataset dataset = ds.get(UUID.fromString("8575f23e-f762-11e1-a439-00145eb45e9a"));
dataset.setRights("CC0");
ds.update(dataset);
}
/**
* Gather statistics needed to update IPT statistics on http://www.gbif.org/ipt/stats.
* Iterates through all installations, looking for IPT installations. For each IPT installation, it counts the
* number of occurrence, sampling-event, checklist, and metadata-only datasets hosted by that installation.
* For each occurrence dataset and sampling-event dataset it counts the number of records.
* For each checklist, it counts the number of usages and the number of occurrence records.
* </br>
* Remember to configure registry.properties to connect to the desired service URLs.
*/
@Ignore
public void gatherStatistics() {
InstallationService installationService = webserviceClientReadOnly().getInstance(InstallationService.class);
CubeService occurrenceCubeService = webserviceClientReadOnly().getInstance(CubeService.class);
OrganizationService organizationService = webserviceClientReadOnly().getInstance(OrganizationService.class);
DatasetMetricsService datasetMetricsService = webserviceClientReadOnly().getInstance(DatasetMetricsService.class);
int installationCount = 0;
int iptInstallationCount = 0;
int iptDatasetCount = 0;
int iptChecklistDatasetCount = 0;
int iptOccurrenceDatasetCount = 0;
int iptSamplingEventDatasetCount = 0;
int iptMetadataDatasetCount = 0;
long totalOccurrenceRecords = 0;
long totalOccurrenceRecordsFromSamplingEventDatasets = 0;
long totalNameUsages = 0;
long totalOccurrenceRecordsFromChecklists = 0;
Set<Country> countriesRepresented = Sets.newHashSet();
Set<UUID> checklistDatasetPublisherKeys = Sets.newHashSet();
Set<UUID> occurrenceDatasetPublisherKeys = Sets.newHashSet();
Set<UUID> samplingEventDatasetPublisherKeys = Sets.newHashSet();
Set<UUID> metadataDatasetPublisherKeys = Sets.newHashSet();
PagingRequest installationPage = new PagingRequest(0, PAGING_LIMIT);
PagingResponse<Installation> installationsResults;
do {
installationsResults = installationService.list(installationPage);
// count # of IPT installations
for (Installation installation : installationsResults.getResults()) {
installationCount++;
if (installation.getType().equals(InstallationType.IPT_INSTALLATION)) {
iptInstallationCount++;
// count number of countries where IPTs are installed
Organization organization = organizationService.get(installation.getOrganizationKey());
countriesRepresented.add(organization.getCountry());
// count # of datasets hosted by IPT installations
PagingRequest datasetPage = new PagingRequest(0, PAGING_LIMIT);
PagingResponse<Dataset> datasetsResults;
do {
datasetsResults = installationService.getHostedDatasets(installation.getKey(), datasetPage);
for (Dataset dataset : datasetsResults.getResults()) {
iptDatasetCount++;
// count how many datasets are Checklist datasets, and how many different publishers share them?
if (dataset.getType().equals(DatasetType.CHECKLIST)) {
iptChecklistDatasetCount++;
checklistDatasetPublisherKeys.add(dataset.getPublishingOrganizationKey());
// how many name usages?
DatasetMetrics metrics = datasetMetricsService.get(dataset.getKey());
if (metrics != null) {
long numNameUsages = metrics.getUsagesCount();
//LOG.info("Checklist [" + dataset.getKey() + "] has " + numNameUsages + " usages");
totalNameUsages = totalNameUsages + numNameUsages;
}
// how many occurrence records
long numOccurrencesForChecklist =
occurrenceCubeService.get(new ReadBuilder().at(OccurrenceCube.DATASET_KEY, dataset.getKey()));
if (numOccurrencesForChecklist > 0 && !dataset.getInstallationKey()
.equals(UUID.fromString("9afa1395-6e93-4848-a42d-bce896f5195e"))) {
//LOG.info("Checklist [" + dataset.getKey() + "] has " + numOccurrencesForChecklist + " occurrence records");
totalOccurrenceRecordsFromChecklists =
totalOccurrenceRecordsFromChecklists + numOccurrencesForChecklist;
}
}
// how many datasets are Occurrence datasets, and how many different publishers share them?
else if (dataset.getType().equals(DatasetType.OCCURRENCE)) {
iptOccurrenceDatasetCount++;
occurrenceDatasetPublisherKeys.add(dataset.getPublishingOrganizationKey());
// how many occurrence records?
long numOccurrences =
occurrenceCubeService.get(new ReadBuilder().at(OccurrenceCube.DATASET_KEY, dataset.getKey()));
totalOccurrenceRecords = totalOccurrenceRecords + numOccurrences;
}
// how many datasets are Sampling-event datasets, and how many different publishers share them?
else if (dataset.getType().equals(DatasetType.SAMPLING_EVENT)) {
iptSamplingEventDatasetCount++;
samplingEventDatasetPublisherKeys.add(dataset.getPublishingOrganizationKey());
// how many occurrence records?
long numOccurrences =
occurrenceCubeService.get(new ReadBuilder().at(OccurrenceCube.DATASET_KEY, dataset.getKey()));
totalOccurrenceRecordsFromSamplingEventDatasets =
totalOccurrenceRecordsFromSamplingEventDatasets + numOccurrences;
}
// how many datasets are Metadata-only datasets, and how many different publishers share them?
else {
iptMetadataDatasetCount++;
metadataDatasetPublisherKeys.add(dataset.getPublishingOrganizationKey());
}
}
datasetPage.nextPage();
} while (!datasetsResults.isEndOfRecords());
}
}
installationPage.nextPage();
} while (!installationsResults.isEndOfRecords());
LOG.info(iptInstallationCount + " out of " + installationCount + " installations are IPTs");
LOG.info(
iptInstallationCount + " IPTs hosted in " + countriesRepresented.size() + " countries serve " + iptDatasetCount
+ " datasets");
LOG.info(iptChecklistDatasetCount + " checklist datasets published by " + checklistDatasetPublisherKeys.size()
+ " publishers totalling " + totalNameUsages + " usages and " + totalOccurrenceRecordsFromChecklists + " occurrence records");
LOG.info(iptOccurrenceDatasetCount + " occurrence datasets published by " + occurrenceDatasetPublisherKeys.size()
+ " publishers totalling " + totalOccurrenceRecords + " occurrence records");
LOG.info(
iptSamplingEventDatasetCount + " sampling event datasets published by " + samplingEventDatasetPublisherKeys.size()
+ " publishers totalling " + totalOccurrenceRecordsFromSamplingEventDatasets + " occurrence records");
LOG.info(iptMetadataDatasetCount + " metadata-only datasets published by " + metadataDatasetPublisherKeys.size()
+ " publishers");
}
}