package org.gbif.checklistbank.nub.lookup;
import org.gbif.api.model.Constants;
import org.gbif.api.model.registry.Dataset;
import org.gbif.api.vocabulary.Kingdom;
import org.gbif.checklistbank.config.ClbConfiguration;
import org.gbif.checklistbank.nub.ParentStack;
import org.gbif.checklistbank.nub.model.NubUsage;
import org.gbif.checklistbank.nub.model.SrcUsage;
import org.gbif.checklistbank.nub.source.ClbSource;
import org.gbif.checklistbank.service.DatasetImportService;
import org.gbif.common.messaging.api.MessagePublisher;
import org.gbif.common.messaging.api.messages.ChecklistSyncedMessage;
import org.gbif.nub.lookup.straight.DatasetMatchFailed;
import org.gbif.nub.lookup.straight.IdLookup;
import org.gbif.nub.lookup.straight.LookupUsage;
import java.util.Date;
import java.util.Map;
import java.util.UUID;
import com.google.common.collect.Maps;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
*
*/
public class NubMatchService {
private static final Logger LOG = LoggerFactory.getLogger(NubMatchService.class);
protected final ClbConfiguration cfg;
protected IdLookup nubLookup;
private final DatasetImportService sqlService;
private final DatasetImportService solrService;
private final MessagePublisher publisher;
private int counter = 0;
public NubMatchService(ClbConfiguration cfg, IdLookup nubLookup, DatasetImportService sqlService, DatasetImportService solrService, MessagePublisher publisher) {
this.cfg = cfg;
this.nubLookup = nubLookup;
this.sqlService = sqlService;
this.solrService = solrService;
this.publisher = publisher;
}
public int getCounter() {
return counter;
}
/**
* Updates a datasets nub matches.
* Uses the internal Lookup to generate a complete id map and then does postgres writes in a separate thread ?!
*/
public void matchDataset(Dataset d) throws DatasetMatchFailed {
if (Constants.NUB_DATASET_KEY.equals(d.getKey())) {
LOG.warn("Cannot match backbone to itself. Ignore");
return;
}
LOG.info("Rematch checklist {} to Backbone", d.getKey());
Map<Integer, Integer> relations = Maps.newHashMap();
try (ClbSource src = new ClbSource(cfg, d)){
// read in postgres usages
LOG.info("Copy usages for {} from pg into neo", d.getKey());
src.init(false, false, true, false);
NubUsage unknown = new NubUsage();
unknown.usageKey = Kingdom.INCERTAE_SEDIS.nubUsageID();
unknown.kingdom = Kingdom.INCERTAE_SEDIS;
// this is a taxonomically sorted iteration. We remember the parent kingdom using the ParentStack
ParentStack parents = new ParentStack(unknown);
for (SrcUsage u : src) {
parents.add(u);
LookupUsage match = nubLookup.match(u.parsedName.canonicalName(), u.parsedName.getAuthorship(), u.parsedName.getYear(), u.rank, parents.nubKingdom());
if (match != null) {
// add to relations
relations.put(u.key, match.getKey());
// store current kingdom in parent stack for further nub lookups of children
NubUsage nub = new NubUsage();
nub.kingdom = match.getKingdom();
parents.put(nub);
} else {
// also store no matches as nulls so we can flag an issue
relations.put(u.key, null);
}
}
LOG.info("Updating {} nub relations for dataset {}", relations.size(), d.getKey());
sqlService.insertNubRelations(d.getKey(), relations);
solrService.insertNubRelations(d.getKey(), relations);
counter++;
//ChecklistSyncedMessage triggers a new dataset analysis
LOG.info("Sending {} for dataset {} {}", ChecklistSyncedMessage.class.getSimpleName(), d.getKey(), d.getTitle());
publisher.send(new ChecklistSyncedMessage(d.getKey(), new Date(), 1, 0));
} catch (Exception e) {
LOG.error("Failed to match checklist {} {}", d.getKey(), d.getTitle());
throw new DatasetMatchFailed(d.getKey(), e);
}
}
public void matchDataset(UUID key) throws DatasetMatchFailed {
Dataset d = new Dataset();
d.setKey(key);
d.setTitle("Dataset "+key);
matchDataset(d);
}
}