package org.gbif.checklistbank.neo.traverse;
import org.gbif.api.model.checklistbank.NameUsage;
import org.gbif.api.model.checklistbank.ParsedName;
import org.gbif.api.vocabulary.Kingdom;
import org.gbif.api.vocabulary.NameType;
import org.gbif.api.vocabulary.NameUsageIssue;
import org.gbif.checklistbank.cli.model.NameUsageNode;
import org.gbif.checklistbank.neo.UsageDao;
import org.gbif.common.parsers.KingdomParser;
import org.gbif.common.parsers.core.ParseResult;
import org.gbif.nub.lookup.straight.IdLookup;
import org.gbif.nub.lookup.straight.LookupUsage;
import java.util.Set;
import com.google.common.base.Preconditions;
import com.google.common.collect.Sets;
import org.neo4j.graphdb.Node;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Assigns existing usageKey through a backbone match.
*/
public class NubMatchHandler implements StartEndHandler {
private static final Logger LOG = LoggerFactory.getLogger(NubMatchHandler.class);
private final Set<NameType> unparsableMatchTypes = Sets.newHashSet(NameType.VIRUS, NameType.HYBRID, NameType.CULTIVAR, NameType.CANDIDATUS);
// neo node ids for the higher classification links
private final IdLookup lookup;
private final UsageDao dao;
private int counter;
private Kingdom currKingdom;
private long currKingdomNodeId = -1;
private KingdomParser kParser = KingdomParser.getInstance();
public NubMatchHandler(IdLookup lookup, UsageDao dao) {
this.dao = dao;
this.lookup = Preconditions.checkNotNull(lookup, "Backbone matching client required");
}
@Override
public void start(Node n) {
// increase counters
counter++;
if (counter % 1000 == 0) {
LOG.debug("Nub matching done for: {}", counter);
}
NameUsage u = dao.readUsage(n, false);
if (currKingdom == null) {
ParseResult<Kingdom> k = kParser.parse(u.getCanonicalName());
if (k.isSuccessful()) {
setCurrKingdom(k.getPayload(), n);
}
}
}
@Override
public void end(Node n) {
NameUsage u = dao.readUsage(n, false);
// nub lookup
NameUsageNode nn = new NameUsageNode(n, u, false);
matchToNub(nn);
processSynonyms(nn);
// remove kingdom?
if (currKingdomNodeId == n.getId()) {
currKingdom = null;
currKingdomNodeId = -1;
}
}
/**
* Use an in memory backbone matching so there is no need for retries.
* The resulting usageKey of the match and potential issues will be stored in the neo node.
*/
private void matchToNub(NameUsageNode nn) {
ParsedName pn = dao.readName(nn.node.getId());
if (pn == null) {
LOG.warn("No parsed name found for {} {}", nn.node, nn.usage.getScientificName());
}
LookupUsage match;
if (pn == null || unparsableMatchTypes.contains(pn.getType())) {
// try with full scientific name for certain name types (we dont want to match informal or no names)
match = lookup.match(nn.usage.getScientificName(), null, null, nn.usage.getRank(), currKingdom);
} else {
match = lookup.match(pn.canonicalName(), pn.getAuthorship(), pn.getYear(), nn.usage.getRank(), currKingdom);
}
// store nub key
if (match != null) {
nn.usage.setNubKey(match.getKey());
if (currKingdom == null) {
setCurrKingdom(match.getKingdom(), nn.node);
LOG.debug("Nub match {} complementing kingdom: {}", nn.usage.getScientificName(), currKingdom);
}
} else {
LOG.debug("Failed nub match: {} {}", nn.usage.getRank(), nn.usage.getScientificName());
nn.usage.setNubKey(null);
nn.addIssue(NameUsageIssue.BACKBONE_MATCH_NONE);
}
nn.modified = true;
dao.store(nn, false);
}
private void setCurrKingdom(Kingdom k, Node n) {
currKingdom = k;
currKingdomNodeId = n.getId();
}
/**
* Process all synonymsTD doing a nub lookup for each of them
*
* @return the number of processed synonymsTD
*/
private void processSynonyms(NameUsageNode nn) {
for (Node syn : Traversals.SYNONYMS.traverse(nn.node).nodes()) {
NameUsage s = dao.readUsage(syn, false);
matchToNub(new NameUsageNode(syn, s, false));
}
}
}