package org.gbif.occurrence.processor.interpreting;
import org.gbif.api.model.occurrence.Occurrence;
import org.gbif.api.model.occurrence.VerbatimOccurrence;
import org.gbif.api.vocabulary.BasisOfRecord;
import org.gbif.api.vocabulary.EstablishmentMeans;
import org.gbif.api.vocabulary.LifeStage;
import org.gbif.api.vocabulary.OccurrenceIssue;
import org.gbif.api.vocabulary.Sex;
import org.gbif.api.vocabulary.TypeStatus;
import org.gbif.common.parsers.BasisOfRecordParser;
import org.gbif.common.parsers.EstablishmentMeansParser;
import org.gbif.common.parsers.LifeStageParser;
import org.gbif.common.parsers.NumberParser;
import org.gbif.common.parsers.SexParser;
import org.gbif.common.parsers.TypeStatusParser;
import org.gbif.common.parsers.TypifiedNameParser;
import org.gbif.common.parsers.UrlParser;
import org.gbif.common.parsers.core.Parsable;
import org.gbif.common.parsers.core.ParseResult;
import org.gbif.dwc.terms.DcTerm;
import org.gbif.dwc.terms.DwcTerm;
import org.gbif.dwc.terms.GbifTerm;
import org.gbif.occurrence.processor.interpreting.result.OccurrenceInterpretationResult;
import java.io.Serializable;
import java.util.Date;
import java.util.List;
import javax.annotation.Nullable;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Interprets/Validates verbatim occurrence records.
* This class doesn't persist any information, it only collects possible issues and generates a interpreted version
* of the verbatim record.
*/
@Singleton
public class OccurrenceInterpreter implements Serializable {
@FunctionalInterface
private interface Interpreter{
void interpret(VerbatimOccurrence verbatim, Occurrence occurrence);
}
private static final Logger LOG = LoggerFactory.getLogger(OccurrenceInterpreter.class);
private static final TypeStatusParser TYPE_PARSER = TypeStatusParser.getInstance();
private static final Parsable<String> TYPE_NAME_PARSER = TypifiedNameParser.getInstance();
private static final BasisOfRecordParser BOR_PARSER = BasisOfRecordParser.getInstance();
private static final SexParser SEX_PARSER = SexParser.getInstance();
private static final EstablishmentMeansParser EST_PARSER = EstablishmentMeansParser.getInstance();
private static final LifeStageParser LST_PARSER = LifeStageParser.getInstance();
private final DatasetInfoInterpreter datasetInfoInterpreter;
//Holds the list of Interpreters that will be applied
private final List<Interpreter> interpreters;
@Inject
public OccurrenceInterpreter(@Nullable DatasetInfoInterpreter datasetInfoInterpreter,
TaxonomyInterpreter taxonomyInterpreter, LocationInterpreter locationInterpreter) {
this.datasetInfoInterpreter = datasetInfoInterpreter;
//the list of interpreters is initialized with all the interpretations methods used
ImmutableList.Builder<Interpreter> bldr = new ImmutableList.Builder<Interpreter>().add(
locationInterpreter::interpretLocation,
taxonomyInterpreter::interpretTaxonomy,
MultiMediaInterpreter::interpretMedia,
OccurrenceInterpreter::interpretBor,
OccurrenceInterpreter::interpretSex,
OccurrenceInterpreter::interpretEstablishmentMeans,
OccurrenceInterpreter::interpretLifeStage,
OccurrenceInterpreter::interpretTypification,
TemporalInterpreter::interpretTemporal,
OccurrenceInterpreter::interpretReferences,
OccurrenceInterpreter::interpretIndividualCount);
if(datasetInfoInterpreter != null) {
bldr.add(this::interpretDatasetInfo);
}
interpreters = bldr.build();
}
/**
* Constructor of OccurrenceInterpreter that will not fill the information from the dataset.
*
* @param taxonomyInterpreter
* @param locationInterpreter
*/
public OccurrenceInterpreter(TaxonomyInterpreter taxonomyInterpreter, LocationInterpreter locationInterpreter) {
this(null, taxonomyInterpreter, locationInterpreter);
}
/**
* Interpret all the verbatim fields into our standard Occurrence fields.
* TODO: send messages/write logs for interpretation errors
*
* @param verbatim the verbatim occurrence to interpret
*
* @return an OccurrenceInterpretationResult that contains an "updated" Occurrence with interpreted fields and an
* "original"
* occurrence iff this was an update to an existing record (will be null otherwise)
*/
public OccurrenceInterpretationResult interpret(VerbatimOccurrence verbatim, Occurrence original) {
Occurrence occ = new Occurrence(verbatim);
interpreters.stream().forEach(interpreter -> {
try {
interpreter.interpret(verbatim,occ);
} catch (Exception e) {
LOG.warn("Caught a runtime exception during interpretation", e);
occ.addIssue(OccurrenceIssue.INTERPRETATION_ERROR);
}
});
occ.setLastInterpreted(new Date());
return new OccurrenceInterpretationResult(original, occ);
}
/**
* This method was created to be follow the Interpreter functional interface contract.
* Note that datasetInfoInterpreter is nullable but will not be added to the list if null
*/
private void interpretDatasetInfo(VerbatimOccurrence verbatim, Occurrence occ) {
datasetInfoInterpreter.interpretDatasetInfo(occ);
}
private static void interpretReferences(VerbatimOccurrence verbatim, Occurrence occ) {
if (verbatim.hasVerbatimField(DcTerm.references)) {
String val = verbatim.getVerbatimField(DcTerm.references);
if (!Strings.isNullOrEmpty(val)) {
occ.setReferences(UrlParser.parse(val));
if (occ.getReferences() == null) {
occ.getIssues().add(OccurrenceIssue.REFERENCES_URI_INVALID);
}
}
}
}
private static void interpretTypification(VerbatimOccurrence verbatim, Occurrence occ) {
if (verbatim.hasVerbatimField(DwcTerm.typeStatus)) {
ParseResult<TypeStatus> parsed = TYPE_PARSER.parse(verbatim.getVerbatimField(DwcTerm.typeStatus));
occ.setTypeStatus(parsed.getPayload());
ParseResult<String> parsedName = TYPE_NAME_PARSER.parse(verbatim.getVerbatimField(DwcTerm.typeStatus));
occ.setTypifiedName(parsedName.getPayload());
}
if (verbatim.hasVerbatimField(GbifTerm.typifiedName)) {
occ.setTypifiedName(verbatim.getVerbatimField(GbifTerm.typifiedName));
}
}
private static void interpretBor(VerbatimOccurrence verbatim, Occurrence occ) {
ParseResult<BasisOfRecord> parsed = BOR_PARSER.parse(verbatim.getVerbatimField(DwcTerm.basisOfRecord));
if (parsed.isSuccessful()) {
occ.setBasisOfRecord(parsed.getPayload());
} else {
LOG.debug("Unknown basisOfRecord [{}]", verbatim.getVerbatimField(DwcTerm.basisOfRecord));
occ.addIssue(OccurrenceIssue.BASIS_OF_RECORD_INVALID);
occ.setBasisOfRecord(BasisOfRecord.UNKNOWN);
}
}
private static void interpretSex(VerbatimOccurrence verbatim, Occurrence occ) {
ParseResult<Sex> parsed = SEX_PARSER.parse(verbatim.getVerbatimField(DwcTerm.sex));
if (parsed.isSuccessful()) {
occ.setSex(parsed.getPayload());
} else {
//TODO: flag value invalid issue (new API enum value to be created)
LOG.debug("Unknown sex [{}]", verbatim.getVerbatimField(DwcTerm.sex));
}
}
private static void interpretEstablishmentMeans(VerbatimOccurrence verbatim, Occurrence occ) {
ParseResult<EstablishmentMeans> parsed = EST_PARSER.parse(verbatim.getVerbatimField(DwcTerm.establishmentMeans));
if (parsed.isSuccessful()) {
occ.setEstablishmentMeans(parsed.getPayload());
} else {
//TODO: flag value invalid issue (new API enum value to be created)
LOG.debug("Unknown establishmentMeans [{}]", verbatim.getVerbatimField(DwcTerm.establishmentMeans));
}
}
private static void interpretLifeStage(VerbatimOccurrence verbatim, Occurrence occ) {
ParseResult<LifeStage> parsed = LST_PARSER.parse(verbatim.getVerbatimField(DwcTerm.lifeStage));
if (parsed.isSuccessful()) {
occ.setLifeStage(parsed.getPayload());
} else {
LOG.debug("Unknown lifeStage [{}]", verbatim.getVerbatimField(DwcTerm.lifeStage));
}
}
private static void interpretIndividualCount(VerbatimOccurrence verbatim, Occurrence occ) {
if (verbatim.hasVerbatimField(DwcTerm.individualCount)) {
occ.setIndividualCount(NumberParser.parseInteger(verbatim.getVerbatimField(DwcTerm.individualCount)));
if (occ.getIndividualCount() == null && !verbatim.getVerbatimField(DwcTerm.individualCount).isEmpty()) {
occ.getIssues().add(OccurrenceIssue.INDIVIDUAL_COUNT_INVALID);
LOG.debug("Invalid individualCount {}", verbatim.getVerbatimField(DwcTerm.individualCount));
}
}
}
}