package org.gbif.occurrence.processor.interpreting;
import org.gbif.api.model.occurrence.Occurrence;
import org.gbif.api.model.occurrence.VerbatimOccurrence;
import org.gbif.api.vocabulary.OccurrenceIssue;
import org.gbif.common.parsers.core.OccurrenceParseResult;
import org.gbif.common.parsers.core.ParseResult;
import org.gbif.common.parsers.date.AtomizedLocalDate;
import org.gbif.common.parsers.date.DateParsers;
import org.gbif.common.parsers.date.TemporalAccessorUtils;
import org.gbif.common.parsers.date.TemporalParser;
import org.gbif.dwc.terms.DcTerm;
import org.gbif.dwc.terms.DwcTerm;
import java.time.LocalDate;
import java.time.temporal.ChronoField;
import java.time.temporal.TemporalAccessor;
import java.time.temporal.TemporalQueries;
import java.util.Date;
import java.util.EnumSet;
import java.util.Set;
import com.google.common.base.Strings;
import com.google.common.collect.Range;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Interprets date representations into a Date.
*/
public class TemporalInterpreter {
private static final Logger LOG = LoggerFactory.getLogger(TemporalInterpreter.class);
static final LocalDate MIN_LOCAL_DATE = LocalDate.of(1600, 1, 1);
static final LocalDate MIN_EPOCH_LOCAL_DATE = LocalDate.ofEpochDay(0);
private static final TemporalParser TEXTDATE_PARSER = DateParsers.defaultTemporalParser();
private TemporalInterpreter() {
}
public static void interpretTemporal(VerbatimOccurrence verbatim, Occurrence occ) {
OccurrenceParseResult<TemporalAccessor> eventResult = interpretRecordedDate(verbatim);
if (eventResult.isSuccessful()) {
TemporalAccessor temporalAccessor = eventResult.getPayload();
//Get eventDate as java.util.Date and ignore the offset (timezone) if provided
//Note for debug: be careful if you inspect the content of 'eventDate' it will contain your machine timezone.
Date eventDate = TemporalAccessorUtils.toDate(temporalAccessor, true);
AtomizedLocalDate atomizedLocalDate = AtomizedLocalDate.fromTemporalAccessor(temporalAccessor);
occ.setEventDate(eventDate);
occ.setYear(atomizedLocalDate.getYear());
occ.setMonth(atomizedLocalDate.getMonth());
occ.setDay(atomizedLocalDate.getDay());
}
occ.getIssues().addAll(eventResult.getIssues());
LocalDate upperBound = LocalDate.now().plusDays(1);
if (verbatim.hasVerbatimField(DcTerm.modified)) {
Range<LocalDate> validModifiedDateRange = Range.closed(MIN_EPOCH_LOCAL_DATE, upperBound);
OccurrenceParseResult<TemporalAccessor> parsed = interpretLocalDate(verbatim.getVerbatimField(DcTerm.modified),
validModifiedDateRange, OccurrenceIssue.MODIFIED_DATE_UNLIKELY);
occ.setModified(TemporalAccessorUtils.toDate(parsed.getPayload()));
occ.getIssues().addAll(parsed.getIssues());
}
if (verbatim.hasVerbatimField(DwcTerm.dateIdentified)) {
Range<LocalDate> validRecordedDateRange = Range.closed(MIN_LOCAL_DATE, upperBound);
OccurrenceParseResult<TemporalAccessor> parsed = interpretLocalDate(verbatim.getVerbatimField(DwcTerm.dateIdentified),
validRecordedDateRange, OccurrenceIssue.IDENTIFIED_DATE_UNLIKELY);
if(parsed.isSuccessful()) {
occ.setDateIdentified(TemporalAccessorUtils.toDate(parsed.getPayload()));
}
occ.getIssues().addAll(parsed.getIssues());
}
}
/**
* A convenience method that calls interpretRecordedDate with the verbatim recordedDate values from the
* VerbatimOccurrence.
*
* @param verbatim the VerbatimOccurrence containing a recordedDate
* @return the interpretation result which is never null
*/
public static OccurrenceParseResult<TemporalAccessor> interpretRecordedDate(VerbatimOccurrence verbatim) {
final String year = verbatim.getVerbatimField(DwcTerm.year);
final String month = verbatim.getVerbatimField(DwcTerm.month);
final String day = verbatim.getVerbatimField(DwcTerm.day);
final String dateString = verbatim.getVerbatimField(DwcTerm.eventDate);
return interpretRecordedDate(year, month, day, dateString);
}
public static OccurrenceParseResult<AtomizedLocalDate> interpretEventDate(String year, String month, String day,
String dateString) {
OccurrenceParseResult<TemporalAccessor> ta = interpretRecordedDate(year, month, day, dateString);
return new OccurrenceParseResult<AtomizedLocalDate>(ta.getStatus(), ta.getConfidence(),
AtomizedLocalDate.fromTemporalAccessor(ta.getPayload()), ta.getError());
}
/**
* Given possibly both of year, month, day and a dateString, produces a single date.
* When year, month and day are all populated and parseable they are given priority,
* but if any field is missing or illegal and dateString is parseable dateString is preferred.
* Partially valid dates are not supported and null will be returned instead. The only exception is the year alone
* which will be used as the last resort if nothing else works.
* Years are verified to be before or next year and after 1600.
*x
* @return interpretation result, never null
*/
public static OccurrenceParseResult<TemporalAccessor> interpretRecordedDate(String year, String month, String day,
String dateString) {
boolean atomizedDateProvided = StringUtils.isNotBlank(year) || StringUtils.isNotBlank(month)
|| StringUtils.isNotBlank(day);
boolean dateStringProvided = StringUtils.isNotBlank(dateString);
if (!atomizedDateProvided && !dateStringProvided) {
return OccurrenceParseResult.fail();
}
Set<OccurrenceIssue> issues = EnumSet.noneOf(OccurrenceIssue.class);
/**
* First, attempt year, month, day parsing
* If the parse result is SUCCESS it means that a whole date could be extracted (with year,
* month and day). If it is a failure but the normalizer returned a meaningful result (e.g. it could extract just
* a year) we're going to return a result with all the fields set that we could parse.
*/
TemporalAccessor parsedTemporalAccessor = null;
ParseResult.CONFIDENCE confidence = null;
ParseResult<TemporalAccessor> parsedYMDResult = atomizedDateProvided ? TEXTDATE_PARSER.parse(year, month, day) :
ParseResult.<TemporalAccessor>fail();
ParseResult<TemporalAccessor> parsedDateResult = dateStringProvided ? TEXTDATE_PARSER.parse(dateString) :
ParseResult.<TemporalAccessor>fail();
// If both inputs exist verify that they match
if(atomizedDateProvided && dateStringProvided &&
!(TemporalAccessorUtils.representsSameYMD(parsedYMDResult.getPayload(), parsedDateResult.getPayload())
|| ObjectUtils.equals(parsedYMDResult.getPayload(), parsedDateResult.getPayload()))){
issues.add(OccurrenceIssue.RECORDED_DATE_MISMATCH);
LOG.debug("Date mismatch: [{} vs {}].", parsedYMDResult.getPayload(), parsedDateResult.getPayload());
TemporalAccessor bestResolution =
TemporalAccessorUtils.getBestResolutionTemporalAccessor(parsedYMDResult.getPayload(), parsedDateResult.getPayload());
if(bestResolution != null){
parsedTemporalAccessor = bestResolution;
// if one of the 2 result is null we can not set the confidence to DEFINITE
confidence = (parsedYMDResult.getPayload() == null || parsedDateResult.getPayload() == null) ?
ParseResult.CONFIDENCE.PROBABLE :ParseResult.CONFIDENCE.DEFINITE;
}
else{
return OccurrenceParseResult.fail(issues);
}
}
else{
// prioritized parsedDateResult because it can hold higher resolution date
parsedTemporalAccessor = parsedDateResult.getPayload() != null ? parsedDateResult.getPayload() :
parsedYMDResult.getPayload();
confidence = parsedDateResult.getPayload() != null ? parsedDateResult.getConfidence() :
parsedYMDResult.getConfidence();
}
if(!isValidDate(parsedTemporalAccessor, true)){
if(parsedTemporalAccessor == null) {
issues.add(OccurrenceIssue.RECORDED_DATE_INVALID);
}
else{
issues.add(OccurrenceIssue.RECORDED_DATE_UNLIKELY);
}
LOG.debug("Invalid date: [{}]].", parsedTemporalAccessor);
return OccurrenceParseResult.fail(issues);
}
return OccurrenceParseResult.success(confidence, parsedTemporalAccessor, issues);
}
/**
* Check if a date express as TemporalAccessor falls between the predefined range.
* Lower bound defined by {@link #MIN_LOCAL_DATE} and upper bound by current date + 1 day
* @param temporalAccessor
* @param acceptPartialDate
* @return valid or not according to the predefined range.
*/
public static boolean isValidDate(TemporalAccessor temporalAccessor, boolean acceptPartialDate){
LocalDate upperBound = LocalDate.now().plusDays(1);
return isValidDate(temporalAccessor, acceptPartialDate, Range.closed(MIN_LOCAL_DATE, upperBound));
}
/**
* Check if a date express as TemporalAccessor falls between the provided range.
*
* @param temporalAccessor
* @return
*/
public static boolean isValidDate(TemporalAccessor temporalAccessor, boolean acceptPartialDate,
Range<LocalDate> likelyRange){
if(temporalAccessor == null){
return false;
}
if(!acceptPartialDate){
LocalDate localDate = temporalAccessor.query(TemporalQueries.localDate());
if(localDate == null){
return false;
}
return likelyRange.contains(localDate);
}
//if partial dates should be considered valid
int year, month = 1, day = 1;
if(temporalAccessor.isSupported(ChronoField.YEAR)){
year = temporalAccessor.get(ChronoField.YEAR);
}
else{
return false;
}
if(temporalAccessor.isSupported(ChronoField.MONTH_OF_YEAR)){
month = temporalAccessor.get(ChronoField.MONTH_OF_YEAR);
}
if(temporalAccessor.isSupported(ChronoField.DAY_OF_MONTH)){
day = temporalAccessor.get(ChronoField.DAY_OF_MONTH);
}
return likelyRange.contains(LocalDate.of(year, month, day));
}
/**
*
* @param dateString
* @param likelyRange
* @param unlikelyIssue
* @return TemporalAccessor that represents a LocalDate or LocalDateTime
*/
public static OccurrenceParseResult<TemporalAccessor> interpretLocalDate(String dateString, Range<LocalDate> likelyRange,
OccurrenceIssue unlikelyIssue) {
if (!Strings.isNullOrEmpty(dateString)) {
OccurrenceParseResult<TemporalAccessor> result = new OccurrenceParseResult(TEXTDATE_PARSER.parse(dateString));
// check year makes sense
if (result.isSuccessful()) {
if(!isValidDate(result.getPayload(), false, likelyRange)) {
LOG.debug("Unlikely date parsed, ignore [{}].", dateString);
result.addIssue(unlikelyIssue);
}
}
return result;
}
return OccurrenceParseResult.fail();
}
}