package org.gbif.occurrence.processor.interpreting;
import org.gbif.api.model.occurrence.Occurrence;
import org.gbif.api.model.occurrence.VerbatimOccurrence;
import org.gbif.api.vocabulary.Continent;
import org.gbif.api.vocabulary.Country;
import org.gbif.api.vocabulary.OccurrenceIssue;
import org.gbif.common.parsers.ContinentParser;
import org.gbif.common.parsers.CountryParser;
import org.gbif.common.parsers.NumberParser;
import org.gbif.common.parsers.core.OccurrenceParseResult;
import org.gbif.common.parsers.core.ParseResult;
import org.gbif.common.parsers.geospatial.DoubleAccuracy;
import org.gbif.common.parsers.geospatial.MeterRangeParser;
import org.gbif.dwc.terms.DwcTerm;
import org.gbif.occurrence.processor.interpreting.result.CoordinateResult;
import org.gbif.occurrence.processor.interpreting.util.CountryMaps;
import java.io.Serializable;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Strings;
import com.google.inject.Inject;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A wrapper for the interpreting steps required to parse and validate location incl coordinates given as latitude and
* longitude.
*/
public class LocationInterpreter implements Serializable {
private static final Logger LOG = LoggerFactory.getLogger(LocationInterpreter.class);
private static final CountryParser PARSER = CountryParser.getInstance();
private final CoordinateInterpreter coordinateInterpreter;
// COORDINATE_UNCERTAINTY_METERS bounds are exclusive bounds
private static final double COORDINATE_UNCERTAINTY_METERS_LOWER_BOUND = 0;
// 5000 km seems safe
private static final double COORDINATE_UNCERTAINTY_METERS_UPPER_BOUND = 5000000;
private static final double COORDINATE_PRECISION_LOWER_BOUND = 0;
// 45 close to 5000 km
private static final double COORDINATE_PRECISION_UPPER_BOUND = 45;
@Inject
public LocationInterpreter(CoordinateInterpreter coordinateInterpreter) {
this.coordinateInterpreter = coordinateInterpreter;
}
public void interpretLocation(VerbatimOccurrence verbatim, Occurrence occ) {
Country country = interpretCountry(verbatim, occ);
interpretCoordinates(verbatim, occ, country);
interpretContinent(verbatim, occ);
interpretWaterBody(verbatim, occ);
interpretState(verbatim, occ);
interpretElevation(verbatim, occ);
interpretDepth(verbatim, occ);
}
//TODO: improve this method and put it into parsers!
private static String cleanName(String x) {
x = StringUtils.normalizeSpace(x).trim();
// if we get all upper names, Capitalize them
if (StringUtils.isAllUpperCase(StringUtils.deleteWhitespace(x))) {
x = StringUtils.capitalize(x.toLowerCase());
}
return x;
}
/**
* Attempts to convert given country strings to a single country, verifying the all interpreted countries
* do not contradict.
*
* @param country verbatim country strings, e.g. dwc:country or dwc:countryCode
*/
public OccurrenceParseResult<Country> interpretCountry(String ... country) {
if (country == null) {
return OccurrenceParseResult.fail();
}
OccurrenceParseResult<Country> result = null;
for (String verbatim : country) {
if (!Strings.isNullOrEmpty(verbatim)) {
if (result == null) {
result = new OccurrenceParseResult(PARSER.parse(verbatim));
} else if (result.isSuccessful()) {
ParseResult<Country> result2 = PARSER.parse(verbatim);
if (result2.isSuccessful()) {
// only inspect secondary parsing if its also successful
if (!result2.getPayload().equals(result.getPayload())) {
result.getIssues().add(OccurrenceIssue.COUNTRY_MISMATCH);
}
}
} else {
// failed before. Use new parsing and add issue
result = new OccurrenceParseResult(PARSER.parse(verbatim));
result.getIssues().add(OccurrenceIssue.COUNTRY_INVALID);
}
}
}
if (result == null) {
// we got an array of null or empty countries passed in
return OccurrenceParseResult.fail();
}
if (!result.isSuccessful()) {
result.getIssues().add(OccurrenceIssue.COUNTRY_INVALID);
}
return result;
}
private void interpretState(VerbatimOccurrence verbatim, Occurrence occ) {
if (verbatim.hasVerbatimField(DwcTerm.stateProvince)) {
occ.setStateProvince(cleanName(verbatim.getVerbatimField(DwcTerm.stateProvince)));
}
// TODO: verify against country?
}
private void interpretContinent(VerbatimOccurrence verbatim, Occurrence occ) {
if (verbatim.hasVerbatimField(DwcTerm.continent)) {
ParseResult<Continent> inter = ContinentParser.getInstance().parse(verbatim.getVerbatimField(DwcTerm.continent));
occ.setContinent(inter.getPayload());
}
// TODO: if null, try to derive from country
}
private void interpretWaterBody(VerbatimOccurrence verbatim, Occurrence occ) {
if (verbatim.hasVerbatimField(DwcTerm.waterBody)) {
occ.setWaterBody(cleanName(verbatim.getVerbatimField(DwcTerm.waterBody)));
}
}
private Country interpretCountry(VerbatimOccurrence verbatim, Occurrence occ) {
OccurrenceParseResult<Country>
inter = interpretCountry(verbatim.getVerbatimField(DwcTerm.countryCode),
verbatim.getVerbatimField(DwcTerm.country));
occ.setCountry(CountryMaps.preferred(inter.getPayload()));
occ.getIssues().addAll(inter.getIssues());
return occ.getCountry();
}
private void interpretCoordinates(VerbatimOccurrence verbatim, Occurrence occ, Country country) {
OccurrenceParseResult<CoordinateResult> parsedCoord = coordinateInterpreter.interpretCoordinate(
verbatim.getVerbatimField(DwcTerm.decimalLatitude), verbatim.getVerbatimField(DwcTerm.decimalLongitude),
verbatim.getVerbatimField(DwcTerm.geodeticDatum), country);
if (!parsedCoord.isSuccessful() && verbatim.hasVerbatimField(DwcTerm.verbatimLatitude)
&& verbatim.hasVerbatimField(DwcTerm.verbatimLongitude)) {
LOG.debug("Decimal coord interpretation, trying verbatim lat/lon");
// try again with verbatim lat/lon
parsedCoord = coordinateInterpreter.interpretCoordinate(verbatim.getVerbatimField(DwcTerm.verbatimLatitude),
verbatim.getVerbatimField(DwcTerm.verbatimLongitude),
verbatim.getVerbatimField(DwcTerm.geodeticDatum),country);
}
if (!parsedCoord.isSuccessful() && verbatim.hasVerbatimField(DwcTerm.verbatimCoordinates)) {
LOG.debug("Verbatim lat/lon interpretation, trying single verbatimCoordinates");
// try again with verbatim coordinates
parsedCoord = coordinateInterpreter.interpretCoordinate(verbatim.getVerbatimField(DwcTerm.verbatimCoordinates),
verbatim.getVerbatimField(DwcTerm.geodeticDatum),country);
}
if (parsedCoord.isSuccessful() && parsedCoord.getPayload() != null) {
occ.setDecimalLatitude(parsedCoord.getPayload().getLatitude());
occ.setDecimalLongitude(parsedCoord.getPayload().getLongitude());
// If the country returned by the co-ordinate interpreter is different, then it's an acceptable
// swap (e.g. Réunion→France).
if (country == null || (country != parsedCoord.getPayload().getCountry())) {
occ.setCountry(parsedCoord.getPayload().getCountry());
}
// interpret coordinateUncertaintyInMeters and coordinatePrecision
interpretCoordinateUncertaintyAndPrecision(occ, verbatim);
}
LOG.debug("Adding coord issues to occ [{}]", parsedCoord.getIssues());
occ.getIssues().addAll(parsedCoord.getIssues());
}
/**
* http://dev.gbif.org/issues/browse/POR-1804
* @param occ
* @param verbatim
*/
@VisibleForTesting
protected void interpretCoordinateUncertaintyAndPrecision(Occurrence occ, VerbatimOccurrence verbatim) {
if (verbatim.hasVerbatimField(DwcTerm.coordinatePrecision)) {
Double coordinatePrecision =
NumberParser.parseDouble(verbatim.getVerbatimField(DwcTerm.coordinatePrecision).trim());
if (coordinatePrecision != null
&& coordinatePrecision.doubleValue() >= COORDINATE_PRECISION_LOWER_BOUND
&& coordinatePrecision.doubleValue() <= COORDINATE_PRECISION_UPPER_BOUND) {
occ.setCoordinatePrecision(coordinatePrecision);
} else {
occ.getIssues().add(OccurrenceIssue.COORDINATE_PRECISION_INVALID);
LOG.debug("Ignoring coordinatePrecision, value invalid or highly unlikely");
}
}
if (verbatim.hasVerbatimField(DwcTerm.coordinateUncertaintyInMeters)) {
ParseResult<Double> meters =
MeterRangeParser.parseMeters(verbatim.getVerbatimField(DwcTerm.coordinateUncertaintyInMeters).trim());
Double coordinateUncertaintyInMeters = meters.isSuccessful() ? Math.abs(meters.getPayload()) : null;
if (coordinateUncertaintyInMeters != null &&
coordinateUncertaintyInMeters > COORDINATE_UNCERTAINTY_METERS_LOWER_BOUND &&
coordinateUncertaintyInMeters < COORDINATE_UNCERTAINTY_METERS_UPPER_BOUND) {
occ.setCoordinateUncertaintyInMeters(coordinateUncertaintyInMeters);
} else {
occ.getIssues().add(OccurrenceIssue.COORDINATE_UNCERTAINTY_METERS_INVALID);
}
}
}
public void interpretDepth(VerbatimOccurrence verbatim, Occurrence occ) {
OccurrenceParseResult<DoubleAccuracy> result = MeterRangeParser
.parseDepth(verbatim.getVerbatimField(DwcTerm.minimumDepthInMeters),
verbatim.getVerbatimField(DwcTerm.maximumDepthInMeters), null);
if (result.isSuccessful() && result.getPayload().getValue() != null) {
occ.setDepth(result.getPayload().getValue());
occ.setDepthAccuracy(result.getPayload().getAccuracy());
occ.getIssues().addAll(result.getIssues());
}
}
public void interpretElevation(VerbatimOccurrence verbatim, Occurrence occ) {
OccurrenceParseResult<DoubleAccuracy> result = MeterRangeParser
.parseElevation(verbatim.getVerbatimField(DwcTerm.minimumElevationInMeters),
verbatim.getVerbatimField(DwcTerm.maximumElevationInMeters), null);
if (result.isSuccessful() && result.getPayload().getValue() != null) {
occ.setElevation(result.getPayload().getValue());
occ.setElevationAccuracy(result.getPayload().getAccuracy());
occ.getIssues().addAll(result.getIssues());
}
//TODO: use continent information to get finer unlikely values:
// http://en.wikipedia.org/wiki/Extremes_on_Earth#Extreme_elevations_and_temperatures_per_continent
}
}