/*
* This is eMonocot, a global online biodiversity information resource.
*
* Copyright © 2011–2015 The Board of Trustees of the Royal Botanic Gardens, Kew and The University of Oxford
*
* eMonocot is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* eMonocot is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
* the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* The complete text of the GNU Affero General Public License is in the source repository as the file
* ‘COPYING’. It is also available from <http://www.gnu.org/licenses/>.
*/
package org.emonocot.job.iucn;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang.StringEscapeUtils;
import org.emonocot.api.match.Match;
import org.emonocot.api.match.taxon.TaxonMatcher;
import org.emonocot.harvest.common.AbstractRecordAnnotator;
import org.emonocot.model.Annotation;
import org.emonocot.model.MeasurementOrFact;
import org.emonocot.model.Taxon;
import org.emonocot.model.constants.AnnotationCode;
import org.emonocot.model.constants.AnnotationType;
import org.emonocot.model.constants.RecordType;
import org.gbif.dwc.terms.IucnTerm;
import org.gbif.ecat.model.ParsedName;
import org.gbif.ecat.parser.NameParser;
import org.gbif.ecat.parser.UnparsableException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.batch.item.ItemProcessor;
/**
* @author ben
*/
public class Processor extends AbstractRecordAnnotator implements ItemProcessor<Map<String,Object>, MeasurementOrFact> {
private static final Object SCIENTIFIC_NAME_FIELD = "scientific_name";
private static final Object MODIFIED_YEAR_FIELD = "modified_year";
private static final Object CATEGORY_FIELD = "category";
private static final Object SPECIES_ID_FIELD = "species_id";
public static String GENUS_FIELD = "genus";
public static String SPECIFIC_EPITHET_FIELD = "species";
public static String AUTHORITY_FIELD = "authority";
public static String INFRASPECIFIC_EPITHET_FIELD = "infra_name";
public static String INFRASPECIFIC_AUTHORITY_FIELD = "infra_authority";
public static String INFRASPECIFIC_RANK_FIELD = "infra_rank";
public static String CRITERIA_FIELD = "criteria";
private String accessRights;
private String license;
private String rights;
private String rightsHolder;
private String bibliographicCitation;
private String iucnWebsiteUri = "http://www.iucnredlist.org/details/${identifier}/0";
private NameParser nameParser;
public void setIucnWebsiteUri(String iucnWebsiteUri) {
if(iucnWebsiteUri != null) {
this.iucnWebsiteUri = iucnWebsiteUri;
}
}
/**
* @param accessRights the accessRights to set
*/
public void setAccessRights(String accessRights) {
this.accessRights = accessRights;
}
/**
* @param license the license to set
*/
public void setLicense(String license) {
this.license = license;
}
/**
* @param rights the rights to set
*/
public void setRights(String rights) {
this.rights = rights;
}
/**
* @param bibliographicCitation the bibliographicCitation to set
*/
public void setBibliographicCitation(String bibliographicCitation) {
this.bibliographicCitation = bibliographicCitation;
}
/**
* @param rightsHolder the rightsHolder to set
*/
public void setRightsHolder(String rightsHolder) {
this.rightsHolder = rightsHolder;
}
private Logger logger = LoggerFactory.getLogger(Processor.class);
private TaxonMatcher taxonMatcher;
public void setTaxonMatcher(TaxonMatcher taxonMatcher) {
this.taxonMatcher = taxonMatcher;
}
public void setNameParser(NameParser nameParser) {
this.nameParser = nameParser;
}
public final MeasurementOrFact process(final Map<String,Object> map) throws Exception {
Taxon taxon = doMatchTaxon(map);
if(taxon != null) {
MeasurementOrFact measurementOrFact = new MeasurementOrFact();
StringBuffer remarks = new StringBuffer();
if(map.get(Processor.CRITERIA_FIELD) != null) {
remarks.append("Criteria: " + map.get(Processor.CRITERIA_FIELD) + ". ");
}
if(map.get(Processor.MODIFIED_YEAR_FIELD) != null) {
remarks.append("Modified Year: " + map.get(Processor.MODIFIED_YEAR_FIELD) + ". ");
}
measurementOrFact.setMeasurementRemarks(remarks.toString().trim());
measurementOrFact.setMeasurementValue((String)map.get(Processor.CATEGORY_FIELD));
measurementOrFact.setMeasurementType(IucnTerm.threatStatus);
measurementOrFact.setAccessRights(accessRights);
measurementOrFact.setRights(rights);
measurementOrFact.setRightsHolder(rightsHolder);
measurementOrFact.setLicense(license);
measurementOrFact.setBibliographicCitation(bibliographicCitation);
if(map.get(Processor.SPECIES_ID_FIELD) != null) {
Integer speciesId = (Integer)map.get(Processor.SPECIES_ID_FIELD);
measurementOrFact.setSource(iucnWebsiteUri.replace("${identifier}",speciesId.toString()));
}
measurementOrFact.setTaxon(taxon);
return measurementOrFact;
}
return null;
}
private boolean nullSafeContains(Map<String,Object> map, String key) {
return map.containsKey(key) && map.get(key) != null && !(((String) map.get(key)).isEmpty());
}
private Taxon doMatchTaxon(Map<String, Object> map) {
ParsedName<String> parsedName = null;
if(map.get(Processor.SCIENTIFIC_NAME_FIELD) != null) {
try {
parsedName = nameParser.parse(StringEscapeUtils.unescapeXml((String)map.get(Processor.SCIENTIFIC_NAME_FIELD)));
} catch (UnparsableException e) {
logger.error("Unable to parse scientific_name");
}
}
StringBuffer nameBuffer = new StringBuffer();
if(nullSafeContains(map,Processor.GENUS_FIELD)) {
String genus = ((String)map.get(Processor.GENUS_FIELD)).trim();
nameBuffer.append(genus);
}
if(nullSafeContains(map,Processor.SPECIFIC_EPITHET_FIELD)) {
String species = ((String)map.get(Processor.SPECIFIC_EPITHET_FIELD)).trim();
nameBuffer.append(" ").append(species);
}
if(!nullSafeContains(map,Processor.INFRASPECIFIC_EPITHET_FIELD)) {
if(parsedName == null || parsedName.getInfraSpecificEpithet() == null) {
// Assume species, and use the "authority" field
if(nullSafeContains(map,Processor.AUTHORITY_FIELD)) {
String authority = StringEscapeUtils.unescapeXml(((String)map.get("authority")).trim());
nameBuffer.append(" ").append(authority);
}
} else {
// The parsed json fields do not contain information about the
// infraspecies, but the scientific_name field does contain this
// information
if(parsedName.getRankMarker() != null) {
String infraspecificRank = parsedName.getRankMarker().trim();
nameBuffer.append(" ").append(infraspecificRank);
}
String infraspecificEpithet = parsedName.getInfraSpecificEpithet().trim();
nameBuffer.append(" ").append(infraspecificEpithet);
if(parsedName.getAuthorship() != null) {
String infraspecificAuthority = parsedName.getAuthorship().trim();
nameBuffer.append(" ").append(infraspecificAuthority);
}
}
} else {
// Assume infraspecies, try to use the infra_rank, infra_name and "infra_authority" fields
if(nullSafeContains(map,Processor.INFRASPECIFIC_RANK_FIELD)) {
String infraspecificRank = ((String)map.get(Processor.INFRASPECIFIC_RANK_FIELD)).trim();
nameBuffer.append(" ").append(infraspecificRank);
}
if(nullSafeContains(map,Processor.INFRASPECIFIC_EPITHET_FIELD)) {
String infraspecificEpithet = ((String)map.get(Processor.INFRASPECIFIC_EPITHET_FIELD)).trim();
nameBuffer.append(" ").append(infraspecificEpithet);
}
if(nullSafeContains(map,Processor.INFRASPECIFIC_AUTHORITY_FIELD)) {
String infraspecificAuthority = StringEscapeUtils.unescapeXml(((String)map.get(Processor.INFRASPECIFIC_AUTHORITY_FIELD)).trim());
nameBuffer.append(" ").append(infraspecificAuthority);
}
}
String name = nameBuffer.toString();
List<Match<Taxon>> results;
try {
results = taxonMatcher.match(name);
if(results.size() == 1) {
return results.get(0).getInternal();
} else if(results.size() > 1) {
logger.info(name + " multiple matches");
Annotation annotation = new Annotation();
annotation.setJobId(stepExecution.getJobExecutionId());
annotation.setAnnotatedObj(null);
annotation.setRecordType(RecordType.MeasurementOrFact);
annotation.setCode(AnnotationCode.BadRecord);
annotation.setType(AnnotationType.Error);
annotation.setValue("Species Id: " + (Integer)map.get("species_id"));
annotation.setText(results.size() + " matches found for taxonomic name " + name);
super.annotate(annotation);
return null;
} else {
logger.info(name + " no matches");
Annotation annotation = new Annotation();
annotation.setJobId(stepExecution.getJobExecutionId());
annotation.setAnnotatedObj(null);
annotation.setRecordType(RecordType.MeasurementOrFact);
annotation.setCode(AnnotationCode.Absent);
annotation.setType(AnnotationType.Error);
annotation.setValue("Species Id: " + (Integer)map.get("species_id"));
annotation.setText("No matches found for taxonomic name " + name);
super.annotate(annotation);
return null;
}
} catch (UnparsableException e) {
logger.info(name + " is unparseable");
Annotation annotation = new Annotation();
annotation.setJobId(stepExecution.getJobExecutionId());
annotation.setAnnotatedObj(null);
annotation.setRecordType(RecordType.MeasurementOrFact);
annotation.setCode(AnnotationCode.Absent);
annotation.setType(AnnotationType.Error);
annotation.setValue("Species Id: " + (Integer)map.get("species_id"));
annotation.setText("Taxonomic name " + name + " cannot be parsed");
return null;
}
}
}