/*
* This is eMonocot, a global online biodiversity information resource.
*
* Copyright © 2011–2015 The Board of Trustees of the Royal Botanic Gardens, Kew and The University of Oxford
*
* eMonocot is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* eMonocot is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
* the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* The complete text of the GNU Affero General Public License is in the source repository as the file
* ‘COPYING’. It is also available from <http://www.gnu.org/licenses/>.
*/
package org.emonocot.job.taxonmatch;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.solr.client.solrj.SolrServerException;
import org.emonocot.api.SearchableObjectService;
import org.emonocot.api.match.Match;
import org.emonocot.api.match.MatchStatus;
import org.emonocot.api.match.Matcher;
import org.emonocot.api.match.taxon.TaxonMatcher;
import org.emonocot.model.SearchableObject;
import org.emonocot.model.Taxon;
import org.emonocot.pager.Page;
import org.gbif.ecat.model.ParsedName;
import org.gbif.ecat.parser.NameParser;
import org.gbif.ecat.parser.UnparsableException;
import org.gbif.ecat.voc.Rank;
import org.gbif.ecat.voc.TaxonomicStatus;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
/**
* @author jk00kg
*/
public class DefaultTaxonMatcher implements TaxonMatcher, Matcher<String, Taxon> {
private Logger logger = LoggerFactory.getLogger(DefaultTaxonMatcher.class);
@Autowired
private SearchableObjectService searchableObjectService;
@Autowired
private NameParser nameParser;
private Boolean assumeAcceptedMatches = Boolean.FALSE;
public void setNameParser(NameParser nameParser) {
this.nameParser = nameParser;
}
public void setAssumeAcceptedMatches(Boolean assumeAcceptedMatches) {
if(assumeAcceptedMatches != null) {
this.assumeAcceptedMatches = assumeAcceptedMatches;
}
}
/**
* @param searchableObjectService
* the taxonService to set
*/
public void setSearchableObjectService(SearchableObjectService searchableObjectService) {
this.searchableObjectService = searchableObjectService;
}
/*
* Returns one or more match of the same status
* @see
* org.emonocot.api.match.TaxonMatcher#match(org.gbif.ecat.model.ParsedName
* )
*/
public List<Match<Taxon>> match(ParsedName<String> parsed) {
StringBuilder stringBuilder = new StringBuilder();
if (parsed.getSpecificEpithet() == null) {
stringBuilder.append("searchable.label_sort:" + parsed.getGenusOrAbove());
if(parsed.getAuthorship() != null) {
stringBuilder.append(" AND taxon.scientific_name_authorship_s:"
+ parsed.getAuthorship());
}
} else {
stringBuilder.append("taxon.genus_ns:" + parsed.getGenusOrAbove());
if (parsed.getSpecificEpithet() != null) {
stringBuilder.append(" AND taxon.specific_epithet_s:"
+ parsed.getSpecificEpithet());
}
if (parsed.getInfraGeneric() != null) {
stringBuilder.append(" AND taxon.subgenus_s:"
+ parsed.getInfraGeneric());
}
if (parsed.getInfraSpecificEpithet() != null) {
stringBuilder.append(" AND taxon.infraspecific_epithet_s:"
+ parsed.getInfraSpecificEpithet());
} else {
stringBuilder.append(" AND -taxon.infraspecific_epithet_s:[* TO *]");
}
if (parsed.getRank() != null) {
if (parsed.getRank().equals(Rank.SPECIES)) {
stringBuilder.append(" AND taxon.taxon_rank_s:SPECIES");
} else {
}
}
if(parsed.getAuthorship() != null) {
stringBuilder.append(" AND taxon.scientific_name_authorship_s:"
+ parsed.getAuthorship());
}
}
String searchTerm = stringBuilder.toString();
logger.debug("Attempting to match " + searchTerm);
List<Match<Taxon>> matches = new ArrayList<Match<Taxon>>();
Map<String,String> selectedFacets = new HashMap<String,String>();
selectedFacets.put("base.class_s", "org.emonocot.model.Taxon");
Page<SearchableObject> page;
try {
page = searchableObjectService.search(searchTerm, null, null, null, null, null, selectedFacets, null, null);
} catch (SolrServerException sse) {
throw new RuntimeException("SolrServerException", sse);
}
switch (page.getRecords().size()) {
case 0:
if(parsed.getBracketAuthorship() != null){
logger.info("removing bracket authorship " + parsed.getAuthorship());
parsed.setBracketAuthorship(null);
logger.info("'null' bracket authorship is " + parsed.getAuthorship());
matches = match(parsed);
} else if (parsed.getAuthorship() != null) {
logger.info("removing authorship " + parsed.getAuthorship());
parsed.setAuthorship(null);
logger.info("'null' authorship is " + parsed.getAuthorship());
matches = match(parsed);
}
break;
case 1:
Match<Taxon> single = new Match<Taxon>();
single.setInternal((Taxon)page.getRecords().get(0));
String internalName = (new NameParser().parseToCanonical(single.getInternal().getScientificName()));
if (parsed.canonicalName().equals(internalName)) {
single.setStatus(MatchStatus.EXACT);
} else {
single.setStatus(MatchStatus.PARTIAL);
}
matches.add(single);
break;
default:
logger.debug(page.getSize() + " records found");
Set<Match<Taxon>> exactMatches = new HashSet<Match<Taxon>>();
for (SearchableObject eTaxon : page.getRecords()) {
logger.debug(((Taxon)eTaxon).getScientificName() + " " + eTaxon.getIdentifier());
Match<Taxon> m = new Match<Taxon>();
m.setInternal((Taxon)eTaxon);
matches.add(m);
String name = (new NameParser().parseToCanonical(((Taxon)eTaxon).getScientificName()));
logger.debug("Name is " + name);
if (parsed.canonicalName().equals(name)) {
m.setStatus(MatchStatus.EXACT);
exactMatches.add(m);
} else {
m.setStatus(MatchStatus.PARTIAL);
}
}
logger.debug(exactMatches.size() + " exact matches");
switch (exactMatches.size()) {
case 0:
break;
case 1:
matches.retainAll(exactMatches);
break;
default:
if(assumeAcceptedMatches && parsed.getAuthorship() == null) {
Set<Match<Taxon>> acceptedMatches = new HashSet<Match<Taxon>>();
for(Match<Taxon> match : exactMatches) {
logger.debug("Taxonomic status " + match.getInternal().getTaxonomicStatus());
if(match.getInternal().getTaxonomicStatus() != null && match.getInternal().getTaxonomicStatus().equals(TaxonomicStatus.Accepted)) {
acceptedMatches.add(match);
}
}
if(acceptedMatches.size() == 1) {
matches.retainAll(acceptedMatches);
} else {
logger.debug(acceptedMatches.size() + " accepted taxa exactly match");
}
} else {
logger.debug(exactMatches.size() + " exact matches:");
}
break;
}
}
return matches;
}
@Override
public List<Match<Taxon>> match(String name) throws UnparsableException {
ParsedName<String> parsed = nameParser.parse(name);
return match(parsed);
}
/* (non-Javadoc)
* @see org.emonocot.api.match.Matcher#findMatches(java.lang.Object)
*/
@Override
public List<Match<Taxon>> getMatches(String input) {
try {
return match(input);
} catch (UnparsableException e) {
logger.error("Couldn't parse the string");
return null;
}
}
}