package org.gbif.checklistbank.authorship; import org.gbif.api.model.checklistbank.ParsedName; import java.util.Collection; import java.util.Iterator; import java.util.List; import com.google.common.base.Function; import com.google.common.base.Functions; import com.google.common.collect.Lists; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * A utility to sort a list of parsed names into sets sharing the same basionym judging only the authorship not epithets. * A name without any authorship at all will be ignored and not returned in any group. */ public class BasionymSorter { private static final Logger LOG = LoggerFactory.getLogger(BasionymSorter.class); private AuthorComparator authorComp; public BasionymSorter() { this.authorComp = AuthorComparator.createWithAuthormap(); } public BasionymSorter(AuthorComparator authorComp) { this.authorComp = authorComp; } public static class MultipleBasionymException extends Exception { } public Collection<BasionymGroup<ParsedName>> groupBasionyms(Iterable<ParsedName> names) { return groupBasionyms(names, Functions.<ParsedName>identity()); } private <T> BasionymGroup<T> findExistingGroup(T p, List<BasionymGroup<T>> groups, Function<T, ParsedName> func) { ParsedName pn = func.apply(p); for (BasionymGroup<T> g : groups) { ParsedName representative = func.apply(g.getRecombinations().get(0)); if (authorComp.compareStrict(pn.getBracketAuthorship(), pn.getBracketYear(), representative.getBracketAuthorship(), representative.getBracketYear())) { return g; } } return null; } private <T> T findBasionym(String authorship, String year, List<T> originals, Function<T, ParsedName> func) throws MultipleBasionymException { List<T> basionyms = Lists.newArrayList(); for (T obj : originals) { ParsedName b = func.apply(obj); if (authorComp.compareStrict(authorship, year, b.getAuthorship(), b.getYear())) { basionyms.add(obj); } } if (basionyms.isEmpty()) { // try again without year in case we didnt find any but make sure we only match once! if (authorship != null) { for (T obj : originals) { ParsedName b = func.apply(obj); if (authorComp.compareStrict(authorship, null, b.getAuthorship(), null)) { basionyms.add(obj); } } } } // we have more than one match, dont use it! if (basionyms.size() == 1) { return basionyms.get(0); } else if (basionyms.isEmpty()) { return null; } throw new MultipleBasionymException(); } /** * Grouping that allows to use any custom class as long as there is a function that returns a ParsedName instance. * The list of groups returned only contains groups with no or one known basionym. Any uncertain cases like groups with multiple basionyms are excluded! */ public <T> Collection<BasionymGroup<T>> groupBasionyms(Iterable<T> names, Function<T, ParsedName> func) { List<BasionymGroup<T>> groups = Lists.newArrayList(); // first split names into recombinations and original names not having a basionym authorship // note that we drop any name without authorship here! List<T> recombinations = Lists.newArrayList(); List<T> originals = Lists.newArrayList(); for (T obj : names) { ParsedName p = func.apply(obj); if (p != null) { if (p.isRecombination()) { recombinations.add(obj); } else if (p.getAuthorship() != null || p.getYear() != null) { originals.add(obj); } } else { LOG.warn("No parsed name returned for name object {}", obj); } } // now group the recombinations for (T recomb : recombinations) { BasionymGroup<T> group = findExistingGroup(recomb, groups, func); // create new group if needed if (group == null) { ParsedName pn = func.apply(recomb); if (pn != null) { group = new BasionymGroup<T>(); group.setName(pn.getTerminalEpithet(), pn.getBracketAuthorship(), pn.getBracketYear()); groups.add(group); group.getRecombinations().add(recomb); } else { LOG.warn("No parsed name returned for name recombination {}", recomb); } } else { group.getRecombinations().add(recomb); } } // finally try to find the basionym for each group in the list of original names Iterator<BasionymGroup<T>> iter = groups.iterator(); while (iter.hasNext()) { BasionymGroup<T> group = iter.next(); try { group.setBasionym(findBasionym(group.getAuthorship(), group.getYear(), originals, func)); } catch (MultipleBasionymException e) { LOG.info("Ignore group with multiple basionyms found for {} {} {} in {} original names", group.getEpithet(), group.getAuthorship(), group.getYear(), originals.size()); iter.remove(); } } return groups; } }