package org.gbif.checklistbank.cli.normalizer;
import org.gbif.api.model.checklistbank.NameUsage;
import org.gbif.api.model.checklistbank.NameUsageMetrics;
import org.gbif.api.model.common.LinneanClassification;
import org.gbif.api.vocabulary.Country;
import org.gbif.api.vocabulary.Language;
import org.gbif.api.vocabulary.MediaType;
import org.gbif.api.vocabulary.Origin;
import org.gbif.api.vocabulary.Rank;
import org.gbif.api.vocabulary.TaxonomicStatus;
import org.gbif.checklistbank.cli.BaseTest;
import org.gbif.checklistbank.model.UsageExtensions;
import org.gbif.checklistbank.neo.Labels;
import org.gbif.checklistbank.neo.NeoProperties;
import java.net.URI;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import com.beust.jcommander.internal.Sets;
import com.google.common.collect.Maps;
import org.junit.Ignore;
import org.junit.Test;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Transaction;
import org.neo4j.graphdb.schema.IndexDefinition;
import org.neo4j.graphdb.schema.Schema;
import org.neo4j.helpers.collection.Iterables;
import org.neo4j.helpers.collection.Iterators;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
/**
* Main integration tests for the normalizer testing imports of entire small checklists.
*/
public class NormalizerTest extends BaseTest {
private static final String INCERTAE_SEDIS = "Incertae sedis";
@Test
public void testgetLowestExistingRank() throws Exception {
LinneanClassification cl = new NameUsage();
assertNull(Normalizer.getLowestExistingRank(cl));
cl.setOrder("oda");
assertEquals(Rank.ORDER, Normalizer.getLowestExistingRank(cl));
cl.setPhylum("fülum");
assertEquals(Rank.ORDER, Normalizer.getLowestExistingRank(cl));
cl.setSubgenus("sappjenuss");
assertEquals(Rank.SUBGENUS, Normalizer.getLowestExistingRank(cl));
}
@Test
public void testSplitByCommonDelimiters() throws Exception {
assertThat(Normalizer.splitByCommonDelimiters("gx:1234")).containsExactly("gx:1234");
assertThat(Normalizer.splitByCommonDelimiters("1234|135286|678231612")).containsExactly("1234", "135286", "678231612");
assertThat(Normalizer.splitByCommonDelimiters("1234 135286 678231612")).containsExactly("1234", "135286", "678231612");
assertThat(Normalizer.splitByCommonDelimiters("1234; 135286; 678231612")).containsExactly("1234", "135286", "678231612");
assertThat(Normalizer.splitByCommonDelimiters("1234,135286 | 67.8231612")).containsExactly("1234,135286", "67.8231612");
}
@Test
public void testNeoIndices() throws Exception {
final UUID datasetKey = datasetKey(1);
Normalizer norm = Normalizer.create(cfg, datasetKey);
norm.run();
openDb(datasetKey);
compareStats(norm.getStats());
Set<String> taxonIndices = Sets.newHashSet();
taxonIndices.add(NeoProperties.TAXON_ID);
taxonIndices.add(NeoProperties.SCIENTIFIC_NAME);
taxonIndices.add(NeoProperties.CANONICAL_NAME);
try (Transaction tx = beginTx()) {
Schema schema = dao.getNeo().schema();
for (IndexDefinition idf : schema.getIndexes(Labels.TAXON)) {
List<String> idxProps = Iterables.asList(idf.getPropertyKeys());
assertTrue(idxProps.size() == 1);
assertTrue(taxonIndices.remove(idxProps.get(0)));
}
assertNotNull(Iterators.singleOrNull(dao.getNeo().findNodes(Labels.TAXON, NeoProperties.TAXON_ID, "1001")));
assertNotNull(Iterators.singleOrNull(dao.getNeo().findNodes(Labels.TAXON, NeoProperties.SCIENTIFIC_NAME, "Crepis bakeri Greene")));
assertNotNull(Iterators.singleOrNull(dao.getNeo().findNodes(Labels.TAXON, NeoProperties.CANONICAL_NAME, "Crepis bakeri")));
assertNull(Iterators.singleOrNull(dao.getNeo().findNodes(Labels.TAXON, NeoProperties.TAXON_ID, "x1001")));
assertNull(Iterators.singleOrNull(dao.getNeo().findNodes(Labels.TAXON, NeoProperties.SCIENTIFIC_NAME, "xCrepis bakeri Greene")));
assertNull(Iterators.singleOrNull(dao.getNeo().findNodes(Labels.TAXON, NeoProperties.CANONICAL_NAME, "xCrepis bakeri")));
}
}
@Test
public void testIdList() throws Exception {
final UUID datasetKey = datasetKey(1);
Normalizer norm = Normalizer.create(cfg, datasetKey);
norm.run();
NormalizerStats stats = norm.getStats();
System.out.println(stats);
assertEquals(20, stats.getCount());
assertEquals(6, stats.getDepth());
assertEquals(20, stats.getCountByOrigin(Origin.SOURCE));
assertEquals(1, stats.getRoots());
assertEquals(4, stats.getSynonyms());
openDb(datasetKey);
compareStats(stats);
try (Transaction tx = beginTx()) {
NameUsage u1 = getUsageByTaxonId("1006");
NameUsage u2 = getUsageByName("Leontodon taraxacoides (Vill.) Mérat");
NameUsage u3 = getUsageByKey(u1.getKey());
assertEquals(u1, u2);
assertEquals(u1, u3);
NameUsage syn = getUsageByName("Leontodon leysseri");
NameUsage acc = getUsageByTaxonId("1006");
assertEquals(acc.getKey(), syn.getAcceptedKey());
// metrics
assertMetrics(getMetricsByTaxonId("101"), 2, 2, 0, 0, 0, 0, 0, 0, 0, 2);
assertMetrics(getMetricsByTaxonId("1"), 1, 15, 1, 0, 0, 0, 1, 4, 0, 7);
}
}
private void assertMetrics(NameUsageMetrics m, int children, int descendants, int synonyms,
int p, int c, int o, int f, int g, int sg, int s) {
System.out.println(m);
assertEquals(children, m.getNumChildren());
assertEquals(descendants, m.getNumDescendants());
assertEquals(synonyms, m.getNumSynonyms());
assertEquals(p, m.getNumPhylum());
assertEquals(c, m.getNumClass());
assertEquals(o, m.getNumOrder());
assertEquals(f, m.getNumFamily());
assertEquals(g, m.getNumGenus());
assertEquals(sg, m.getNumSubgenus());
assertEquals(s, m.getNumSpecies());
}
/**
* Imports should not insert implicit genus or species and use the exact, original taxonomy.
*/
@Test
public void testImplicitSpecies() throws Exception {
NormalizerStats stats = normalize(2);
try (Transaction tx = beginTx()) {
// Agaricaceae
NameUsage fam = getUsageByTaxonId("5");
// Tulostoma
NameUsage tulostoma = getUsageByTaxonId("6");
// Tulostoma
NameUsage tulostomaEx = getUsageByTaxonId("100");
assertEquals(tulostoma.getKey(), tulostomaEx.getParentKey());
NameUsage tulostomaExEx = getUsageByTaxonId("101");
assertEquals(tulostoma.getKey(), tulostomaExEx.getParentKey());
NameUsage tulostomaExRid = getUsageByTaxonId("102");
assertEquals(tulostoma.getKey(), tulostomaExRid.getParentKey());
// Tulostomafake
NameUsage tulostomafake1 = getUsageByTaxonId("301");
assertEquals(fam.getKey(), tulostomafake1.getParentKey());
NameUsage tulostomafake2 = getUsageByTaxonId("302");
assertEquals(fam.getKey(), tulostomafake2.getParentKey());
}
System.out.println(stats);
assertEquals(14, stats.getCount());
assertEquals(7, stats.getDepth());
assertEquals(14, stats.getCountByOrigin(Origin.SOURCE));
assertEquals(1, stats.getRoots());
assertEquals(0, stats.getSynonyms());
}
@Test
public void testSynonymsWIthMissingAccepted() throws Exception {
NormalizerStats stats = normalize(3);
System.out.println(stats);
assertEquals(9, stats.getCount());
assertEquals(8, stats.getCountByOrigin(Origin.SOURCE));
assertEquals(1, stats.getCountByOrigin(Origin.MISSING_ACCEPTED));
assertEquals(5, stats.getDepth());
assertEquals(1, stats.getRoots());
assertEquals(4, stats.getSynonyms());
try (Transaction tx = beginTx()) {
//Coleoptera
NameUsage coleo = getUsageByTaxonId("4");
assertEquals("Coleoptera", coleo.getScientificName());
// Pygoleptura nigrella
NameUsage syn = getUsageByTaxonId("200");
assertTrue(syn.isSynonym());
// test for inserted incertae sedis usage
NameUsage incertae = getUsageByKey(syn.getAcceptedKey());
assertEquals(INCERTAE_SEDIS, incertae.getScientificName());
assertFalse(incertae.isSynonym());
// make sure synonym classification is preserved in incertae sedis
assertEquals(coleo.getKey(), incertae.getParentKey());
// make sure synonyms of synonyms are relinked to incertae accepted
// Leptura nigrella Adams, 1909
NameUsage synLep = getUsageByTaxonId("100");
assertTrue(synLep.isSynonym());
assertEquals(incertae.getKey(), synLep.getAcceptedKey());
// Leptura nigrella Chagnon, 1917
synLep = getUsageByTaxonId("101");
assertTrue(synLep.isSynonym());
assertEquals(incertae.getKey(), synLep.getAcceptedKey());
}
}
@Test
public void testColSynonyms() throws Exception {
NormalizerStats stats = normalize(4);
try (Transaction tx = beginTx()) {
// Phoenicus sanguinipennis
NameUsage acc = getUsageByTaxonId("248320");
assertEquals("Phoenicus sanguinipennis Lacordaire, 1869", acc.getScientificName());
assertFalse(acc.isSynonym());
NameUsage syn = getUsageByTaxonId("282664");
assertEquals("Phoenicus sanguinipennis Aurivillius, 1912", syn.getScientificName());
assertTrue(syn.isSynonym());
assertEquals(acc.getKey(), syn.getAcceptedKey());
// Anniella pulchra pulchra
NameUsage species = getUsageByTaxonId("1938166");
assertFalse(species.isSynonym());
NameUsage speciesSyn = getUsageByTaxonId("1954175");
assertTrue(speciesSyn.isSynonym());
NameUsage ssppulchra = getUsageByTaxonId("1943002");
assertFalse(ssppulchra.isSynonym());
assertEquals(ssppulchra.getKey(), speciesSyn.getAcceptedKey());
assertEquals(species.getKey(), ssppulchra.getParentKey());
NameUsage sspnigra = getUsageByTaxonId("1943001");
assertFalse(sspnigra.isSynonym());
}
}
/**
* Testing the insertion of incertae sedis records for synonyms without an accepted usage.
* Using real IRMNG Homonym Data:
* "mol101988","Tubiferidae Cossmann, 1895","Cossmann, 1895",,,"Tubiferidae","Heterostropha","Gastropoda","Mollusca",
* "Animalia","family","synonym"
* Also testing the materialisation of verbatim acceptedNameUsage in case they dont exist as records in their own
* right.
* Tested using 2 IRMNG homonyms:
* "hex1088048","Acanthophora Hulst, 1896","Hulst, 1896","Acanthophora",,"Geometridae","Lepidoptera","Insecta",
* "Arthropoda","Animalia","genus","synonym"
* "hex1090241","Acanthophora Borgmeier, 1922","Borgmeier, 1922","Acanthophora",,"Phoridae","Diptera","Insecta",
* "Arthropoda","Animalia","genus","synonym"
*/
@Test
public void testIncertaeSedisSynonyms() throws Exception {
NormalizerStats stats = normalize(5);
assertEquals(5, stats.getSynonyms());
assertEquals(2, stats.getCountByOrigin(Origin.MISSING_ACCEPTED));
assertEquals(3, stats.getCountByOrigin(Origin.VERBATIM_ACCEPTED));
try (Transaction tx = beginTx()) {
NameUsage syn = getUsageByTaxonId("por10083");
assertEquals("Megalithistida", syn.getScientificName());
assertTrue(syn.isSynonym());
NameUsage sedis = getUsageByKey(syn.getAcceptedKey());
assertFalse(sedis.isSynonym());
assertEquals(INCERTAE_SEDIS, sedis.getScientificName());
NameUsage parent = getUsageByKey(sedis.getParentKey());
assertFalse(parent.isSynonym());
assertEquals("Demospongea", parent.getScientificName());
assertEquals(Rank.CLASS, parent.getRank());
// Tubiferidae Cossmann, 1895
// "mol101988","Tubiferidae Cossmann, 1895","Cossmann, 1895",,,"Tubiferidae","Heterostropha","Gastropoda","Mollusca","Animalia","family","synonym"
syn = getUsageByTaxonId("mol101988");
assertEquals("Tubiferidae Cossmann, 1895", syn.getScientificName());
assertTrue(syn.isSynonym());
sedis = getUsageByKey(syn.getAcceptedKey());
assertFalse(sedis.isSynonym());
assertEquals(INCERTAE_SEDIS, sedis.getScientificName());
parent = getUsageByKey(sedis.getParentKey());
assertFalse(parent.isSynonym());
assertEquals("Heterostropha", parent.getScientificName());
assertEquals(Rank.ORDER, parent.getRank());
// Acanthophora Hulst, 1896
syn = getUsageByTaxonId("hex1088048");
assertEquals("Acanthophora Hulst, 1896", syn.getScientificName());
assertTrue(syn.isSynonym());
NameUsage acc = getUsageByKey(syn.getAcceptedKey());
assertEquals("Acanthotoca", acc.getScientificName());
assertFalse(acc.isSynonym());
assertEquals(syn.getParentKey(), acc.getParentKey());
parent = getUsageByKey(acc.getParentKey());
assertFalse(parent.isSynonym());
assertEquals("Geometridae", parent.getScientificName());
assertEquals(Rank.FAMILY, parent.getRank());
// Acanthophora Borgmeier, 1922
syn = getUsageByTaxonId("hex1090241");
assertEquals("Acanthophora Borgmeier, 1922", syn.getScientificName());
assertTrue(syn.isSynonym());
acc = getUsageByKey(syn.getAcceptedKey());
assertFalse(acc.isSynonym());
assertEquals("Acanthophorides", acc.getScientificName());
parent = getUsageByKey(acc.getParentKey());
assertFalse(parent.isSynonym());
assertEquals("Phoridae", parent.getScientificName());
assertEquals(Rank.FAMILY, parent.getRank());
}
}
/**
* Debugging method, please leave even if not used
*/
private void printKey(Integer key) {
if (key == null) {
System.out.println("Key: NULL");
} else {
System.out.println("Key: " + key + " = " + getUsageByKey(key).getScientificName());
}
}
@Test
public void testDenormedIndexFungorumSmall() throws Exception {
NormalizerStats stats = normalize(20);
assertEquals(1, stats.getRoots());
assertEquals(9, stats.getCountByOrigin(Origin.SOURCE));
// all distinct higher taxa but the synonym genera and the explicit genus Chaetosphaeria
assertEquals(24, stats.getCountByOrigin(Origin.DENORMED_CLASSIFICATION));
assertEquals(33, stats.getCount());
try (Transaction tx = beginTx()) {
// make sure we only have one such genus
NameUsage u = getUsageByCanonical("Chaetosphaeria");
assertUsage("970",
false,
"Chaetosphaeria Tul. & C. Tul.",
null,
null,
Rank.GENUS,
"Chaetosphaeriaceae",
"Chaetosphaeriales",
"Sordariomycetes",
"Ascomycota",
"Fungi");
// verify all denormed usages
Map<String, Rank> expected = Maps.newHashMap();
expected.put("Fungi", Rank.KINGDOM);
expected.put("Ascomycota", Rank.PHYLUM);
expected.put("Arthoniomycetes", Rank.CLASS);
expected.put("Dothideomycetes", Rank.CLASS);
expected.put("Incertae sedis", Rank.CLASS);
expected.put("Lecanoromycetes", Rank.CLASS);
expected.put("Sordariomycetes", Rank.CLASS);
expected.put("Arthoniales", Rank.ORDER);
expected.put("Capnodiales", Rank.ORDER);
expected.put("Chaetosphaeriales", Rank.ORDER);
expected.put("Lecanorales", Rank.ORDER);
expected.put("Pleosporales", Rank.ORDER);
expected.put("Triblidiales", Rank.ORDER);
expected.put("Arthoniaceae", Rank.FAMILY);
expected.put("Arthopyreniaceae", Rank.FAMILY);
expected.put("Chaetosphaeriaceae", Rank.FAMILY);
expected.put("Mycosphaerellaceae", Rank.FAMILY);
expected.put("Parmeliaceae", Rank.FAMILY);
expected.put("Triblidiaceae", Rank.FAMILY);
expected.put("Arthopyrenia", Rank.GENUS);
expected.put("Blitridium", Rank.GENUS);
expected.put("Pseudocercospora", Rank.GENUS);
expected.put("Septoria", Rank.GENUS);
expected.put("Sphaerella", Rank.GENUS);
for (Node n : Iterators.loop(dao.allTaxa())) {
u = dao.readUsage(n, true);
if (u.getOrigin() == Origin.DENORMED_CLASSIFICATION) {
Rank r = expected.remove(u.getScientificName());
if (r == null) {
fail("Missing denormed usage " + u.getScientificName());
} else if (!r.equals(u.getRank())) {
fail("Wrong rank for denormed usage "+u.getScientificName());
}
}
}
assertTrue(expected.isEmpty());
}
}
/**
* Tests the index fungorum format using a denormed classification.
* All records of the genus Zignoëlla have been included in the test resources,
* as only when all are present the original issue of missing higher taxa shows up.
* The genus field is left out in the meta.xml, as it causes confusion when the genus is regarded as a synonyms,
* but there are species within that genus still being accepted. An oddity of the nomenclatoral index fungorum
* database.
* Discovered with this test.
*/
@Test
public void testDenormedIndexFungorum() throws Exception {
NormalizerStats stats = normalize(6);
assertEquals(1, stats.getRoots());
assertEquals(226, stats.getCountByOrigin(Origin.SOURCE));
assertEquals(43, stats.getCountByOrigin(Origin.DENORMED_CLASSIFICATION));
try (Transaction tx = beginTx()) {
assertUsage("426221",
false,
"Lepiota seminuda var. seminuda",
null,
null,
Rank.VARIETY,
"Lepiota",
"Agaricaceae",
"Agaricales",
"Agaricomycetes",
"Basidiomycota",
"Fungi");
// test a synonym which should use the classification of the accepted name, not the (wrong) denormed of the synonym
assertUsage("140283",
true,
"Polystictus substipitatus (Murrill) Sacc. & Trotter",
"Coriolus substipitatus Murrill",
"Trametes modesta (Kunze ex Fr.) Ryvarden",
Rank.SPECIES,
"Trametes",
"Polyporaceae",
"Polyporales",
"Agaricomycetes",
"Basidiomycota",
"Fungi");
assertUsage("233484",
false,
"Zignoëlla culmicola Delacr.",
null,
null,
Rank.SPECIES,
"Zignoëlla",
"Chaetosphaeriaceae",
"Chaetosphaeriales",
"Sordariomycetes",
"Ascomycota",
"Fungi");
assertUsage("970",
false,
"Chaetosphaeria Tul. & C. Tul.",
null,
null,
Rank.GENUS,
"Chaetosphaeriaceae",
"Chaetosphaeriales",
"Sordariomycetes",
"Ascomycota",
"Fungi");
}
}
/**
* Tests the creation of parent and accepted usages given as verbatim names via acceptedNameUsage or parentNameUsage.
*/
@Test
public void testMaterializeVerbatimParents() throws Exception {
NormalizerStats stats = normalize(7);
assertEquals(3, stats.getRoots()); // Animalia, Pygoleptura & Pygoleptura synomica
assertEquals(11, stats.getCount());
assertEquals(2, stats.getSynonyms()); // Leptura nigrella & Pygoleptura tinktura subsp. synomica
assertEquals(9, stats.getCountByOrigin(Origin.SOURCE));
assertEquals(1, stats.getCountByOrigin(Origin.VERBATIM_ACCEPTED));
assertEquals(1, stats.getCountByOrigin(Origin.VERBATIM_PARENT));
assertEquals(1, stats.getCountByRank(Rank.KINGDOM));
assertEquals(1, stats.getCountByRank(Rank.PHYLUM));
assertEquals(1, stats.getCountByRank(Rank.ORDER));
assertEquals(4, stats.getCountByRank(Rank.SPECIES));
try (Transaction tx = beginTx()) {
NameUsage u = getUsageByTaxonId("100");
assertFalse(u.isSynonym());
assertEquals(Rank.SPECIES, u.getRank());
assertEquals("Leptura tinktura Döring, 2011", u.getScientificName());
u = getUsageByKey(u.getParentKey());
Integer coleopteraID = u.getKey();
assertFalse(u.isSynonym());
assertEquals(Rank.ORDER, u.getRank());
assertEquals("Coleoptera", u.getScientificName());
u = getUsageByKey(u.getParentKey());
assertFalse(u.isSynonym());
assertEquals(Rank.CLASS, u.getRank());
assertEquals("Insecta", u.getScientificName());
u = getUsageByKey(u.getParentKey());
assertFalse(u.isSynonym());
assertEquals(Rank.PHYLUM, u.getRank());
assertEquals("Arthropoda", u.getScientificName());
u = getUsageByKey(u.getParentKey());
assertFalse(u.isSynonym());
assertEquals(Rank.KINGDOM, u.getRank());
assertEquals("Animalia", u.getScientificName());
assertNull(u.getParentKey());
u = getUsageByTaxonId("101");
assertTrue(u.isSynonym());
assertEquals(Rank.SPECIES, u.getRank());
assertEquals("Leptura nigrella Adams, 1909", u.getScientificName());
u = getUsageByKey(u.getAcceptedKey());
assertFalse(u.isSynonym());
assertEquals(Rank.SPECIES, u.getRank());
assertEquals("Pygoleptura nigrella", u.getScientificName());
assertEquals(coleopteraID, u.getParentKey());
Integer pNigrellaID = u.getKey();
u = getUsageByTaxonId("102");
assertEquals(pNigrellaID, u.getKey());
u = getUsageByTaxonId("103");
assertFalse(u.isSynonym());
assertEquals(Rank.SPECIES, u.getRank());
assertEquals("Pygoleptura tinktura Döring, 2011", u.getScientificName());
u = getUsageByKey(u.getParentKey());
assertFalse(u.isSynonym());
assertNull(u.getRank());
assertEquals("Pygoleptura", u.getScientificName());
assertNull(u.getParentKey());
u = getUsageByTaxonId("104");
assertEquals(Rank.SUBSPECIES, u.getRank());
assertEquals("Pygoleptura tinktura subsp. synomica Döring, 2011", u.getScientificName());
assertTrue(u.isSynonym());
u = getUsageByKey(u.getAcceptedKey());
assertFalse(u.isSynonym());
assertNull(u.getRank());
assertEquals("Pygoleptura synomica", u.getScientificName());
assertNull(u.getParentKey());
}
}
/**
* Pro parte synonyms get exploded into several usages/nodes that each have just one accepted taxon!
*/
@Test
public void testProParteSynonyms() throws Exception {
NormalizerStats stats = normalize(8);
try (Transaction tx = beginTx()) {
assertEquals(1, stats.getRoots());
assertEquals(17, stats.getCount());
assertEquals(6, stats.getSynonyms());
assertEquals(17, stats.getCountByOrigin(Origin.SOURCE));
assertEquals(2, stats.getCountByRank(Rank.GENUS));
assertEquals(7, stats.getCountByRank(Rank.SPECIES));
assertEquals(5, stats.getCountByRank(Rank.SUBSPECIES));
// genus synonym
NameUsage nu = getUsageByTaxonId("101");
assertEquals("Cladendula Döring", nu.getScientificName());
assertEquals(Rank.GENUS, nu.getRank());
assertEquals(TaxonomicStatus.SYNONYM, nu.getTaxonomicStatus());
assertTrue(nu.isSynonym());
NameUsage acc = getUsageByKey(nu.getAcceptedKey());
assertEquals("Calendula L.", acc.getScientificName());
assertEquals(Rank.GENUS, acc.getRank());
assertEquals(TaxonomicStatus.ACCEPTED, acc.getTaxonomicStatus());
assertFalse(acc.isSynonym());
// pro parte synonym
Set<Integer> accIds = Sets.newHashSet();
List<NameUsage> pps = getUsagesByName("Calendula eckerleinii Ohle");
assertEquals(1, pps.size());
for (NameUsage u : pps) {
assertEquals("Calendula eckerleinii Ohle", u.getScientificName());
assertEquals(Rank.SPECIES, u.getRank());
assertEquals(TaxonomicStatus.PROPARTE_SYNONYM, u.getTaxonomicStatus());
assertTrue(u.isSynonym());
assertFalse(accIds.contains(u.getAcceptedKey()));
accIds.add(u.getAcceptedKey());
}
for (Integer aid : accIds) {
acc = getUsageByKey(aid);
assertFalse(acc.isSynonym());
assertEquals(TaxonomicStatus.ACCEPTED, acc.getTaxonomicStatus());
if (acc.getTaxonID().equals("1000")) {
assertEquals("Calendula arvensis (Vaill.) L.", acc.getScientificName());
assertEquals(Rank.SPECIES, acc.getRank());
} else if (acc.getTaxonID().equals("10000")) {
assertEquals("Calendula incana Willd. subsp. incana", acc.getScientificName());
assertEquals(Rank.SUBSPECIES, acc.getRank());
} else if (acc.getTaxonID().equals("10002")) {
assertEquals("Calendula incana subsp. maderensis (DC.) Ohle", acc.getScientificName());
assertEquals(Rank.SUBSPECIES, acc.getRank());
} else {
fail("Unknown pro parte synonym");
}
}
}
}
/**
* Tests if the same verbatim parent gets reused and only one usage is created for it
*/
@Test
public void testVerbatimParent() throws Exception {
NormalizerStats stats = normalize(11);
assertEquals(1, stats.getRoots());
try (Transaction tx = beginTx()) {
final NameUsage sspalgarbiensis = getUsageByTaxonId("10001");
assertUsage(sspalgarbiensis, Rank.SUBSPECIES, "Calendula incana subsp. algarbiensis (Boiss.) Ohle", false);
final NameUsage algarbiensis = getUsageByKey(sspalgarbiensis.getBasionymKey());
assertUsage(algarbiensis, Rank.SPECIES, "Calendula algarbiensis Boss.", true);
final NameUsage eckerleinii = getUsageByKey(algarbiensis.getAcceptedKey());
assertUsage(eckerleinii, Rank.SPECIES, "Calendula eckerleinii Ohle", false);
final NameUsage incana = getUsageByKey(sspalgarbiensis.getParentKey());
assertUsage(incana, Rank.SPECIES, "Calendula incana Willd.", false);
final NameUsage callendula = getUsageByKey(incana.getParentKey());
assertUsage(callendula, Rank.GENUS, "Calendula L.", false);
assertEquals(callendula.getKey(), eckerleinii.getParentKey());
final NameUsage compositae = getUsageByKey(callendula.getParentKey());
assertUsage(compositae, Rank.FAMILY, "Compositae Giseke", false);
final NameUsage asteraceae = getUsageByTaxonId("11");
assertUsage(asteraceae, Rank.FAMILY, "Asteraceae", true);
assertEquals(compositae.getKey(), asteraceae.getAcceptedKey());
final NameUsage plant = getUsageByKey(compositae.getParentKey());
assertUsage(plant, Rank.KINGDOM, "Plantae", false);
}
}
@Test
public void testDenormedClassification() throws Exception {
NormalizerStats stats = normalize(12);
try (Transaction tx = beginTx()) {
assertUsage("1",
false,
"Lepiota seminuda",
null,
null,
Rank.SPECIES,
"Lepiota",
"Agaricaceae",
"Agaricales",
"Agaricomycetes",
"Basidiomycota");
assertUsage("2",
false,
"Lepiota seminuda",
null,
null,
Rank.SPECIES,
"Lepiota",
"Agaricaceae",
"Agaricales",
"Agaricomycetes",
"Basidiomycota");
assertUsage("3",
false,
"Lepiota seminuda",
null,
null,
Rank.SPECIES,
"Agaricales",
"Agaricomycetes",
"Basidiomycota");
assertUsage("4",
false,
"Lepiota seminuda",
null,
null,
Rank.SPECIES,
"Lepiota",
"Agaricaceae",
"Agaricales",
"Agaricomycetes",
"Basidiomycota",
"Fungi");
// verify identities of higher taxa
assertQuantity(2, "Lepiota");
assertQuantity(2, "Agaricaceae");
assertQuantity(2, "Agaricales");
assertQuantity(2, "Agaricomycetes");
}
}
@Test
public void testMixedDenormedClassification() throws Exception {
NormalizerStats stats = normalize(13);
assertEquals(1, stats.getRoots());
assertEquals(4, stats.getCountByOrigin(Origin.DENORMED_CLASSIFICATION));
assertEquals(8, stats.getCount());
try (Transaction tx = beginTx()) {
assertUsage("1",
false,
"Agaricaceae",
null,
null,
Rank.FAMILY,
"Agaricales",
"Agaricomycetes",
"Basidiomycota",
"Fungi");
assertUsage("2",
false,
"Lepiota",
null,
null,
Rank.GENUS,
"Agaricaceae",
"Agaricales",
"Agaricomycetes",
"Basidiomycota",
"Fungi");
assertUsage("3",
false,
"Lepiota seminuda",
null,
null,
Rank.SPECIES,
"Lepiota",
"Agaricaceae",
"Agaricales",
"Agaricomycetes",
"Basidiomycota",
"Fungi");
// verify identities of higher taxa
assertQuantity(1, "Lepiota");
assertQuantity(1, "Agaricaceae");
assertQuantity(1, "Agaricales");
assertQuantity(1, "Agaricomycetes");
}
}
/**
* http://dev.gbif.org/issues/browse/POR-2992
*/
@Test
public void testMixedDenormedClassification2() throws Exception {
NormalizerStats stats = normalize(22);
assertEquals(1, stats.getRoots());
assertEquals(4, stats.getCountByOrigin(Origin.DENORMED_CLASSIFICATION));
assertEquals(7, stats.getCount());
try (Transaction tx = beginTx()) {
assertUsage("3",
true,
"Kamasutra nuda",
null,
"Lepiota nuda",
Rank.SPECIES,
"Lepiota",
"Agaricomycetes",
"Basidiomycota",
"Fungi");
// verify identities of higher taxa
assertQuantity(0, "Kamasutra");
assertQuantity(1, "Lepiota");
}
}
@Test
public void testDenormedClassificationBDJ() throws Exception {
NormalizerStats stats = normalize(17);
assertEquals(1, stats.getRoots());
assertEquals(555, stats.getCountByOrigin(Origin.SOURCE));
assertEquals(232, stats.getCountByOrigin(Origin.DENORMED_CLASSIFICATION));
}
private void assertUsage(NameUsage u, Rank rank, String sciName, boolean synonym) {
assertEquals(synonym, u.isSynonym());
assertEquals(rank, u.getRank());
assertEquals(sciName, u.getScientificName());
}
private void assertQuantity(Integer expected, String canonical) {
assertEquals(expected, (Integer) getNodesByName(canonical).size());
}
/**
* Tests the relinking of synonyms that point to other synonyms.
*/
@Test
public void testSynonymsOfSynonyms() throws Exception {
NormalizerStats stats = normalize(9);
assertEquals(1, stats.getCycles().size());
assertEquals(1, stats.getRoots());
try (Transaction tx = beginTx()) {
final NameUsage incana = getUsageByTaxonId("1000");
assertFalse(incana.isSynonym());
assertEquals(Rank.SPECIES, incana.getRank());
assertEquals("Calendula incana Willd.", incana.getScientificName());
// synonym chain resolved
assertEquals(incana.getKey(), getUsageByTaxonId("1001").getAcceptedKey());
assertEquals(incana.getKey(), getUsageByTaxonId("1002").getAcceptedKey());
assertEquals(incana.getKey(), getUsageByTaxonId("1003").getAcceptedKey());
assertEquals(incana.getKey(), getUsageByTaxonId("1004").getAcceptedKey());
NameUsage u = getUsageByTaxonId("10000");
assertNull(u.getAcceptedKey());
assertNull(u.getBasionymKey());
assertEquals(incana.getKey(), u.getParentKey());
//the synonym cycle should be cut and all relinked to a new incertae sedis taxon
final NameUsage incertae = getUsageByKey(getUsageByTaxonId("10002").getAcceptedKey());
assertEquals(incana.getKey(), incertae.getParentKey());
for (Integer id : new Integer[]{10003, 10004}) {
assertEquals("Synonym cycle for taxonID 10002 not cut", incertae.getKey(), getUsageByTaxonId(id.toString()).getAcceptedKey());
}
}
}
/**
* Testing CLIMBER dataset from ZooKeys:
* http://www.gbif.org/dataset/e2bcea8c-dfea-475e-a4ae-af282b4ea1c5
* Especially the behavior of acceptedNameUsage (canonical form without authorship)
* pointing to itself (scientificName WITH authorship) indicating this is NOT a synonym.
*/
@Test
public void testVerbatimAccepted() throws Exception {
final UUID datasetKey = datasetKey(14);
Normalizer norm = Normalizer.create(cfg, datasetKey);
norm.run();
NormalizerStats stats = norm.getStats();
System.out.println(stats);
assertEquals(16, stats.getCount());
assertEquals(1, stats.getRoots());
assertEquals(6, stats.getDepth());
assertEquals(0, stats.getSynonyms());
assertEquals(10, stats.getCountByOrigin(Origin.SOURCE));
assertEquals(6, stats.getCountByOrigin(Origin.DENORMED_CLASSIFICATION));
assertEquals(1, stats.getCountByRank(Rank.KINGDOM));
assertEquals(1, stats.getCountByRank(Rank.PHYLUM));
assertEquals(1, stats.getCountByRank(Rank.CLASS));
assertEquals(1, stats.getCountByRank(Rank.ORDER));
assertEquals(2, stats.getCountByRank(Rank.FAMILY));
assertEquals(0, stats.getCountByRank(Rank.GENUS));
assertEquals(10, stats.getCountByRank(Rank.SPECIES));
openDb(datasetKey);
compareStats(stats);
try (Transaction tx = beginTx()) {
NameUsage u1 = getUsageByTaxonId("Aglais io");
NameUsage u2 = getUsageByName("Aglais io (Linnaeus, 1758)");
assertEquals(u1, u2);
assertNull(u1.getAcceptedKey());
assertNull(u1.getAccepted());
assertFalse(u1.isSynonym());
assertFalse(u1.getTaxonomicStatus().isSynonym());
assertEquals(Origin.SOURCE, u1.getOrigin());
}
try (Transaction tx = beginTx()) {
int sourceUsages = 0;
for (NameUsage u : getAllUsages()) {
assertNull(u.getAcceptedKey());
assertNull(u.getAccepted());
assertNull(u.getBasionymKey());
assertNull(u.getBasionym());
assertFalse(u.isSynonym());
assertFalse(u.getTaxonomicStatus().isSynonym());
if (u.getTaxonID() != null) {
assertEquals(Origin.SOURCE, u.getOrigin());
sourceUsages++;
} else {
assertEquals(Origin.DENORMED_CLASSIFICATION, u.getOrigin());
}
}
assertEquals(10, sourceUsages);
}
}
/**
* Tests the Achillea genus form a VASCAN download
* with vernacular name, species profile, distribution, description, reference, multimedia and identifier extension.
*/
@Test
public void testExtensions() throws Exception {
final UUID datasetKey = datasetKey(15);
Normalizer norm = Normalizer.create(cfg, datasetKey);
norm.run();
NormalizerStats stats = norm.getStats();
System.out.println(stats);
assertEquals(57, stats.getCount());
assertEquals(7, stats.getDepth());
assertEquals(53, stats.getCountByOrigin(Origin.SOURCE));
assertEquals(1, stats.getRoots());
assertEquals(48, stats.getSynonyms());
openDb(datasetKey);
compareStats(stats);
try (Transaction tx = beginTx()) {
// Achillea
NameUsage a = getUsageByTaxonId("770");
// Achillea millefolium
NameUsage am = getUsageByTaxonId("2768");
assertEquals(a.getKey(), am.getParentKey());
//media
UsageExtensions ea = dao.readExtensions(a.getKey().longValue());
UsageExtensions eam = dao.readExtensions(am.getKey().longValue());
assertThat(ea.media).hasSize(0);
assertThat(eam.media).hasSize(2);
assertThat(eam.media).extracting("creator").containsOnly("Gary A. Monroe", "J.S. Peterson");
assertThat(eam.media).extracting("title").containsOnly("Achillea millefolium L. - common yarrow");
assertThat(eam.media).extracting("identifier").containsOnly(
URI.create("http://plants.usda.gov/gallery/large/acmi2_002_lvp.jpg"),
URI.create("http://plants.usda.gov/gallery/pubs/acmi2_006_php.jpg"));
assertThat(eam.media).extracting("references").containsOnly(
URI.create("http://plants.usda.gov/java/largeImage?imageID=acmi2_002_avp.jpg"),
URI.create("http://plants.usda.gov/java/largeImage?imageID=acmi2_006_ahp.tif"));
assertThat(eam.media).extracting("type").containsOnly(MediaType.StillImage);
assertThat(eam.media).extracting("format").containsOnly("image/jpg", "image/jpeg");
//vernaculars
assertThat(ea.vernacularNames).hasSize(0);
assertThat(eam.vernacularNames).hasSize(6);
assertThat(eam.vernacularNames).extracting("language").containsOnly(Language.ENGLISH, Language.FRENCH);
assertThat(eam.vernacularNames).extracting("country").containsOnly(Country.CANADA);
assertThat(eam.vernacularNames).extracting("vernacularName").containsOnly(
"achillée millefeuille", "herbe à dindes", "herbe à dindons", "common yarrow", "yarrow", "milfoil");
}
}
/**
* http://dev.gbif.org/issues/browse/POR-2755
*/
@Test
public void testFloraBrazilIncertaeSedis() throws Exception {
final UUID datasetKey = datasetKey(18);
cfg.neo.batchSize = 5;
Normalizer norm = Normalizer.create(cfg, datasetKey);
norm.run();
NormalizerStats stats = norm.getStats();
System.out.println(stats);
openDb(datasetKey);
compareStats(stats);
try (Transaction tx = beginTx()) {
assertEquals(76, stats.getCount());
assertEquals(1, stats.getRoots());
assertEquals(4, stats.getDepth());
assertEquals(25, stats.getSynonyms());
assertEquals(50, stats.getCountByOrigin(Origin.SOURCE));
assertEquals(25, stats.getCountByOrigin(Origin.MISSING_ACCEPTED));
assertEquals(1, stats.getCountByOrigin(Origin.VERBATIM_PARENT));
assertEquals(1, stats.getCountByRank(Rank.GENUS));
assertEquals(62, stats.getCountByRank(Rank.SPECIES));
assertEquals(3, stats.getCountByRank(Rank.SUBSPECIES));
assertEquals(9, stats.getCountByRank(Rank.VARIETY));
// Ceramium rubrum C.Agardh
NameUsage cr = getUsageByTaxonId("99937");
assertNotNull(cr);
}
}
/**
* Tests the simple images media extension
*/
@Test
@Ignore
public void testSimpleImages() throws Exception {
}
/**
* Tests the Audubon media extension
*/
@Test
@Ignore
public void testAudubon() throws Exception {
}
/**
* Tests the EOL media extension
*/
@Test
@Ignore
public void testEolMedia() throws Exception {
}
/**
* http://dev.gbif.org/issues/browse/POR-2819
*/
@Test
public void testWormsSubgenus() throws Exception {
final UUID datasetKey = datasetKey(21);
Normalizer norm = Normalizer.create(cfg, datasetKey);
norm.run();
NormalizerStats stats = norm.getStats();
System.out.println(stats);
openDb(datasetKey);
compareStats(stats);
assertEquals(40, stats.getCountByOrigin(Origin.SOURCE));
assertEquals(0, stats.getCountByOrigin(Origin.DENORMED_CLASSIFICATION));
assertEquals(0, stats.getCountByOrigin(Origin.VERBATIM_ACCEPTED));
assertEquals(0, stats.getCountByOrigin(Origin.MISSING_ACCEPTED));
assertEquals(1, stats.getRoots());
assertEquals(17, stats.getSynonyms());
assertEquals(40, stats.getCount());
try (Transaction tx = beginTx()) {
NameUsage subgen = getUsageByTaxonId("171415");
assertEquals(Rank.SUBGENUS, subgen.getRank());
assertEquals("Corynonema", subgen.getCanonicalName());
assertEquals("Hyalonema subgen. Corynonema Ijima, 1927", subgen.getScientificName());
NameUsage u = getUsageByTaxonId("171426");
assertEquals(TaxonomicStatus.ACCEPTED, u.getTaxonomicStatus());
assertEquals(Rank.SPECIES, u.getRank());
assertEquals("Hyalonema rotundum", u.getCanonicalName());
assertEquals("Hyalonema rotundum Ijima, 1927", u.getScientificName());
}
}
/**
* http://dev.gbif.org/issues/browse/POR-3193
*/
@Test
public void testWormsDuplicates() throws Exception {
final UUID datasetKey = datasetKey(23);
Normalizer norm = Normalizer.create(cfg, datasetKey);
norm.run();
NormalizerStats stats = norm.getStats();
System.out.println(stats);
openDb(datasetKey);
compareStats(stats);
try (Transaction tx = beginTx()) {
print(Rank.CLASS);
showOrigin(Origin.DENORMED_CLASSIFICATION);
NameUsage k = getUsageByName("Animalia");
assertEquals(Rank.KINGDOM, k.getRank());
assertEquals("Animalia", k.getCanonicalName());
assertEquals("Animalia", k.getScientificName());
}
assertEquals(1, stats.getRoots());
assertEquals(9, stats.getCountByRank(Rank.KINGDOM));
assertEquals(5, stats.getCountByRank(Rank.PHYLUM));
assertEquals(99, stats.getCountByOrigin(Origin.SOURCE));
assertEquals(12, stats.getCountByOrigin(Origin.DENORMED_CLASSIFICATION));
assertEquals(0, stats.getCountByOrigin(Origin.VERBATIM_ACCEPTED));
assertEquals(11, stats.getCountByOrigin(Origin.MISSING_ACCEPTED));
assertEquals(122, stats.getCount());
}
public static UUID datasetKey(Integer x) throws NormalizationFailedException {
return UUID.fromString(String.format("%08d-c6af-11e2-9b88-00145eb45e9a", x));
}
private NormalizerStats normalize(Integer dKey) throws NormalizationFailedException {
UUID datasetKey = datasetKey(dKey);
Normalizer norm = Normalizer.create(cfg, datasetKey);
norm.run();
NormalizerStats stats = norm.getStats();
openDb(datasetKey);
compareStats(stats);
return stats;
}
}