package org.gbif.nub.lookup.fuzzy; import org.gbif.api.model.checklistbank.NameUsage; import org.gbif.api.model.checklistbank.NameUsageMatch; import org.gbif.api.model.common.LinneanClassification; import org.gbif.nameparser.GBIFNameParser; import java.io.IOException; import java.util.List; import org.junit.BeforeClass; import org.junit.Test; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; public class NubMatchingServiceImplLegacyIT { private static NubMatchingServiceImpl matcher; private static List<NameUsage> names; @BeforeClass public static void buildMatcher() throws IOException { HigherTaxaComparator syn = new HigherTaxaComparator(); syn.loadClasspathDicts("dicts"); NubIndex index = NubIndex.newMemoryIndex(NubIndexTest.readTestNames()); matcher = new NubMatchingServiceImpl(index, syn, new GBIFNameParser()); } /** * trying all we can to produce a NPE ... */ @Test public void testRatingNPE() throws IOException { LinneanClassification cl = new NameUsageMatch(); cl.setKingdom(null); cl.setPhylum("."); matcher.match(null, "---", null, cl, NubMatchingServiceImpl.MatchingMode.FUZZY, true); // blacklisted names turn into nulls via the synonym lookup! cl = new NameUsageMatch(); cl.setKingdom(null); cl.setPhylum("Unknown"); matcher.match(null, "FAMILY", null, cl, NubMatchingServiceImpl.MatchingMode.FUZZY, true); } @Test public void testNoMatch() throws IOException { LinneanClassification cl = new NameUsageMatch(); NameUsageMatch m = matcher.match(null, "", null, cl, NubMatchingServiceImpl.MatchingMode.FUZZY, true); assertEquals(NameUsageMatch.MatchType.NONE, m.getMatchType()); } @Test public void testClassificationSimilarity() throws IOException { LinneanClassification cn1 = new NameUsageMatch(); LinneanClassification cn2 = new NameUsageMatch(); int score = matcher.classificationSimilarity(cn1, cn2); assertTrue(score > -10); assertTrue(score < 0); cn1.setFamily("Asteraceae"); cn2.setFamily("Asteraceae"); score = matcher.classificationSimilarity(cn1, cn2); assertTrue(score > 10); cn2.setFamily("Asteraceaee"); score = matcher.classificationSimilarity(cn1, cn2); assertTrue(score <= -5); cn1.setKingdom("A"); cn2.setKingdom("B"); score = matcher.classificationSimilarity(cn1, cn2); assertTrue(score < -12); } /** * compare * Plantae;Dinophyta;Dinophyceae;Gonyaulacales;;; * with * Protozoa;Dinophyta;;;;; * * @throws java.io.IOException */ @Test public void testRatingWihtKingdomSynonym() throws IOException { LinneanClassification cn1 = new NameUsageMatch(); cn1.setKingdom("Plantae"); cn1.setPhylum("Dinophyta"); cn1.setClazz("Dinophyceae"); cn1.setOrder("Gonyaulacales"); LinneanClassification cn2 = new NameUsageMatch(); cn2.setKingdom("Plantae"); cn2.setPhylum("Dinophyta"); cn2.setClazz("Dinophyceae"); cn2.setOrder("Gonyaulacales"); // test identical int score = matcher.classificationSimilarity(cn1, cn2); assertTrue(score > 35); // test kingdom synonym cn2.setKingdom("Plants"); cn2.setPhylum("Dinophyta"); cn2.setClazz(""); cn2.setOrder(""); // test identical score = matcher.classificationSimilarity(cn1, cn2); assertTrue(score >= 15); // test vague kingdom cn2.setKingdom("Protozoa"); score = matcher.classificationSimilarity(cn1, cn2); assertTrue(score > -10); assertTrue(score <= 0); // test very different kingdom cn2.setKingdom("Animalia"); score = matcher.classificationSimilarity(cn1, cn2); assertTrue(score <= -15); } /** * Plantae Rhodophyta Amphibia rodophyta */ @Test public void testAmphibiaRating() throws IOException { LinneanClassification cn1 = new NameUsageMatch(); cn1.setKingdom("Plantae"); cn1.setPhylum("Rhodophyta"); LinneanClassification cn2 = new NameUsageMatch(); cn2.setKingdom("Plantae"); cn2.setPhylum("Rhodophyta"); // test identical int score = matcher.classificationSimilarity(cn1, cn2); assertTrue(score > 10); } private void assertMatchedUsageKey(String name, LinneanClassification query, Integer expectedKey) { NameUsageMatch best = matcher.match(name, null, query, false, true); assertEquals( expectedKey, best.getUsageKey()); } @Test public void testRating() throws IOException { final String queryName = "Acanthophora"; LinneanClassification query = new NameUsageMatch(); query.setKingdom("Animalia"); query.setPhylum("Porifera"); assertMatchedUsageKey(queryName, query, 3); query.setPhylum("Arthropoda"); query.setClazz("Insecta"); assertMatchedUsageKey(queryName, query, 2); query.setOrder("Wrongly"); assertMatchedUsageKey(queryName, query, 2); query.setFamily("Geometridae"); assertMatchedUsageKey(queryName, query, 1); query = new NameUsageMatch(); query.setFamily("Rhodomelaceae"); assertMatchedUsageKey(queryName, query, 5); query.setFamily("Araliaceae"); assertMatchedUsageKey(queryName, query, 4); query.setFamily("PHORIdae"); assertMatchedUsageKey(queryName, query, 2); // homonym matching without a classification should return a NONE match query = new NameUsageMatch(); query.setKingdom(""); query.setPhylum(""); query.setClazz(""); query.setOrder(""); query.setFamily(""); query.setGenus(""); NameUsageMatch best = matcher.match(queryName, null, query, false, true); assertEquals(NameUsageMatch.MatchType.NONE, best.getMatchType()); // test that without a clear classification the best match is empty query = new NameUsageMatch(); query.setKingdom("Animalia"); query.setGenus("Acanthophora"); best = matcher.match(queryName, null, query, false, true); assertEquals(NameUsageMatch.MatchType.NONE, best.getMatchType()); } @Test public void testFuzzyMatching() throws IOException { LinneanClassification query = new NameUsageMatch(); query.setKingdom("Animalia"); query.setPhylum("Porifera"); assertMatchedUsageKey("Acanthophora", query, 3); assertNoMatch("Accanthophora", query); assertNoMatch("Akantophora", query); assertNoMatch("Acanthofora", query); assertNoMatch("Acanthoofoora", query); assertNoMatch("Acantoofora", query); } private void assertNoMatch(String name, LinneanClassification query) { NameUsageMatch best = matcher.match(name, null, query, false, true); assertEquals(NameUsageMatch.MatchType.NONE, best.getMatchType()); assertNull(best.getUsageKey()); } }