package org.gbif.checklistbank.authorship; import org.gbif.api.model.checklistbank.ParsedName; import org.gbif.api.vocabulary.NamePart; import org.gbif.checklistbank.model.Equality; import java.util.List; import com.google.common.collect.Lists; import org.junit.Test; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; public class AuthorComparatorTest { AuthorComparator comp = AuthorComparator.createWithAuthormap(); @Test public void testNormalize() throws Exception { assertNull(comp.normalize(null)); assertNull(comp.normalize(" ")); assertNull(comp.normalize(".")); assertNull(comp.normalize(" (-) ")); assertEquals("doring", comp.normalize("Döring")); assertEquals("desireno", comp.normalize("Désírèñø")); assertEquals("a j white", comp.normalize("A.J. White")); assertEquals("j a white", comp.normalize("J A WHITE")); assertEquals("a j white", comp.normalize("A-J-White")); assertEquals("a j white", comp.normalize("(A.J. White)")); assertEquals("a j white,herbert,p j harvey", comp.normalize("A. J. White, Herbert & P. J. Harvey")); assertEquals("a j white,herbert,p j harvey", comp.normalize("A.J. White, Herbert et P.J. Harvey")); assertEquals("a j white,herbert,p j harvey", comp.normalize("A.J. White, Herbert and P.J. Harvey")); assertEquals("colla", comp.normalize("Bertero ex Colla")); assertEquals("schult", comp.normalize("Sieber ex Schult.")); assertEquals("nevski", comp.normalize("Desv. ex Nevski")); assertEquals("w q yin", comp.normalize("K. M. Feng ex W. Q. Yin")); assertEquals("g kirchn", comp.normalize("G.Kirchn. in Petzold & G.Kirchn.")); assertEquals("torr,a gray", comp.normalize("Torr. & A.Gray")); assertEquals("c chr", comp.normalize("C. Chr.")); assertEquals("h christ", comp.normalize("H. Christ")); assertEquals("l", comp.normalize("L.")); assertEquals("rchb", comp.normalize("Rchb.")); assertEquals("rchb", comp.normalize("Abasicarpon Andrz. ex Rchb.")); assertEquals("muller", comp.normalize("Müller")); assertEquals("muller", comp.normalize("Mueller")); assertEquals("moller", comp.normalize("Moeller")); assertEquals("don filius", comp.normalize("Don f.")); assertEquals("don filius", comp.normalize("Don fil.")); assertEquals("don filius", comp.normalize("Don fil")); assertEquals("f merck", comp.normalize("f. Merck")); assertEquals("f merck", comp.normalize("f Merck")); assertEquals("la don filius,dc", comp.normalize("la Don f. et DC")); assertEquals("la don,f rich,dc", comp.normalize("la Don, f. Rich. et DC")); assertEquals("la don,rich filius,dc", comp.normalize("la Don, Rich. f. et DC")); assertEquals("l filius", comp.normalize("L.f.")); assertEquals("l filius", comp.normalize("L. f.")); assertEquals("l filius", comp.normalize("L f")); assertEquals("lf", comp.normalize("Lf")); } @Test public void testLookup() throws Exception { assertNull(comp.lookup(null)); assertEquals(" ", comp.lookup(" ")); assertEquals(".", comp.lookup(".")); assertEquals("-", comp.lookup("-")); assertEquals("Döring", comp.lookup("Döring")); assertEquals("desireno", comp.lookup("desireno")); assertEquals("a j white", comp.lookup("a j white")); assertEquals("l a colla", comp.lookup("colla")); assertEquals("j a schultes", comp.lookup("schult")); assertEquals("s a nevski", comp.lookup("nevski")); assertEquals("w q yin", comp.lookup("w q yin")); assertEquals("g kirchner", comp.lookup("g kirchn")); assertEquals("c f a christensen", comp.lookup("c chr")); assertEquals("h christ", comp.lookup("h christ")); assertEquals("c linnaus", comp.lookup("l")); assertEquals("h g l reichenbach", comp.lookup("rchb")); assertEquals("a p de candolle", comp.lookup("dc")); assertEquals("j lamarck", comp.lookup("lam")); // the input is a single author. so expect nothing assertEquals("lam,dc", comp.lookup("lam,dc")); assertEquals("c linnaus filius", comp.lookup("l filius")); assertEquals("c h bipontinus schultz", comp.lookup("sch bip")); assertEquals("c h bipontinus schultz", comp.lookup("schultz bip")); } @Test public void extractSurname() throws Exception { assertEquals("doring", comp.extractSurname("doring")); assertEquals("white", comp.extractSurname("a j white")); assertEquals("harvey", comp.extractSurname("white herbert harvey")); assertEquals("colla", comp.extractSurname("l a colla")); assertEquals("yin", comp.extractSurname("w q yin")); assertEquals("kirchner", comp.extractSurname("g kirchner")); assertEquals("reichenbach", comp.extractSurname("h g l reichenbach")); assertEquals("linnaeus", comp.extractSurname("c linnaeus filius")); } @Test public void firstInitialsDiffer() throws Exception { assertTrue(comp.firstInitialsDiffer("a a mark", "a b mark")); assertFalse(comp.firstInitialsDiffer("k f mark", "k f mark")); assertFalse(comp.firstInitialsDiffer("k f mark", "f k mark")); assertFalse(comp.firstInitialsDiffer("k f mark", "k mark")); assertFalse(comp.firstInitialsDiffer("k mark", "k f mark")); assertFalse(comp.firstInitialsDiffer("f mark", "k f mark")); assertFalse(comp.firstInitialsDiffer("f mark", "f k mark")); assertFalse(comp.firstInitialsDiffer("f mark", "f k c g s mark")); assertTrue(comp.firstInitialsDiffer("k mark", "f mark")); assertTrue(comp.firstInitialsDiffer("k f mark", "a f mark")); assertTrue(comp.firstInitialsDiffer("a a mark", "a b mark")); } @Test public void testCompareParsedName() throws Exception { ParsedName p1 = new ParsedName(); ParsedName p2 = new ParsedName(); assertEquals(Equality.UNKNOWN, comp.compare(p1, p2)); p1.setAuthorship("L."); assertEquals(Equality.UNKNOWN, comp.compare(p1, p2)); p2.setAuthorship("Linne"); assertEquals(Equality.EQUAL, comp.compare(p1, p2)); p1.setAuthorship("Linné"); assertEquals(Equality.EQUAL, comp.compare(p1, p2)); p1.setYear("1847"); p2.setYear("1877"); assertEquals(Equality.EQUAL, comp.compare(p1, p2)); p2.setAuthorship("Carl von Linne"); assertEquals(Equality.EQUAL, comp.compare(p1, p2)); p2.setYear("1847"); assertEquals(Equality.EQUAL, comp.compare(p1, p2)); p1 = new ParsedName(); p1.setAuthorship("Reich."); p2 = new ParsedName(); p2.setAuthorship(""); assertEquals(Equality.UNKNOWN, comp.compare(p1, p2)); p2.setAuthorship("Reichen."); assertEquals(Equality.EQUAL, comp.compare(p1, p2)); p2.setAuthorship("Reichenbrg."); assertEquals(Equality.EQUAL, comp.compare(p1, p2)); p2.setAuthorship("Reichenberger"); assertEquals(Equality.EQUAL, comp.compare(p1, p2)); p2.setAuthorship("Müller"); assertEquals(Equality.DIFFERENT, comp.compare(p1, p2)); p2.setAuthorship("Jenkins, Marx & Kluse"); assertEquals(Equality.DIFFERENT, comp.compare(p1, p2)); p1.setAuthorship("Mill."); p2.setAuthorship("L."); assertEquals(Equality.DIFFERENT, comp.compare(p1, p2)); } @Test public void testCompareUnparsedAuthor() throws Exception { ParsedName p1 = new ParsedName(); p1.setScientificName("Platanus x hispanica Mill. ex Münch., 1770"); p1.setGenusOrAbove("Platanus"); p1.setSpecificEpithet("hispanica"); p1.setNotho(NamePart.SPECIFIC); p1.setAuthorsParsed(false); ParsedName p2 = new ParsedName(); p2.setScientificName("Platanus hispanica Mill. ex Münch."); p2.setGenusOrAbove("Platanus"); p2.setSpecificEpithet("hispanica"); p2.setNotho(NamePart.SPECIFIC); p2.setAuthorship("Mill. ex Münch."); p2.setAuthorsParsed(true); assertEquals(Equality.EQUAL, comp.compare(p1, p2)); } @Test public void testCompare() throws Exception { assertAuth("Debreczy & I. Rácz", null, Equality.EQUAL, "Rácz", null); assertAuth("DC. ex Lam. et DC.", null, Equality.EQUAL, "DC.", null); assertAuth(null, null, Equality.UNKNOWN, null, null); assertAuth("", " ", Equality.UNKNOWN, " ", " "); assertAuth("L.", null, Equality.UNKNOWN, null, null); assertAuth("Bluff & Fingerh.", null, Equality.DIFFERENT, "Lindl.", null); assertAuth("Lindl.", null, Equality.EQUAL, "Lindl.", null); assertAuth(null, "1978", Equality.DIFFERENT, null, "1934"); assertAuth(null, "1978", Equality.EQUAL, null, "1978"); assertAuth("H. Christ", null, Equality.DIFFERENT, "C. Chr.", null); assertAuth("Reichenbach", "1837", Equality.EQUAL, "Abasicarpon Andrz. ex Rchb.", null); assertAuth("Torr et Gray", null, Equality.EQUAL, "Torr. & A.Gray", null); assertAuth("A.Murr", "1863", Equality.EQUAL, "A.Murray bis.", null); assertAuth("Maxim.", null, Equality.EQUAL, "Max.", null); assertAuth("A.Murr", "1863", Equality.EQUAL, "A. Murray", null); assertAuth("A.Murr", "1863", Equality.EQUAL, "A.Murray bis.", null); assertAuth("A.Murr", "1863", Equality.EQUAL, "A. Murr.", null); assertAuth("A.Murr", "1863", Equality.DIFFERENT, "B. Murr.", null); assertAuth("Debreczy & I. Rácz", null, Equality.EQUAL, "Rácz", null); assertAuth("Debreczy & I. Rácz", null, Equality.EQUAL, "Debreczy", null); assertAuth("White, Herbert & Harvey", null, Equality.EQUAL, "A.J. White, Herbert et P.J. Harvey", null); assertAuth("A.J.White", null, Equality.EQUAL, "A.J. White, Herbert et P.J. Harvey", null); assertAuth("Harvey", null, Equality.EQUAL, "A.J. White, Herbert et P.J. Harvey", null); assertAuth("R.H.Roberts", null, Equality.DIFFERENT, "R.J.Roberts", null); assertAuth("V.J.Chapm.", null, Equality.DIFFERENT, "F.R.Chapm.", null); assertAuth("V.J.Chapm.", null, Equality.DIFFERENT, "F.Chapm.", null); assertAuth("Chapm.", null, Equality.EQUAL, "F.R.Chapm.", null); assertAuth("Chapm.", null, Equality.EQUAL, "A.W.Chapm.", null); assertAuth("Brot. ex Willk. & Lange", null, Equality.DIFFERENT, "L.", null); assertAuth("Brugg.", null, Equality.EQUAL, "Brug.", null); assertAuth("A.Bruggen.", null, Equality.EQUAL, "Brug.", null); assertAuth("Brug.", null, Equality.EQUAL, "Pascal Bruggeman", null); assertAuth("Presl ex DC.", null, Equality.EQUAL, "C. Presl ex de Candolle", null); // https://github.com/gbif/checklistbank/issues/7 assertAuth("G. Don f.", null, Equality.EQUAL, "G. Don fil.", null); assertAuth("Don f.", null, Equality.EQUAL, "Don fil.", null); assertAuth("F.K. Schimp. et Spenn.", null, Equality.EQUAL, "K.F. Schimp. et Spenn.", null); assertAuth("J.A. Weinm.", null, Equality.EQUAL, "Weinm.", null); assertAuth("DC. ex Lam. et DC.", null, Equality.EQUAL, "DC.", null); assertAuth("Koch", null, Equality.EQUAL, "Johann Friedrich Wilhelm Koch", null); assertAuth("Koch", null, Equality.EQUAL, "J F W Koch", null); assertAuth("Koch", null, Equality.EQUAL, "H Koch", null); assertAuth("L.f", null, Equality.EQUAL, "Linnaeus filius", null); assertAuth("L. f", null, Equality.EQUAL, "Linnaeus filius", null); assertAuth("L.fil.", null, Equality.EQUAL, "Linnaeus filius", null); assertAuth("Schultz-Bip", null, Equality.EQUAL, "Sch.Bip.", null); assertAuth("Bruand", "1850", Equality.EQUAL, "Bruand", "1851"); assertAuth("Bruand", "1850", Equality.DIFFERENT, null, "1998"); assertAuth("Bruand", "1850", Equality.EQUAL, null, "1850"); assertAuth("Bruand", "1850", Equality.EQUAL, null, "1851"); } @Test public void testCompareStrict() throws Exception { assertFalse(comp.compareStrict(null, null, null, null)); assertFalse(comp.compareStrict("", "", "", "")); assertFalse(comp.compareStrict("", null, "", null)); assertAuthStrict("", " ", false, " ", " "); assertAuthStrict("L.", null, false, null, null); assertAuthStrict("Bluff & Fingerh.", null, false, "Lindl.", null); assertAuthStrict("Lindl.", null, true, "Lindl.", null); assertAuthStrict(null, "1978", false, null, "1934"); assertAuthStrict(null, "1978", false, null, "1978"); assertAuthStrict("H. Christ", null, false, "C. Chr.", null); assertAuthStrict("Reichenbach", "1837", false, "Abasicarpon Andrz. ex Rchb.", null); assertAuthStrict("Reichenbach", null, true, "Abasicarpon Andrz. ex Rchb.", null); assertAuthStrict("Reichenbach", "1837", true, "Abasicarpon Andrz. ex Rchb.", "1837"); assertAuthStrict("Torr et Gray", null, true, "Torr. & A.Gray", null); assertAuthStrict("Boed.", null, true, "Boed.", null); assertAuthStrict("Boed.", null, false, "F.Boos", null); assertAuthStrict("Boed.", null, false, "Boott", null); assertAuthStrict("Boed.", null, false, "F.Bolus", null); assertAuthStrict("Boed.", null, false, "Borchs.", null); assertAuthStrict("Hett.", null, false, "Scheffers", null); assertAuthStrict("Hett.", null, false, "Schew.", null); assertAuthStrict("Hett.", null, false, "Schemmann", null); assertAuthStrict("Hett.", null, false, "W.Mast.", null); assertAuthStrict("Hett.", null, false, "Kirschst.", null); /** * http://dev.gbif.org/issues/browse/POR-398 */ assertTrue(comp.compareStrict("Ridgway", "1893", "Ridgway", "1893")); assertTrue(comp.compareStrict("K. Koch", null, "K. Koch", null)); assertTrue(comp.compareStrict("K.Koch", null, "K. Koch", null)); assertTrue(comp.compareStrict("A. Nelson", null, "A Nélson", null)); assertTrue(comp.compareStrict("Colla", null, "Bertero ex Colla", null)); assertTrue(comp.compareStrict("Taczanowski & Berlepsch", "1885", "Berlepsch & Taczanowski", "1884")); assertFalse(comp.compareStrict("Taczanowski & Berlepsch", "1885", "Berlepsch & Taczanowski", "1883")); assertFalse(comp.compareStrict("Oberholser", "1917", "Oberholser", "1919")); assertFalse(comp.compareStrict("Koch", "1897", "K. Koch", null)); assertFalse(comp.compareStrict("Gould", "1860", "Gould", "1862")); assertFalse(comp.compareStrict("Gould", "1860", "Gould", "1863")); assertFalse(comp.compareStrict("A. Nelson", null, "E.E. Nelson", null)); assertFalse(comp.compareStrict("Koch", null, "K. Koch", null)); assertTrue(comp.compareStrict("J Koch", null, "Koch", null)); assertTrue(comp.compareStrict("Taczanowski & Berlepsch", "1885", "Berlepsch & Taczanowski", "1885")); assertTrue(comp.compareStrict("J Koch", null, "Koch", null)); assertFalse(comp.compareStrict("Chapm.", null, "F.R.Chapm.", null)); assertTrue(comp.compareStrict("Chapm.", null, "A.W.Chapm.", null)); assertTrue(comp.compareStrict("Brugg.", null, "Brug.", null)); assertFalse(comp.compareStrict("A.Bruggen.", null, "Brug.", null)); assertTrue(comp.compareStrict("Brug.", null, "Pascal Bruggeman", null)); assertTrue(comp.compareStrict("Koch", null, "Johann Friedrich Wilhelm Koch", null)); assertTrue(comp.compareStrict("Koch", null, "J F W Koch", null)); assertFalse(comp.compareStrict("Koch.", null, "H Koch", null)); } @Test public void testEqualsWithBasionym() throws Exception { assertAuth("Maxim.", null, "Trautv. ex Maxim.", null, Equality.EQUAL, "Maxim.", null, null, null); assertAuth("Maxim.", null, "Trautv. ex Karl Johann Maximowicz", null, Equality.EQUAL, "Max.", null, null, null); assertAuth("Maxim.", null, null, null, Equality.EQUAL, "Karl Johann Maximowicz", null, null, null); assertAuth("Bluff & Fingerh.", null, "L.", null, Equality.DIFFERENT, "Mill.", "1768", null, null); assertAuth("Mill.", null, "L.", null, Equality.EQUAL, "Mill.", "1768", null, null); assertAuth("Debreczy & I. Rácz", null, null, null, Equality.EQUAL, "Debreczy & Rácz", null, null, null); assertAuth("Debreczy & I.Rácz", null, null, null, Equality.DIFFERENT, "Silba", null, "Debreczy & I.Rácz", null); assertAuth(null, null, "Pauly", "1986", Equality.EQUAL, null, null, "Pauly", "1986"); assertAuth(null, null, "Moure", "1956", Equality.DIFFERENT, null, null, "Pauly", "1986"); // missing brackets is a common error so make this a positive comparison! assertAuth("Pauly", "1986", null, null, Equality.EQUAL, null, null, "Pauly", "1986"); assertAuth("Erichson", "1847", null, null, Equality.UNKNOWN, null, null, "Linnaeus", "1758"); } @Test public void testEqualsSubstring() throws Exception { ParsedName p1 = new ParsedName(); ParsedName p2 = new ParsedName(); p1.setAuthorship("L."); p2.setAuthorship("Linne"); assertEquals(Equality.EQUAL, comp.compare(p1, p2)); p2.setAuthorship("Lin."); assertEquals(Equality.EQUAL, comp.compare(p1, p2)); p1.setAuthorship("DC."); p2.setAuthorship("De Candolle"); assertEquals(Equality.EQUAL, comp.compare(p1, p2)); p1.setAuthorship("Miller"); p2.setAuthorship("Mill."); assertEquals(Equality.EQUAL, comp.compare(p1, p2)); p1.setAuthorship("Hern."); p2.setAuthorship("Hernandez"); assertEquals(Equality.EQUAL, comp.compare(p1, p2)); p1.setAuthorship("Robertson, T., Miller, P. et Jameson, R. J."); p2.setAuthorship("Miller"); assertEquals(Equality.EQUAL, comp.compare(p1, p2)); p1.setAuthorship("T. Robertson, P. Miller & R.J. Jameson"); p2.setAuthorship("Miller"); assertEquals(Equality.EQUAL, comp.compare(p1, p2)); p2.setAuthorship("Mülles"); assertEquals(Equality.DIFFERENT, comp.compare(p1, p2)); } @Test public void testBlattariaAuthors() throws Exception { ParsedName p1 = new ParsedName(); p1.setAuthorship("P. Miller"); p1.setYear("1754"); ParsedName p2 = new ParsedName(); p2.setAuthorship("O. Kuntze"); p2.setYear("1891"); ParsedName p3 = new ParsedName(); p3.setAuthorship("Voet, ?"); p3.setYear("1806"); ParsedName p4 = new ParsedName(); p4.setAuthorship("Weyenbergh"); p4.setYear("1874"); List<ParsedName> names = Lists.newArrayList(p1, p2, p3, p4); for (ParsedName p : names) { if (!p1.equals(p)) { assertEquals(Equality.DIFFERENT, comp.compare(p1, p)); } if (!p2.equals(p)) { assertEquals(Equality.DIFFERENT, comp.compare(p2, p)); } if (!p3.equals(p)) { assertEquals(Equality.DIFFERENT, comp.compare(p3, p)); } if (!p4.equals(p)) { assertEquals(Equality.DIFFERENT, comp.compare(p4, p)); } } } @Test public void testUnparsedAuthors() throws Exception { ParsedName p3 = new ParsedName(); p3.setAuthorsParsed(false); p3.setScientificName("Blattaria Voet, ?, 1806"); p3.setGenusOrAbove("Blattaria"); ParsedName p4 = new ParsedName(); p4.setAuthorsParsed(true); p4.setScientificName("Blattaria Weyenbergh, 1874"); p4.setAuthorship("Weyenbergh"); p4.setYear("1874"); assertEquals(Equality.DIFFERENT, comp.compare(p3, p4)); p4.setYear("1806"); assertEquals(Equality.EQUAL, comp.compare(p3, p4)); } @Test public void testAlikeAuthors() throws Exception { ParsedName p1 = new ParsedName(); p1.setAuthorsParsed(true); p1.setAuthorship("Voet"); ParsedName p2 = new ParsedName(); p2.setAuthorsParsed(true); p2.setAuthorship("Weyenbergh"); assertEquals(Equality.DIFFERENT, comp.compare(p1, p2)); p2 = new ParsedName(); p2.setAuthorship("Voetington"); assertEquals(Equality.EQUAL, comp.compare(p1, p2)); p2 = new ParsedName(); p2.setAuthorship("Vellington"); assertEquals(Equality.DIFFERENT, comp.compare(p1, p2)); } /** * see http://dev.gbif.org/issues/browse/PF-2445 */ @Test public void testTransliterations() throws Exception { ParsedName p1 = new ParsedName(); p1.setAuthorsParsed(true); p1.setAuthorship("Müller"); ParsedName p2 = new ParsedName(); p2.setAuthorsParsed(true); p2.setAuthorship("Muller"); ParsedName p3 = new ParsedName(); p3.setAuthorsParsed(true); p3.setAuthorship("Mueller"); assertEquals(Equality.EQUAL, comp.compare(p1, p2)); assertEquals(Equality.EQUAL, comp.compare(p1, p3)); assertEquals(Equality.EQUAL, comp.compare(p2, p3)); p1.setAuthorship("Müll."); p2.setAuthorship("Mull"); p3.setAuthorship("Muell"); assertEquals(Equality.EQUAL, comp.compare(p1, p2)); assertEquals(Equality.EQUAL, comp.compare(p1, p3)); assertEquals(Equality.EQUAL, comp.compare(p2, p3)); } private void assertAuth(String a1, String y1, Equality eq, String a2, String y2) { assertEquals(eq, comp.compare(a1, y1, a2, y2)); } private void assertAuthStrict(String a1, String y1, boolean eq, String a2, String y2) { assertEquals(eq, comp.compareStrict(a1, y1, a2, y2)); } private void assertAuth(String a1, String y1, String a1b, String y1b, Equality eq, String a2, String y2, String a2b, String y2b) { ParsedName p1 = new ParsedName(); p1.setAuthorsParsed(true); p1.setAuthorship(a1); p1.setYear(y1); p1.setBracketAuthorship(a1b); p1.setBracketYear(y1b); ParsedName p2 = new ParsedName(); p2.setAuthorsParsed(true); p2.setAuthorship(a2); p2.setYear(y2); p2.setBracketAuthorship(a2b); p2.setBracketYear(y2b); assertEquals(eq, comp.compare(p1, p2)); } }