package org.jabref.logic.bibtex; import org.jabref.model.database.BibDatabaseMode; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.BibtexEntryTypes; import org.jabref.model.entry.FieldName; import org.junit.Before; import org.junit.Test; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; public class DuplicateCheckTest { private BibEntry simpleArticle; private BibEntry unrelatedArticle; @Before public void setUp() { simpleArticle = new BibEntry(BibtexEntryTypes.ARTICLE.getName()) .withField(FieldName.AUTHOR, "Single Author") .withField(FieldName.TITLE, "A serious paper about something") .withField(FieldName.YEAR, "2017"); unrelatedArticle = new BibEntry(BibtexEntryTypes.ARTICLE.getName()) .withField(FieldName.AUTHOR, "Completely Different") .withField(FieldName.TITLE, "Holy Moly Uffdada und Trallalla") .withField(FieldName.YEAR, "1992"); } @Test public void testDuplicateDetection() { BibEntry one = new BibEntry(BibtexEntryTypes.ARTICLE.getName()); BibEntry two = new BibEntry(BibtexEntryTypes.ARTICLE.getName()); one.setField("author", "Billy Bob"); two.setField("author", "Billy Bob"); assertTrue(DuplicateCheck.isDuplicate(one, two, BibDatabaseMode.BIBTEX)); two.setField("author", "James Joyce"); assertFalse(DuplicateCheck.isDuplicate(one, two, BibDatabaseMode.BIBTEX)); two.setField("author", "Billy Bob"); two.setType(BibtexEntryTypes.BOOK); assertFalse(DuplicateCheck.isDuplicate(one, two, BibDatabaseMode.BIBTEX)); two.setType(BibtexEntryTypes.ARTICLE); one.setField("year", "2005"); two.setField("year", "2005"); one.setField("title", "A title"); two.setField("title", "A title"); one.setField("journal", "A"); two.setField("journal", "A"); assertTrue(DuplicateCheck.isDuplicate(one, two, BibDatabaseMode.BIBTEX)); assertEquals(1.01, DuplicateCheck.compareEntriesStrictly(one, two), 0.01); two.setField("journal", "B"); assertTrue(DuplicateCheck.isDuplicate(one, two, BibDatabaseMode.BIBTEX)); assertEquals(0.75, DuplicateCheck.compareEntriesStrictly(one, two), 0.01); two.setField("journal", "A"); one.setField("number", "1"); two.setField("volume", "21"); one.setField("pages", "334--337"); two.setField("pages", "334--337"); assertTrue(DuplicateCheck.isDuplicate(one, two, BibDatabaseMode.BIBTEX)); two.setField("number", "1"); one.setField("volume", "21"); assertTrue(DuplicateCheck.isDuplicate(one, two, BibDatabaseMode.BIBTEX)); two.setField("volume", "22"); assertTrue(DuplicateCheck.isDuplicate(one, two, BibDatabaseMode.BIBTEX)); two.setField("journal", "B"); assertTrue(DuplicateCheck.isDuplicate(one, two, BibDatabaseMode.BIBTEX)); one.setField("journal", ""); two.setField("journal", ""); assertTrue(DuplicateCheck.isDuplicate(one, two, BibDatabaseMode.BIBTEX)); two.setField("title", "Another title"); assertFalse(DuplicateCheck.isDuplicate(one, two, BibDatabaseMode.BIBTEX)); } @Test public void testWordCorrelation() { String d1 = "Characterization of Calanus finmarchicus habitat in the North Sea"; String d2 = "Characterization of Calunus finmarchicus habitat in the North Sea"; String d3 = "Characterization of Calanus glacialissss habitat in the South Sea"; assertEquals(1.0, (DuplicateCheck.correlateByWords(d1, d2)), 0.01); assertEquals(0.78, (DuplicateCheck.correlateByWords(d1, d3)), 0.01); assertEquals(0.78, (DuplicateCheck.correlateByWords(d2, d3)), 0.01); } @Test public void twoUnrelatedEntriesAreNoDuplicates() { assertFalse(DuplicateCheck.isDuplicate(simpleArticle, unrelatedArticle, BibDatabaseMode.BIBTEX)); } @Test public void twoUnrelatedEntriesWithDifferentDoisAreNoDuplicates() { simpleArticle.setField(FieldName.DOI, "10.1016/j.is.2004.02.002"); unrelatedArticle.setField(FieldName.DOI, "10.1016/j.is.2004.02.00X"); assertFalse(DuplicateCheck.isDuplicate(simpleArticle, unrelatedArticle, BibDatabaseMode.BIBTEX)); } @Test public void twoUnrelatedEntriesWithEqualDoisAreDuplicates() { simpleArticle.setField(FieldName.DOI, "10.1016/j.is.2004.02.002"); unrelatedArticle.setField(FieldName.DOI, "10.1016/j.is.2004.02.002"); assertTrue(DuplicateCheck.isDuplicate(simpleArticle, unrelatedArticle, BibDatabaseMode.BIBTEX)); } @Test public void twoUnrelatedEntriesWithEqualPmidAreDuplicates() { simpleArticle.setField(FieldName.PMID, "12345678"); unrelatedArticle.setField(FieldName.PMID, "12345678"); assertTrue(DuplicateCheck.isDuplicate(simpleArticle, unrelatedArticle, BibDatabaseMode.BIBTEX)); } @Test public void twoUnrelatedEntriesWithEqualEprintAreDuplicates() { simpleArticle.setField(FieldName.EPRINT, "12345678"); unrelatedArticle.setField(FieldName.EPRINT, "12345678"); assertTrue(DuplicateCheck.isDuplicate(simpleArticle, unrelatedArticle, BibDatabaseMode.BIBTEX)); } @Test public void twoEntriesWithSameDoiButDifferentTypesAreDuplicates() { simpleArticle.setField(FieldName.DOI, "10.1016/j.is.2004.02.002"); BibEntry duplicateWithDifferentType = (BibEntry) simpleArticle.clone(); duplicateWithDifferentType.setType(BibtexEntryTypes.INCOLLECTION); assertTrue(DuplicateCheck.isDuplicate(simpleArticle, duplicateWithDifferentType, BibDatabaseMode.BIBTEX)); } }