package net.sf.jabref; import java.util.HashSet; import java.util.Iterator; /** * This class contains utility method for duplicate checking of entries. */ public class DuplicateCheck { /** * Checks if the two entries represent the same publication. * * @param one BibtexEntry * @param two BibtexEntry * @return boolean */ public static boolean isDuplicate(BibtexEntry one, BibtexEntry two) { // First check if they are of the same type - a necessary condition: if (one.getType() != two.getType()) return false; // The check if they have the same required fields: String[] fields = one.getType().getRequiredFields(); float req, reqWeight = 2; if (fields == null) { req = 0; reqWeight = 0; } else req = compareFieldSet(fields, one, two); fields = one.getType().getOptionalFields(); if (fields != null) { float opt = compareFieldSet(fields, one, two); return (reqWeight * req + opt) / (1 + reqWeight) >= Globals.duplicateThreshold; } else { return (req >= Globals.duplicateThreshold); } } private static float compareFieldSet(String[] fields, BibtexEntry one, BibtexEntry two) { int res = 0, empty = 0; for (int i = 0; i < fields.length; i++) { // Util.pr(":"+compareSingleField(fields[i], one, two)); int result = compareSingleField(fields[i], one, two); if (result == Util.EQUAL) { res++; // Util.pr(fields[i]); } else if (result == Util.EMPTY_IN_BOTH) empty++; } if (fields.length > empty) return ((float) res) / ((float) (fields.length - empty)); else // no fields present. This points to a possible duplicate? return 0.5f; } private static int compareSingleField(String field, BibtexEntry one, BibtexEntry two) { String s1 = one.getField(field), s2 = two.getField(field); if (s1 == null) { if (s2 == null) return Util.EMPTY_IN_BOTH; else return Util.EMPTY_IN_ONE; } else if (s2 == null) return Util.EMPTY_IN_TWO; s1 = s1.toLowerCase(); s2 = s2.toLowerCase(); // Util.pr(field+": '"+s1+"' vs '"+s2+"'"); if (field.equals("author") || field.equals("editor")) { // Specific for name fields. // Harmonise case: String[] aus1 = AuthorList.fixAuthor_lastNameFirst(s1).split(" and "), aus2 = AuthorList .fixAuthor_lastNameFirst(s2).split(" and "), au1 = aus1[0].split(","), au2 = aus2[0] .split(","); // Can check number of authors, all authors or only the first. if ((aus1.length > 0) && (aus1.length == aus2.length) && au1[0].trim().equals(au2[0].trim())) return Util.EQUAL; else return Util.NOT_EQUAL; } else { if (s1.trim().equals(s2.trim())) return Util.EQUAL; else return Util.NOT_EQUAL; } } public static double compareEntriesStrictly(BibtexEntry one, BibtexEntry two) { HashSet<String> allFields = new HashSet<String>();// one.getAllFields()); allFields.addAll(one.getAllFields()); allFields.addAll(two.getAllFields()); int score = 0; for (Iterator<String> fld = allFields.iterator(); fld.hasNext();) { String field = fld.next(); Object en = one.getField(field), to = two.getField(field); if ((en != null) && (to != null) && (en.equals(to))) score++; else if ((en == null) && (to == null)) score++; } if (score == allFields.size()) return 1.01; // Just to make sure we can // use score>1 without // trouble. else return ((double) score) / allFields.size(); } /** * Goes through all entries in the given database, and if at least one of * them is a duplicate of the given entry, as per * Util.isDuplicate(BibtexEntry, BibtexEntry), the duplicate is returned. * The search is terminated when the first duplicate is found. * * @param database The database to search. * @param entry The entry of which we are looking for duplicates. * @return The first duplicate entry found. null if no duplicates are found. */ public static BibtexEntry containsDuplicate(BibtexDatabase database, BibtexEntry entry) { for (BibtexEntry other : database.getEntries()) { if (isDuplicate(entry, other)) return other; // Duplicate found. } return null; // No duplicate found. } }