package org.jabref.logic.importer.fileformat; import java.io.BufferedReader; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Pattern; import org.jabref.logic.bibtexkeypattern.BibtexKeyPatternUtil; import org.jabref.logic.importer.ImportFormatPreferences; import org.jabref.logic.importer.Importer; import org.jabref.logic.importer.ParserResult; import org.jabref.logic.util.FileExtensions; import org.jabref.model.entry.AuthorList; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.FieldName; /** * Importer for the Refer/Endnote format. * modified to use article number for pages if pages are missing (some * journals, e.g., Physical Review Letters, don't use pages anymore) * * check here for details on the format * http://libguides.csuchico.edu/c.php?g=414245&p=2822898 */ public class EndnoteImporter extends Importer { private static final String ENDOFRECORD = "__EOREOR__"; private static final Pattern A_PATTERN = Pattern.compile("%A .*"); private static final Pattern E_PATTERN = Pattern.compile("%E .*"); private final ImportFormatPreferences preferences; public EndnoteImporter(ImportFormatPreferences preferences) { this.preferences = preferences; } @Override public String getName() { return "Refer/Endnote"; } @Override public FileExtensions getExtensions() { return FileExtensions.ENDNOTE; } @Override public String getId() { return "refer"; } @Override public String getDescription() { return "Importer for the Refer/Endnote format. Modified to use article number for pages if pages are missing."; } @Override public boolean isRecognizedFormat(BufferedReader reader) throws IOException { // Our strategy is to look for the "%A *" line. String str; while ((str = reader.readLine()) != null) { if (A_PATTERN.matcher(str).matches() || E_PATTERN.matcher(str).matches()) { return true; } } return false; } @Override public ParserResult importDatabase(BufferedReader reader) throws IOException { List<BibEntry> bibitems = new ArrayList<>(); StringBuilder sb = new StringBuilder(); String str; boolean first = true; while ((str = reader.readLine()) != null) { str = str.trim(); if (str.indexOf("%0") == 0) { if (first) { first = false; } else { sb.append(ENDOFRECORD); } sb.append(str); } else { sb.append(str); } sb.append('\n'); } String[] entries = sb.toString().split(ENDOFRECORD); Map<String, String> hm = new HashMap<>(); String author; String type; String editor; String artnum; for (String entry : entries) { hm.clear(); author = ""; type = BibEntry.DEFAULT_TYPE; editor = ""; artnum = ""; boolean isEditedBook = false; String[] fields = entry.trim().substring(1).split("\n%"); for (String field : fields) { if (field.length() < 3) { continue; } /* * Details of Refer format for Journal Article and Book: * * Generic Ref Journal Article Book Code Author %A Author Author Year %D * Year Year Title %T Title Title Secondary Author %E Series Editor * Secondary Title %B Journal Series Title Place Published %C City * Publisher %I Publisher Volume %V Volume Volume Number of Volumes %6 * Number of Volumes Number %N Issue Pages %P Pages Number of Pages * Edition %7 Edition Subsidiary Author %? Translator Alternate Title %J * Alternate Journal Label %F Label Label Keywords %K Keywords Keywords * Abstract %X Abstract Abstract Notes %O Notes Notes */ String prefix = field.substring(0, 1); String val = field.substring(2); if ("A".equals(prefix)) { if ("".equals(author)) { author = val; } else { author += " and " + val; } } else if ("E".equals(prefix)) { if ("".equals(editor)) { editor = val; } else { editor += " and " + val; } } else if ("T".equals(prefix)) { hm.put(FieldName.TITLE, val); } else if ("0".equals(prefix)) { if (val.indexOf("Journal") == 0) { type = "article"; } else if (val.indexOf("Book Section") == 0) { type = "incollection"; } else if (val.indexOf("Book") == 0) { type = "book"; } else if (val.indexOf("Edited Book") == 0) { type = "book"; isEditedBook = true; } else if (val.indexOf("Conference") == 0) { type = "inproceedings"; } else if (val.indexOf("Report") == 0) { type = "techreport"; } else if (val.indexOf("Review") == 0) { type = "article"; } else if (val.indexOf("Thesis") == 0) { type = "phdthesis"; } else { type = BibEntry.DEFAULT_TYPE; // } } else if ("7".equals(prefix)) { hm.put(FieldName.EDITION, val); } else if ("C".equals(prefix)) { hm.put(FieldName.ADDRESS, val); } else if ("D".equals(prefix)) { hm.put(FieldName.YEAR, val); } else if ("8".equals(prefix)) { hm.put(FieldName.DATE, val); } else if ("J".equals(prefix)) { // "Alternate journal. Let's set it only if no journal // has been set with %B. hm.putIfAbsent(FieldName.JOURNAL, val); } else if ("B".equals(prefix)) { // This prefix stands for "journal" in a journal entry, and // "series" in a book entry. if ("article".equals(type)) { hm.put(FieldName.JOURNAL, val); } else if ("book".equals(type) || "inbook".equals(type)) { hm.put(FieldName.SERIES, val); } else { /* type = inproceedings */ hm.put(FieldName.BOOKTITLE, val); } } else if ("I".equals(prefix)) { if ("phdthesis".equals(type)) { hm.put(FieldName.SCHOOL, val); } else { hm.put(FieldName.PUBLISHER, val); } } // replace single dash page ranges (23-45) with double dashes (23--45): else if ("P".equals(prefix)) { hm.put(FieldName.PAGES, val.replaceAll("([0-9]) *- *([0-9])", "$1--$2")); } else if ("V".equals(prefix)) { hm.put(FieldName.VOLUME, val); } else if ("N".equals(prefix)) { hm.put(FieldName.NUMBER, val); } else if ("U".equals(prefix)) { hm.put(FieldName.URL, val); } else if ("R".equals(prefix)) { String doi = val; if (doi.startsWith("doi:")) { doi = doi.substring(4); } hm.put(FieldName.DOI, doi); } else if ("O".equals(prefix)) { // Notes may contain Article number if (val.startsWith("Artn")) { String[] tokens = val.split("\\s"); artnum = tokens[1]; } else { hm.put(FieldName.NOTE, val); } } else if ("K".equals(prefix)) { hm.put(FieldName.KEYWORDS, val); } else if ("X".equals(prefix)) { hm.put(FieldName.ABSTRACT, val); } else if ("9".equals(prefix)) { if (val.indexOf("Ph.D.") == 0) { type = "phdthesis"; } if (val.indexOf("Masters") == 0) { type = "mastersthesis"; } } else if ("F".equals(prefix)) { hm.put(BibEntry.KEY_FIELD, BibtexKeyPatternUtil.checkLegalKey(val, preferences.getBibtexKeyPatternPreferences().isEnforceLegalKey())); } } // For Edited Book, EndNote puts the editors in the author field. // We want them in the editor field so that bibtex knows it's an edited book if (isEditedBook && "".equals(editor)) { editor = author; author = ""; } //fixauthorscomma if (!"".equals(author)) { hm.put(FieldName.AUTHOR, fixAuthor(author)); } if (!"".equals(editor)) { hm.put(FieldName.EDITOR, fixAuthor(editor)); } //if pages missing and article number given, use the article number if (((hm.get(FieldName.PAGES) == null) || "-".equals(hm.get(FieldName.PAGES))) && !"".equals(artnum)) { hm.put(FieldName.PAGES, artnum); } BibEntry b = new BibEntry(type); b.setField(hm); if (!b.getFieldNames().isEmpty()) { bibitems.add(b); } } return new ParserResult(bibitems); } /** * We must be careful about the author names, since they can be presented differently * by different sources. Normally each %A tag brings one name, and we get the authors * separated by " and ". This is the correct behaviour. * One source lists the names separated by comma, with a comma at the end. We can detect * this format and fix it. * @param s The author string * @return The fixed author string */ private static String fixAuthor(String s) { int index = s.indexOf(" and "); if (index >= 0) { return AuthorList.fixAuthorLastNameFirst(s); } // Look for the comma at the end: index = s.lastIndexOf(','); if (index == (s.length() - 1)) { String mod = s.substring(0, s.length() - 1).replace(", ", " and "); return AuthorList.fixAuthorLastNameFirst(mod); } else { return AuthorList.fixAuthorLastNameFirst(s); } } }