package org.jabref.logic.importer.fileformat;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.jabref.logic.bibtexkeypattern.BibtexKeyPatternUtil;
import org.jabref.logic.importer.ImportFormatPreferences;
import org.jabref.logic.importer.Importer;
import org.jabref.logic.importer.ParserResult;
import org.jabref.logic.util.FileExtensions;
import org.jabref.model.entry.AuthorList;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.FieldName;
/**
* Importer for the Refer/Endnote format.
* modified to use article number for pages if pages are missing (some
* journals, e.g., Physical Review Letters, don't use pages anymore)
*
* check here for details on the format
* http://libguides.csuchico.edu/c.php?g=414245&p=2822898
*/
public class EndnoteImporter extends Importer {
private static final String ENDOFRECORD = "__EOREOR__";
private static final Pattern A_PATTERN = Pattern.compile("%A .*");
private static final Pattern E_PATTERN = Pattern.compile("%E .*");
private final ImportFormatPreferences preferences;
public EndnoteImporter(ImportFormatPreferences preferences) {
this.preferences = preferences;
}
@Override
public String getName() {
return "Refer/Endnote";
}
@Override
public FileExtensions getExtensions() {
return FileExtensions.ENDNOTE;
}
@Override
public String getId() {
return "refer";
}
@Override
public String getDescription() {
return "Importer for the Refer/Endnote format. Modified to use article number for pages if pages are missing.";
}
@Override
public boolean isRecognizedFormat(BufferedReader reader) throws IOException {
// Our strategy is to look for the "%A *" line.
String str;
while ((str = reader.readLine()) != null) {
if (A_PATTERN.matcher(str).matches() || E_PATTERN.matcher(str).matches()) {
return true;
}
}
return false;
}
@Override
public ParserResult importDatabase(BufferedReader reader) throws IOException {
List<BibEntry> bibitems = new ArrayList<>();
StringBuilder sb = new StringBuilder();
String str;
boolean first = true;
while ((str = reader.readLine()) != null) {
str = str.trim();
if (str.indexOf("%0") == 0) {
if (first) {
first = false;
} else {
sb.append(ENDOFRECORD);
}
sb.append(str);
} else {
sb.append(str);
}
sb.append('\n');
}
String[] entries = sb.toString().split(ENDOFRECORD);
Map<String, String> hm = new HashMap<>();
String author;
String type;
String editor;
String artnum;
for (String entry : entries) {
hm.clear();
author = "";
type = BibEntry.DEFAULT_TYPE;
editor = "";
artnum = "";
boolean isEditedBook = false;
String[] fields = entry.trim().substring(1).split("\n%");
for (String field : fields) {
if (field.length() < 3) {
continue;
}
/*
* Details of Refer format for Journal Article and Book:
*
* Generic Ref Journal Article Book Code Author %A Author Author Year %D
* Year Year Title %T Title Title Secondary Author %E Series Editor
* Secondary Title %B Journal Series Title Place Published %C City
* Publisher %I Publisher Volume %V Volume Volume Number of Volumes %6
* Number of Volumes Number %N Issue Pages %P Pages Number of Pages
* Edition %7 Edition Subsidiary Author %? Translator Alternate Title %J
* Alternate Journal Label %F Label Label Keywords %K Keywords Keywords
* Abstract %X Abstract Abstract Notes %O Notes Notes
*/
String prefix = field.substring(0, 1);
String val = field.substring(2);
if ("A".equals(prefix)) {
if ("".equals(author)) {
author = val;
} else {
author += " and " + val;
}
} else if ("E".equals(prefix)) {
if ("".equals(editor)) {
editor = val;
} else {
editor += " and " + val;
}
} else if ("T".equals(prefix)) {
hm.put(FieldName.TITLE, val);
} else if ("0".equals(prefix)) {
if (val.indexOf("Journal") == 0) {
type = "article";
} else if (val.indexOf("Book Section") == 0) {
type = "incollection";
} else if (val.indexOf("Book") == 0) {
type = "book";
} else if (val.indexOf("Edited Book") == 0) {
type = "book";
isEditedBook = true;
} else if (val.indexOf("Conference") == 0) {
type = "inproceedings";
} else if (val.indexOf("Report") == 0) {
type = "techreport";
} else if (val.indexOf("Review") == 0) {
type = "article";
} else if (val.indexOf("Thesis") == 0) {
type = "phdthesis";
} else {
type = BibEntry.DEFAULT_TYPE; //
}
} else if ("7".equals(prefix)) {
hm.put(FieldName.EDITION, val);
} else if ("C".equals(prefix)) {
hm.put(FieldName.ADDRESS, val);
} else if ("D".equals(prefix)) {
hm.put(FieldName.YEAR, val);
} else if ("8".equals(prefix)) {
hm.put(FieldName.DATE, val);
} else if ("J".equals(prefix)) {
// "Alternate journal. Let's set it only if no journal
// has been set with %B.
hm.putIfAbsent(FieldName.JOURNAL, val);
} else if ("B".equals(prefix)) {
// This prefix stands for "journal" in a journal entry, and
// "series" in a book entry.
if ("article".equals(type)) {
hm.put(FieldName.JOURNAL, val);
} else if ("book".equals(type) || "inbook".equals(type)) {
hm.put(FieldName.SERIES, val);
} else {
/* type = inproceedings */
hm.put(FieldName.BOOKTITLE, val);
}
} else if ("I".equals(prefix)) {
if ("phdthesis".equals(type)) {
hm.put(FieldName.SCHOOL, val);
} else {
hm.put(FieldName.PUBLISHER, val);
}
}
// replace single dash page ranges (23-45) with double dashes (23--45):
else if ("P".equals(prefix)) {
hm.put(FieldName.PAGES, val.replaceAll("([0-9]) *- *([0-9])", "$1--$2"));
} else if ("V".equals(prefix)) {
hm.put(FieldName.VOLUME, val);
} else if ("N".equals(prefix)) {
hm.put(FieldName.NUMBER, val);
} else if ("U".equals(prefix)) {
hm.put(FieldName.URL, val);
} else if ("R".equals(prefix)) {
String doi = val;
if (doi.startsWith("doi:")) {
doi = doi.substring(4);
}
hm.put(FieldName.DOI, doi);
} else if ("O".equals(prefix)) {
// Notes may contain Article number
if (val.startsWith("Artn")) {
String[] tokens = val.split("\\s");
artnum = tokens[1];
} else {
hm.put(FieldName.NOTE, val);
}
} else if ("K".equals(prefix)) {
hm.put(FieldName.KEYWORDS, val);
} else if ("X".equals(prefix)) {
hm.put(FieldName.ABSTRACT, val);
} else if ("9".equals(prefix)) {
if (val.indexOf("Ph.D.") == 0) {
type = "phdthesis";
}
if (val.indexOf("Masters") == 0) {
type = "mastersthesis";
}
} else if ("F".equals(prefix)) {
hm.put(BibEntry.KEY_FIELD, BibtexKeyPatternUtil.checkLegalKey(val,
preferences.getBibtexKeyPatternPreferences().isEnforceLegalKey()));
}
}
// For Edited Book, EndNote puts the editors in the author field.
// We want them in the editor field so that bibtex knows it's an edited book
if (isEditedBook && "".equals(editor)) {
editor = author;
author = "";
}
//fixauthorscomma
if (!"".equals(author)) {
hm.put(FieldName.AUTHOR, fixAuthor(author));
}
if (!"".equals(editor)) {
hm.put(FieldName.EDITOR, fixAuthor(editor));
}
//if pages missing and article number given, use the article number
if (((hm.get(FieldName.PAGES) == null) || "-".equals(hm.get(FieldName.PAGES))) && !"".equals(artnum)) {
hm.put(FieldName.PAGES, artnum);
}
BibEntry b = new BibEntry(type);
b.setField(hm);
if (!b.getFieldNames().isEmpty()) {
bibitems.add(b);
}
}
return new ParserResult(bibitems);
}
/**
* We must be careful about the author names, since they can be presented differently
* by different sources. Normally each %A tag brings one name, and we get the authors
* separated by " and ". This is the correct behaviour.
* One source lists the names separated by comma, with a comma at the end. We can detect
* this format and fix it.
* @param s The author string
* @return The fixed author string
*/
private static String fixAuthor(String s) {
int index = s.indexOf(" and ");
if (index >= 0) {
return AuthorList.fixAuthorLastNameFirst(s);
}
// Look for the comma at the end:
index = s.lastIndexOf(',');
if (index == (s.length() - 1)) {
String mod = s.substring(0, s.length() - 1).replace(", ", " and ");
return AuthorList.fixAuthorLastNameFirst(mod);
} else {
return AuthorList.fixAuthorLastNameFirst(s);
}
}
}