package org.jabref.logic.importer.fileformat;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.regex.Pattern;
import org.jabref.logic.importer.Importer;
import org.jabref.logic.importer.ParserResult;
import org.jabref.logic.util.FileExtensions;
import org.jabref.logic.util.OS;
import org.jabref.model.entry.AuthorList;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.FieldName;
import org.jabref.model.entry.Month;
/**
* Imports a Biblioscape Tag File. The format is described on
* http://www.biblioscape.com/manual_bsp/Biblioscape_Tag_File.htm
* Several Biblioscape field types are ignored. Others are only included in the BibTeX
* field "comment".
*/
public class RisImporter extends Importer {
private static final Pattern RECOGNIZED_FORMAT_PATTERN = Pattern.compile("TY - .*");
@Override
public String getName() {
return "RIS";
}
@Override
public FileExtensions getExtensions() {
return FileExtensions.RIS;
}
@Override
public String getDescription() {
return "Imports a Biblioscape Tag File.";
}
@Override
public boolean isRecognizedFormat(BufferedReader reader) throws IOException {
// Our strategy is to look for the "TY - *" line.
return reader.lines().anyMatch(line -> RECOGNIZED_FORMAT_PATTERN.matcher(line).find());
}
@Override
public ParserResult importDatabase(BufferedReader reader) throws IOException {
List<BibEntry> bibitems = new ArrayList<>();
//use optional here, so that no exception will be thrown if the file is empty
Optional<String> OptionalLines = reader.lines().reduce((line, nextline) -> line + "\n" + nextline);
String linesAsString = OptionalLines.isPresent() ? OptionalLines.get() : "";
String[] entries = linesAsString.replace("\u2013", "-").replace("\u2014", "--").replace("\u2015", "--")
.split("ER -.*\\n");
for (String entry1 : entries) {
String type = "";
String author = "";
String editor = "";
String startPage = "";
String endPage = "";
String comment = "";
Optional<Month> month = Optional.empty();
Map<String, String> fields = new HashMap<>();
String[] lines = entry1.split("\n");
for (int j = 0; j < lines.length; j++) {
StringBuilder current = new StringBuilder(lines[j]);
boolean done = false;
while (!done && (j < (lines.length - 1))) {
if ((lines[j + 1].length() >= 6) && !" - ".equals(lines[j + 1].substring(2, 6))) {
if ((current.length() > 0) && !Character.isWhitespace(current.charAt(current.length() - 1))
&& !Character.isWhitespace(lines[j + 1].charAt(0))) {
current.append(' ');
}
current.append(lines[j + 1]);
j++;
} else {
done = true;
}
}
String entry = current.toString();
if (entry.length() < 6) {
continue;
} else {
String tag = entry.substring(0, 2);
String value = entry.substring(6).trim();
if ("TY".equals(tag)) {
if ("BOOK".equals(value)) {
type = "book";
} else if ("JOUR".equals(value) || "MGZN".equals(value)) {
type = "article";
} else if ("THES".equals(value)) {
type = "phdthesis";
} else if ("UNPB".equals(value)) {
type = "unpublished";
} else if ("RPRT".equals(value)) {
type = "techreport";
} else if ("CONF".equals(value)) {
type = "inproceedings";
} else if ("CHAP".equals(value)) {
type = "incollection";//"inbook";
} else if ("PAT".equals(value)) {
type = "patent";
} else {
type = "other";
}
} else if ("T1".equals(tag) || "TI".equals(tag)) {
String oldVal = fields.get(FieldName.TITLE);
if (oldVal == null) {
fields.put(FieldName.TITLE, value);
} else {
if (oldVal.endsWith(":") || oldVal.endsWith(".") || oldVal.endsWith("?")) {
fields.put(FieldName.TITLE, oldVal + " " + value);
} else {
fields.put(FieldName.TITLE, oldVal + ": " + value);
}
}
fields.put(FieldName.TITLE, fields.get(FieldName.TITLE).replaceAll("\\s+", " ")); // Normalize whitespaces
} else if ("BT".equals(tag)) {
fields.put(FieldName.BOOKTITLE, value);
} else if ("T2".equals(tag) && (fields.get(FieldName.JOURNAL) == null || "".equals(fields.get(FieldName.JOURNAL)))) {
//if there is no journal title, then put second title as journal title
fields.put(FieldName.JOURNAL, value);
} else if ("JO".equals(tag)) {
//if this field appears then this should be the journal title
fields.put(FieldName.JOURNAL, value);
} else if ("T3".equals(tag)) {
fields.put(FieldName.SERIES, value);
} else if ("AU".equals(tag) || "A1".equals(tag)) {
if ("".equals(author)) {
author = value;
} else {
author += " and " + value;
}
} else if ("A2".equals(tag) || "A3".equals(tag) || "A4".equals(tag)) {
if (editor.isEmpty()) {
editor = value;
} else {
editor += " and " + value;
}
} else if ("JA".equals(tag) || "JF".equals(tag)) {
if ("inproceedings".equals(type)) {
fields.put(FieldName.BOOKTITLE, value);
} else {
fields.put(FieldName.JOURNAL, value);
}
} else if ("LA".equals(tag)) {
fields.put(FieldName.LANGUAGE, value);
} else if ("CA".equals(tag)) {
fields.put("caption", value);
} else if ("DB".equals(tag)) {
fields.put("database", value);
} else if ("IS".equals(tag)) {
fields.put(FieldName.NUMBER, value);
} else if ("SP".equals(tag)) {
startPage = value;
} else if ("PB".equals(tag)) {
if ("phdthesis".equals(type)) {
fields.put(FieldName.SCHOOL, value);
} else {
fields.put(FieldName.PUBLISHER, value);
}
} else if ("AD".equals(tag) || "CY".equals(tag)) {
fields.put(FieldName.ADDRESS, value);
} else if ("EP".equals(tag)) {
endPage = value;
if (!endPage.isEmpty()) {
endPage = "--" + endPage;
}
} else if ("ET".equals(tag)) {
fields.put(FieldName.EDITION, value);
} else if ("SN".equals(tag)) {
fields.put(FieldName.ISSN, value);
} else if ("VL".equals(tag)) {
fields.put(FieldName.VOLUME, value);
} else if ("N2".equals(tag) || "AB".equals(tag)) {
String oldAb = fields.get(FieldName.ABSTRACT);
if (oldAb == null) {
fields.put(FieldName.ABSTRACT, value);
} else {
fields.put(FieldName.ABSTRACT, oldAb + OS.NEWLINE + value);
}
} else if ("UR".equals(tag)) {
fields.put(FieldName.URL, value);
} else if (("Y1".equals(tag) || "PY".equals(tag) || "DA".equals(tag)) && (value.length() >= 4)) {
fields.put(FieldName.YEAR, value.substring(0, 4));
String[] parts = value.split("/");
if ((parts.length > 1) && !parts[1].isEmpty()) {
try {
int monthNumber = Integer.parseInt(parts[1]);
month = Month.getMonthByNumber(monthNumber);
} catch (NumberFormatException ex) {
// The month part is unparseable, so we ignore it.
}
}
} else if ("KW".equals(tag)) {
if (fields.containsKey(FieldName.KEYWORDS)) {
String kw = fields.get(FieldName.KEYWORDS);
fields.put(FieldName.KEYWORDS, kw + ", " + value);
} else {
fields.put(FieldName.KEYWORDS, value);
}
} else if ("U1".equals(tag) || "U2".equals(tag) || "N1".equals(tag)) {
if (!comment.isEmpty()) {
comment = comment + " ";
}
comment = comment + value;
}
// Added ID import 2005.12.01, Morten Alver:
else if ("ID".equals(tag)) {
fields.put("refid", value);
} else if ("M3".equals(tag) || "DO".equals(tag)) {
addDoi(fields, value);
}
}
// fix authors
if (!author.isEmpty()) {
author = AuthorList.fixAuthorLastNameFirst(author);
fields.put(FieldName.AUTHOR, author);
}
if (!editor.isEmpty()) {
editor = AuthorList.fixAuthorLastNameFirst(editor);
fields.put(FieldName.EDITOR, editor);
}
if (!comment.isEmpty()) {
fields.put(FieldName.COMMENT, comment);
}
fields.put(FieldName.PAGES, startPage + endPage);
}
// Remove empty fields:
fields.entrySet().removeIf(key -> (key.getValue() == null) || key.getValue().trim().isEmpty());
// create one here
// type is set in the loop above
BibEntry b = new BibEntry(type);
b.setField(fields);
// month has a special treatment as we use the separate method "setMonth" of BibEntry instead of directly setting the value
month.ifPresent(parsedMonth -> b.setMonth(parsedMonth));
bibitems.add(b);
}
return new ParserResult(bibitems);
}
private void addDoi(Map<String, String> hm, String val) {
String doi = val.toLowerCase(Locale.ENGLISH);
if (doi.startsWith("doi:")) {
doi = doi.replaceAll("(?i)doi:", "").trim();
hm.put(FieldName.DOI, doi);
}
}
}