package org.jabref.logic.importer.fetcher; import java.net.MalformedURLException; import java.net.URISyntaxException; import java.net.URL; import java.util.ArrayList; import java.util.List; import java.util.Optional; import org.jabref.logic.formatter.bibtexfields.ClearFormatter; import org.jabref.logic.formatter.bibtexfields.RemoveBracesFormatter; import org.jabref.logic.importer.EntryBasedParserFetcher; import org.jabref.logic.importer.FetcherException; import org.jabref.logic.importer.IdBasedParserFetcher; import org.jabref.logic.importer.IdParserFetcher; import org.jabref.logic.importer.ParseException; import org.jabref.logic.importer.Parser; import org.jabref.logic.importer.SearchBasedParserFetcher; import org.jabref.logic.importer.util.JsonReader; import org.jabref.logic.util.strings.StringSimilarity; import org.jabref.model.cleanup.FieldFormatterCleanup; import org.jabref.model.entry.AuthorList; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.BiblatexEntryTypes; import org.jabref.model.entry.EntryType; import org.jabref.model.entry.FieldName; import org.jabref.model.entry.identifier.DOI; import org.jabref.model.util.OptionalUtil; import org.apache.http.client.utils.URIBuilder; import org.json.JSONArray; import org.json.JSONException; import org.json.JSONObject; /** * A class for fetching DOIs from CrossRef * * See https://github.com/CrossRef/rest-api-doc */ public class CrossRef implements IdParserFetcher<DOI>, EntryBasedParserFetcher, SearchBasedParserFetcher, IdBasedParserFetcher { private static final String API_URL = "http://api.crossref.org/works"; private static final RemoveBracesFormatter REMOVE_BRACES_FORMATTER = new RemoveBracesFormatter(); @Override public String getName() { return "Crossref"; } @Override public URL getURLForEntry(BibEntry entry) throws URISyntaxException, MalformedURLException, FetcherException { URIBuilder uriBuilder = new URIBuilder(API_URL); entry.getLatexFreeField(FieldName.TITLE).ifPresent(title -> uriBuilder.addParameter("query.title", title)); entry.getLatexFreeField(FieldName.AUTHOR).ifPresent(author -> uriBuilder.addParameter("query.author", author)); entry.getLatexFreeField(FieldName.YEAR).ifPresent(year -> uriBuilder.addParameter("filter", "from-pub-date:" + year) ); uriBuilder.addParameter("rows", "20"); // = API default uriBuilder.addParameter("offset", "0"); // start at beginning return uriBuilder.build().toURL(); } @Override public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException { URIBuilder uriBuilder = new URIBuilder(API_URL); uriBuilder.addParameter("query", query); return uriBuilder.build().toURL(); } @Override public URL getURLForID(String identifier) throws URISyntaxException, MalformedURLException, FetcherException { URIBuilder uriBuilder = new URIBuilder(API_URL + "/" + identifier); return uriBuilder.build().toURL(); } @Override public Parser getParser() { return inputStream -> { JSONObject response = JsonReader.toJsonObject(inputStream).getJSONObject("message"); List<BibEntry> entries = new ArrayList<>(); if (response.has("items")) { // Response contains a list JSONArray items = response.getJSONArray("items"); for (int i = 0; i < items.length(); i++) { JSONObject item = items.getJSONObject(i); BibEntry entry = jsonItemToBibEntry(item); entries.add(entry); } } else { // Singleton response BibEntry entry = jsonItemToBibEntry(response); entries.add(entry); } return entries; }; } @Override public void doPostCleanup(BibEntry entry) { // Sometimes the fetched entry returns the title also in the subtitle field; in this case only keep the title field if (entry.getField(FieldName.TITLE).equals(entry.getField(FieldName.SUBTITLE))) { new FieldFormatterCleanup(FieldName.SUBTITLE, new ClearFormatter()).cleanup(entry); } } private BibEntry jsonItemToBibEntry(JSONObject item) throws ParseException { try { BibEntry entry = new BibEntry(); entry.setType(convertType(item.getString("type"))); entry.setField(FieldName.TITLE, item.getJSONArray("title").optString(0)); entry.setField(FieldName.SUBTITLE, Optional.ofNullable(item.optJSONArray("subtitle")) .map(array -> array.optString(0)).orElse("")); entry.setField(FieldName.AUTHOR, toAuthors(item.optJSONArray("author"))); entry.setField(FieldName.YEAR, Optional.ofNullable(item.optJSONObject("published-print")) .map(array -> array.optJSONArray("date-parts")) .map(array -> array.optJSONArray(0)) .map(array -> array.optInt(0)) .map(year -> Integer.toString(year)).orElse("") ); entry.setField(FieldName.DOI, item.getString("DOI")); entry.setField(FieldName.PAGES, item.optString("page")); entry.setField(FieldName.VOLUME, item.optString("volume")); entry.setField(FieldName.ISSN, Optional.ofNullable(item.optJSONArray("ISSN")).map(array -> array.getString(0)).orElse("")); return entry; } catch (JSONException exception) { throw new ParseException("CrossRef API JSON format has changed", exception); } } private String toAuthors(JSONArray authors) { if (authors == null) { return ""; } // input: list of {"given":"A.","family":"Riel","affiliation":[]} AuthorList authorsParsed = new AuthorList(); for (int i = 0; i < authors.length(); i++) { JSONObject author = authors.getJSONObject(i); authorsParsed.addAuthor( author.optString("given", ""), "", "", author.optString("family", ""), ""); } return authorsParsed.getAsFirstLastNamesWithAnd(); } private EntryType convertType(String type) { switch (type) { case "journal-article": return BiblatexEntryTypes.ARTICLE; default: return BiblatexEntryTypes.MISC; } } @Override public Optional<DOI> extractIdentifier(BibEntry inputEntry, List<BibEntry> fetchedEntries) throws FetcherException { final String entryTitle = REMOVE_BRACES_FORMATTER.format(inputEntry.getLatexFreeField(FieldName.TITLE).orElse("")); final StringSimilarity stringSimilarity = new StringSimilarity(); for (BibEntry fetchedEntry : fetchedEntries) { // currently only title-based comparison // title Optional<String> dataTitle = fetchedEntry.getField(FieldName.TITLE); if (OptionalUtil.isPresentAnd(dataTitle, title -> stringSimilarity.isSimilar(entryTitle, title))) { return fetchedEntry.getDOI(); } // subtitle // additional check, as sometimes subtitle is needed but sometimes only duplicates the title Optional<String> dataSubtitle = fetchedEntry.getField(FieldName.SUBTITLE); Optional<String> dataWithSubTitle = OptionalUtil.combine(dataTitle, dataSubtitle, (title, subtitle) -> title + " " + subtitle); if (OptionalUtil.isPresentAnd(dataWithSubTitle, titleWithSubtitle -> stringSimilarity.isSimilar(entryTitle, titleWithSubtitle))) { return fetchedEntry.getDOI(); } } return Optional.empty(); } @Override public String getIdentifierName() { return "DOI"; } }