package org.jabref.logic.importer.fetcher; import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; import java.net.URISyntaxException; import java.net.URL; import java.net.URLConnection; import java.util.Collections; import java.util.List; import java.util.Objects; import org.jabref.logic.formatter.bibtexfields.ClearFormatter; import org.jabref.logic.formatter.bibtexfields.NormalizeNamesFormatter; import org.jabref.logic.formatter.bibtexfields.RemoveBracesFormatter; import org.jabref.logic.help.HelpFile; import org.jabref.logic.importer.EntryBasedParserFetcher; import org.jabref.logic.importer.FetcherException; import org.jabref.logic.importer.IdBasedParserFetcher; import org.jabref.logic.importer.ImportFormatPreferences; import org.jabref.logic.importer.ParseException; import org.jabref.logic.importer.Parser; import org.jabref.logic.importer.SearchBasedParserFetcher; import org.jabref.logic.importer.fileformat.BibtexParser; import org.jabref.logic.l10n.Localization; import org.jabref.logic.net.URLDownload; import org.jabref.model.cleanup.FieldFormatterCleanup; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.FieldName; import org.apache.http.client.utils.URIBuilder; import org.jsoup.helper.StringUtil; /** * Fetches data from the SAO/NASA Astrophysics Data System (http://www.adsabs.harvard.edu/) * * Search query-based: http://adsabs.harvard.edu/basic_search.html * Entry -based: http://adsabs.harvard.edu/abstract_service.html * * There is also a new API (https://github.com/adsabs/adsabs-dev-api) but it returns JSON * (or at least needs multiple calls to get BibTeX, status: September 2016) */ public class AstrophysicsDataSystem implements IdBasedParserFetcher, SearchBasedParserFetcher, EntryBasedParserFetcher { private static String API_QUERY_URL = "http://adsabs.harvard.edu/cgi-bin/nph-basic_connect"; private static String API_ENTRY_URL = "http://adsabs.harvard.edu/cgi-bin/nph-abs_connect"; private static String API_DOI_URL = "http://adsabs.harvard.edu/doi/"; private final String patternRemoveDOI = "^(doi:|DOI:)"; private final ImportFormatPreferences preferences; public AstrophysicsDataSystem(ImportFormatPreferences preferences) { this.preferences = Objects.requireNonNull(preferences); } @Override public String getName() { return "SAO/NASA Astrophysics Data System"; } private URIBuilder getBaseUrl(String apiUrl) throws URISyntaxException { URIBuilder uriBuilder = new URIBuilder(apiUrl); uriBuilder.addParameter("data_type", "BIBTEXPLUS"); uriBuilder.addParameter("start_nr", String.valueOf(1)); uriBuilder.addParameter("nr_to_return", String.valueOf(200)); return uriBuilder; } @Override public URL getURLForQuery(String query) throws URISyntaxException, MalformedURLException, FetcherException { URIBuilder uriBuilder = getBaseUrl(API_QUERY_URL); uriBuilder.addParameter("qsearch", query); return uriBuilder.build().toURL(); } @Override public URL getURLForEntry(BibEntry entry) throws URISyntaxException, MalformedURLException, FetcherException { URIBuilder uriBuilder = getBaseUrl(API_ENTRY_URL); // Search astronomy + physics + arXiv db uriBuilder.addParameter("db_key", "AST"); uriBuilder.addParameter("db_key", "PHY"); uriBuilder.addParameter("db_key", "PRE"); // Add title search entry.getFieldOrAlias(FieldName.TITLE).ifPresent(title -> { uriBuilder.addParameter("ttl_logic", "OR"); uriBuilder.addParameter("title", title); uriBuilder.addParameter("ttl_syn", "YES"); // Synonym replacement uriBuilder.addParameter("ttl_wt", "0.3"); // Weight uriBuilder.addParameter("ttl_wgt", "YES"); // Consider Weight }); // Add author search entry.getFieldOrAlias(FieldName.AUTHOR).ifPresent(author -> { uriBuilder.addParameter("aut_logic", "OR"); uriBuilder.addParameter("author", author); uriBuilder.addParameter("aut_syn", "YES"); // Synonym replacement uriBuilder.addParameter("aut_wt", "1.0"); // Weight uriBuilder.addParameter("aut_wgt", "YES"); // Consider weight }); return uriBuilder.build().toURL(); } @Override public URL getURLForID(String identifier) throws URISyntaxException, MalformedURLException, FetcherException { String key = identifier.replaceAll(patternRemoveDOI, ""); URIBuilder uriBuilder = new URIBuilder(API_DOI_URL + key); uriBuilder.addParameter("data_type", "BIBTEXPLUS"); return uriBuilder.build().toURL(); } @Override public HelpFile getHelpPage() { return HelpFile.FETCHER_ADS; } @Override public Parser getParser() { return new BibtexParser(preferences); } @Override public List<BibEntry> performSearch(String query) throws FetcherException { if (StringUtil.isBlank(query)) { return Collections.emptyList(); } try { URLConnection connection = getURLForQuery(query).openConnection(); connection.setRequestProperty("User-Agent", URLDownload.USER_AGENT); try (InputStream stream = connection.getInputStream()) { List<BibEntry> fetchedEntries = getParser().parseEntries(stream); // Post-cleanup fetchedEntries.forEach(this::doPostCleanup); return fetchedEntries; } catch (IOException e) { throw new FetcherException("An I/O exception occurred", e); } } catch (URISyntaxException | MalformedURLException e) { throw new FetcherException("Search URI is malformed", e); } catch (IOException e) { throw new FetcherException("An I/O exception occurred", e); } catch (ParseException e) { throw new FetcherException("Error occurred when parsing entry", Localization.lang("Error occurred when parsing entry"), e); } } @Override public void doPostCleanup(BibEntry entry) { new FieldFormatterCleanup(FieldName.ABSTRACT, new RemoveBracesFormatter()).cleanup(entry); new FieldFormatterCleanup(FieldName.TITLE, new RemoveBracesFormatter()).cleanup(entry); new FieldFormatterCleanup(FieldName.AUTHOR, new NormalizeNamesFormatter()).cleanup(entry); // Remove url to ADS page new FieldFormatterCleanup("adsnote", new ClearFormatter()).cleanup(entry); new FieldFormatterCleanup("adsurl", new ClearFormatter()).cleanup(entry); } }