package org.jabref.logic.importer.fetcher; import java.io.IOException; import java.net.URL; import java.util.Objects; import java.util.Optional; import org.jabref.logic.importer.FulltextFetcher; import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.FieldName; import org.jabref.model.entry.identifier.DOI; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; /** * FulltextFetcher implementation that attempts to find a PDF URL at ACS. */ public class ACS implements FulltextFetcher { private static final Log LOGGER = LogFactory.getLog(ACS.class); private static final String SOURCE = "http://pubs.acs.org/doi/abs/%s"; /** * Tries to find a fulltext URL for a given BibTex entry. * * Currently only uses the DOI if found. * * @param entry The Bibtex entry * @return The fulltext PDF URL Optional, if found, or an empty Optional if not found. * @throws NullPointerException if no BibTex entry is given * @throws java.io.IOException */ @Override public Optional<URL> findFullText(BibEntry entry) throws IOException { Objects.requireNonNull(entry); Optional<URL> pdfLink = Optional.empty(); // DOI search Optional<DOI> doi = entry.getField(FieldName.DOI).flatMap(DOI::parse); if (doi.isPresent()) { String source = String.format(SOURCE, doi.get().getDOI()); // Retrieve PDF link Document html = Jsoup.connect(source).ignoreHttpErrors(true).get(); Element link = html.select(".pdf-high-res a").first(); if (link != null) { LOGGER.info("Fulltext PDF found @ ACS."); pdfLink = Optional.of(new URL(source.replaceFirst("/abs/", "/pdf/"))); } } return pdfLink; } }