ScienceDirect.java example

Explorer
jabref-master
- src
package org.jabref.logic.importer.fetcher;

import java.io.IOException;
import java.net.URL;
import java.util.Objects;
import java.util.Optional;

import org.jabref.logic.importer.FulltextFetcher;
import org.jabref.logic.net.URLDownload;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.FieldName;
import org.jabref.model.entry.identifier.DOI;

import com.mashape.unirest.http.HttpResponse;
import com.mashape.unirest.http.JsonNode;
import com.mashape.unirest.http.Unirest;
import com.mashape.unirest.http.exceptions.UnirestException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

/**
 * FulltextFetcher implementation that attempts to find a PDF URL at ScienceDirect.
 *
 * @see http://dev.elsevier.com/
 */
public class ScienceDirect implements FulltextFetcher {
    private static final Log LOGGER = LogFactory.getLog(ScienceDirect.class);

    private static final String API_URL = "http://api.elsevier.com/content/article/doi/";
    private static final String API_KEY = "fb82f2e692b3c72dafe5f4f1fa0ac00b";
    @Override
    public Optional<URL> findFullText(BibEntry entry) throws IOException {
        Objects.requireNonNull(entry);

        // Try unique DOI first
        Optional<DOI> doi = entry.getField(FieldName.DOI).flatMap(DOI::parse);

        if (doi.isPresent()) {
            // Available in catalog?
            try {
                String sciLink = getUrlByDoi(doi.get().getDOI());

                // scrape the web page not as mobile client!
                if (!sciLink.isEmpty()) {
                    Document html = Jsoup.connect(sciLink)
                            .userAgent(URLDownload.USER_AGENT)
                            .referrer("http://www.google.com")
                            .ignoreHttpErrors(true).get();

                    // Retrieve PDF link (old page)
                    Element link = html.getElementById("pdfLink");

                    if (link != null) {
                        LOGGER.info("Fulltext PDF found @ ScienceDirect (old page).");
                        Optional<URL> pdfLink = Optional.of(new URL(link.attr("pdfurl")));
                        return pdfLink;
                    }
                    // Retrieve PDF link (new page)
                    String url = html.getElementsByClass("pdf-download-btn-link").attr("href");

                    if (url != null) {
                        LOGGER.info("Fulltext PDF found @ ScienceDirect (new page).");
                        Optional<URL> pdfLink = Optional.of(new URL("http://www.sciencedirect.com" + url));
                        return pdfLink;
                    }
                }
            } catch (UnirestException e) {
                LOGGER.warn("ScienceDirect API request failed", e);
            }
        }
        return Optional.empty();
    }

    private String getUrlByDoi(String doi) throws UnirestException {
        String sciLink = "";
        try {
            String request = API_URL + doi;
            HttpResponse<JsonNode> jsonResponse = Unirest.get(request)
                    .header("X-ELS-APIKey", API_KEY)
                    .queryString("httpAccept", "application/json")
                    .asJson();

            JSONObject json = jsonResponse.getBody().getObject();
            JSONArray links = json.getJSONObject("full-text-retrieval-response").getJSONObject("coredata").getJSONArray("link");

            for (int i = 0; i < links.length(); i++) {
                JSONObject link = links.getJSONObject(i);
                if (link.getString("@rel").equals("scidir")) {
                    sciLink = link.getString("@href");
                }
            }
            return sciLink;
        } catch (JSONException e) {
            LOGGER.debug("No ScienceDirect link found in API request", e);
            return sciLink;
        }
    }
}