package org.jabref.logic.importer.fetcher;
import java.io.IOException;
import java.net.URL;
import java.util.Objects;
import java.util.Optional;
import org.jabref.logic.importer.FulltextFetcher;
import org.jabref.logic.net.URLDownload;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.FieldName;
import org.jabref.model.entry.identifier.DOI;
import com.mashape.unirest.http.HttpResponse;
import com.mashape.unirest.http.JsonNode;
import com.mashape.unirest.http.Unirest;
import com.mashape.unirest.http.exceptions.UnirestException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
/**
* FulltextFetcher implementation that attempts to find a PDF URL at ScienceDirect.
*
* @see http://dev.elsevier.com/
*/
public class ScienceDirect implements FulltextFetcher {
private static final Log LOGGER = LogFactory.getLog(ScienceDirect.class);
private static final String API_URL = "http://api.elsevier.com/content/article/doi/";
private static final String API_KEY = "fb82f2e692b3c72dafe5f4f1fa0ac00b";
@Override
public Optional<URL> findFullText(BibEntry entry) throws IOException {
Objects.requireNonNull(entry);
// Try unique DOI first
Optional<DOI> doi = entry.getField(FieldName.DOI).flatMap(DOI::parse);
if (doi.isPresent()) {
// Available in catalog?
try {
String sciLink = getUrlByDoi(doi.get().getDOI());
// scrape the web page not as mobile client!
if (!sciLink.isEmpty()) {
Document html = Jsoup.connect(sciLink)
.userAgent(URLDownload.USER_AGENT)
.referrer("http://www.google.com")
.ignoreHttpErrors(true).get();
// Retrieve PDF link (old page)
Element link = html.getElementById("pdfLink");
if (link != null) {
LOGGER.info("Fulltext PDF found @ ScienceDirect (old page).");
Optional<URL> pdfLink = Optional.of(new URL(link.attr("pdfurl")));
return pdfLink;
}
// Retrieve PDF link (new page)
String url = html.getElementsByClass("pdf-download-btn-link").attr("href");
if (url != null) {
LOGGER.info("Fulltext PDF found @ ScienceDirect (new page).");
Optional<URL> pdfLink = Optional.of(new URL("http://www.sciencedirect.com" + url));
return pdfLink;
}
}
} catch (UnirestException e) {
LOGGER.warn("ScienceDirect API request failed", e);
}
}
return Optional.empty();
}
private String getUrlByDoi(String doi) throws UnirestException {
String sciLink = "";
try {
String request = API_URL + doi;
HttpResponse<JsonNode> jsonResponse = Unirest.get(request)
.header("X-ELS-APIKey", API_KEY)
.queryString("httpAccept", "application/json")
.asJson();
JSONObject json = jsonResponse.getBody().getObject();
JSONArray links = json.getJSONObject("full-text-retrieval-response").getJSONObject("coredata").getJSONArray("link");
for (int i = 0; i < links.length(); i++) {
JSONObject link = links.getJSONObject(i);
if (link.getString("@rel").equals("scidir")) {
sciLink = link.getString("@href");
}
}
return sciLink;
} catch (JSONException e) {
LOGGER.debug("No ScienceDirect link found in API request", e);
return sciLink;
}
}
}