package org.jabref.logic.importer.fetcher;
import java.io.IOException;
import java.net.URL;
import java.util.Objects;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jabref.logic.importer.FulltextFetcher;
import org.jabref.logic.net.URLDownload;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.FieldName;
import org.jabref.model.entry.identifier.DOI;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* Class for finding PDF URLs for entries on IEEE
* Will first look for URLs of the type http://ieeexplore.ieee.org/stamp/stamp.jsp?[tp=&]arnumber=...
* If not found, will resolve the DOI, if it starts with 10.1109, and try to find a similar link on the HTML page
*/
public class IEEE implements FulltextFetcher {
private static final Log LOGGER = LogFactory.getLog(IEEE.class);
private static final Pattern STAMP_PATTERN = Pattern.compile("(/stamp/stamp.jsp\\?t?p?=?&?arnumber=[0-9]+)");
private static final Pattern PDF_PATTERN = Pattern
.compile("\"(http://ieeexplore.ieee.org/ielx[0-9/]+\\.pdf[^\"]+)\"");
private static final String IEEE_DOI = "10.1109";
private static final String BASE_URL = "http://ieeexplore.ieee.org";
@Override
public Optional<URL> findFullText(BibEntry entry) throws IOException {
Objects.requireNonNull(entry);
String stampString = "";
// Try URL first -- will primarily work for entries from the old IEEE search
Optional<String> urlString = entry.getField(FieldName.URL);
if (urlString.isPresent()) {
// Is the URL a direct link to IEEE?
Matcher matcher = STAMP_PATTERN.matcher(urlString.get());
if (matcher.find()) {
// Found it
stampString = matcher.group(1);
}
}
// If not, try DOI
if (stampString.isEmpty()) {
Optional<DOI> doi = entry.getField(FieldName.DOI).flatMap(DOI::parse);
if (doi.isPresent() && doi.get().getDOI().startsWith(IEEE_DOI) && doi.get().getExternalURI().isPresent()) {
// Download the HTML page from IEEE
String resolvedDOIPage = new URLDownload(doi.get().getExternalURI().get().toURL()).asString();
// Try to find the link
Matcher matcher = STAMP_PATTERN.matcher(resolvedDOIPage);
if (matcher.find()) {
// Found it
stampString = matcher.group(1);
}
}
}
// Any success?
if (stampString.isEmpty()) {
return Optional.empty();
}
// Download the HTML page containing a frame with the PDF
String framePage = new URLDownload(BASE_URL + stampString).asString();
// Try to find the direct PDF link
Matcher matcher = PDF_PATTERN.matcher(framePage);
if (matcher.find()) {
// The PDF was found
LOGGER.debug("Full text document found on IEEE Xplore");
return Optional.of(new URL(matcher.group(1)));
}
return Optional.empty();
}
}