package org.jabref.logic.importer;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import org.jabref.logic.importer.fetcher.ACS;
import org.jabref.logic.importer.fetcher.ArXiv;
import org.jabref.logic.importer.fetcher.DoiResolution;
import org.jabref.logic.importer.fetcher.GoogleScholar;
import org.jabref.logic.importer.fetcher.IEEE;
import org.jabref.logic.importer.fetcher.ScienceDirect;
import org.jabref.logic.importer.fetcher.SpringerLink;
import org.jabref.logic.net.URLDownload;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.FieldName;
import org.jabref.model.entry.identifier.DOI;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* Utility class for trying to resolve URLs to full-text PDF for articles.
*/
public class FulltextFetchers {
private static final Log LOGGER = LogFactory.getLog(FulltextFetchers.class);
private final List<FulltextFetcher> finders = new ArrayList<>();
public FulltextFetchers(ImportFormatPreferences importFormatPreferences) {
// Ordering is important, authorities first!
// Publisher
finders.add(new DoiResolution());
finders.add(new ScienceDirect());
finders.add(new SpringerLink());
finders.add(new ACS());
finders.add(new ArXiv(importFormatPreferences));
finders.add(new IEEE());
// Meta search
finders.add(new GoogleScholar(importFormatPreferences));
}
public FulltextFetchers(List<FulltextFetcher> fetcher) {
finders.addAll(fetcher);
}
public Optional<URL> findFullTextPDF(BibEntry entry) {
// for accuracy, fetch DOI first but do not modify entry
BibEntry clonedEntry = (BibEntry) entry.clone();
Optional<DOI> doi = clonedEntry.getField(FieldName.DOI).flatMap(DOI::parse);
if (!doi.isPresent()) {
try {
WebFetchers.getIdFetcherForIdentifier(DOI.class)
.findIdentifier(clonedEntry)
.ifPresent(e -> clonedEntry.setField(FieldName.DOI, e.getDOI()));
} catch (FetcherException e) {
LOGGER.debug("Failed to find DOI", e);
}
}
for (FulltextFetcher finder : finders) {
try {
Optional<URL> result = finder.findFullText(clonedEntry);
if (result.isPresent() && new URLDownload(result.get().toString()).isPdf()) {
return result;
}
} catch (IOException | FetcherException e) {
LOGGER.debug("Failed to find fulltext PDF at given URL", e);
}
}
return Optional.empty();
}
}