package org.jabref.logic.importer;
import java.io.BufferedInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.List;
import java.util.Optional;
import org.jabref.model.cleanup.Formatter;
import org.jabref.model.entry.BibEntry;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.jsoup.helper.StringUtil;
/**
* Provides a convenient interface for ID-based fetcher, which follow the usual three-step procedure:
* 1. Open a URL based on the search query
* 2. Parse the response to get a list of {@link BibEntry}
* 3. Post-process fetched entries
*/
public interface IdBasedParserFetcher extends IdBasedFetcher {
Log LOGGER = LogFactory.getLog(IdBasedParserFetcher.class);
/**
* Constructs a URL based on the query.
* @param identifier the ID
*/
URL getURLForID(String identifier) throws URISyntaxException, MalformedURLException, FetcherException;
/**
* Returns the parser used to convert the response to a list of {@link BibEntry}.
*/
Parser getParser();
/**
* Performs a cleanup of the fetched entry.
*
* Only systematic errors of the fetcher should be corrected here
* (i.e. if information is consistently contained in the wrong field or the wrong format)
* but not cosmetic issues which may depend on the user's taste (for example, LateX code vs HTML in the abstract).
*
* Try to reuse existing {@link Formatter} for the cleanup. For example,
* {@code new FieldFormatterCleanup(FieldName.TITLE, new RemoveBracesFormatter()).cleanup(entry);}
*
* By default, no cleanup is done.
* @param entry the entry to be cleaned-up
*/
default void doPostCleanup(BibEntry entry) {
// Do nothing by default
}
@Override
default Optional<BibEntry> performSearchById(String identifier) throws FetcherException {
if (StringUtil.isBlank(identifier)) {
return Optional.empty();
}
try (InputStream stream = new BufferedInputStream(getURLForID(identifier).openStream())) {
List<BibEntry> fetchedEntries = getParser().parseEntries(stream);
if (fetchedEntries.isEmpty()) {
return Optional.empty();
}
if (fetchedEntries.size() > 1) {
LOGGER.info("Fetcher " + getName() + "found more than one result for identifier " + identifier
+ ". We will use the first entry.");
}
BibEntry entry = fetchedEntries.get(0);
// Post-cleanup
doPostCleanup(entry);
return Optional.of(entry);
} catch (URISyntaxException e) {
throw new FetcherException("Search URI is malformed", e);
} catch (FileNotFoundException e) {
LOGGER.debug("Id not found");
return Optional.empty();
} catch (IOException e) {
// TODO: Catch HTTP Response 401 errors and report that user has no rights to access resource
throw new FetcherException("An I/O exception occurred", e);
} catch (ParseException e) {
throw new FetcherException("An internal parser error occurred", e);
}
}
}