package org.jabref.model.entry.identifier; import java.net.URI; import java.net.URISyntaxException; import java.util.Objects; import java.util.Optional; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.jabref.model.entry.FieldName; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; /** * Class for working with Digital object identifiers (DOIs) * * @see https://en.wikipedia.org/wiki/Digital_object_identifier */ public class DOI implements Identifier { private static final Log LOGGER = LogFactory.getLog(DOI.class); // DOI resolver private static final URI RESOLVER = URI.create("http://doi.org"); // Regex // (see http://www.doi.org/doi_handbook/2_Numbering.html) private static final String DOI_EXP = "" + "(?:urn:)?" // optional urn + "(?:doi:)?" // optional doi + "(" // begin group \1 + "10" // directory indicator + "(?:\\.[0-9]+)+" // registrant codes + "[/:]" // divider + "(?:.+)" // suffix alphanumeric string + ")"; // end group \1 private static final String FIND_DOI_EXP = "" + "(?:urn:)?" // optional urn + "(?:doi:)?" // optional doi + "(" // begin group \1 + "10" // directory indicator + "(?:\\.[0-9]+)+" // registrant codes + "[/:]" // divider + "(?:[^\\s]+)" // suffix alphanumeric without space + ")"; // end group \1 private static final String HTTP_EXP = "https?://[^\\s]+?" + DOI_EXP; // Pattern private static final Pattern EXACT_DOI_PATT = Pattern.compile("^(?:https?://[^\\s]+?)?" + DOI_EXP + "$", Pattern.CASE_INSENSITIVE); private static final Pattern DOI_PATT = Pattern.compile("(?:https?://[^\\s]+?)?" + FIND_DOI_EXP, Pattern.CASE_INSENSITIVE); // DOI private final String doi; /** * Creates a DOI from various schemes including URL, URN, and plain DOIs. * * @param doi the DOI string * @throws NullPointerException if DOI is null * @throws IllegalArgumentException if doi does not include a valid DOI * @return an instance of the DOI class */ public DOI(String doi) { Objects.requireNonNull(doi); // Remove whitespace String trimmedDoi = doi.trim(); // HTTP URL decoding if (doi.matches(HTTP_EXP)) { try { // decodes path segment URI url = new URI(trimmedDoi); trimmedDoi = url.getScheme() + "://" + url.getHost() + url.getPath(); } catch (URISyntaxException e) { throw new IllegalArgumentException(doi + " is not a valid HTTP DOI."); } } // Extract DOI Matcher matcher = EXACT_DOI_PATT.matcher(trimmedDoi); if (matcher.find()) { // match only group \1 this.doi = matcher.group(1); } else { throw new IllegalArgumentException(trimmedDoi + " is not a valid DOI."); } } /** * Creates an Optional<DOI> from various schemes including URL, URN, and plain DOIs. * * Useful for suppressing the <c>IllegalArgumentException</c> of the Constructor * and checking for Optional.isPresent() instead. * * @param doi the DOI string * @return an Optional containing the DOI or an empty Optional */ public static Optional<DOI> parse(String doi) { try { return Optional.ofNullable(new DOI(doi)); } catch (IllegalArgumentException | NullPointerException e) { return Optional.empty(); } } /** * Determines whether a DOI is valid or not * * @param doi the DOI string * @return true if DOI is valid, false otherwise */ public static boolean isValid(String doi) { return parse(doi).isPresent(); } /** * Tries to find a DOI inside the given text. * * @param text the Text which might contain a DOI * @return an Optional containing the DOI or an empty Optional */ public static Optional<DOI> findInText(String text) { Optional<DOI> result = Optional.empty(); Matcher matcher = DOI_PATT.matcher(text); if (matcher.find()) { // match only group \1 result = Optional.of(new DOI(matcher.group(1))); } return result; } @Override public String toString() { return "DOI{" + "doi='" + doi + '\'' + '}'; } /** * Return the plain DOI * * @return the plain DOI value. */ public String getDOI() { return doi; } /** * Return a URI presentation for the DOI * * @return an encoded URI representation of the DOI */ @Override public Optional<URI> getExternalURI() { try { URI uri = new URI(RESOLVER.getScheme(), RESOLVER.getHost(), "/" + doi, null); return Optional.of(uri); } catch (URISyntaxException e) { // should never happen LOGGER.error(doi + " could not be encoded as URI.", e); return Optional.empty(); } } /** * Return an ASCII URL presentation for the DOI * * @return an encoded URL representation of the DOI */ public String getURIAsASCIIString() { return getExternalURI().map(URI::toASCIIString).orElse(""); } @Override public String getDefaultField() { return FieldName.DOI; } @Override public String getNormalized() { return doi; } }