package org.jabref.model.entry.identifier;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Objects;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jabref.model.entry.FieldName;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* Class for working with Digital object identifiers (DOIs)
*
* @see https://en.wikipedia.org/wiki/Digital_object_identifier
*/
public class DOI implements Identifier {
private static final Log LOGGER = LogFactory.getLog(DOI.class);
// DOI resolver
private static final URI RESOLVER = URI.create("http://doi.org");
// Regex
// (see http://www.doi.org/doi_handbook/2_Numbering.html)
private static final String DOI_EXP = ""
+ "(?:urn:)?" // optional urn
+ "(?:doi:)?" // optional doi
+ "(" // begin group \1
+ "10" // directory indicator
+ "(?:\\.[0-9]+)+" // registrant codes
+ "[/:]" // divider
+ "(?:.+)" // suffix alphanumeric string
+ ")"; // end group \1
private static final String FIND_DOI_EXP = ""
+ "(?:urn:)?" // optional urn
+ "(?:doi:)?" // optional doi
+ "(" // begin group \1
+ "10" // directory indicator
+ "(?:\\.[0-9]+)+" // registrant codes
+ "[/:]" // divider
+ "(?:[^\\s]+)" // suffix alphanumeric without space
+ ")"; // end group \1
private static final String HTTP_EXP = "https?://[^\\s]+?" + DOI_EXP;
// Pattern
private static final Pattern EXACT_DOI_PATT = Pattern.compile("^(?:https?://[^\\s]+?)?" + DOI_EXP + "$", Pattern.CASE_INSENSITIVE);
private static final Pattern DOI_PATT = Pattern.compile("(?:https?://[^\\s]+?)?" + FIND_DOI_EXP, Pattern.CASE_INSENSITIVE);
// DOI
private final String doi;
/**
* Creates a DOI from various schemes including URL, URN, and plain DOIs.
*
* @param doi the DOI string
* @throws NullPointerException if DOI is null
* @throws IllegalArgumentException if doi does not include a valid DOI
* @return an instance of the DOI class
*/
public DOI(String doi) {
Objects.requireNonNull(doi);
// Remove whitespace
String trimmedDoi = doi.trim();
// HTTP URL decoding
if (doi.matches(HTTP_EXP)) {
try {
// decodes path segment
URI url = new URI(trimmedDoi);
trimmedDoi = url.getScheme() + "://" + url.getHost() + url.getPath();
} catch (URISyntaxException e) {
throw new IllegalArgumentException(doi + " is not a valid HTTP DOI.");
}
}
// Extract DOI
Matcher matcher = EXACT_DOI_PATT.matcher(trimmedDoi);
if (matcher.find()) {
// match only group \1
this.doi = matcher.group(1);
} else {
throw new IllegalArgumentException(trimmedDoi + " is not a valid DOI.");
}
}
/**
* Creates an Optional<DOI> from various schemes including URL, URN, and plain DOIs.
*
* Useful for suppressing the <c>IllegalArgumentException</c> of the Constructor
* and checking for Optional.isPresent() instead.
*
* @param doi the DOI string
* @return an Optional containing the DOI or an empty Optional
*/
public static Optional<DOI> parse(String doi) {
try {
return Optional.ofNullable(new DOI(doi));
} catch (IllegalArgumentException | NullPointerException e) {
return Optional.empty();
}
}
/**
* Determines whether a DOI is valid or not
*
* @param doi the DOI string
* @return true if DOI is valid, false otherwise
*/
public static boolean isValid(String doi) {
return parse(doi).isPresent();
}
/**
* Tries to find a DOI inside the given text.
*
* @param text the Text which might contain a DOI
* @return an Optional containing the DOI or an empty Optional
*/
public static Optional<DOI> findInText(String text) {
Optional<DOI> result = Optional.empty();
Matcher matcher = DOI_PATT.matcher(text);
if (matcher.find()) {
// match only group \1
result = Optional.of(new DOI(matcher.group(1)));
}
return result;
}
@Override
public String toString() {
return "DOI{" +
"doi='" + doi + '\'' +
'}';
}
/**
* Return the plain DOI
*
* @return the plain DOI value.
*/
public String getDOI() {
return doi;
}
/**
* Return a URI presentation for the DOI
*
* @return an encoded URI representation of the DOI
*/
@Override
public Optional<URI> getExternalURI() {
try {
URI uri = new URI(RESOLVER.getScheme(), RESOLVER.getHost(), "/" + doi, null);
return Optional.of(uri);
} catch (URISyntaxException e) {
// should never happen
LOGGER.error(doi + " could not be encoded as URI.", e);
return Optional.empty();
}
}
/**
* Return an ASCII URL presentation for the DOI
*
* @return an encoded URL representation of the DOI
*/
public String getURIAsASCIIString() {
return getExternalURI().map(URI::toASCIIString).orElse("");
}
@Override
public String getDefaultField() {
return FieldName.DOI;
}
@Override
public String getNormalized() {
return doi;
}
}