package org.jabref.model.entry.identifier;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Objects;
import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jabref.model.entry.FieldName;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* Class for working with Eprint identifiers
*
* @see https://arxiv.org/help/arxiv_identifier
* @see https://arxiv.org/hypertex/bibstyles/
*/
public class Eprint implements Identifier {
public static final URI RESOLVER = URI.create("http://arxiv.org");
private static final Log LOGGER = LogFactory.getLog(Eprint.class);
// Regex
// (see https://arxiv.org/help/arxiv_identifier)
private static final String EPRINT_EXP = ""
+ "(?:arXiv:)?" // optional prefix
+ "(" // begin group \1
+ "\\d{4}" // YYMM
+ "\\." // divider
+ "\\d{4,5}" // number
+ "(v\\d+)?" // optional version
+ "|" // old id
+ ".+" // archive
+ "(\\.\\w{2})?" // optional subject class
+ "\\/" // divider
+ "\\d{7}" // number
+ ")"; // end group \1
private static final String HTTP_EXP = "https?://[^\\s]+?" + EPRINT_EXP;
// Pattern
private static final Pattern EXACT_EPRINT_PATT = Pattern.compile("^(?:https?://[^\\s]+?)?" + EPRINT_EXP + "$", Pattern.CASE_INSENSITIVE);
// DOI
private final String eprint;
/**
* Creates a Eprint from various schemes including URL.
*
* @param eprint the Eprint identifier string
* @throws NullPointerException if eprint is null
* @throws IllegalArgumentException if eprint does not include a valid Eprint identifier
* @return an instance of the Eprint class
*/
public Eprint(String eprint) {
Objects.requireNonNull(eprint);
// Remove whitespace
String trimmedId = eprint.trim();
// HTTP URL decoding
if (eprint.matches(HTTP_EXP)) {
try {
// decodes path segment
URI url = new URI(trimmedId);
trimmedId = url.getScheme() + "://" + url.getHost() + url.getPath();
} catch (URISyntaxException e) {
throw new IllegalArgumentException(eprint + " is not a valid HTTP Eprint identifier.");
}
}
// Extract DOI
Matcher matcher = EXACT_EPRINT_PATT.matcher(trimmedId);
if (matcher.find()) {
// match only group \1
this.eprint = matcher.group(1);
} else {
throw new IllegalArgumentException(trimmedId + " is not a valid Eprint identifier.");
}
}
/**
* Creates an Optional<Eprint> from various schemes including URL.
*
* Useful for suppressing the <c>IllegalArgumentException</c> of the Constructor
* and checking for Optional.isPresent() instead.
*
* @param eprint the Eprint string
* @return an Optional containing the Eprint or an empty Optional
*/
public static Optional<Eprint> build(String eprint) {
try {
return Optional.ofNullable(new Eprint(eprint));
} catch (IllegalArgumentException | NullPointerException e) {
return Optional.empty();
}
}
/**
* Return a URI presentation for the Eprint identifier
*
* @return an encoded URI representation of the Eprint identifier
*/
@Override
public Optional<URI> getExternalURI() {
try {
URI uri = new URI(RESOLVER.getScheme(), RESOLVER.getHost(), "/abs/" + eprint, null);
return Optional.of(uri);
} catch (URISyntaxException e) {
// should never happen
LOGGER.error(eprint + " could not be encoded as URI.", e);
return Optional.empty();
}
}
/**
* Return an ASCII URL presentation for the Eprint identifier
*
* @return an encoded URL representation of the Eprint identifier
*/
public String getURIAsASCIIString() {
return getExternalURI().map(URI::toASCIIString).orElse("");
}
/**
* Return the plain Eprint identifier
*
* @return the plain Eprint value.
*/
public String getEprint() {
return eprint;
}
@Override
public String getDefaultField() {
return FieldName.EPRINT;
}
@Override
public String getNormalized() {
return eprint;
}
}