package edu.harvard.iq.dataverse.api.datadeposit; import edu.harvard.iq.dataverse.util.SystemConfig; import java.net.URI; import java.net.URISyntaxException; import java.util.Arrays; import java.util.List; import java.util.logging.Logger; import org.apache.commons.lang.StringUtils; import org.swordapp.server.SwordError; import org.swordapp.server.UriRegistry; public class UrlManager { private static final Logger logger = Logger.getLogger(UrlManager.class.getCanonicalName()); String originalUrl; SwordConfigurationImpl swordConfiguration = new SwordConfigurationImpl(); String servlet; String targetType; String targetIdentifier; int port; String processUrl(String url) throws SwordError { logger.fine("URL was: " + url); String warning = null; this.originalUrl = url; URI javaNetUri; try { javaNetUri = new URI(url); } catch (URISyntaxException ex) { throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "Invalid URL syntax: " + url); } /** * @todo: figure out another way to check for http. We used to use * javaNetUri.getScheme() but now that we are using "ProxyPass / * ajp://localhost:8009/" in Apache it's always http rather than https. * * http://serverfault.com/questions/6128/how-do-i-force-apache-to-use-https-in-conjunction-with-ajp * http://stackoverflow.com/questions/1685563/apache-webserver-jboss-ajp-connectivity-with-https * http://stackoverflow.com/questions/12460422/how-do-ensure-that-apache-ajp-to-tomcat-connection-is-secure-encrypted */ if (!"https".equals(javaNetUri.getScheme())) { /** * @todo figure out how to prevent this stackstrace from showing up * in Glassfish logs: * * Unable to populate SSL attributes * java.lang.IllegalStateException: SSLEngine is null at * org.glassfish.grizzly.ssl.SSLSupportImpl * * https://github.com/IQSS/dataverse/issues/643 * * SSLOptions +StdEnvVars +ExportCertData ? * * [#GLASSFISH-20694] Glassfish 4.0 and jk Unable to populate SSL * attributes - Java.net JIRA - * https://java.net/jira/browse/GLASSFISH-20694 */ logger.fine("https is required but protocol was " + javaNetUri.getScheme()); // throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "https is required but protocol was " + javaNetUri.getScheme()); } this.port = javaNetUri.getPort(); String[] urlPartsArray = javaNetUri.getPath().split("/"); List<String> urlParts = Arrays.asList(urlPartsArray); String dataDepositApiBasePath; try { List<String> dataDepositApiBasePathParts; // 1 2 3 4 5 6 7 8 9 // for example: /dvn/api/data-deposit/v1/swordv2/collection/dataverse/sword dataDepositApiBasePathParts = urlParts.subList(0, 6); dataDepositApiBasePath = StringUtils.join(dataDepositApiBasePathParts, "/"); } catch (IndexOutOfBoundsException ex) { throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "Error processing URL: " + url); } if (!swordConfiguration.getBaseUrlPathsValid().contains(dataDepositApiBasePath)) { throw new SwordError(dataDepositApiBasePath + " found but one of these required: " + swordConfiguration.getBaseUrlPathsValid() + ". Current version is " + swordConfiguration.getBaseUrlPathCurrent()); } else { if (swordConfiguration.getBaseUrlPathsDeprecated().contains(dataDepositApiBasePath)) { String msg = "Deprecated version used for Data Deposit API. The current version expects '" + swordConfiguration.getBaseUrlPathCurrent() + "'. URL passed in: " + url; warning = msg; } } try { this.servlet = urlParts.get(6); } catch (ArrayIndexOutOfBoundsException ex) { throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "Unable to determine servlet path from URL: " + url); } if (!servlet.equals("service-document")) { List<String> targetTypeAndIdentifier; try { // 6 7 8 // for example: /collection/dataverse/sword targetTypeAndIdentifier = urlParts.subList(7, urlParts.size()); } catch (IndexOutOfBoundsException ex) { throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "No target components specified in URL: " + url); } this.targetType = targetTypeAndIdentifier.get(0); if (targetType != null) { if (targetType.equals("dataverse")) { String dvAlias; try { dvAlias = targetTypeAndIdentifier.get(1); } catch (IndexOutOfBoundsException ex) { throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "No dataverse alias provided in URL: " + url); } this.targetIdentifier = dvAlias; /** * @todo it would be nice to support "dataset" as an alias * for "study" since that's what we call them now in * Dataverse 4.0. We should continue to support "study" in * the URL however because some API users have these URLs * stored in databases: * http://irclog.iq.harvard.edu/dvn/2014-05-14#i_9404 * * Also, to support "dataset" in URLs properly, we'd need to * examine all the places where we return the string "study" * such as in the Deposit Receipt. */ } else if (targetType.equals("study")) { String globalId; try { List<String> globalIdParts = targetTypeAndIdentifier.subList(1, targetTypeAndIdentifier.size()); globalId = StringUtils.join(globalIdParts, "/"); } catch (IndexOutOfBoundsException ex) { throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "Invalid study global id provided in URL: " + url); } this.targetIdentifier = globalId; } else if (targetType.equals("file")) { String fileIdString; try { // a user might reasonably pass in a filename as well [.get(2)] since // we expose it in the statement of a study but we ignore it here fileIdString = targetTypeAndIdentifier.get(1); } catch (IndexOutOfBoundsException ex) { throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "No file id provided in URL: " + url); } this.targetIdentifier = fileIdString; } else { throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "unsupported target type: " + targetType); } } else { throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "Unable to determine target type from URL: " + url); } logger.fine("target type: " + targetType); logger.fine("target identifier: " + targetIdentifier); } if (warning != null) { logger.info(warning); } return warning; } String getHostnamePlusBaseUrlPath(String url) throws SwordError { String optionalPort = ""; URI u; try { u = new URI(url); } catch (URISyntaxException ex) { throw new SwordError(UriRegistry.ERROR_BAD_REQUEST, "unable to part URL"); } int port = u.getPort(); if (port != -1) { // https often runs on port 8181 in dev optionalPort = ":" + port; } String requestedHostname = u.getHost(); String hostName = System.getProperty(SystemConfig.FQDN); if (hostName == null) { hostName = "localhost"; } /** * @todo should this be configurable? In dev it's convenient to override * the JVM option and force traffic to localhost. */ if (requestedHostname.equals("localhost")) { hostName = "localhost"; } /** * @todo Any problem with returning the current API version rather than * the version that was operated on? Both should work. If SWORD API * users are operating on the URLs returned (as they should) returning * the current version will avoid deprecation warnings on the Dataverse * side. * * @todo Prevent "https://localhost:8080" from being returned. It should * either be "http://localhost:8080" or "https://localhost:8181". Use * SystemConfig.getDataverseSiteUrl instead of SystemConfig.FQDN above. * It's worse for security to not have https hard coded here but if * users have configured dataverse.siteUrl to be http rather than https * we assume they are doing this on purpose (despite our warnings in the * Installation Guide), perhaps because they are only kicking the tires * on Dataverse. */ return "https://" + hostName + optionalPort + swordConfiguration.getBaseUrlPathCurrent(); } public String getOriginalUrl() { return originalUrl; } public void setOriginalUrl(String originalUrl) { this.originalUrl = originalUrl; } public String getServlet() { return servlet; } public void setServlet(String servlet) { this.servlet = servlet; } public String getTargetIdentifier() { return targetIdentifier; } public void setTargetIdentifier(String targetIdentifier) { this.targetIdentifier = targetIdentifier; } public String getTargetType() { return targetType; } public void setTargetType(String targetType) { this.targetType = targetType; } public int getPort() { return port; } public void setPort(int port) { this.port = port; } }