/* The contents of this file are subject to the license and copyright terms * detailed in the license directory at the root of the source tree (also * available online at http://fedora-commons.org/license/). */ package fedora.utilities; import java.net.URI; import java.net.URISyntaxException; import java.util.HashMap; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * A representation of a normalized URI, per RFC3986. * * @author Edwin Shin * @since 3.0 * @see RFC3986 * @version $Id$ */ public class NormalizedURI { private URI uri; /** * see http://www.iana.org/assignments/uri-schemes * see http://www.iana.org/assignments/port-numbers */ private static final HashMap<String, Integer> defaultPorts = new HashMap<String, Integer>(); static { defaultPorts.put("acap", new Integer(2628)); defaultPorts.put("afs", new Integer(1483)); defaultPorts.put("dict", new Integer(674)); defaultPorts.put("ftp", new Integer(21)); defaultPorts.put("go", new Integer(1096)); defaultPorts.put("gopher", new Integer(70)); defaultPorts.put("http", new Integer(80)); defaultPorts.put("https", new Integer(443)); defaultPorts.put("imap", new Integer(143)); defaultPorts.put("ipp", new Integer(631)); defaultPorts.put("iris.beep", new Integer(702)); defaultPorts.put("ldap", new Integer(389)); defaultPorts.put("telnet", new Integer(23)); defaultPorts.put("mtqp", new Integer(1038)); defaultPorts.put("mupdate", new Integer(3905)); defaultPorts.put("nfs", new Integer(2049)); defaultPorts.put("nntp", new Integer(119)); defaultPorts.put("pop", new Integer(110)); defaultPorts.put("prospero", new Integer(1525)); defaultPorts.put("rtsp", new Integer(554)); defaultPorts.put("smtp", new Integer(25)); defaultPorts.put("sip", new Integer(5060)); defaultPorts.put("sips", new Integer(5061)); defaultPorts.put("snmp", new Integer(161)); defaultPorts.put("soap.beep", new Integer(605)); defaultPorts.put("soap.beeps", new Integer(605)); defaultPorts.put("telnet", new Integer(23)); defaultPorts.put("tftp", new Integer(69)); defaultPorts.put("vemmi", new Integer(575)); defaultPorts.put("wais", new Integer(210)); defaultPorts.put("xmlrpc.beep", new Integer(602)); defaultPorts.put("xmlrpc.beeps", new Integer(602)); defaultPorts.put("z39.50r", new Integer(210)); defaultPorts.put("z39.50s", new Integer(210)); } private static final Pattern PERCENT_ENCODED = Pattern.compile("%([a-z0-9]{2})"); public NormalizedURI(String uri) throws URISyntaxException { this(new URI(uri)); } public NormalizedURI(URI uri) { this.uri = uri; } public void normalize() { normalizeSyntax(); normalizeByScheme(); normalizeByProtocol(); } /** * Performs the following: * Case Normalization * Percent-Encoding Normalization * Path Segment Normalization * */ public void normalizeSyntax() { normalizeCase(); normalizePercentEncoding(); normalizePathSegment(); } /** * Case Normalization * @see RFC3986 6.2.2.1 * */ public void normalizeCase() { // Scheme and host should be lowercase String scheme = uri.getScheme(); String host = uri.getHost(); String rURI = toString(); if (scheme != null) { rURI = rURI.replaceFirst(scheme, scheme.toLowerCase()); } if (host != null) { rURI = rURI.replaceFirst(host, host.toLowerCase()); } // Percent-encoded characters should be uppercase if (rURI.indexOf('%') != -1) { Matcher m = PERCENT_ENCODED.matcher(rURI); StringBuffer sb = new StringBuffer(); int lastEnd = 0; while(m.find()) { sb.append(rURI.substring(lastEnd, m.start())); sb.append(m.group().toUpperCase()); lastEnd = m.end(); } sb.append(rURI.substring(lastEnd, rURI.length())); rURI = sb.toString(); } uri = URI.create(rURI); } /** * Percent-Encoding Normalization * @see RFC3986 6.2.2.2 * */ public void normalizePercentEncoding() { try { uri = new URI(uri.getScheme(), uri.getSchemeSpecificPart(), uri.getFragment()); } catch (URISyntaxException e) { // This should never be reached e.printStackTrace(); } } /** * Path Segment Normalization * @see RFC3986 6.2.2.3 * */ public void normalizePathSegment() { uri = uri.normalize(); } /** * Scheme-Based Normalization * @see RFC3986 6.2.3 * */ public void normalizeByScheme() { String rURI = toString(); String scheme = uri.getScheme(); String authority = uri.getAuthority(); String host = uri.getHost(); int port = uri.getPort(); String path = uri.getPath(); if (port == defaultPort(scheme)) { rURI = rURI.replaceFirst(":" + port, ""); try { uri = new URI(rURI); } catch (URISyntaxException e) { // This should never be reached e.printStackTrace(); } } if (port == -1 && authority != null && authority.endsWith(":")) { rURI = rURI.replaceFirst(authority, authority.substring(0, authority.length() -1)); try { uri = new URI(rURI); } catch (URISyntaxException e) { // This should never be reached e.printStackTrace(); } } if (path == null || path.length() == 0) { if (host != null) { rURI = rURI.replaceFirst(host, host + '/'); } else { rURI = rURI.replaceFirst(authority, authority + '/'); } uri = URI.create(rURI); } } /** * Protocol-Based Normalization * @see RFC3986 6.2.4 * */ public void normalizeByProtocol() { //TODO noop } @Override public String toString() { return uri.toASCIIString(); } public URI toURI() { return uri; } /** * Return the default port used by a given scheme. * * @param the scheme, e.g. http * @return the port number, or -1 if unknown */ private final static int defaultPort(String scheme) { if (scheme == null) { return -1; } Integer port = defaultPorts.get(scheme.trim().toLowerCase()); return (port != null) ? port.intValue() : -1; } }