package org.limewire.util; import java.io.UnsupportedEncodingException; import java.net.URI; import java.net.URISyntaxException; import java.net.URLDecoder; import java.util.Locale; /** * Utilities for <code>URIs</code>. */ public class URIUtils { // private static final Log LOG = LogFactory.getLog(URIUtils.class); /** * Creates a <code>URI</code> from the input string. * The preferred way to invoke this method is with an URL-encoded string. * <p> * However, if the string has not been encoded, this method will encode it. * It is ambiguous whether a string has been encoded or not, which is why * it is preferred to pass in the string pre-encoded. * <p> * This method is useful when manipulating a URI and you don't know if it is * encoded or not. * <p> * @param uriString the uri to be created * @throws URISyntaxException */ public static URI toURI(final String uriString) throws URISyntaxException { URI uri; try { uri = new URI(uriString); } catch (URISyntaxException e) { // the uriString was perhaps not encoded. // try to percent encode it. String encodedURIString = encodeUri(uriString); try { uri = new URI(encodedURIString); } catch (URISyntaxException e1) { // encoding the uriString didn't help. // this probably means there is something structurally // wrong with it. // NOTE: throwing the original exception. // initing with second Exception. Not the normal // use case for initCause(), but this will at least capture both // stack traces if(e.getCause() == null) { e.initCause(e1); } throw e; } } return uri; } /** * Returns the port for the given URI. If no port can be found, it checks the scheme. * If the scheme is http port 80 is returned, if https 443. * <p> * -1 is returned if no port can be found. */ public static int getPort(URI uri) { int port = uri.getPort(); if (port == -1) { String scheme = uri.getScheme(); if ("http".equalsIgnoreCase(scheme)) { port = 80; } else if ("https".equalsIgnoreCase(scheme)) { port = 443; } } return port; } /** * Percent encodes <code>uri</code> leaving slashes and other reserved * characters untouched. This is the same as the Javascript implementation * of encodURI. */ public static String encodeUri(String uri) throws URISyntaxException { return encode(uri, true); } /** * Percent-encodes part of a uri, encoding all reserved characters. This * is the same as the Javascript implementation of encodeURIComponent. */ public static String encodeUriComponent(String uriComponent) throws URISyntaxException { return encode(uriComponent, false); } /** * Decodes <code>uri</code> replacing '+' with ' ' and percent encoded characters * with their utf equivalents. * * @return copy of original uri if there no characters had to be decoded */ public static String decodeToUtf8(String uri) throws URISyntaxException { try { return URLDecoder.decode(uri, "UTF-8"); } catch (UnsupportedEncodingException e) { throw new RuntimeException(e); } catch (IllegalArgumentException iae) { throw new URISyntaxException(uri, "invalid url"); } } /** * @return the canonical lower case host or null if <code>uri</code> does * not have a host */ public static String getCanonicalHost(URI uri) { String host = uri.getHost(); return host != null ? host.toLowerCase(Locale.US) : null; } /** * Code taken from rhino-1.7R1/src/org/mozilla/javascript/NativeGlobal.java * and slightly adapted. It is released under the MPL 1.1 and GPL 2.0. */ private static String encode(String str, boolean fullUri) throws URISyntaxException { byte[] utf8buf = null; StringBuilder sb = null; for (int k = 0, length = str.length(); k != length; ++k) { char c = str.charAt(k); if (encodeUnescaped(c, fullUri)) { if (sb != null) { sb.append(c); } } else { if (sb == null) { sb = new StringBuilder(length + 3); sb.append(str); sb.setLength(k); utf8buf = new byte[6]; } if (0xDC00 <= c && c <= 0xDFFF) { throw new URISyntaxException(str, c + " outside of valid range"); } int value; if (c < 0xD800 || 0xDBFF < c) { value = c; } else { k++; if (k == length) { throw new URISyntaxException(str, "out of chars"); } char c2 = str.charAt(k); if (!(0xDC00 <= c2 && c2 <= 0xDFFF)) { throw new URISyntaxException(str, "outside of valid range"); } value = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000; } int L = oneUcs4ToUtf8Char(utf8buf, value); assert utf8buf != null; for (int j = 0; j < L; j++) { int d = 0xff & utf8buf[j]; sb.append('%'); sb.append(toHexChar(d >>> 4)); sb.append(toHexChar(d & 0xf)); } } } return (sb == null) ? str : sb.toString(); } private static char toHexChar(int i) { if (i >> 4 != 0) throw new RuntimeException(); return (char)((i < 10) ? i + '0' : i - 10 + 'a'); } private static boolean encodeUnescaped(char c, boolean fullUri) { if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9')) { return true; } if ("-_.!~*'()".indexOf(c) >= 0) return true; if (fullUri) { return URI_DECODE_RESERVED.indexOf(c) >= 0; } return false; } private static final String URI_DECODE_RESERVED = ";/?:@&=+$,#"; /* Convert one UCS-4 char and write it into a UTF-8 buffer, which must be * at least 6 bytes long. Return the number of UTF-8 bytes of data written. */ private static int oneUcs4ToUtf8Char(byte[] utf8Buffer, int ucs4Char) { int utf8Length = 1; //JS_ASSERT(ucs4Char <= 0x7FFFFFFF); if ((ucs4Char & ~0x7F) == 0) utf8Buffer[0] = (byte)ucs4Char; else { int i; int a = ucs4Char >>> 11; utf8Length = 2; while (a != 0) { a >>>= 5; utf8Length++; } i = utf8Length; while (--i > 0) { utf8Buffer[i] = (byte)((ucs4Char & 0x3F) | 0x80); ucs4Char >>>= 6; } utf8Buffer[0] = (byte)(0x100 - (1 << (8-utf8Length)) + ucs4Char); } return utf8Length; } }