URIUtils.java example

Explorer
LimeWire-Pirate-Edition-master
package org.limewire.util;

import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLDecoder;
import java.util.Locale;

/**
 * Utilities for <code>URIs</code>.
 */
public class URIUtils {
    
//    private static final Log LOG = LogFactory.getLog(URIUtils.class);
    
    /**
     * Creates a <code>URI</code> from the input string.
     * The preferred way to invoke this method is with an URL-encoded string.
     * <p>
     * However, if the string has not been encoded, this method will encode it.
     * It is ambiguous whether a string has been encoded or not, which is why
     * it is preferred to pass in the string pre-encoded.
     * <p>
     * This method is useful when manipulating a URI and you don't know if it is 
     * encoded or not.
     * <p>
     * @param uriString the uri to be created
     * @throws URISyntaxException
     */
    public static URI toURI(final String uriString) throws URISyntaxException {
        URI uri;
        try {   
            uri = new URI(uriString);
        } catch (URISyntaxException e) {
            // the uriString was perhaps not encoded.
            // try to percent encode it.
            String encodedURIString = encodeUri(uriString);
            try {
                uri = new URI(encodedURIString);
            } catch (URISyntaxException e1) {
                // encoding the uriString didn't help.
                // this probably means there is something structurally
                // wrong with it.
                
                // NOTE: throwing the original exception.
                // initing with second Exception.  Not the normal
                // use case for initCause(), but this will at least capture both 
                // stack traces
                if(e.getCause() == null) {
                    e.initCause(e1);
                }
                throw e;
            }
        }
        return uri;
    }
    
    /**
     * Returns the port for the given URI. If no port can be found, it checks the scheme.
     * If the scheme is http port 80 is returned, if https 443.
     * <p>
     * -1 is returned if no port can be found.
     */
    public static int getPort(URI uri) {
        int port = uri.getPort();
        if (port == -1) {
            String scheme = uri.getScheme();
            if ("http".equalsIgnoreCase(scheme)) {
                port = 80;
            } else if ("https".equalsIgnoreCase(scheme)) {
                port = 443;
            }
        }
        return port;
    }
    
    /**
     * Percent encodes <code>uri</code> leaving slashes and other reserved 
     * characters untouched. This is the same as the Javascript implementation
     * of encodURI.
     */
    public static String encodeUri(String uri) throws URISyntaxException {
        return encode(uri, true);
    }
    
    /**
     * Percent-encodes part of a uri, encoding all reserved characters. This
     * is the same as the Javascript implementation of encodeURIComponent.
     */
    public static String encodeUriComponent(String uriComponent) throws URISyntaxException {
        return encode(uriComponent, false);
    }
    
    /**
     * Decodes <code>uri</code> replacing '+' with ' ' and percent encoded characters
     * with their utf equivalents.
     * 
     * @return copy of original uri if there no characters had to be decoded
     */
    public static String decodeToUtf8(String uri) throws URISyntaxException {
        try {
            return URLDecoder.decode(uri, "UTF-8");
        } catch (UnsupportedEncodingException e) {
            throw new RuntimeException(e);
        } catch (IllegalArgumentException iae) {
            throw new URISyntaxException(uri, "invalid url");
        }
    }
    
    /**
     * @return the canonical lower case host or null if <code>uri</code> does
     * not have a host
     */
    public static String getCanonicalHost(URI uri) {
        String host = uri.getHost();
        return host != null ? host.toLowerCase(Locale.US) : null;
    }
    
    /**
     * Code taken from rhino-1.7R1/src/org/mozilla/javascript/NativeGlobal.java 
     * and slightly adapted. It is released under the MPL 1.1 and GPL 2.0.
     */
    private static String encode(String str, boolean fullUri) throws URISyntaxException {
        byte[] utf8buf = null;
        StringBuilder sb = null;

        for (int k = 0, length = str.length(); k != length; ++k) {
            char c = str.charAt(k);
            if (encodeUnescaped(c, fullUri)) {
                if (sb != null) {
                    sb.append(c);
                }
            } else {
                if (sb == null) {
                    sb = new StringBuilder(length + 3);
                    sb.append(str);
                    sb.setLength(k);
                    utf8buf = new byte[6];
                }
                if (0xDC00 <= c && c <= 0xDFFF) {
                    throw new URISyntaxException(str, c + " outside of valid range");
                }
                int value;
                if (c < 0xD800 || 0xDBFF < c) {
                    value = c;
                } else {
                    k++;
                    if (k == length) {
                        throw new URISyntaxException(str, "out of chars");
                    }
                    char c2 = str.charAt(k);
                    if (!(0xDC00 <= c2 && c2 <= 0xDFFF)) {
                        throw new URISyntaxException(str, "outside of valid range");
                    }
                    value = ((c - 0xD800) << 10) + (c2 - 0xDC00) + 0x10000;
                }
                int L = oneUcs4ToUtf8Char(utf8buf, value);
                assert utf8buf != null;
                for (int j = 0; j < L; j++) {
                    int d = 0xff & utf8buf[j];
                    sb.append('%');
                    sb.append(toHexChar(d >>> 4));
                    sb.append(toHexChar(d & 0xf));
                }
            }
        }
        return (sb == null) ? str : sb.toString();
    }

    private static char toHexChar(int i) {
        if (i >> 4 != 0) throw new RuntimeException();
        return (char)((i < 10) ? i + '0' : i - 10 + 'a');
    }

    private static boolean encodeUnescaped(char c, boolean fullUri) {
        if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')
            || ('0' <= c && c <= '9'))
        {
            return true;
        }
        if ("-_.!~*'()".indexOf(c) >= 0)
            return true;
        if (fullUri) {
            return URI_DECODE_RESERVED.indexOf(c) >= 0;
        }
        return false;
    }

    private static final String URI_DECODE_RESERVED = ";/?:@&=+$,#";

    /* Convert one UCS-4 char and write it into a UTF-8 buffer, which must be
    * at least 6 bytes long.  Return the number of UTF-8 bytes of data written.
    */
    private static int oneUcs4ToUtf8Char(byte[] utf8Buffer, int ucs4Char) {
        int utf8Length = 1;

        //JS_ASSERT(ucs4Char <= 0x7FFFFFFF);
        if ((ucs4Char & ~0x7F) == 0)
            utf8Buffer[0] = (byte)ucs4Char;
        else {
            int i;
            int a = ucs4Char >>> 11;
            utf8Length = 2;
            while (a != 0) {
                a >>>= 5;
                utf8Length++;
            }
            i = utf8Length;
            while (--i > 0) {
                utf8Buffer[i] = (byte)((ucs4Char & 0x3F) | 0x80);
                ucs4Char >>>= 6;
            }
            utf8Buffer[0] = (byte)(0x100 - (1 << (8-utf8Length)) + ucs4Char);
        }
        return utf8Length;
    }

}