URIPreEncoder.java example

Explorer
fred-master
package freenet.support;

import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URISyntaxException;

/**
 * Replace any invalid characters in a string (to be converted to a URI) with encoded chars using UTF-8.
 * 
 * This does NOT do the same thing as either java.net.URLEncoder or freenet.support.URLEncoder!
 * 
 * Its purpose is simply to allow us to accept "dirty" URIs - URIs which may contain e.g. spaces -
 * by preprocessing them before they reach the URI(String) constructor.
 * 
 * I _think_ this may be what URLEncoder is for - but it seems to have become rather confused.
 * Somebody needs to check all the calls to URLEncoder...
 */
public class URIPreEncoder {
	
	// We deliberately include '%' because we don't want to interfere with stuff which is already encoded.
	// add "#" here too, this allow anchors
	public final static String allowedChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-!.~'()*,;:$&+=?/@%#"; 

	public static String encode(String s) {
		StringBuilder output = new StringBuilder(s.length()*2);
		for(int i=0;i<s.length();i++) {
			char c = s.charAt(i);
			if(allowedChars.indexOf(c) >= 0) {
				output.append(c);
			} else {
				String tmp = String.valueOf(c);
				try {
					for(byte u: tmp.getBytes("UTF-8")) {
						int x = u & 0xff;
						output.append('%');
						if(x < 16)
							output.append('0');
						output.append(Integer.toHexString(x));
					}
				} catch (UnsupportedEncodingException e) {
					throw new Error("Impossible: JVM doesn't support UTF-8: " + e, e);
				}
			}
		}
		return output.toString();
	}
	
	/**
	 * Create a new URI from a string, which may contain characters which should have been encoded.
	 * @throws URISyntaxException If the string does not represent a valid URI, even after encoding.
	 */
	public static URI encodeURI(String s) throws URISyntaxException {
		return new URI(encode(s));
	}
}