package freenet.support; import java.io.UnsupportedEncodingException; import java.net.URI; import java.net.URISyntaxException; /** * Replace any invalid characters in a string (to be converted to a URI) with encoded chars using UTF-8. * * This does NOT do the same thing as either java.net.URLEncoder or freenet.support.URLEncoder! * * Its purpose is simply to allow us to accept "dirty" URIs - URIs which may contain e.g. spaces - * by preprocessing them before they reach the URI(String) constructor. * * I _think_ this may be what URLEncoder is for - but it seems to have become rather confused. * Somebody needs to check all the calls to URLEncoder... */ public class URIPreEncoder { // We deliberately include '%' because we don't want to interfere with stuff which is already encoded. // add "#" here too, this allow anchors public final static String allowedChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-!.~'()*,;:$&+=?/@%#"; public static String encode(String s) { StringBuilder output = new StringBuilder(s.length()*2); for(int i=0;i<s.length();i++) { char c = s.charAt(i); if(allowedChars.indexOf(c) >= 0) { output.append(c); } else { String tmp = String.valueOf(c); try { for(byte u: tmp.getBytes("UTF-8")) { int x = u & 0xff; output.append('%'); if(x < 16) output.append('0'); output.append(Integer.toHexString(x)); } } catch (UnsupportedEncodingException e) { throw new Error("Impossible: JVM doesn't support UTF-8: " + e, e); } } } return output.toString(); } /** * Create a new URI from a string, which may contain characters which should have been encoded. * @throws URISyntaxException If the string does not represent a valid URI, even after encoding. */ public static URI encodeURI(String s) throws URISyntaxException { return new URI(encode(s)); } }