/* This code is part of Freenet. It is distributed under the GNU General
* Public License, version 2 (or at your option any later version). See
* http://www.gnu.org/ for further details of the GPL. */
package freenet.support;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
/**
* Decode encoded URLs (or parts of URLs). @see URLEncoder.
* This class does NOT decode application/x-www-form-urlencoded
* strings, unlike @see java.net.URLDecoder. What it does is
* decode bits of URIs, in UTF-8. This simply means that it
* converts encoded characters (assuming a charset of UTF-8).
* java.net.URI does similar things internally.
*
* @author <a href="http://www.doc.ic.ac.uk/~twh1/">Theodore Hong</a>
* Originally!
**/
public class URLDecoder
{
// test harness
public static void main(String[] args) throws URLEncodedFormatException {
for (String arg: args) {
System.out.println(arg + " -> " + decode(arg, false));
}
}
/**
* Decodes a URLEncoder format string.
*
* @param s String to be translated.
* @param tolerant If true, be tolerant of bogus escapes; bogus escapes are treated as
* just plain characters. Not recommended; a hack to allow users to paste in URLs
* containing %'s.
* @return the translated String.
*
**/
public static String decode(String s, boolean tolerant) throws URLEncodedFormatException {
if (s.length() == 0)
return "";
int len = s.length();
ByteArrayOutputStream decodedBytes = new ByteArrayOutputStream();
boolean hasDecodedSomething = false;
for (int i = 0; i < len; i++) {
char c = s.charAt(i);
if (c == '%') {
if (i >= len - 2) {
throw new URLEncodedFormatException(s);
}
char[] hexChars = new char[2];
hexChars[0] = s.charAt(++i);
hexChars[1] = s.charAt(++i);
String hexval = new String(hexChars);
try {
long read = Fields.hexToLong(hexval);
if (read == 0)
throw new URLEncodedFormatException("Can't encode" + " 00");
decodedBytes.write((int) read);
hasDecodedSomething = true;
} catch (NumberFormatException nfe) {
// Not encoded?
if(tolerant && !hasDecodedSomething) {
try {
byte[] buf = ('%'+hexval).getBytes("UTF-8");
decodedBytes.write(buf, 0, buf.length);
continue;
} catch (UnsupportedEncodingException e) {
throw new Error("Impossible: JVM doesn't support UTF-8: " + e, e);
}
}
throw new URLEncodedFormatException("Not a two character hex % escape: "+hexval+" in "+s);
}
} else {
try {
byte[] encoded = String.valueOf(c).getBytes("UTF-8");
decodedBytes.write(encoded, 0, encoded.length);
} catch (UnsupportedEncodingException e) {
throw new Error("Impossible: JVM doesn't support UTF-8: " + e, e);
}
}
}
try {
decodedBytes.close();
return new String(decodedBytes.toByteArray(), "utf-8");
} catch (IOException ioe1) {
/* if this throws something's wrong */
}
throw new URLEncodedFormatException(s);
}
}