package org.ocpsoft.urlbuilder.util;
import java.nio.charset.Charset;
public class Decoder
{
private static final Charset UTF8 = Charset.forName("UTF8");
public static String path(final CharSequence path)
{
return decode(path, false);
}
public static String query(final CharSequence query)
{
return decode(query, true);
}
public static String decode(final CharSequence path, final boolean query)
{
StringBuilder decoded = new StringBuilder();
int length = path.length();
int pos = 0;
while (pos < length) {
// '+' -> ' ' for query strings
if (query && path.charAt(pos) == '+') {
decoded.append(' ');
pos++;
}
// percent-encoded values
if (path.charAt(pos) == '%') {
// a single Unicode char may be represented by multiple percent encoded bytes
byte[] bytes = new byte[length / 3];
int i = 0;
boolean invalid = false;
while (pos < length && path.charAt(pos) == '%') {
// make sure we can read the two hex characters
if (pos + 2 < length) {
try {
String hex = path.subSequence(pos + 1, pos + 3).toString();
int b = Integer.parseInt(hex, 16);
bytes[i++] = (byte) b;
}
catch (NumberFormatException e) {
// not a valid hex value
invalid = true;
}
}
pos += 3;
}
// decode the byte sequence with UTF8 if no invalid byte was found
if (!invalid) {
decoded.append(new String(bytes, 0, i, UTF8));
}
// We represent invalid percent encoded values the same way UTF8 does it
// http://unicode-table.com/de/search/?q=%EF%BF%BD
else {
decoded.append('\uFFFD');
}
}
// not escaped
else {
decoded.append(path.charAt(pos));
pos++;
}
}
return decoded.toString();
}
}