package co.codewizards.cloudstore.core.util;
import static co.codewizards.cloudstore.core.util.AssertUtil.*;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.StandardCharsets;
import java.nio.charset.UnsupportedCharsetException;
/**
* URL-decoder corresponding to {@link UrlEncoder}.
* <p>
* In contrast to the {@link java.net.URLDecoder URLDecoder}, this class therefore does <b>not</b> decode
* '+' (plus) into ' ' (space)!
* <p>
* Additionally, this class does not use the default encoding, but always UTF-8, if not specified
* otherwise.
* <p>
* The reason for this class is that {@link java.io.File#toURI() File.toURI()}
* does not encode a "+" sign. Therefore, our URL-encoding and decoding must
* not handle the "+" specifically.
* <p>
* Another reason is <a href="https://java.net/jira/browse/JERSEY-417">JERSEY-417</a>.
* I originally used {@code org.glassfish.jersey.uri.UriComponent.encode(String, Type)}
* at some code locations, but since not all code locations have a dependency on Jersey,
* I decided to switch consistently everywhere to {@link UrlEncoder} and {@code UrlDecoder}.
* <p>
* This class was copied from {@link java.net.URLDecoder URLDecoder} and changed to fit our needs.
* @see UrlEncoder
* @author Marco หงุ่ยตระกูล-Schulze - marco at codewizards dot co
*/
public final class UrlDecoder {
private UrlDecoder() {
}
/**
* Decodes a {@code application/x-www-form-urlencoded} string using UTF-8.
* @param s the {@code String} to decode
* @return the newly decoded {@code String}
* @see UrlEncoder#encode(String)
*/
public static String decode(String s) {
String str = decode(s, StandardCharsets.UTF_8);
return str;
}
/**
* Decodes a {@code application/x-www-form-urlencoded} string using a specific
* encoding scheme.
* The supplied encoding is used to determine
* what characters are represented by any consecutive sequences of the
* form "<i>{@code %xy}</i>".
* <p>
* <em><strong>Note:</strong> The <a href=
* "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
* World Wide Web Consortium Recommendation</a> states that
* UTF-8 should be used. Not doing so may introduce
* incompatibilities.</em>
*
* @param s the {@code String} to decode
* @param enc The name of a supported
* <a href="../lang/package-summary.html#charenc">character
* encoding</a>.
* @return the newly decoded {@code String}
* @exception UnsupportedEncodingException
* If character encoding needs to be consulted, but
* named character encoding is not supported
* @see UrlEncoder#encode(String, String)
* @deprecated UTF-8 should be used; it is thus recommended to invoke {@link #decode(String)} instead.
*/
@Deprecated
public static String decode(String s, String enc) throws UnsupportedEncodingException {
assertNotNull(s, "s");
assertNotNull(enc, "enc");
Charset charset;
try {
charset = Charset.forName(enc);
} catch (IllegalCharsetNameException e) {
throw new UnsupportedEncodingException(enc);
} catch (UnsupportedCharsetException e) {
throw new UnsupportedEncodingException(enc);
}
return decode(s, charset);
}
/**
* Decodes a {@code application/x-www-form-urlencoded} string using a specific
* encoding scheme.
* The supplied encoding is used to determine
* what characters are represented by any consecutive sequences of the
* form "<i>{@code %xy}</i>".
* <p>
* <em><strong>Note:</strong> The <a href=
* "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
* World Wide Web Consortium Recommendation</a> states that
* UTF-8 should be used. Not doing so may introduce
* incompatibilities.</em>
*
* @param s the {@code String} to decode
* @param charset The <a href="../lang/package-summary.html#charenc">character encoding</a>.
* @return the newly decoded {@code String}
* @exception UnsupportedEncodingException
* If character encoding needs to be consulted, but
* named character encoding is not supported
* @see UrlEncoder#encode(String, Charset)
* @deprecated UTF-8 should be used; it is thus recommended to invoke {@link #decode(String)} instead.
*/
@Deprecated
public static String decode(String s, Charset charset) {
assertNotNull(s, "s");
assertNotNull(charset, "charset");
boolean needToChange = false;
int numChars = s.length();
StringBuffer sb = new StringBuffer(numChars > 500 ? numChars / 2 : numChars);
int i = 0;
char c;
byte[] bytes = null;
while (i < numChars) {
c = s.charAt(i);
switch (c) {
case '%':
/*
* Starting with this instance of %, process all
* consecutive substrings of the form %xy. Each
* substring %xy will yield a byte. Convert all
* consecutive bytes obtained this way to whatever
* character(s) they represent in the provided
* encoding.
*/
try {
// (numChars-i)/3 is an upper bound for the number
// of remaining bytes
if (bytes == null)
bytes = new byte[(numChars-i)/3];
int pos = 0;
while ( ((i+2) < numChars) &&
(c=='%')) {
int v = Integer.parseInt(s.substring(i+1,i+3),16);
if (v < 0)
throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern - negative value");
bytes[pos++] = (byte) v;
i+= 3;
if (i < numChars)
c = s.charAt(i);
}
// A trailing, incomplete byte encoding such as
// "%x" will cause an exception to be thrown
if ((i < numChars) && (c=='%'))
throw new IllegalArgumentException(
"URLDecoder: Incomplete trailing escape (%) pattern");
sb.append(new String(bytes, 0, pos, charset));
} catch (NumberFormatException e) {
throw new IllegalArgumentException(
"URLDecoder: Illegal hex characters in escape (%) pattern - "
+ e.getMessage());
}
needToChange = true;
break;
default:
sb.append(c);
i++;
break;
}
}
return (needToChange? sb.toString() : s);
}
}