// // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.google.gdata.util.httputil; import com.google.gdata.util.common.base.StringUtil; import com.google.common.collect.Lists; import com.google.gdata.util.parser.Chset; import com.google.gdata.util.parser.Parser; import java.io.UnsupportedEncodingException; import java.net.URLDecoder; import java.nio.charset.Charset; import java.nio.charset.IllegalCharsetNameException; import java.nio.charset.UnsupportedCharsetException; import java.util.LinkedList; import java.util.concurrent.atomic.AtomicReference; /** * This class has been <b>deprecated</b>; use {@link * com.google.gdata.util.common.net.UriParameterMap#parse(String,java.nio.charset.Charset)} * Utility for parsing application/x-www-form-urlencoded content as * sent by user agents in the query string of GET form submissions and * the body of POST form submissions. * * */ public class FormUrlDecoder { private static Parser<Result> parser; private static class Result { LinkedList<Parameter> params; String encoding; public Result(String encoding) { this.params = Lists.newLinkedList(); this.encoding = (encoding == null) ? "ISO-8859-1" : encoding; } } private FormUrlDecoder() { } /** * @deprecated Do not use. (Currently only needed internally by * UriParameterMap.) */ @Deprecated public interface Callback { void handleParameter(String name, String value); } /** * @deprecated Do not use. (Currently only needed internally by * UriParameterMap.) */ @Deprecated public static void parseWithCallback(String str, String encoding, Callback callback) { if (StringUtil.isEmpty(str)) { return; } Result result = new Result(encoding); parser.parse(str, result); for (Parameter param : result.params) { callback.handleParameter(param.name, param.value); } } /** * @deprecated Use {@code UriParameterMap.parse(query, encoding)}. To convert * from the string to the {@link java.nio.charset.Charset} instance, see * {@link com.google.gdata.util.common.base.Charsets}. Note that an encoding of null * will default to ISO-8859-1 in this deprecated implementation, whereas the * classes in common.net default to UTF-8 encoding. */ @Deprecated public static ParamMap parse(String str, ParamMap map, String encoding) { // If the parameters string is empty, we shouldn't add a key/value pair of // empty strings. if ("".equals(str)) { return map == null ? new ParamMap() : map; } final AtomicReference<ParamMap> outMap = new AtomicReference<ParamMap>(map); parseWithCallback(str, encoding, new Callback() { public void handleParameter(String name, String value) { ParamMap map = outMap.get(); if (map == null) { map = new ParamMap(); outMap.set(map); } map.append(name, value); } }); return outMap.get(); } /** * Returns the canonical name for the specified charset. * * @param charset Some known alias for a charset * @return The canonical name for the charset, or the original alias * if no mapping was found */ private static String getCanonicalEncodingName(String charset) { String canonicalName = charset; if (charset != null && charset.length() > 0) { try { canonicalName = Charset.forName(charset).name(); } catch (UnsupportedCharsetException uce) { // just return the alias } catch (IllegalCharsetNameException ice) { // just return the alias } } return canonicalName; } /** * URL decodes the section of {@code buf} from {@code start} (inclusive) * to {@code end} (exclusive) using the given {@code encoding}. It correctly * handles improperly URL encoded strings for character sets in which * ascii bytes do not always indicate ascii characters. */ private static String decodeString(char[] buf, int start, int end, String encoding) { String str = new String(buf, start, end - start); try { if (requiresByteLevelDecoding(encoding)) { // Java's specification of URLEncoding states that non-ascii-alphanum // characters should be represented by a URL encoded sequence of bytes. // Thus, the proper way perform url encoding is to at the string level, // encoding all non-ascii-alphanum characters to a url encoded sequence // of bytes according to the character set. However, most browsers // implement URLEncoding improperly, encoding the entire string to bytes // first, and then URL-escaping all of the non-ascii bytes. The two // behaviors work fine for UTF-8, because ascii bytes in UTF-8 // correspond to ascii characters. However, a problem arises for // encodings in which ascii bytes can be part of a byte representation // of non-ascii characters. It is these encodings for which we have to // url decode directly to the byte level, and then encode the bytes with // the given encoding. To perform this byte level decoding, we pivot // through ISO-8859-1, the encoding which treats all single bytes // as their corresponding character values. byte[] rawBytes = URLDecoder.decode(str, "ISO-8859-1").getBytes("ISO-8859-1"); return new String(rawBytes, encoding); } return URLDecoder.decode(str, encoding); } catch (IllegalArgumentException iae) { // According to the javadoc of URLDecoder, when the input string is // illegal, it could either leave the illegal characters alone or throw // an IllegalArgumentException! To deal with both consistently, we // ignore IllegalArgumentException and just return the original string. return str; } catch (UnsupportedEncodingException e) { return str; } } /** * Charsets for which a byte with an ascii value does not necessarily map * to the corresponding ascii character. */ private static boolean requiresByteLevelDecoding(String encoding) { encoding = getCanonicalEncodingName(encoding).toUpperCase(); // Use endsWith() to include our wrapper character sets, whose names are // of the form "X-Variant-Shift_JIS" or "X-Variant-windows-31j". return (encoding.endsWith("SHIFT_JIS") || encoding.endsWith("WINDOWS-31J")); } private static class NameAction implements com.google.gdata.util.parser.Callback<Result> { public void handle(char[] buf, int start, int end, Result result) { Parameter param = new Parameter(); param.name = decodeString(buf, start, end, result.encoding); result.params.addLast(param); } } private static class ValueAction implements com.google.gdata.util.parser.Callback<Result> { public void handle(char[] buf, int start, int end, Result result) { Parameter param = result.params.getLast(); param.value = decodeString(buf, start, end, result.encoding); } } private static class Parameter { String name = null; String value = ""; } static { Chset nameToken = Chset.difference(Chset.ANYCHAR, new Chset("&=")); Chset valueToken = Chset.difference(Chset.ANYCHAR, new Chset("&")); Parser<Result> name = nameToken.star().action(new NameAction()); Parser<Result> value = valueToken.plus().action(new ValueAction()); value = value.optional(); value = Parser.sequence(new Chset('='), value); Parser<Result> parameter = Parser.sequence(name, value.optional()); parser = parameter.list(new Chset('&')).optional(); } }