FormUrlDecoder.java example

Explorer
gdata-java-client-master
- java

//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


package com.google.gdata.util.httputil;

import com.google.gdata.util.common.base.StringUtil;
import com.google.common.collect.Lists;
import com.google.gdata.util.parser.Chset;
import com.google.gdata.util.parser.Parser;

import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;
import java.util.LinkedList;
import java.util.concurrent.atomic.AtomicReference;

/**
 * This class has been <b>deprecated</b>; use {@link
 * com.google.gdata.util.common.net.UriParameterMap#parse(String,java.nio.charset.Charset)}
 * Utility for parsing application/x-www-form-urlencoded content as
 * sent by user agents in the query string of GET form submissions and
 * the body of POST form submissions.
 *
 * 
 */
public class FormUrlDecoder {
  private static Parser<Result> parser;

  private static class Result {
    LinkedList<Parameter> params;
    String encoding;
    public Result(String encoding) {
      this.params = Lists.newLinkedList();
      this.encoding = (encoding == null) ? "ISO-8859-1" : encoding;
    }
  }
  
  private FormUrlDecoder() {
  }
  
  /**
   * @deprecated Do not use. (Currently only needed internally by
   * UriParameterMap.)
   */
  @Deprecated
  public interface Callback {
    void handleParameter(String name, String value);
  }

  /**
   * @deprecated Do not use. (Currently only needed internally by
   * UriParameterMap.)
   */
  @Deprecated
  public static void parseWithCallback(String str, String encoding,
      Callback callback) {
    if (StringUtil.isEmpty(str)) {
      return;
    }
    Result result = new Result(encoding);
    parser.parse(str, result);
    for (Parameter param : result.params) {
      callback.handleParameter(param.name, param.value);
    }
  }

  /**
   * @deprecated Use {@code UriParameterMap.parse(query, encoding)}. To convert
   * from the string to the {@link java.nio.charset.Charset} instance, see
   * {@link com.google.gdata.util.common.base.Charsets}. Note that an encoding of null
   * will default to ISO-8859-1 in this deprecated implementation, whereas the
   * classes in common.net default to UTF-8 encoding.
   */
  @Deprecated
  public static ParamMap parse(String str, ParamMap map, String encoding) {
    // If the parameters string is empty, we shouldn't add a key/value pair of 
    // empty strings.
    if ("".equals(str)) {
      return map == null ? new ParamMap() : map;
    }
    
    final AtomicReference<ParamMap> outMap = new AtomicReference<ParamMap>(map);
    parseWithCallback(str, encoding, new Callback() {
        public void handleParameter(String name, String value) {
          ParamMap map = outMap.get();
          if (map == null) {
            map = new ParamMap();
            outMap.set(map);
          }
          map.append(name, value);
        }
      });
    return outMap.get();
  }

  /**
   * Returns the canonical name for the specified charset.
   *
   * @param charset Some known alias for a charset
   * @return The canonical name for the charset, or the original alias
   * if no mapping was found
   */
  private static String getCanonicalEncodingName(String charset) {
    String canonicalName = charset;
    if (charset != null && charset.length() > 0) {
      try {
        canonicalName = Charset.forName(charset).name();
      } catch (UnsupportedCharsetException uce) {
        // just return the alias
      } catch (IllegalCharsetNameException ice) {
        // just return the alias
      }
    }
    return canonicalName;
  }


  /**
   * URL decodes the section of {@code buf} from {@code start} (inclusive)
   * to {@code end} (exclusive) using the given {@code encoding}. It correctly
   * handles improperly URL encoded strings for character sets in which
   * ascii bytes do not always indicate ascii characters.
   */
  private static String decodeString(char[] buf, int start, int end,
                                     String encoding) {
    String str = new String(buf, start, end - start);
    try {
      if (requiresByteLevelDecoding(encoding)) {
        // Java's specification of URLEncoding states that non-ascii-alphanum
        // characters should be represented by a URL encoded sequence of bytes.
        // Thus, the proper way perform url encoding is to at the string level,
        // encoding all non-ascii-alphanum characters to a url encoded sequence
        // of bytes according to the character set. However, most browsers
        // implement URLEncoding improperly, encoding the entire string to bytes
        // first, and then URL-escaping all of the non-ascii bytes. The two
        // behaviors work fine for UTF-8, because ascii bytes in UTF-8
        // correspond to ascii characters. However, a problem arises for
        // encodings in which ascii bytes can be part of a byte representation
        // of non-ascii characters. It is these encodings for which we have to
        // url decode directly to the byte level, and then encode the bytes with
        // the given encoding. To perform this byte level decoding, we pivot
        // through ISO-8859-1, the encoding which treats all single bytes
        // as their corresponding character values.
        byte[] rawBytes =
            URLDecoder.decode(str, "ISO-8859-1").getBytes("ISO-8859-1");
        
        return new String(rawBytes, encoding);
      }
      return URLDecoder.decode(str, encoding);
    } catch (IllegalArgumentException iae) {
      // According to the javadoc of URLDecoder, when the input string is
      // illegal, it could either leave the illegal characters alone or throw
      // an IllegalArgumentException! To deal with both consistently, we
      // ignore IllegalArgumentException and just return the original string.
      return str;
    } catch (UnsupportedEncodingException e) {
      return str;
    }
  }

  /**
   * Charsets for which a byte with an ascii value does not necessarily map
   * to the corresponding ascii character.
   */
  private static boolean requiresByteLevelDecoding(String encoding) {
    encoding = getCanonicalEncodingName(encoding).toUpperCase();
    // Use endsWith() to include our wrapper character sets, whose names are
    // of the form "X-Variant-Shift_JIS" or "X-Variant-windows-31j".
    return (encoding.endsWith("SHIFT_JIS") ||
            encoding.endsWith("WINDOWS-31J"));
  }

  private static class NameAction
      implements com.google.gdata.util.parser.Callback<Result> {
    public void handle(char[] buf, int start, int end, Result result) {
      Parameter param = new Parameter();
      param.name = decodeString(buf, start, end, result.encoding);
      result.params.addLast(param);
    }
  }

  private static class ValueAction
      implements com.google.gdata.util.parser.Callback<Result> {
    public void handle(char[] buf, int start, int end, Result result) {
      Parameter param = result.params.getLast();
      param.value = decodeString(buf, start, end, result.encoding);
    }
  }

  private static class Parameter {
    String name = null;
    String value = "";
  }

  static {
    Chset nameToken = Chset.difference(Chset.ANYCHAR, new Chset("&="));
    Chset valueToken = Chset.difference(Chset.ANYCHAR, new Chset("&"));
    Parser<Result> name = nameToken.star().action(new NameAction());
    Parser<Result> value = valueToken.plus().action(new ValueAction());
    value = value.optional();
    value = Parser.sequence(new Chset('='), value);

    Parser<Result> parameter = Parser.sequence(name, value.optional());
    parser = parameter.list(new Chset('&')).optional();
  }
}