URLEncodedUtils.java example

Explorer

mechanize-master
- src
  - int-tests
    - java
      - com
        gistlabs
        mechanize
        MechanizeAgentIntTest.java
        integration
        test
        AndroidJsonApiIT.java
        ApacheImageCacheIT.java
        CookiesTest.java
        GoogleApiJsonIT.java
        GoogleSearchForMechanizeJavaIT.java
        Issue36Test.java
        MozillaUserAgentTestClass.java
        WikipediaSearchForAngelaMerkelAndDownloadingImagesIT.java
  - main
    - java
      - com
        gistlabs
        mechanize
        AbstractResource.java
        Mechanize.java
        Resource.java
        ResourceFactory.java
        cache
        HttpCacheFilter.java
        api
        CacheEntry.java
        HttpCache.java
        inMemory
        InMemoryCacheEntry.java
        InMemoryHttpCache.java
        nil
        NilHttpCache.java
        cookie
        Cookie.java
        Cookies.java
        document
        AbstractDocument.java
        documentElements
        AbstractDocumentElement.java
        DocumentElements.java
        html
        HtmlDocument.java
        HtmlDocumentFactory.java
        HtmlElement.java
        HtmlElements.java
        HtmlNode.java
        HtmlSpecialAttributes.java
        HtmlTextNode.java
        JsoupDataUtil.java
        JsoupNodeHelper.java
        form
        Checkable.java
        Checkbox.java
        Email.java
        Form.java
        FormElement.java
        Forms.java
        Hidden.java
        Password.java
        RadioButton.java
        Search.java
        Select.java
        SubmitButton.java
        SubmitImage.java
        Text.java
        TextArea.java
        Upload.java
        image
        Image.java
        Images.java
        json
        JsonDocument.java
        JsonDocumentFactory.java
        exceptions
        JsonArrayException.java
        JsonException.java
        hypermedia
        JsonLink.java
        JsonLinkFinder.java
        node
        JsonNode.java
        impl
        AbstractJsonNode.java
        ArrayNodeImpl.java
        AttributeNode.java
        IndexedAttributeNode.java
        ObjectNodeImpl.java
        link
        Link.java
        Links.java
        node
        AbstractNode.java
        CssNodeHelper.java
        Node.java
        NodeVisitor.java
        exceptions
        MechanizeClientProtocolException.java
        MechanizeException.java
        MechanizeExceptionFactory.java
        MechanizeIOException.java
        MechanizeInitializationException.java
        MechanizeURISyntaxException.java
        MechanizeUnsupportedEncodingException.java
        URISyntaxException.java
        filters
        DefaultMechanizeChainFilter.java
        MechanizeChainFilter.java
        MechanizeFilter.java
        headers
        Header.java
        Headers.java
        impl
        DefaultResource.java
        DefaultResourceFactory.java
        MechanizeAgent.java
        MechanizeHttpClientFilter.java
        MechanizeInitializer.java
        interfaces
        Link.java
        Mechanize.java
        Resource.java
        document
        Document.java
        Node.java
        NodeLink.java
        parameters
        Parameter.java
        Parameters.java
        requestor
        PageRequestor.java
        RequestBuilder.java
        RequestBuilderFactory.java
        util
        Assert.java
        CopyInputStream.java
        NullOutputStream.java
        Util.java
        apache
        Consts.java
        ContentType.java
        URIBuilder.java
        URLEncodedUtils.java
        css
        CSSHelper.java
        css_query
        AbstractChecker.java
        AttributeSpecifierChecker.java
        Checker.java
        NodeHelper.java
        NodeSelector.java
        PseudoClassSpecifierChecker.java
        PseudoContainsSpecifierChecker.java
        PseudoNthSpecifierChecker.java
        TagChecker.java
  - test
    - java
      - com
        gistlabs
        mechanize
        MechanizeAgentTest.java
        MechanizeMock.java
        MechanizeTestCase.java
        PageQueryTest.java
        PageRequest.java
        cache
        HttpCacheFilterTest.java
        cookie
        CookiesTest.java
        document
        html
        DebugJsoupNodeHelperTest.java
        HtmlElementTest.java
        HtmlElementsTest.java
        HtmlNodeTest.java
        JsoupNodeHelperTest.java
        JsoupUtilTest.java
        LinksTest.java
        form
        FormTest.java
        image
        ImagesTest.java
        json
        JsonTest.java
        hypermedia
        BulkJsonLinksTest.java
        JsonLinkFinderTest.java
        JsonLinksTest.java
        impl
        JsonCssPerformanceTest.java
        JsonPageTest.java
        node
        JsonNodeTestCase.java
        impl
        ArrayElementsTest.java
        ElementAttributesTest.java
        MixedChildrenElementsTest.java
        NestedElementsTest.java
        TestElementBaseClass.java
        query
        ArrayRootSelectorTest.java
        AttributeNodeSelectorTest.java
        ElementQueryTest.java
        NodeSelectorTest.java
        PseudoNodeSelectorTest.java
        filters
        DefaultMechanizeChainFilterTest.java
        parameters
        ParametersTest.java
        util
        CopyInputStreamTest.java

/*
 * (This file extracted from Apache HttpClient 4.2.1, used to
 * provide support on Android platform - where only 4.0 is present.
 * Modifications are only to package/import locations to reflect location
 * of this code.)
 * 
 * ====================================================================
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 *
 */

package com.gistlabs.mechanize.util.apache;

import java.net.URI;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collections;
import java.util.List;
import java.util.Scanner;

import org.apache.http.Header;
import org.apache.http.HeaderElement;
import org.apache.http.HttpEntity;
import org.apache.http.NameValuePair;
import org.apache.http.annotation.Immutable;
import org.apache.http.message.BasicHeaderValueParser;
import org.apache.http.message.BasicNameValuePair;
import org.apache.http.message.ParserCursor;
import org.apache.http.util.CharArrayBuffer;

/**
 * A collection of utilities for encoding URLs.
 *
 * @since 4.0
 */
@Immutable
public class URLEncodedUtils {

	public static final String CONTENT_TYPE = "application/x-www-form-urlencoded";
	private static final String PARAMETER_SEPARATOR = "&";
	private static final String NAME_VALUE_SEPARATOR = "=";

	/**
	 * Returns a list of {@link NameValuePair NameValuePairs} as built from the
	 * URI's query portion. For example, a URI of
	 * http://example.org/path/to/file?a=1&b=2&c=3 would return a list of three
	 * NameValuePairs, one for a=1, one for b=2, and one for c=3.
	 * <p>
	 * This is typically useful while parsing an HTTP PUT.
	 *
	 * @param uri
	 *            uri to parse
	 * @param encoding
	 *            encoding to use while parsing the query
	 */
	public static List <NameValuePair> parse (final URI uri, final String encoding) {
		final String query = uri.getRawQuery();
		if (query != null && query.length() > 0) {
			List<NameValuePair> result = new ArrayList<NameValuePair>();
			Scanner scanner = new Scanner(query);
			parse(result, scanner, encoding);
			return result;
		} else
			return Collections.emptyList();
	}

	/**
	 * Returns a list of {@link NameValuePair NameValuePairs} as parsed from an
	 * {@link HttpEntity}. The encoding is taken from the entity's
	 * Content-Encoding header.
	 * <p>
	 * This is typically used while parsing an HTTP POST.
	 *
	 * @param entity
	 *            The entity to parse
	 * @throws IOException
	 *             If there was an exception getting the entity's data.
	 */
	//	public static List <NameValuePair> parse (
	//			final HttpEntity entity) throws IOException {
	//		ContentType contentType = ContentType.get(entity);
	//		if (contentType != null && contentType.getMimeType().equalsIgnoreCase(CONTENT_TYPE)) {
	//			String content = EntityUtils.toString(entity, Consts.ASCII);
	//			if (content != null && content.length() > 0) {
	//				Charset charset = contentType.getCharset();
	//				if (charset == null)
	//					charset = HTTP.DEF_CONTENT_CHARSET;
	//				return parse(content, charset);
	//			}
	//		}
	//		return Collections.emptyList();
	//	}

	/**
	 * Returns true if the entity's Content-Type header is
	 * <code>application/x-www-form-urlencoded</code>.
	 */
	public static boolean isEncoded (final HttpEntity entity) {
		Header h = entity.getContentType();
		if (h != null) {
			HeaderElement[] elems = h.getElements();
			if (elems.length > 0) {
				String contentType = elems[0].getName();
				return contentType.equalsIgnoreCase(CONTENT_TYPE);
			} else
				return false;
		} else
			return false;
	}

	/**
	 * Adds all parameters within the Scanner to the list of
	 * <code>parameters</code>, as encoded by <code>encoding</code>. For
	 * example, a scanner containing the string <code>a=1&b=2&c=3</code> would
	 * add the {@link NameValuePair NameValuePairs} a=1, b=2, and c=3 to the
	 * list of parameters.
	 *
	 * @param parameters
	 *            List to add parameters to.
	 * @param scanner
	 *            Input that contains the parameters to parse.
	 * @param charset
	 *            Encoding to use when decoding the parameters.
	 */
	public static void parse (
			final List <NameValuePair> parameters,
			final Scanner scanner,
			final String charset) {
		scanner.useDelimiter(PARAMETER_SEPARATOR);
		while (scanner.hasNext()) {
			String name = null;
			String value = null;
			String token = scanner.next();
			int i = token.indexOf(NAME_VALUE_SEPARATOR);
			if (i != -1) {
				name = decodeFormFields(token.substring(0, i).trim(), charset);
				value = decodeFormFields(token.substring(i + 1).trim(), charset);
			} else
				name = decodeFormFields(token.trim(), charset);
			parameters.add(new BasicNameValuePair(name, value));
		}
	}

	private static final char[] DELIM = new char[] { '&' };

	/**
	 * Returns a list of {@link NameValuePair NameValuePairs} as parsed from the given string
	 * using the given character encoding.
	 *
	 * @param s
	 *            text to parse.
	 * @param charset
	 *            Encoding to use when decoding the parameters.
	 *
	 * @since 4.2
	 */
	public static List<NameValuePair> parse (final String s, final Charset charset) {
		if (s == null)
			return Collections.emptyList();
		BasicHeaderValueParser parser = BasicHeaderValueParser.DEFAULT;
		CharArrayBuffer buffer = new CharArrayBuffer(s.length());
		buffer.append(s);
		ParserCursor cursor = new ParserCursor(0, buffer.length());
		List<NameValuePair> list = new ArrayList<NameValuePair>();
		while (!cursor.atEnd()) {
			NameValuePair nvp = parser.parseNameValuePair(buffer, cursor, DELIM);
			if (nvp.getName().length() > 0)
				list.add(new BasicNameValuePair(
						decodeFormFields(nvp.getName(), charset),
						decodeFormFields(nvp.getValue(), charset)));
		}
		return list;
	}

	/**
	 * Returns a String that is suitable for use as an <code>application/x-www-form-urlencoded</code>
	 * list of parameters in an HTTP PUT or HTTP POST.
	 *
	 * @param parameters  The parameters to include.
	 * @param encoding The encoding to use.
	 */
	public static String format (
			final List <? extends NameValuePair> parameters,
			final String encoding) {
		final StringBuilder result = new StringBuilder();
		for (final NameValuePair parameter : parameters) {
			final String encodedName = encodeFormFields(parameter.getName(), encoding);
			final String encodedValue = encodeFormFields(parameter.getValue(), encoding);
			if (result.length() > 0)
				result.append(PARAMETER_SEPARATOR);
			result.append(encodedName);
			if (encodedValue != null) {
				result.append(NAME_VALUE_SEPARATOR);
				result.append(encodedValue);
			}
		}
		return result.toString();
	}

	/**
	 * Returns a String that is suitable for use as an <code>application/x-www-form-urlencoded</code>
	 * list of parameters in an HTTP PUT or HTTP POST.
	 *
	 * @param parameters  The parameters to include.
	 * @param charset The encoding to use.
	 *
	 * @since 4.2
	 */
	public static String format (
			final Iterable<? extends NameValuePair> parameters,
			final Charset charset) {
		final StringBuilder result = new StringBuilder();
		for (final NameValuePair parameter : parameters) {
			final String encodedName = encodeFormFields(parameter.getName(), charset);
			final String encodedValue = encodeFormFields(parameter.getValue(), charset);
			if (result.length() > 0)
				result.append(PARAMETER_SEPARATOR);
			result.append(encodedName);
			if (encodedValue != null) {
				result.append(NAME_VALUE_SEPARATOR);
				result.append(encodedValue);
			}
		}
		return result.toString();
	}

	/**
	 * Unreserved characters, i.e. alphanumeric, plus: {@code _ - ! . ~ ' ( ) *}
	 * <p>
	 *  This list is the same as the {@code unreserved} list in
	 *  <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>
	 */
	private static final BitSet UNRESERVED   = new BitSet(256);
	/**
	 * Punctuation characters: , ; : $ & + =
	 * <p>
	 * These are the additional characters allowed by userinfo.
	 */
	private static final BitSet PUNCT        = new BitSet(256);
	/** Characters which are safe to use in userinfo, i.e. {@link #UNRESERVED} plus {@link #PUNCT}uation */
	private static final BitSet USERINFO     = new BitSet(256);
	/** Characters which are safe to use in a path, i.e. {@link #UNRESERVED} plus {@link #PUNCT}uation plus / @ */
	private static final BitSet PATHSAFE     = new BitSet(256);
	/** Characters which are safe to use in a fragment, i.e. {@link #RESERVED} plus {@link #UNRESERVED} */
	private static final BitSet FRAGMENT     = new BitSet(256);

	/**
	 * Reserved characters, i.e. {@code ;/?:@&=+$,[]}
	 * <p>
	 *  This list is the same as the {@code reserved} list in
	 *  <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>
	 *  as augmented by
	 *  <a href="http://www.ietf.org/rfc/rfc2732.txt">RFC 2732</a>
	 */
	private static final BitSet RESERVED     = new BitSet(256);


	/**
	 * Safe characters for x-www-form-urlencoded data, as per java.net.URLEncoder and browser behaviour,
	 * i.e. alphanumeric plus {@code "-", "_", ".", "*"}
	 */
	private static final BitSet URLENCODER   = new BitSet(256);

	static {
		// unreserved chars
		// alpha characters
		for (int i = 'a'; i <= 'z'; i++)
			UNRESERVED.set(i);
		for (int i = 'A'; i <= 'Z'; i++)
			UNRESERVED.set(i);
		// numeric characters
		for (int i = '0'; i <= '9'; i++)
			UNRESERVED.set(i);
		UNRESERVED.set('_'); // these are the charactes of the "mark" list
		UNRESERVED.set('-');
		UNRESERVED.set('.');
		UNRESERVED.set('*');
		URLENCODER.or(UNRESERVED); // skip remaining unreserved characters
		UNRESERVED.set('!');
		UNRESERVED.set('~');
		UNRESERVED.set('\'');
		UNRESERVED.set('(');
		UNRESERVED.set(')');
		// punct chars
		PUNCT.set(',');
		PUNCT.set(';');
		PUNCT.set(':');
		PUNCT.set('$');
		PUNCT.set('&');
		PUNCT.set('+');
		PUNCT.set('=');
		// Safe for userinfo
		USERINFO.or(UNRESERVED);
		USERINFO.or(PUNCT);

		// URL path safe
		PATHSAFE.or(UNRESERVED);
		PATHSAFE.set('/'); // segment separator
		PATHSAFE.set(';'); // param separator
		PATHSAFE.set(':'); // rest as per list in 2396, i.e. : @ & = + $ ,
		PATHSAFE.set('@');
		PATHSAFE.set('&');
		PATHSAFE.set('=');
		PATHSAFE.set('+');
		PATHSAFE.set('$');
		PATHSAFE.set(',');

		RESERVED.set(';');
		RESERVED.set('/');
		RESERVED.set('?');
		RESERVED.set(':');
		RESERVED.set('@');
		RESERVED.set('&');
		RESERVED.set('=');
		RESERVED.set('+');
		RESERVED.set('$');
		RESERVED.set(',');
		RESERVED.set('['); // added by RFC 2732
		RESERVED.set(']'); // added by RFC 2732

		FRAGMENT.or(RESERVED);
		FRAGMENT.or(UNRESERVED);
	}

	private static final int RADIX = 16;

	/**
	 * Emcode/escape a portion of a URL, to use with the query part ensure {@code plusAsBlank} is true.
	 * 
	 * @param content the portion to decode
	 * @param charset the charset to use
	 * @param blankAsPlus if {@code true}, then convert space to '+' (e.g. for www-url-form-encoded content), otherwise leave as is.
	 * @return
	 */
	private static String urlencode(
			final String content,
			final Charset charset,
			final BitSet safechars,
			final boolean blankAsPlus) {
		if (content == null)
			return null;
		StringBuilder buf = new StringBuilder();
		ByteBuffer bb = charset.encode(content);
		while (bb.hasRemaining()) {
			int b = bb.get() & 0xff;
			if (safechars.get(b))
				buf.append((char) b);
			else if (blankAsPlus && b == ' ')
				buf.append('+');
			else {
				buf.append("%");
				char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, RADIX));
				char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, RADIX));
				buf.append(hex1);
				buf.append(hex2);
			}
		}
		return buf.toString();
	}

	/**
	 * Decode/unescape a portion of a URL, to use with the query part ensure {@code plusAsBlank} is true.
	 * 
	 * @param content the portion to decode
	 * @param charset the charset to use
	 * @param plusAsBlank if {@code true}, then convert '+' to space (e.g. for www-url-form-encoded content), otherwise leave as is.
	 * @return
	 */
	private static String urldecode(
			final String content,
			final Charset charset,
			final boolean plusAsBlank) {
		if (content == null)
			return null;
		ByteBuffer bb = ByteBuffer.allocate(content.length());
		CharBuffer cb = CharBuffer.wrap(content);
		while (cb.hasRemaining()) {
			char c = cb.get();
			if (c == '%' && cb.remaining() >= 2) {
				char uc = cb.get();
				char lc = cb.get();
				int u = Character.digit(uc, 16);
				int l = Character.digit(lc, 16);
				if (u != -1 && l != -1)
					bb.put((byte) ((u << 4) + l));
				else {
					bb.put((byte) '%');
					bb.put((byte) uc);
					bb.put((byte) lc);
				}
			} else if (plusAsBlank && c == '+')
				bb.put((byte) ' ');
			else
				bb.put((byte) c);
		}
		bb.flip();
		return charset.decode(bb).toString();
	}

	/**
	 * Decode/unescape www-url-form-encoded content.
	 * 
	 * @param content the content to decode, will decode '+' as space
	 * @param charset the charset to use
	 * @return
	 */
	private static String decodeFormFields (final String content, final String charset) {
		if (content == null)
			return null;
		return urldecode(content, charset != null ? Charset.forName(charset) : Consts.UTF_8, true);
	}

	/**
	 * Decode/unescape www-url-form-encoded content.
	 * 
	 * @param content the content to decode, will decode '+' as space
	 * @param charset the charset to use
	 * @return
	 */
	private static String decodeFormFields (final String content, final Charset charset) {
		if (content == null)
			return null;
		return urldecode(content, charset != null ? charset : Consts.UTF_8, true);
	}

	/**
	 * Encode/escape www-url-form-encoded content.
	 * <p>
	 * Uses the {@link #URLENCODER} set of characters, rather than
	 * the {@link #UNRSERVED} set; this is for compatibilty with previous
	 * releases, URLEncoder.encode() and most browsers.
	 * 
	 * @param content the content to encode, will convert space to '+'
	 * @param charset the charset to use
	 * @return
	 */
	private static String encodeFormFields (final String content, final String charset) {
		if (content == null)
			return null;
		return urlencode(content, charset != null ? Charset.forName(charset) :
			Consts.UTF_8, URLENCODER, true);
	}

	/**
	 * Encode/escape www-url-form-encoded content.
	 * <p>
	 * Uses the {@link #URLENCODER} set of characters, rather than
	 * the {@link #UNRSERVED} set; this is for compatibilty with previous
	 * releases, URLEncoder.encode() and most browsers.
	 * 
	 * @param content the content to encode, will convert space to '+'
	 * @param charset the charset to use
	 * @return
	 */
	private static String encodeFormFields (final String content, final Charset charset) {
		if (content == null)
			return null;
		return urlencode(content, charset != null ? charset : Consts.UTF_8, URLENCODER, true);
	}

	/**
	 * Encode a String using the {@link #USERINFO} set of characters.
	 * <p>
	 * Used by URIBuilder to encode the userinfo segment.
	 * 
	 * @param content the string to encode, does not convert space to '+'
	 * @param charset the charset to use
	 * @return the encoded string
	 */
	static String encUserInfo(final String content, final Charset charset) {
		return urlencode(content, charset, USERINFO, false);
	}

	/**
	 * Encode a String using the {@link #FRAGMENT} set of characters.
	 * <p>
	 * Used by URIBuilder to encode the userinfo segment.
	 * 
	 * @param content the string to encode, does not convert space to '+'
	 * @param charset the charset to use
	 * @return the encoded string
	 */
	static String encFragment(final String content, final Charset charset) {
		return urlencode(content, charset, FRAGMENT, false);
	}

	/**
	 * Encode a String using the {@link #PATHSAFE} set of characters.
	 * <p>
	 * Used by URIBuilder to encode path segments.
	 * 
	 * @param content the string to encode, does not convert space to '+'
	 * @param charset the charset to use
	 * @return the encoded string
	 */
	static String encPath(final String content, final Charset charset) {
		return urlencode(content, charset, PATHSAFE, false);
	}

}