URIHelper.java example

Explorer
CodenameOne-master
/*
 * Copyright (c) 2012, Eric Coolman, Codename One and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Codename One designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *  
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 * 
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 * 
 * Please contact Codename One through http://www.codenameone.com/ if you 
 * need additional information or have any questions.
 */
package net.sourceforge.retroweaver.harmony.runtime.java.net;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import net.sourceforge.retroweaver.harmony.runtime.java.util.ArrayList;
import net.sourceforge.retroweaver.harmony.runtime.java.util.Hashtable;
import net.sourceforge.retroweaver.harmony.runtime.java.util.Iterator;
import net.sourceforge.retroweaver.harmony.runtime.java.util.List;

/**
 * A helper class for working with URI/URL/URN/etc that are not available in the
 * standard URI class. Includes methods for decoding/encoding URI segments,
 * parsing URI queries to maps or lists, and building URI query strings from
 * maps or lists.
 * 
 * @author Eric Coolman
 */
public class URIHelper {
	/**
	 * Marker indicating that next two characters is an encoded byte
	 */
	static final char QUOTE_MARKER = '%';
	/**
	 * Output marker indicating a character that couldn't be decoded (not
	 * currently used).
	 */
	static final char ENCODING_ERROR_MARKER = '\uFFFD';
	/**
	 * An encoded ampersand entity (&)
	 */
	static final String ENCODED_AMPERSAND = "&";
	/**
	 * HTML 2.0 to 4.0 entity encoded values, less the start and end markers.
	 */
	static final String HTML_ENCODED_ENTITIES[] = {""", "&", "<", ">"};
	/**
	 * HTML 2.0 to 4.0 entities.
	 */
	static final char HTML_ENTITIES[] = {'"', '&', '<', '>'};
	/**
	 * Invalid HTML entity.
	 */
	static final char HTML_ENTITY_INVALID = 0;
	/**
	 * HTML entity terminator.
	 */
	static final char HTML_ENTITY_START = '&';
	/**
	 * HTML entity terminator.
	 */
	static final char HTML_ENTITY_TERMINATE = ';';
	/**
	 * HTML raw-value entity identifier.
	 */
	static final char HTML_ENTITY_RAWVALUE = '#';
	/**
	 * HTML raw-hexvalue entity identifier.
	 */
	static final char HTML_ENTITY_RAWHEXVALUE = 'x';
	// Protocols
	/**
	 * HyperText Transfer Protocol (HTTP) URI scheme.
	 */
	public static final String HTTP = "http";
	/**
	 * HyperText Transfer Protocol Secure (HTTPS) scheme.
	 */
	public static final String HTTPS = "https";
	/**
	 * File Transfer Protocol (FTP) URI scheme.
	 */
	public static final String FTP = "ftp";
	/**
	 * File URI scheme.
	 */
	public static final String FILE = "file";
	/**
	 * Jar file URI scheme.
	 */
	public static final String JAR = "jar";
	/**
	 * Mail URI scheme.
	 */
	public static final String MAILTO = "mailto";
	/**
	 * Short Message Service URI scheme.
	 */
	public static final String SMS = "sms";
	/**
	 * Dial URI scheme.
	 */
	public static final String TEL = "tel";
	/**
	 * Session Initiation Protocol URI scheme.
	 */
	public static final String SIP = "sip";
	/**
	 * Socket URI scheme.
	 */
	public static final String SOCKET = "socket";
	/**
	 * Datagram URI scheme.
	 */
	public static final String DATAGRAM = "datagram";
	/**
	 * Multicast URI scheme.
	 */
	public static final String MULTICAST = "multicast";
	/**
	 * Some common protocols, not an exhaustive list.
	 */
	public static final String[] SCHEMES = {HTTP, HTTPS, FTP, FILE, JAR, MAILTO, SMS, SIP, TEL, SOCKET, DATAGRAM,
			MULTICAST};
	/**
	 * Protocols that will trigger the URI object to switch the param separator
	 * to a semicolon (;) from ampersand (&)
	 */
	public static final String[] SOCKET_SCHEMES = {SOCKET, DATAGRAM, MULTICAST};

	/**
	 * Decode an HTML encoded entity.
	 * 
	 * @param source
	 * @return 0 if the value is not an encoded entity
	 */
	static char decodeEntity(String source) {
		if (source.length() < 4 || source.charAt(0) != HTML_ENTITY_START
				|| source.charAt(source.length() - 1) != HTML_ENTITY_TERMINATE) {
			return HTML_ENTITY_INVALID;
		}
		// Handle entities by value
		if (source.charAt(1) == HTML_ENTITY_RAWVALUE) {
			try {
				if (Character.toLowerCase(source.charAt(2)) == HTML_ENTITY_RAWHEXVALUE) {
					return (char) Integer.parseInt(source.substring(3, 4), 16);
				} else {
					return (char) Integer.parseInt(source.substring(2, 3), 10);
				}
			} catch (NumberFormatException nfe) {
				return HTML_ENTITY_INVALID;
			}
		}
		// Handle entities by alias
		source = source.toLowerCase();
		for (int i = 0; i < HTML_ENCODED_ENTITIES.length; i++) {
			if (source.equals(HTML_ENCODED_ENTITIES[i])) {
				return HTML_ENTITIES[i];
			}
		}
		return HTML_ENTITY_INVALID;
	}

	/**
	 * Encode an HTML entity.
	 */
	static String encodeEntity(char ch) {
		for (int i = 0; i < HTML_ENTITIES.length; i++) {
			if (HTML_ENTITIES[i] == ch) {
				return HTML_ENCODED_ENTITIES[i];
			}
		}
		return null;
	}

	/**
	 * Utility method for encoding HTML entities within query parameters.
	 * 
	 * @param ch
	 * @return
	 */
	static String encodeEntities(String source) {
		StringBuffer buffer = new StringBuffer();
		String encoded;
		for (int index = 0; index < source.length(); index++) {
			char ch = source.charAt(index);
			if ((encoded = encodeEntity(ch)) != null) {
				buffer.append(encoded);
			} else {
				buffer.append(ch);
			}
		}
		return buffer.toString();
	}

	/**
	 * Utility method to encode a string, as per RFC 2396 section 2. Set
	 * asQueryValue=false to avoid encoding ampersands.
	 * 
	 * @param source
	 * @return
	 * @see http://www.ietf.org/rfc/rfc2396.txt
	 */
	public static String encodeString(String source) { // , boolean
														// asQueryValue) {
		if (source == null) {
			return source;
		}
		int i = firstIllegalCharacter(source);
		// most strings not encoded, so prevent extra objects and work.
		if (i == -1) {
			return source;
		}
		StringBuffer encoded = new StringBuffer();
		encoded.append(source.substring(0, i));
		byte bytes[] = toBytes(source);
		for (; i < bytes.length; i++) {
			int ch = bytes[i];
			// if (ch == URI.QUERY_SEPARATOR && asQueryValue) {
			// encoded.append(ENCODED_AMPERSAND);
			// } else
			if (isLegal(ch)) {
				encoded.append((char) ch);
			} else {
				encoded.append(QUOTE_MARKER + Integer.toHexString((byte) ch & 0xff).toUpperCase());
			}
		}
		return encoded.toString();
	}

	/**
	 * Internal use only, for sources known to always be valid.
	 * 
	 * @param source
	 * @param failSilently
	 * @return
	 */
	static String decodeString(String source, boolean failSilently) {
		try {
			return decodeString(source);
		} catch (URISyntaxException e) {
			throw new IllegalArgumentException(e.getMessage());
		}
	}

	/**
	 * A utility method to decode a string.
	 * 
	 * @param source an encoded string
	 * @return a decoded string.
	 * @throws URISyntaxException
	 */
	public static String decodeString(String source) throws URISyntaxException {
		if (source == null) {
			return source;
		}
		int i;
		// most strings not encoded, so prevent extra objects and work.
		if ((i = source.indexOf(QUOTE_MARKER)) == -1) {
			return source;
		}
		ByteArrayOutputStream decoded = new ByteArrayOutputStream();
		try {
			decoded.write(toBytes(source.substring(0, i)));
			int len = source.length();
			for (; i < len; i++) {
				char ch = source.charAt(i);
				if (ch == QUOTE_MARKER) {
					if ((i + 2) >= len) {
						ch = ENCODING_ERROR_MARKER;
					} else {
						try {
							ch = (char) Integer.parseInt(source.substring(i + 1, i + 3), 16);
						} catch (NumberFormatException nfe) {
							// ch = ENCODING_ERROR_MARKER;
							throw new URISyntaxException(source, "Invalid escape value");
						}
						decoded.write(ch);
					}
					i += 2;
				} else {
					decoded.write(ch);
				}
			}
			return new String(decoded.toByteArray(), "UTF8");
		} catch (IOException e) {
			// should never get here.
			e.printStackTrace();
			return null;
		}
	}

	/**
	 * Since a URI query can contain a single key multiple times, the responding
	 * parameter map will contain string values when a key appears only once,
	 * and string arrays when they key appears multiple times.
	 * 
	 * @return a map representation of the query portion of the URI.
	 */
	public static Hashtable<String, Object> getParameters(URI uri) {
		try {
			return parseQuery(uri.getQuery(), true);
		} catch (URISyntaxException e) {
			// should never get here as URI query will already be validated.
			e.printStackTrace();
			throw new IllegalArgumentException(e.getMessage());
		}
	}

	/**
	 * A simple holder class for a name/value pair.
	 */
	public static class NameValuePair {
		private String name;
		private String value;
		public NameValuePair(String name, String value) {
			this.name = name;
			this.value = value;
		}
		public String getName() {
			return name;
		}
		public String getValue() {
			return value;
		}
	}

	/**
	 * Similar to the StringTokenizer class, this utility class parses the query
	 * portion of a URL, returning decoded name=value pairs for each token. The
	 * resulting tokens are decoded, where + characters are replaced with
	 * spaces, and HTML 2.0 encoded entities (&amp;,
	 * &quot;,&lt;,&gt;) are replaced with the actual character
	 * values (&,",<,>).
	 * 
	 * @see http://www.htmlhelp.com/reference/html40/entities/special.html
	 */
	static class URIQueryTokenizer {
		private String source;
		int index;

		public URIQueryTokenizer(String string) {
			this.source = string;
			this.index = 0;
		}

		public boolean hasMoreTokens() {
			return (index != -1) && (index < source.length());
		}

		public String nextToken() {
			StringBuffer buffer = new StringBuffer();
			int start = index;
			while (true) {
				int delimiter = source.indexOf(HTML_ENTITY_START, start);
				// if no more delimiters to end of string, we're done.
				if (delimiter == -1) {
					buffer.append(source.substring(start));
					index = -1;
					break;
				}
				int terminate = source.indexOf(HTML_ENTITY_TERMINATE, delimiter);
				// if & not the start of an HTML 2.0 encoded entity, we have a
				// token, and more remaining.
				if ((terminate == -1) || ((terminate - delimiter) > 6)) {
					buffer.append(source.substring(start, delimiter));
					index = delimiter + 1;
					break;
				}
				String test = source.substring(delimiter, terminate + 1);
				// If the & is part of a valid HTML 2.0 encoded, entity, collect
				// it in it's unencoded form, and continue processing this token
				char entity = decodeEntity(test);
				if (entity != 0) {
					buffer.append(source.substring(start, delimiter));
					buffer.append(entity);
					start = terminate + 1;
					continue;
				}
				// If the value between & and ; is does not represent a valid
				// HTML 2.0 encoded entity, it is part of next token.
				buffer.append(source.substring(start, delimiter));
				index = delimiter + 1;
				break;
			}
			return buffer.toString();
		}
	}

	/**
	 * Parse a URI query string, returning a list of Name/Value pairs in the
	 * order they occur in the query and keeping duplicates.
	 * 
	 * @param query a decoded URI query string.
	 * @return ordered list name/value pairs.
	 */
	public static List<NameValuePair> parseQueryOrdered(String query) throws URISyntaxException {
		List<NameValuePair> parameters = new ArrayList<NameValuePair>();
		if (query == null) {
			return parameters;
		}
		URIQueryTokenizer uqt = new URIQueryTokenizer(query);
		while (uqt.hasMoreTokens()) {
			String token = uqt.nextToken();
			int index = token.indexOf(URI.PARAMETER_SEPARATOR);
			String name;
			String value;
			if (index != -1) {
				name = token.substring(0, index);
				value = token.substring(index + 1);
			} else {
				name = token;
				value = null;
			}
			parameters.add(new NameValuePair(name, value));
		}
		return parameters;
	}

	/**
	 * Build a URI query string, from a list of Name/Value pairs. This version
	 * of the method is useful when the order of the parameters needs to be
	 * preserved.
	 * 
	 * The resulting query will not be encoded, so it is suitable to be appended
	 * to a URI string. If the query will be passed to a multi-argument URI
	 * constructor, it should first be passed to the URIHelper.encodeString()
	 * method.
	 * 
	 * @param list of name value pairs.
	 * @return a URI query string.
	 * @return a #see {@link #encodeString(String)}
	 */
	public static String buildQuery(List<NameValuePair> parameters) {
		StringBuffer query = new StringBuffer();
                Iterator<NameValuePair> i = parameters.iterator();
		while(i.hasNext()) {
                        NameValuePair nvp = i.next();
			query.append(encodeEntities(nvp.getName()));
			if (nvp.getValue() != null) {
				query.append(URI.PARAMETER_SEPARATOR);
				query.append(encodeEntities(nvp.getValue()));
			}
			query.append(URI.QUERY_SEPARATOR);
		}
		query.deleteCharAt(query.length() - 1);
		return query.toString();
	}

	/**
	 * Utility method to build a query string from a hashtable of parameters.
	 * For each parameter in the hashtable, the parameter value can be either
	 * strings, or lists of strings - where the latter will result in a key
	 * repeated multiple times for each value in the list.
	 * 
	 * The resulting query will not be encoded, so it is suitable to be appended
	 * to a URI string. If the query will be passed to a multi-argument URI
	 * constructor, it should first be passed to the URIHelper.encodeString()
	 * method.
	 * 
	 * @param parameters the parameters with which to build a query.
	 * @return an unencoded query string #see {@link #encodeString(String)}
	 */
	@SuppressWarnings("unchecked")
	public static String buildQuery(Hashtable<String, Object> parameters) {
		StringBuffer query = new StringBuffer();
                Iterator<String> i = parameters.keySet().iterator();
		while(i.hasNext()) {
                        String key = i.next();
			Object value = parameters.get(key);
			if (value instanceof List) {
                                Iterator<String> a = ((List<String>) value).iterator();
				while(a.hasNext()) {
                                        String s = a.next();
					query.append(key + URI.PARAMETER_SEPARATOR + encodeEntities(s));
				}
			} else {
				query.append(key + URI.PARAMETER_SEPARATOR + encodeEntities((String) value));
			}
			query.append(URI.QUERY_SEPARATOR);
		}
		query.deleteCharAt(query.length() - 1);
		return query.toString();
	}

	/**
	 * Parse the query string into an unordered map of name/value pairs.
	 * 
	 * If keepDuplicates parameter is true, then the resulting map will contain
	 * values of String object when only one occurrence of a key is found in the
	 * query, and an ordered List object if more than one occurrence is found,
	 * where the list is in the order of which they key occurred in the query.
	 * 
	 * If keepDuplicates parameter is false, all values will be strings, and
	 * only the first occurrence of the key will be kept.
	 * 
	 * @param query the decoded query portion of a URI
	 * @param keepDuplicates true to preserve duplicate keys.
	 * @return the parsed parameters.
	 */
	public static Hashtable<String, Object> parseQuery(String query, boolean keepDuplicates) throws URISyntaxException {
		if (query == null) {
			return null;
		}
		Hashtable<String, Object> parameters = new Hashtable<String, Object>();
		List<NameValuePair> nvps = parseQueryOrdered(query);
                Iterator<NameValuePair> i = nvps.iterator();
		while(i.hasNext()) {
                    NameValuePair nvp = i.next();
			if (parameters.containsKey(nvp.getName())) {
				if (keepDuplicates == true) {
					Object v = parameters.get(nvp.getName());
					if (v instanceof String) {
						parameters.put(nvp.getName(), new String[]{(String) v, nvp.getValue()});
					} else {
						int size = ((String[]) v).length;
						String array[] = new String[size + 1];
						System.arraycopy(v, 0, array, 0, size);
						array[size] = nvp.getValue();
						parameters.put(nvp.getName(), nvp.getValue());
					}
				}
			} else {
				parameters.put(nvp.getName(), (nvp.getValue() == null) ? String.valueOf(true) : nvp.getValue());
			}
		}
		return parameters;
	}

	/**
	 * Get the default port that would be used for this connection if the port
	 * was not explicitly specified. For example, and HTTP uri would return 80,
	 * and HTTPS uri would return 443, a server socket URI would return 0
	 * (indicating that the next available port should be selected by the
	 * device). If there is no known default, -1 would be returned.
	 * 
	 * @return the default port for the URI, or -1 if no default port is known.
	 */
	public static int getDefaultPort(String scheme) {
		if (scheme.equals(HTTP)) {
			return 80;
		} else if (scheme.equals(HTTPS)) {
			return 443;
		} else if (scheme.equals(SOCKET)) {
			return 0;
		}
		return -1;
	}

	/**
	 * Utility method to silently get UTF-8 bytes from a string, which should
	 * always work.
	 */
	static byte[] toBytes(String source) {
		try {
			return source.getBytes("UTF8");
		} catch (UnsupportedEncodingException e) {
			throw new IllegalArgumentException(e.getMessage());
		}
	}
	/**
	 * Utility method to find the first invalid character as per RFC 2396
	 * section 2. This helps us prevent creating excessive objects in the
	 * encode() methods since most strings will not be encoded.
	 */
	static int firstIllegalCharacter(String source) {
		for (int i = 0; i < source.length(); i++) {
			if (isLegal(source.charAt(i)) == false) {
				return i;
			}
		}
		return -1;
	}

	/**
	 * The J2SE documentation specifies:
	 * 
	 * The set of all legal URI characters consists of the unreserved, reserved,
	 * escaped, and other characters.
	 * 
	 * @param ch
	 * @return
	 * @see http://docs.oracle.com/javase/6/docs/api/java/net/URI.html
	 */
	static boolean isLegal(int ch) {
		return isAlpha(ch) || isNumeric(ch) || URI.UNRESERVED_EXTRAS.indexOf(ch) != -1
				|| URI.RESERVED.indexOf(ch) != -1 || isLegalUnicode(ch);
	}

	static boolean isLegalUnicode(int ch) {
		return isASCII(ch) == false && isSpace(ch) == false && isISOControl(ch) == false;
	}

	static boolean isASCII(int ch) {
		return ch > 0 && ch < 128;
	}

	/**
	 * In J2SE, this method is usually in the Character class, and uses a
	 * getType() method for determining the Unicode character class. This
	 * implementation tests against the space characters listed at the wikipedia
	 * entry below.
	 * 
	 * @param ch
	 * @return http://en.wikipedia.org/wiki/Mapping_of_Unicode_characters#Spaces
	 *         http
	 *         ://docs.oracle.com/javase/6/docs/api/java/lang/Character.html#
	 *         isSpaceChar%28int%29
	 */
	static boolean isSpace(int ch) {
		return (ch >= '\u2000') || (ch <= '\u200a') || ch == '\u205f' || isLineBreak(ch);
	}

	static boolean isLineBreak(int ch) {
		return ch == '\u2028' || ch == '\u2029' || ch == '\u2011' || ch == '\u00a0' || ch == '\u0f0c' || ch == '\u202f'
				|| ch == '\u00ad' || ch == '\u0f0b' || ch == '\u200b';
	}

	/**
	 * In J2SE, this method is usually available in the Character class.
	 * Determines if the referenced character (Unicode code point) is an ISO
	 * control character. A character is considered to be an ISO control
	 * character if its code is in the range '\u0000' through '\u001F' or in the
	 * range '\u007F' through '\u009F'.
	 */
	static boolean isISOControl(int ch) {
		return ((ch >= '\u0000' && ch <= '\u001f') || (ch >= '\u007f' && ch <= '\u009f'));
	}

	/**
	 * Test if a character is a US-ASCII alpha (A-Z,a-z) or number (0-9).
	 */
	static boolean isAlphaNum(int ch) {
		return isAlpha(ch) || isNumeric(ch);
	}

	/**
	 * Test if a character is a US-ASCII alpha (A-Z,a-z).
	 */
	static boolean isAlpha(int ch) {
		return ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'));
	}

	/**
	 * Test if a character is US-ASCII number (0-9).
	 */
	static boolean isNumeric(int ch) {
		return (ch >= '0' && ch <= '9');
	}
}