/* Copyright (c) 2012 LinkedIn Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ /* * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. * * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * * The contents of this file are subject to the terms of either the GNU * General Public License Version 2 only ("GPL") or the Common Development * and Distribution License("CDDL") (collectively, the "License"). You * may not use this file except in compliance with the License. You can * obtain a copy of the License at * https://glassfish.dev.java.net/public/CDDL+GPL_1_1.html * or packager/legal/LICENSE.txt. See the License for the specific * language governing permissions and limitations under the License. * * When distributing the software, include this License Header Notice in each * file and include the License file at packager/legal/LICENSE.txt. * * GPL Classpath Exception: * Oracle designates this particular file as subject to the "Classpath" * exception as provided by Oracle in the GPL Version 2 section of the License * file that accompanied this code. * * Modifications: * If applicable, add the following below the License Header, with the fields * enclosed by brackets [] replaced by your own identifying information: * "Portions Copyright [year] [name of copyright owner]" * * Contributor(s): * If you wish your version of this file to be governed by only the CDDL or * only the GPL Version 2, indicate your decision by adding "[Contributor] * elects to include this software in this distribution under the [CDDL or GPL * Version 2] license." If you don't indicate a single choice of license, a * recipient has the option to distribute your version of this file under * either the CDDL, the GPL Version 2 or to extend the choice of license to * its licensees as provided above. However, if you add GPL Version 2 code * and therefore, elected the GPL Version 2 license, then the option applies * only if the new code is made subject to such option by the copyright * holder. */ /** * LinkedIn elects to include this software in this distribution under the CDDL license. * * Modifications: * Repackaged original source under com.linkedin.jersey package. * Removed dependency on javax.ws.rs interfaces * Added JavaDoc documentation to conform to Pegasus style guidelines * Remove special-case encoding of ' ' in query params * Updated _encode() and appendPercentEncodedOctet() methods to handle surrogate pairs */ package com.linkedin.jersey.api.uri; import com.linkedin.jersey.core.util.MultivaluedMap; import java.io.UnsupportedEncodingException; import java.net.URI; import java.net.URLDecoder; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Arrays; import java.util.LinkedList; import java.util.List; /** * Utility class for validating, encoding and decoding components * of a URI. * * @author Paul.Sandoz@Sun.Com */ public class UriComponent { // TODO rewrite to use masks and not lookup tables /** * The URI component type. */ public enum Type { /** * ALPHA / DIGIT / "-" / "." / "_" / "~" characters */ UNRESERVED, /** * The URI scheme component type. */ SCHEME, /** * The URI authority component type. */ AUTHORITY, /** * The URI user info component type. */ USER_INFO, /** * The URI host component type. */ HOST, /** * The URI port component type. */ PORT, /** * The URI path component type. */ PATH, /** * The URI path component type that is a path segment. */ PATH_SEGMENT, /** * The URI path component type that is a matrix parameter. */ MATRIX_PARAM, /** * The URI query component type. */ QUERY, /** * The URI query component type that is a query parameter. */ QUERY_PARAM, /** * The URI fragment component type. */ FRAGMENT, } private UriComponent() { } /** * Validates the legal characters of a percent-encoded string that * represents a URI component type. * * @param s the encoded string. * @param t the URI compontent type identifying the legal characters. * @throws IllegalArgumentException if the encoded string contains illegal * characters. */ public static void validate(String s, Type t) throws IllegalArgumentException { validate(s, t, false); } /** * Validates the legal characters of a percent-encoded string that * represents a URI component type. * * @param s the encoded string. * @param t the URI compontent type identifying the legal characters. * @param template true if the encoded string contains URI template variables * @throws IllegalArgumentException if the encoded string contains illegal * characters. */ public static void validate(String s, Type t, boolean template) throws IllegalArgumentException { int i = _valid(s, t, template); if (i > -1) // TODO localize { throw new IllegalArgumentException("The string '" + s + "' for the URI component " + t + " contains an invalid character, '" + s.charAt(i) + "', at index " + i); } } /** * Validates the legal characters of a percent-encoded string that * represents a URI component type. * * @param s the encoded string. * @param t the URI compontent type identifying the legal characters. * @return true if the encoded string is valid, otherwise false. */ public static boolean valid(String s, Type t) { return valid(s, t, false); } /** * Validates the legal characters of a percent-encoded string that * represents a URI component type. * * @param s the encoded string. * @param t the URI compontent type identifying the legal characters. * @param template true if the encoded string contains URI template variables * @return true if the encoded string is valid, otherwise false. */ public static boolean valid(String s, Type t, boolean template) { return _valid(s, t, template) == -1; } private static int _valid(String s, Type t, boolean template) { boolean[] table = ENCODING_TABLES[t.ordinal()]; for (int i = 0; i < s.length(); i++) { final char c = s.charAt(i); if ((c < 0x80 && c != '%' && !table[c]) || c >= 0x80) { if (!template || (c != '{' && c != '}')) { return i; } } } return -1; } /** * Contextually encodes the characters of string that are either non-ASCII * characters or are ASCII characters that must be percent-encoded using the * UTF-8 encoding. Percent-encoded characters will be recognized and not * double encoded. * * @param s the string to be encoded. * @param t the URI compontent type identifying the ASCII characters that * must be percent-encoded. * @return the encoded string. */ public static String contextualEncode(String s, Type t) { return _encode(s, t, false, true); } /** * Contextually encodes the characters of string that are either non-ASCII * characters or are ASCII characters that must be percent-encoded using the * UTF-8 encoding. Percent-encoded characters will be recognized and not * double encoded. * * @param s the string to be encoded. * @param t the URI compontent type identifying the ASCII characters that * must be percent-encoded. * @param template true if the encoded string contains URI template variables * @return the encoded string. */ public static String contextualEncode(String s, Type t, boolean template) { return _encode(s, t, template, true); } /** * Encodes the characters of string that are either non-ASCII characters * or are ASCII characters that must be percent-encoded using the * UTF-8 encoding. * * @param s the string to be encoded. * @param t the URI compontent type identifying the ASCII characters that * must be percent-encoded. * @return the encoded string. */ public static String encode(String s, Type t) { return _encode(s, t, false, false); } /** * Encodes the characters of string that are either non-ASCII characters * or are ASCII characters that must be percent-encoded using the * UTF-8 encoding. * * @param s the string to be encoded. * @param t the URI compontent type identifying the ASCII characters that * must be percent-encoded. * @param template true if the encoded string contains URI template variables * @return the encoded string. */ public static String encode(String s, Type t, boolean template) { return _encode(s, t, template, false); } /** * Encodes a string with template parameters names present, specifically the * characters '{' and '}' will be percent-encoded. * * @param s the string with zero or more template parameters names * @return the string with encoded template parameters names. */ public static String encodeTemplateNames(String s) { int i = s.indexOf('{'); if (i != -1) s = s.replace("{", "%7B"); i = s.indexOf('}'); if (i != -1) s = s.replace("}", "%7D"); return s; } private static String _encode(final String s, final Type t, final boolean template, final boolean contextualEncode) { final boolean[] table = ENCODING_TABLES[t.ordinal()]; boolean insideTemplateParam = false; StringBuilder sb = null; for (int offset = 0, codePoint; offset < s.length(); offset += Character.charCount(codePoint)) { codePoint = s.codePointAt(offset); if (codePoint < 0x80 && table[codePoint]) { if (sb != null) { sb.append((char) codePoint); } } else { if (template) { boolean leavingTemplateParam = false; if (codePoint == '{') { insideTemplateParam = true; } else if (codePoint == '}') { insideTemplateParam = false; leavingTemplateParam = true; } if (insideTemplateParam || leavingTemplateParam) { if (sb != null) { sb.append(Character.toChars(codePoint)); } continue; } } if (contextualEncode && codePoint == '%' && offset + 2 < s.length() && isHexCharacter(s.charAt(offset + 1)) && isHexCharacter(s.charAt(offset + 2))) { if (sb != null) { sb.append('%').append(s.charAt(offset + 1)).append(s.charAt(offset + 2)); } offset += 2; continue; } if (sb == null) { sb = new StringBuilder(); sb.append(s.substring(0, offset)); } if (codePoint < 0x80) { appendPercentEncodedOctet(sb, (char) codePoint); } else { appendUTF8EncodedCharacter(sb, codePoint); } } } return (sb == null) ? s : sb.toString(); } private final static char[] HEX_DIGITS = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; private static void appendPercentEncodedOctet(StringBuilder sb, int b) { sb.append('%'); sb.append(HEX_DIGITS[b >> 4]); sb.append(HEX_DIGITS[b & 0x0F]); } private static void appendUTF8EncodedCharacter(final StringBuilder sb, final int codePoint) { final CharBuffer chars = CharBuffer.wrap(Character.toChars(codePoint)); final ByteBuffer bytes = UTF_8_CHARSET.encode(chars); while (bytes.hasRemaining()) { appendPercentEncodedOctet(sb, bytes.get() & 0xFF); } } private static final String[] SCHEME = {"0-9", "A-Z", "a-z", "+", "-", "."}; private static final String[] UNRESERVED = {"0-9", "A-Z", "a-z", "-", ".", "_", "~"}; private static final String[] SUB_DELIMS = {"!", "$", "&", "'", "(", ")", "*", "+", ",", ";", "="}; private static final boolean[][] ENCODING_TABLES = creatingEncodingTables(); private static boolean[][] creatingEncodingTables() { boolean[][] tables = new boolean[Type.values().length][]; List<String> l = new ArrayList<String>(); l.addAll(Arrays.asList(SCHEME)); tables[Type.SCHEME.ordinal()] = creatingEncodingTable(l); l.clear(); l.addAll(Arrays.asList(UNRESERVED)); tables[Type.UNRESERVED.ordinal()] = creatingEncodingTable(l); l.addAll(Arrays.asList(SUB_DELIMS)); tables[Type.HOST.ordinal()] = creatingEncodingTable(l); tables[Type.PORT.ordinal()] = creatingEncodingTable(Arrays.asList("0-9")); l.add(":"); tables[Type.USER_INFO.ordinal()] = creatingEncodingTable(l); l.add("@"); tables[Type.AUTHORITY.ordinal()] = creatingEncodingTable(l); tables[Type.PATH_SEGMENT.ordinal()] = creatingEncodingTable(l); tables[Type.PATH_SEGMENT.ordinal()][';'] = false; tables[Type.MATRIX_PARAM.ordinal()] = tables[Type.PATH_SEGMENT.ordinal()].clone(); tables[Type.MATRIX_PARAM.ordinal()]['='] = false; l.add("/"); tables[Type.PATH.ordinal()] = creatingEncodingTable(l); l.add("?"); tables[Type.QUERY.ordinal()] = creatingEncodingTable(l); tables[Type.FRAGMENT.ordinal()] = tables[Type.QUERY.ordinal()]; tables[Type.QUERY_PARAM.ordinal()] = creatingEncodingTable(l); tables[Type.QUERY_PARAM.ordinal()]['='] = false; tables[Type.QUERY_PARAM.ordinal()]['+'] = false; tables[Type.QUERY_PARAM.ordinal()]['&'] = false; return tables; } private static boolean[] creatingEncodingTable(List<String> allowed) { boolean[] table = new boolean[0x80]; for (String range : allowed) { if (range.length() == 1) { table[range.charAt(0)] = true; } else if (range.length() == 3 && range.charAt(1) == '-') { for (int i = range.charAt(0); i <= range.charAt(2); i++) { table[i] = true; } } } return table; } private static final Charset UTF_8_CHARSET = Charset.forName("UTF-8"); /** * Decodes characters of a string that are percent-encoded octets using * UTF-8 decoding (if needed). * <p> * It is assumed that the string is valid according to an (unspecified) URI * component type. If a sequence of contiguous percent-encoded octets is * not a valid UTF-8 character then the octets are replaced with '\uFFFD'. * <p> * If the URI component is of type HOST then any "%" found between "[]" is * left alone. It is an IPv6 literal with a scope_id. * <p> * If the URI component is of type QUERY_PARAM then any "+" is decoded as * as ' '. * <p> * @param s the string to be decoded. * @param t the URI component type, may be null. * @return the decoded string. * @throws IllegalArgumentException if a malformed percent-encoded octet is * detected */ public static String decode(String s, Type t) throws IllegalArgumentException { if (s == null) { throw new IllegalArgumentException(); } final int n = s.length(); if (n == 0) { return s; } // If there are no percent-escaped octets if (s.indexOf('%') < 0) { // If there are no '+' characters for query param if (t == Type.QUERY_PARAM) { if (s.indexOf('+') < 0) { return s; } } else { return s; } } else { // Malformed percent-escaped octet at the end if (n < 2) // TODO localize { throw new IllegalArgumentException("Malformed percent-encoded octet at index 1"); } // Malformed percent-escaped octet at the end if (s.charAt(n - 2) == '%') // TODO localize { throw new IllegalArgumentException("Malformed percent-encoded octet at index " + (n - 2)); } } if (t == null) return decode(s, n); switch (t) { case HOST : return decodeHost(s, n); case QUERY_PARAM : return decodeQueryParam(s, n); default : return decode(s, n); } } /** * Decode the query component of a URI. * * @param u the URI. * @param decode true if the query parameters of the query component * should be in decoded form. * @return the multivalued map of query parameters. */ public static MultivaluedMap decodeQuery(URI u, boolean decode) { return decodeQuery(u.getRawQuery(), decode); } /** * Decode the query component of a URI. * * @param q the query component in encoded form. * @param decode true of the query parameters of the query component * should be in decoded form. * @return the multivalued map of query parameters. */ public static MultivaluedMap decodeQuery(String q, boolean decode) { MultivaluedMap queryParameters = new MultivaluedMap(); if (q == null || q.length() == 0) { return queryParameters; } int s = 0, e = 0; do { e = q.indexOf('&', s); if (e == -1) { decodeQueryParam(queryParameters, q.substring(s), decode); } else if (e > s) { decodeQueryParam(queryParameters, q.substring(s, e), decode); } s = e + 1; } while (s > 0 && s < q.length()); return queryParameters; } private static void decodeQueryParam(MultivaluedMap params, String param, boolean decode) { try { int equals = param.indexOf('='); if (equals > 0) { params.add( URLDecoder.decode(param.substring(0, equals), "UTF-8"), (decode) ? URLDecoder.decode(param.substring(equals + 1), "UTF-8") : param.substring(equals + 1)); } else if (equals == 0) { // no key declared, ignore } else if (param.length() > 0) { params.add( URLDecoder.decode(param, "UTF-8"), ""); } } catch (UnsupportedEncodingException ex) { // This should never occur throw new IllegalArgumentException(ex); } } public static final class PathSegment { private static final PathSegment EMPTY_PATH_SEGMENT = new PathSegment("", false); private final String path; private final MultivaluedMap matrixParameters; PathSegment(String path, boolean decode) { this(path, decode, new MultivaluedMap()); } PathSegment(String path, boolean decode, MultivaluedMap matrixParameters) { this.path = (decode) ? UriComponent.decode(path, UriComponent.Type.PATH_SEGMENT) : path; this.matrixParameters = matrixParameters; } /** * @return path */ public String getPath() { return path; } /** * @return matrixParameters */ public MultivaluedMap getMatrixParameters() { return matrixParameters; } } /** * Decode the path component of a URI as path segments. * * @param u the URI. If the path component is an absolute path component * then the leading '/' is ignored and is not considered a delimiator * of a path segment. * @param decode true if the path segments of the path component * should be in decoded form. * @return the list of path segments. */ public static List<PathSegment> decodePath(URI u, boolean decode) { String rawPath = u.getRawPath(); if (rawPath != null && rawPath.length() > 0 && rawPath.charAt(0) == '/') { rawPath = rawPath.substring(1); } return decodePath(rawPath, decode); } /** * Decode the path component of a URI as path segments. * <p> * Any '/' character in the path is considered to be a deliminator * between two path segments. Thus if the path is '/' then the path segment * list will contain two empty path segments. If the path is "//" then * the path segment list will contain three empty path segments. If the path * is "/a/" the path segment list will consist of the following path * segments in order: "", "a" and "". * * @param path the path component in encoded form. * @param decode true if the path segments of the path component * should be in decoded form. * @return the list of path segments. */ public static List<PathSegment> decodePath(String path, boolean decode) { List<PathSegment> segments = new LinkedList<PathSegment>(); if (path == null) { return segments; } int s = 0; int e = -1; do { s = e + 1; e = path.indexOf('/', s); if (e > s) { decodePathSegment(segments, path.substring(s, e), decode); } else if (e == s) { segments.add(PathSegment.EMPTY_PATH_SEGMENT); } } while (e != -1); if (s < path.length()) { decodePathSegment(segments, path.substring(s), decode); } else { segments.add(PathSegment.EMPTY_PATH_SEGMENT); } return segments; } /** * Add the path segment to the list of segments * @param segments the list of segments that should be added to * @param segment the path segment to add * @param decode true if the path segment to be added should be in decoded form */ public static void decodePathSegment(List<PathSegment> segments, String segment, boolean decode) { int colon = segment.indexOf(';'); if (colon != -1) { segments.add(new PathSegment( (colon == 0) ? "" : segment.substring(0, colon), decode, decodeMatrix(segment, decode))); } else { segments.add(new PathSegment( segment, decode)); } } /** * Decode the matrix component of a URI path segment. * * @param pathSegment the path segment component in encoded form. * @param decode true if the matrix parameters of the path segment component * should be in decoded form. * @return the multivalued map of matrix parameters. */ public static MultivaluedMap decodeMatrix(String pathSegment, boolean decode) { MultivaluedMap matrixMap = new MultivaluedMap(); // Skip over path segment int s = pathSegment.indexOf(';') + 1; if (s == 0 || s == pathSegment.length()) { return matrixMap; } int e = 0; do { e = pathSegment.indexOf(';', s); if (e == -1) { decodeMatrixParam(matrixMap, pathSegment.substring(s), decode); } else if (e > s) { decodeMatrixParam(matrixMap, pathSegment.substring(s, e), decode); } s = e + 1; } while (s > 0 && s < pathSegment.length()); return matrixMap; } private static void decodeMatrixParam(MultivaluedMap params, String param, boolean decode) { int equals = param.indexOf('='); if (equals > 0) { params.add( UriComponent.decode(param.substring(0, equals), UriComponent.Type.MATRIX_PARAM), (decode) ? UriComponent.decode(param.substring(equals + 1), UriComponent.Type.MATRIX_PARAM) : param.substring(equals + 1)); } else if (equals == 0) { // no key declared, ignore } else if (param.length() > 0) { params.add( UriComponent.decode(param, UriComponent.Type.MATRIX_PARAM), ""); } } private static String decode(String s, int n) { final StringBuilder sb = new StringBuilder(n); ByteBuffer bb = null; for (int i = 0; i < n;) { final char c = s.charAt(i++); if (c != '%') { sb.append(c); } else { bb = decodePercentEncodedOctets(s, i, bb); i = decodeOctets(i, bb, sb); } } return sb.toString(); } private static String decodeQueryParam(String s, int n) { final StringBuilder sb = new StringBuilder(n); ByteBuffer bb = null; for (int i = 0; i < n;) { final char c = s.charAt(i++); if (c != '%') { if (c != '+') sb.append(c); else sb.append(' '); } else { bb = decodePercentEncodedOctets(s, i, bb); i = decodeOctets(i, bb, sb); } } return sb.toString(); } private static String decodeHost(String s, int n) { final StringBuilder sb = new StringBuilder(n); ByteBuffer bb = null; boolean betweenBrackets = false; for (int i = 0; i < n;) { final char c = s.charAt(i++); if (c == '[') { betweenBrackets = true; } else if (betweenBrackets && c == ']') { betweenBrackets = false; } if (c != '%' || betweenBrackets) { sb.append(c); } else { bb = decodePercentEncodedOctets(s, i, bb); i = decodeOctets(i, bb, sb); } } return sb.toString(); } /** * Decode a contigious sequence of percent encoded octets. * <p> * Assumes the index, i, starts that the first hex digit of the first * percent-encoded octet. */ private static ByteBuffer decodePercentEncodedOctets(String s, int i, ByteBuffer bb) { if (bb == null) bb = ByteBuffer.allocate(1); else bb.clear(); while (true) { // Decode the hex digits bb.put((byte) (decodeHex(s, i++) << 4 | decodeHex(s, i++))); // Finish if at the end of the string if (i == s.length()) { break; } // Finish if no more percent-encoded octets follow if (s.charAt(i++) != '%') { break; } // Check if the byte buffer needs to be increased in size if (bb.position() == bb.capacity()) { bb.flip(); // Create a new byte buffer with the maximum number of possible // octets, hence resize should only occur once ByteBuffer bb_new = ByteBuffer.allocate(s.length() / 3); bb_new.put(bb); bb = bb_new; } } bb.flip(); return bb; } /** * Decodes octets to characters using the UTF-8 decoding and appends * the characters to a StringBuffer. * @return the index to the next unchecked character in the string to decode */ private static int decodeOctets(int i, ByteBuffer bb, StringBuilder sb) { // If there is only one octet and is an ASCII character if (bb.limit() == 1 && (bb.get(0) & 0xFF) < 0x80) { // Octet can be appended directly sb.append((char) bb.get(0)); return i + 2; } else { // CharBuffer cb = UTF_8_CHARSET.decode(bb); sb.append(cb.toString()); return i + bb.limit() * 3 - 1; } } private static int decodeHex(String s, int i) { final int v = decodeHex(s.charAt(i)); if (v == -1) // TODO localize { throw new IllegalArgumentException("Malformed percent-encoded octet at index " + i + ", invalid hexadecimal digit '" + s.charAt(i) + "'"); } return v; } private static final int[] HEX_TABLE = createHexTable(); private static int[] createHexTable() { int[] table = new int[0x80]; Arrays.fill(table, -1); for (char c = '0'; c <= '9'; c++) { table[c] = c - '0'; } for (char c = 'A'; c <= 'F'; c++) { table[c] = c - 'A' + 10; } for (char c = 'a'; c <= 'f'; c++) { table[c] = c - 'a' + 10; } return table; } private static int decodeHex(char c) { return (c < 128) ? HEX_TABLE[c] : -1; } private static boolean isHexCharacter(char c) { return c < 128 && HEX_TABLE[c] != -1; } }