/*******************************************************************************
* Copyright (c) 2012-2016 Codenvy, S.A.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Codenvy, S.A. - initial API and implementation
*******************************************************************************/
package org.everrest.core.impl.uri;
import com.google.common.base.Strings;
import org.everrest.core.impl.MultivaluedMapImpl;
import org.everrest.core.util.NoSyncByteArrayOutputStream;
import javax.ws.rs.core.MultivaluedMap;
import javax.ws.rs.core.PathSegment;
import javax.ws.rs.core.UriBuilder;
import java.io.ByteArrayOutputStream;
import java.net.URI;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;
import static com.google.common.base.Preconditions.checkArgument;
public final class UriComponent {
// Components of URI, see http://gbiv.com/protocols/uri/rfc/rfc3986.htm
/** Scheme URI component. */
public static final int SCHEME = 0;
/** UserInfo URI component. */
public static final int USER_INFO = 1;
/** Host URI component. */
public static final int HOST = 2;
/** Port URI component. */
public static final int PORT = 3;
/** Path segment URI sub-component, it can't contains '/'. */
public static final int PATH_SEGMENT = 4;
/** Path URI components, consists of path-segments. */
public static final int PATH = 5;
/** Query string. */
public static final int QUERY = 6;
/** Fragment. */
public static final int FRAGMENT = 7;
/** Scheme-specific part. */
public static final int SSP = 8;
public static final int MATRIX_PARAM = 9;
public static final int QUERY_STRING = 10;
/** Encoded '%' character. */
public static final String PERCENT = "%25";
// --------------------
/** Characters that used for percent encoding. */
private static final char[] HEX_DIGITS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
private static final char[][][] ENCODED = new char[11][128][3];
/** Array of legal characters for each component of URI. */
private static final int[][] LEGAL = new int[11][128];
// fill table
static {
for (int i = SCHEME; i <= QUERY_STRING; i++) {
LEGAL[i] = new int[128];
}
/* The letters of the basic Latin alphabet */
int[] alphabet = new int[128];
fillTable(alphabet, 'a', 'z');
fillTable(alphabet, 'A', 'Z');
/* Digits */
int[] digit = new int[128];
fillTable(digit, '0', '9');
/* Characters that are allowed in a URI but do not have a reserved purpose are called unreserved. These include
* uppercase and lowercase letters, decimal digits, hyphen, period, underscore, and tilde.
* Unreserved = ALPHA | DIGIT | '-' | '.' | '_' | '~' */
int[] unreserved = new int[128];
set(alphabet, unreserved);
set(digit, unreserved);
unreserved['-'] = 1;
unreserved['.'] = 1;
unreserved['_'] = 1;
unreserved['~'] = 1;
/* The subset of the reserved characters (gen-delims) is used as delimiters of the generic URI components. */
int[] gendelim = new int[128];
gendelim[':'] = 1;
gendelim['/'] = 1;
gendelim['?'] = 1;
gendelim['#'] = 1;
gendelim['['] = 1;
gendelim[']'] = 1;
gendelim['@'] = 1;
/* Sub-delims characters. */
int[] subdelim = new int[128];
subdelim['*'] = 1;
subdelim['+'] = 1;
subdelim['!'] = 1;
subdelim['$'] = 1;
subdelim['&'] = 1;
subdelim['\''] = 1;
subdelim['('] = 1;
subdelim[')'] = 1;
subdelim[','] = 1;
subdelim[';'] = 1;
subdelim['='] = 1;
set(alphabet, LEGAL[SCHEME]);
set(digit, LEGAL[SCHEME]);
LEGAL[SCHEME]['-'] = 1;
LEGAL[SCHEME]['+'] = 1;
LEGAL[SCHEME]['.'] = 1;
set(unreserved, LEGAL[USER_INFO]);
set(subdelim, LEGAL[USER_INFO]);
LEGAL[USER_INFO][':'] = 1;
set(unreserved, LEGAL[HOST]);
set(subdelim, LEGAL[HOST]);
set(digit, LEGAL[PORT]);
set(unreserved, LEGAL[PATH_SEGMENT]);
set(subdelim, LEGAL[PATH_SEGMENT]);
LEGAL[PATH_SEGMENT][':'] = 1;
LEGAL[PATH_SEGMENT][';'] = 0;
LEGAL[PATH_SEGMENT]['@'] = 1;
set(LEGAL[PATH_SEGMENT], LEGAL[MATRIX_PARAM]);
LEGAL[MATRIX_PARAM]['='] = 0;
set(unreserved, LEGAL[PATH]);
set(subdelim, LEGAL[PATH]);
LEGAL[PATH][':'] = 1;
LEGAL[PATH]['@'] = 1;
LEGAL[PATH]['/'] = 1;
set(unreserved, LEGAL[QUERY]);
LEGAL[QUERY]['-'] = 1;
LEGAL[QUERY]['.'] = 1;
LEGAL[QUERY]['_'] = 1;
LEGAL[QUERY]['*'] = 1;
// LEGAL[QUERY]['!'] = 1;
// LEGAL[QUERY]['$'] = 1;
// LEGAL[QUERY]['\''] = 1;
// LEGAL[QUERY]['('] = 1;
// LEGAL[QUERY][')'] = 1;
// LEGAL[QUERY][','] = 1;
// LEGAL[QUERY][';'] = 1;
LEGAL[QUERY][':'] = 1;
LEGAL[QUERY]['@'] = 1;
// LEGAL[QUERY]['?'] = 1;
LEGAL[QUERY]['/'] = 1;
set(LEGAL[QUERY], LEGAL[QUERY_STRING]);
LEGAL[QUERY_STRING]['='] = 1;
LEGAL[QUERY_STRING]['&'] = 1;
System.arraycopy(LEGAL[QUERY], 0, LEGAL[FRAGMENT], 0, LEGAL[QUERY].length);
set(unreserved, LEGAL[SSP]);
set(subdelim, LEGAL[SSP]);
set(gendelim, LEGAL[SSP]);
for (int i = SCHEME; i <= QUERY_STRING; i++) {
for (int j = 0; j < 128; j++) {
if (LEGAL[i][j] == 0) {
ENCODED[i][j] = new char[]{'%', HEX_DIGITS[j >> 4], HEX_DIGITS[j & 0x0F]};
}
}
}
}
/** UTF-8 Charset. */
private static final Charset UTF8 = Charset.forName("UTF-8");
private static void fillTable(int[] array, char begin, char end) {
if (begin < 0 || end < 0 || begin > 127 || end > 127 || begin > end) {
throw new IllegalArgumentException("Invalid range '" + begin + "' - '" + end + '\'');
}
for (char c = begin; c <= end; c++) {
array[c] = 1;
}
}
private static void set(int[] src, int[] dest) {
for (int i = 0, srcLength = src.length; i < srcLength; i++) {
int flag = src[i];
if (flag == 1) {
dest[i] = 1;
}
}
}
// -------------------------------------------
/**
* Normalization URI according to rfc3986. For details see
* http://www.unix.com.ua/rfc/rfc3986.html#s6.2.2 .
*
* @param uri
* source URI
* @return normalized URI
*/
public static URI normalize(URI uri) {
String oldPath = uri.getRawPath();
if (Strings.isNullOrEmpty(oldPath)) {
return uri;
}
String normalizedPath = normalize(oldPath);
if (normalizedPath.equals(oldPath)) {
// nothing to do, URI was normalized
return uri;
}
return UriBuilder.fromUri(uri).replacePath(normalizedPath).build();
}
private static String normalize(String path) {
String inputBuffer = path;
StringBuilder outputBuffer = new StringBuilder();
if (inputBuffer.contains("//")) {
inputBuffer = inputBuffer.replaceAll("//", "/");
}
while (inputBuffer.length() != 0) {
// If the input buffer begins with a prefix of "../" or "./", then remove
// that prefix from the input buffer.
// http://www.unix.com.ua/rfc/rfc3986.html#sA.
if (inputBuffer.startsWith("../") || inputBuffer.startsWith("./")) {
inputBuffer = inputBuffer.substring(inputBuffer.indexOf('/') + 1, inputBuffer.length());
continue;
}
// if the input buffer begins with a prefix of "/./" or "/.", where "." is
// a complete path segment, then replace that prefix with "/" in the input buffer.
// http://www.unix.com.ua/rfc/rfc3986.html#sB.
if (inputBuffer.startsWith("/./") || (inputBuffer.startsWith("/.") && isCompletePathSeg(".", inputBuffer))) {
if (inputBuffer.equals("/.")) {
inputBuffer = "";
outputBuffer.append('/');
continue;
}
inputBuffer = inputBuffer.substring(inputBuffer.indexOf('/', 1), inputBuffer.length());
continue;
}
// if the input buffer begins with a prefix of "/../" or "/..", where ".."
// is a complete path segment, then replace that prefix with "/" in the input buffer and
// remove the last segment and its preceding "/" (if any) from the output buffer.
// http://www.unix.com.ua/rfc/rfc3986.html#sC.
if (inputBuffer.startsWith("/../") || (inputBuffer.startsWith("/..") && isCompletePathSeg("..", inputBuffer))) {
if (inputBuffer.equals("/..")) {
inputBuffer = "";
outputBuffer.delete(outputBuffer.lastIndexOf("/") + 1, outputBuffer.length());
continue;
}
inputBuffer = inputBuffer.substring(inputBuffer.indexOf('/', 1), inputBuffer.length());
if (outputBuffer.lastIndexOf("/") >= 0) {
outputBuffer.delete(outputBuffer.lastIndexOf("/"), outputBuffer.length());
}
continue;
}
// if the input buffer consists only of "." or "..", then remove that from
// the input buffer.
// http://www.unix.com.ua/rfc/rfc3986.html#sD.
if (inputBuffer.equals(".") || inputBuffer.equals("..")) {
inputBuffer = "";
continue;
}
// move the first path segment in the input buffer to the end of the
// output buffer, including the initial "/" character (if any) and any subsequent
// characters up to, but not including, the next "/" character or the end of the
// input buffer.
// http://www.unix.com.ua/rfc/rfc3986.html#sE.
if (inputBuffer.indexOf('/') != inputBuffer.lastIndexOf('/')) {
outputBuffer.append(inputBuffer.substring(0, inputBuffer.indexOf('/', 1)));
inputBuffer = inputBuffer.substring(inputBuffer.indexOf('/', 1));
} else {
outputBuffer.append(inputBuffer);
inputBuffer = "";
}
}
return outputBuffer.toString();
}
/**
* Checks if the segment is a complete path segment
* http://www.unix.com.ua/rfc/rfc3986.html#sB.
*
* @param segment
* path segment
* @param path
* whole path
* @return true if segment is complete path segment false otherwise
*/
private static boolean isCompletePathSeg(String segment, String path) {
return path.equals('/' + segment) || (path.charAt(path.indexOf(segment) + segment.length()) == '/');
}
/**
* Encode given URI string.
*
* @param str
* the URI string
* @param containsUriParams
* true if the source string contains URI parameters
* @param component
* component of URI, scheme, host, port, etc
* @return encoded string
*/
public static String encode(String str, int component, boolean containsUriParams) {
if (str == null) {
throw new IllegalArgumentException();
}
return _encode(str, component, containsUriParams, false);
}
/**
* Validate content of percent-encoding string.
*
* @param str
* the string which must be validate
* @param component
* component of URI, scheme, host, port, etc
* @param containsUriParams
* true if the source string contains URI parameters
* @return the source string
*/
public static String validateUriComponent(String str, int component, boolean containsUriParams) {
for (int i = 0; i < str.length(); i++) {
char ch = str.charAt(i);
if (ch == '%'
|| ((ch == '{' || ch == '}') && containsUriParams)
|| !(ch >= 128 || needEncode(ch, component))) {
continue;
}
throw new IllegalArgumentException("Illegal character, index " + i + ": " + str);
}
return str;
}
public static boolean isUriComponentContainsValidCharacters(int component, String str) {
int[] allowed = LEGAL[component];
for (int i = 0; i < str.length(); i++) {
char ch = str.charAt(i);
if (allowed.length <= ch || allowed[ch] == 0) {
return false;
}
}
return true;
}
/**
* Check string and if it does not contains any '%' characters validate it
* for contains only valid characters. If it contains '%' then check does
* following two character is valid hex numbers, if not then encode '%' to
* '%25' otherwise keep characters without change, there is no double
* encoding.
*
* @param str
* source string
* @param component
* part of URI, e. g. schema, host, path
* @param containsUriParams
* does string may contains URI templates
* @return valid string
*/
public static String recognizeEncode(String str, int component, boolean containsUriParams) {
if (str == null) {
throw new IllegalArgumentException();
}
return _encode(str, component, containsUriParams, true);
}
/**
* @param str
* source string
* @param component
* part of URI, e. g. schema, host, path
* @param containsUriParams
* does string may contains URI templates
* @param recognizeEncoded
* must check string to avoid double encoding
* @return valid string
*/
private static String _encode(String str, int component, boolean containsUriParams, boolean recognizeEncoded) {
int length = str.length();
StringBuilder sb = new StringBuilder(length);
boolean encode = false;
for (int i = 0; i < length; i++) {
char ch = str.charAt(i);
encode |= needEncode(ch, component);
if (ch == '%' && recognizeEncoded) {
if (checkHexCharacters(str, i)) {
sb.append(ch);
sb.append(str.charAt(++i));
sb.append(str.charAt(++i));
} else {
sb.append(PERCENT);
}
} else if (containsUriParams && (ch == '{'/* || ch == '}'*/)) {
int x = find(str, i+1, length, '}');
if (x==-1){
throw new IllegalArgumentException();
}
sb.append('{');
sb.append(str.substring(i+1, x));
sb.append('}');
i=x;
} else if (ch < 128) {
if (needEncode(ch, component)) {
sb.append(ENCODED[component][ch]);
} else {
sb.append(ch);
}
} else {
addUTF8Encoded(ch, sb);
}
}
if (encode) {
return sb.toString();
}
return str;
}
private static int find(String chars, int begin, int end, char stopChar) {
for (int i = begin; i < end; i++) {
if (chars.charAt(i) == stopChar) {
return i;
}
}
return -1;
}
/**
* Decode percent encoded URI string.
*
* @param str
* the source percent encoded string
* @param component
* component of URI, scheme, host, port, etc. NOTE type of
* component is not used currently but will be used for decoding IPv6
* addresses
* @return decoded string
*/
public static String decode(String str, int component) {
if (str == null) {
throw new IllegalArgumentException("Decoded string is null");
}
int length = str.length();
if (length < 3 && str.indexOf('%') >= 0) {
throw new IllegalArgumentException("Malformed string: " + str);
}
int p = str.lastIndexOf('%');
if (p > 0 && p > (length - 3)) {
throw new IllegalArgumentException("Malformed string '" + str + "' at index " + p);
}
p = 0; // reset pointer
StringBuilder sb = new StringBuilder();
NoSyncByteArrayOutputStream buff = null;
while (p < length) {
char c = str.charAt(p);
switch (c) {
case '%':
if (p + 2 > length) {
throw new IllegalArgumentException("Malformed string '" + str + "' at index " + p);
}
if (buff == null) {
buff = new NoSyncByteArrayOutputStream(4);
} else {
buff.reset();
}
p = percentDecode(str, p, buff);
byte[] bytes = buff.toByteArray();
if (bytes.length == 1 && (bytes[0] & 0xFF) < 128) {
sb.append((char)bytes[0]);
} else {
sb.append(UTF8.decode(ByteBuffer.wrap(bytes)));
}
break;
case '+':
sb.append(' ');
p++;
break;
default:
sb.append(c);
p++;
break;
}
}
return sb.toString();
}
/**
* Check must charter be encoded.
*
* @param ch
* character
* @param component
* the URI component
* @return true if character must be encoded false otherwise
*/
private static boolean needEncode(char ch, int component) {
int[] allowed = LEGAL[component];
return allowed.length <= ch || allowed[ch] == 0;
}
/**
* Append UTF-8 encoded character in StringBuilder.
*
* @param c
* character which must be encoded
* @param sb
* StringBuilder to add character
*/
private static void addUTF8Encoded(char c, StringBuilder sb) {
ByteBuffer buf = UTF8.encode(CharBuffer.wrap(Character.toChars(c)));
while (buf.hasRemaining()) {
int b = buf.get() & 0xFF;
sb.append('%');
sb.append(HEX_DIGITS[b >> 4]);
sb.append(HEX_DIGITS[b & 0x0F]);
}
}
/**
* Decode percent encoded string.
*
* @param str
* the source string
* @param p
* start position in string
* @param out
* output buffer for decoded characters
* @return current position in source string
*/
private static int percentDecode(String str, int p, ByteArrayOutputStream out) {
int length = str.length();
for (; ; ) {
char hc = getHexCharacter(str, ++p); // higher char
char lc = getHexCharacter(str, ++p); // lower char
int r = ((Character.isDigit(hc) ? hc - '0' : hc - 'A' + 10) << 4)
| (Character.isDigit(lc) ? lc - '0' : lc - 'A' + 10);
out.write((byte)r);
p++;
if (p == length || str.charAt(p) != '%') {
break;
}
}
return p;
}
/**
* Check does two next characters after '%' represent percent-encoded
* character.
*
* @param s
* source string
* @param p
* position of character in string
* @return true is two characters after '%' represent percent-encoded
* character false otherwise
*/
public static boolean checkHexCharacters(String s, int p) {
if (p > (s.length() - 3)) {
return false;
}
try {
getHexCharacter(s, ++p);
getHexCharacter(s, ++p);
return true;
} catch (IllegalArgumentException e) {
return false;
}
}
/**
* Extract character from given string and check is it one of valid for hex
* sequence.
*
* @param str
* source string
* @param p
* position of character in string
* @return character
*/
private static char getHexCharacter(String str, int p) {
char c = str.charAt(p);
if (c >= '0' && c <= '9') {
return c;
}
if (c >= 'A' && c <= 'F') {
return c;
}
if (c >= 'a' && c <= 'f') {
return Character.toUpperCase(c); // (char)(c - 32);
}
throw new IllegalArgumentException("Malformed string '" + str + "' at index " + p);
}
/**
* Parse path segments.
*
* @param path
* the relative path
* @param decode
* true if character must be decoded false otherwise
* @return List of {@link PathSegment}
*/
public static List<PathSegment> parsePathSegments(String path, boolean decode) {
List<PathSegment> result = new ArrayList<>();
if (!(path == null || path.isEmpty())) {
// remove leading slash
if (path.charAt(0) == '/') {
path = path.substring(1);
}
int p = 0;
int n = 0;
while (n < path.length()) {
n = path.indexOf('/', p);
if (n < 0) {
n = path.length();
}
result.add(PathSegmentImpl.fromString(path.substring(p, n), decode));
p = n + 1;
}
}
return result;
}
/**
* Parse encoded query string.
*
* @param rawQuery
* source query string
* @param decode
* if true then query parameters will be decoded
* @return {@link MultivaluedMap} with query parameters
*/
public static MultivaluedMap<String, String> parseQueryString(String rawQuery, boolean decode) {
MultivaluedMap<String, String> result = new MultivaluedMapImpl();
if (!(rawQuery == null || rawQuery.isEmpty())) {
int p = 0;
int n = 0;
while (n < rawQuery.length()) {
n = rawQuery.indexOf('&', p);
if (n < 0) {
n = rawQuery.length();
}
String pair = rawQuery.substring(p, n);
if (!pair.isEmpty()) {
String name;
String value;
int eq = pair.indexOf('=');
if (eq < 0) {
// no value
name = pair;
value = "";
} else {
name = pair.substring(0, eq);
value = pair.substring(eq + 1);
}
result.add(decode ? decode(name, QUERY) : name, decode ? decode(value, QUERY) : value);
}
p = n + 1;
}
}
return result;
}
public static URI resolve(URI baseUri, URI resolvingUri) {
checkArgument(baseUri != null, "Null base uri isn't allowed");
checkArgument(resolvingUri != null, "Null resolving uri isn't allowed");
String resolvingUriStr = resolvingUri.toString();
if (resolvingUriStr.isEmpty()) {
return baseUri;
}
if (resolvingUriStr.startsWith("?")) {
String baseUriStr = baseUri.toString();
int q = baseUriStr.indexOf('?');
if (q > 0) {
return normalize(URI.create(baseUriStr.substring(0, q) + resolvingUriStr));
}
return normalize(URI.create(baseUriStr + resolvingUriStr));
}
return normalize(baseUri.resolve(resolvingUri));
}
private UriComponent() {
}
}