UriComponent.java example

Explorer
everrest-master
/*******************************************************************************
 * Copyright (c) 2012-2016 Codenvy, S.A.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *   Codenvy, S.A. - initial API and implementation
 *******************************************************************************/
package org.everrest.core.impl.uri;

import com.google.common.base.Strings;

import org.everrest.core.impl.MultivaluedMapImpl;
import org.everrest.core.util.NoSyncByteArrayOutputStream;

import javax.ws.rs.core.MultivaluedMap;
import javax.ws.rs.core.PathSegment;
import javax.ws.rs.core.UriBuilder;
import java.io.ByteArrayOutputStream;
import java.net.URI;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.List;

import static com.google.common.base.Preconditions.checkArgument;

public final class UriComponent {
    // Components of URI, see http://gbiv.com/protocols/uri/rfc/rfc3986.htm
    /** Scheme URI component. */
    public static final int SCHEME = 0;

    /** UserInfo URI component. */
    public static final int USER_INFO = 1;

    /** Host URI component. */
    public static final int HOST = 2;

    /** Port URI component. */
    public static final int PORT = 3;

    /** Path segment URI sub-component, it can't contains '/'. */
    public static final int PATH_SEGMENT = 4;

    /** Path URI components, consists of path-segments. */
    public static final int PATH = 5;

    /** Query string. */
    public static final int QUERY = 6;

    /** Fragment. */
    public static final int FRAGMENT = 7;

    /** Scheme-specific part. */
    public static final int SSP = 8;

    public static final int MATRIX_PARAM = 9;
    public static final int QUERY_STRING = 10;

    /** Encoded '%' character. */
    public static final String PERCENT = "%25";

    // --------------------

    /** Characters that used for percent encoding. */
    private static final char[] HEX_DIGITS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};

    private static final char[][][] ENCODED = new char[11][128][3];

    /** Array of legal characters for each component of URI. */
    private static final int[][] LEGAL = new int[11][128];

    // fill table
    static {
        for (int i = SCHEME; i <= QUERY_STRING; i++) {
            LEGAL[i] = new int[128];
        }

      /* The letters of the basic Latin alphabet */
        int[] alphabet = new int[128];
        fillTable(alphabet, 'a', 'z');
        fillTable(alphabet, 'A', 'Z');
      /* Digits */
        int[] digit = new int[128];
        fillTable(digit, '0', '9');
      /* Characters that are allowed in a URI but do not have a reserved purpose are called unreserved. These include
       * uppercase and lowercase letters, decimal digits, hyphen, period, underscore, and tilde.
       * Unreserved = ALPHA | DIGIT | '-' | '.' | '_' | '~' */
        int[] unreserved = new int[128];
        set(alphabet, unreserved);
        set(digit, unreserved);
        unreserved['-'] = 1;
        unreserved['.'] = 1;
        unreserved['_'] = 1;
        unreserved['~'] = 1;
      /* The subset of the reserved characters (gen-delims) is used as delimiters of the generic URI components. */
        int[] gendelim = new int[128];
        gendelim[':'] = 1;
        gendelim['/'] = 1;
        gendelim['?'] = 1;
        gendelim['#'] = 1;
        gendelim['['] = 1;
        gendelim[']'] = 1;
        gendelim['@'] = 1;
      /* Sub-delims characters. */
        int[] subdelim = new int[128];
        subdelim['*'] = 1;
        subdelim['+'] = 1;
        subdelim['!'] = 1;
        subdelim['$'] = 1;
        subdelim['&'] = 1;
        subdelim['\''] = 1;
        subdelim['('] = 1;
        subdelim[')'] = 1;
        subdelim[','] = 1;
        subdelim[';'] = 1;
        subdelim['='] = 1;

        set(alphabet, LEGAL[SCHEME]);
        set(digit, LEGAL[SCHEME]);
        LEGAL[SCHEME]['-'] = 1;
        LEGAL[SCHEME]['+'] = 1;
        LEGAL[SCHEME]['.'] = 1;

        set(unreserved, LEGAL[USER_INFO]);
        set(subdelim, LEGAL[USER_INFO]);
        LEGAL[USER_INFO][':'] = 1;

        set(unreserved, LEGAL[HOST]);
        set(subdelim, LEGAL[HOST]);

        set(digit, LEGAL[PORT]);

        set(unreserved, LEGAL[PATH_SEGMENT]);
        set(subdelim, LEGAL[PATH_SEGMENT]);
        LEGAL[PATH_SEGMENT][':'] = 1;
        LEGAL[PATH_SEGMENT][';'] = 0;
        LEGAL[PATH_SEGMENT]['@'] = 1;

        set(LEGAL[PATH_SEGMENT], LEGAL[MATRIX_PARAM]);
        LEGAL[MATRIX_PARAM]['='] = 0;

        set(unreserved, LEGAL[PATH]);
        set(subdelim, LEGAL[PATH]);
        LEGAL[PATH][':'] = 1;
        LEGAL[PATH]['@'] = 1;
        LEGAL[PATH]['/'] = 1;

        set(unreserved, LEGAL[QUERY]);
        LEGAL[QUERY]['-'] = 1;
        LEGAL[QUERY]['.'] = 1;
        LEGAL[QUERY]['_'] = 1;
        LEGAL[QUERY]['*'] = 1;
//        LEGAL[QUERY]['!'] = 1;
//        LEGAL[QUERY]['$'] = 1;
//        LEGAL[QUERY]['\''] = 1;
//        LEGAL[QUERY]['('] = 1;
//        LEGAL[QUERY][')'] = 1;
//        LEGAL[QUERY][','] = 1;
//        LEGAL[QUERY][';'] = 1;
        LEGAL[QUERY][':'] = 1;
        LEGAL[QUERY]['@'] = 1;
//        LEGAL[QUERY]['?'] = 1;
        LEGAL[QUERY]['/'] = 1;

        set(LEGAL[QUERY], LEGAL[QUERY_STRING]);
        LEGAL[QUERY_STRING]['='] = 1;
        LEGAL[QUERY_STRING]['&'] = 1;

        System.arraycopy(LEGAL[QUERY], 0, LEGAL[FRAGMENT], 0, LEGAL[QUERY].length);

        set(unreserved, LEGAL[SSP]);
        set(subdelim, LEGAL[SSP]);
        set(gendelim, LEGAL[SSP]);

        for (int i = SCHEME; i <= QUERY_STRING; i++) {
            for (int j = 0; j < 128; j++) {
                if (LEGAL[i][j] == 0) {
                    ENCODED[i][j] = new char[]{'%', HEX_DIGITS[j >> 4], HEX_DIGITS[j & 0x0F]};
                }
            }
        }
    }

    /** UTF-8 Charset. */
    private static final Charset UTF8 = Charset.forName("UTF-8");

    private static void fillTable(int[] array, char begin, char end) {
        if (begin < 0 || end < 0 || begin > 127 || end > 127 || begin > end) {
            throw new IllegalArgumentException("Invalid range '" + begin + "' - '" + end + '\'');
        }
        for (char c = begin; c <= end; c++) {
            array[c] = 1;
        }
    }

    private static void set(int[] src, int[] dest) {
        for (int i = 0, srcLength = src.length; i < srcLength; i++) {
            int flag = src[i];
            if (flag == 1) {
                dest[i] = 1;
            }
        }
    }

    // -------------------------------------------

    /**
     * Normalization URI according to rfc3986. For details see
     * http://www.unix.com.ua/rfc/rfc3986.html#s6.2.2 .
     *
     * @param uri
     *         source URI
     * @return normalized URI
     */
    public static URI normalize(URI uri) {
        String oldPath = uri.getRawPath();
        if (Strings.isNullOrEmpty(oldPath)) {
            return uri;
        }
        String normalizedPath = normalize(oldPath);
        if (normalizedPath.equals(oldPath)) {
            // nothing to do, URI was normalized
            return uri;
        }
        return UriBuilder.fromUri(uri).replacePath(normalizedPath).build();
    }

    private static String normalize(String path) {
        String inputBuffer = path;
        StringBuilder outputBuffer = new StringBuilder();
        if (inputBuffer.contains("//")) {
            inputBuffer = inputBuffer.replaceAll("//", "/");
        }

        while (inputBuffer.length() != 0) {
            // If the input buffer begins with a prefix of "../" or "./", then remove
            // that prefix from the input buffer.
            // http://www.unix.com.ua/rfc/rfc3986.html#sA.
            if (inputBuffer.startsWith("../") || inputBuffer.startsWith("./")) {
                inputBuffer = inputBuffer.substring(inputBuffer.indexOf('/') + 1, inputBuffer.length());
                continue;
            }
            // if the input buffer begins with a prefix of "/./" or "/.", where "." is
            // a complete path segment, then replace that prefix with "/" in the input buffer.
            // http://www.unix.com.ua/rfc/rfc3986.html#sB.
            if (inputBuffer.startsWith("/./") || (inputBuffer.startsWith("/.") && isCompletePathSeg(".", inputBuffer))) {
                if (inputBuffer.equals("/.")) {
                    inputBuffer = "";
                    outputBuffer.append('/');
                    continue;
                }
                inputBuffer = inputBuffer.substring(inputBuffer.indexOf('/', 1), inputBuffer.length());
                continue;
            }
            // if the input buffer begins with a prefix of "/../" or "/..", where ".."
            // is a complete path segment, then replace that prefix with "/" in the input buffer and
            // remove the last segment and its preceding "/" (if any) from the output buffer.
            // http://www.unix.com.ua/rfc/rfc3986.html#sC.
            if (inputBuffer.startsWith("/../") || (inputBuffer.startsWith("/..") && isCompletePathSeg("..", inputBuffer))) {
                if (inputBuffer.equals("/..")) {
                    inputBuffer = "";
                    outputBuffer.delete(outputBuffer.lastIndexOf("/") + 1, outputBuffer.length());
                    continue;
                }
                inputBuffer = inputBuffer.substring(inputBuffer.indexOf('/', 1), inputBuffer.length());
                if (outputBuffer.lastIndexOf("/") >= 0) {
                    outputBuffer.delete(outputBuffer.lastIndexOf("/"), outputBuffer.length());
                }
                continue;
            }
            // if the input buffer consists only of "." or "..", then remove that from
            // the input buffer.
            // http://www.unix.com.ua/rfc/rfc3986.html#sD.
            if (inputBuffer.equals(".") || inputBuffer.equals("..")) {
                inputBuffer = "";
                continue;
            }
            // move the first path segment in the input buffer to the end of the
            // output buffer, including the initial "/" character (if any) and any subsequent
            // characters up to, but not including, the next "/" character or the end of the
            // input buffer.
            // http://www.unix.com.ua/rfc/rfc3986.html#sE.
            if (inputBuffer.indexOf('/') != inputBuffer.lastIndexOf('/')) {
                outputBuffer.append(inputBuffer.substring(0, inputBuffer.indexOf('/', 1)));
                inputBuffer = inputBuffer.substring(inputBuffer.indexOf('/', 1));
            } else {
                outputBuffer.append(inputBuffer);
                inputBuffer = "";
            }
        }
        return outputBuffer.toString();
    }

    /**
     * Checks if the segment is a complete path segment
     * http://www.unix.com.ua/rfc/rfc3986.html#sB.
     *
     * @param segment
     *         path segment
     * @param path
     *         whole path
     * @return true if segment is complete path segment false otherwise
     */
    private static boolean isCompletePathSeg(String segment, String path) {
        return path.equals('/' + segment) || (path.charAt(path.indexOf(segment) + segment.length()) == '/');
    }

    /**
     * Encode given URI string.
     *
     * @param str
     *         the URI string
     * @param containsUriParams
     *         true if the source string contains URI parameters
     * @param component
     *         component of URI, scheme, host, port, etc
     * @return encoded string
     */
    public static String encode(String str, int component, boolean containsUriParams) {
        if (str == null) {
            throw new IllegalArgumentException();
        }
        return _encode(str, component, containsUriParams, false);
    }

    /**
     * Validate content of percent-encoding string.
     *
     * @param str
     *         the string which must be validate
     * @param component
     *         component of URI, scheme, host, port, etc
     * @param containsUriParams
     *         true if the source string contains URI parameters
     * @return the source string
     */
    public static String validateUriComponent(String str, int component, boolean containsUriParams) {
        for (int i = 0; i < str.length(); i++) {
            char ch = str.charAt(i);
            if (ch == '%'
                || ((ch == '{' || ch == '}') && containsUriParams)
                || !(ch >= 128 || needEncode(ch, component))) {
                continue;
            }
            throw new IllegalArgumentException("Illegal character, index " + i + ": " + str);
        }
        return str;
    }

    public static boolean isUriComponentContainsValidCharacters(int component, String str) {
        int[] allowed = LEGAL[component];
        for (int i = 0; i < str.length(); i++) {
            char ch = str.charAt(i);
            if (allowed.length <= ch || allowed[ch] == 0) {
                return false;
            }
        }
        return true;
    }

    /**
     * Check string and if it does not contains any '%' characters validate it
     * for contains only valid characters. If it contains '%' then check does
     * following two character is valid hex numbers, if not then encode '%' to
     * '%25' otherwise keep characters without change, there is no double
     * encoding.
     *
     * @param str
     *         source string
     * @param component
     *         part of URI, e. g. schema, host, path
     * @param containsUriParams
     *         does string may contains URI templates
     * @return valid string
     */
    public static String recognizeEncode(String str, int component, boolean containsUriParams) {
        if (str == null) {
            throw new IllegalArgumentException();
        }
        return _encode(str, component, containsUriParams, true);
    }

    /**
     * @param str
     *         source string
     * @param component
     *         part of URI, e. g. schema, host, path
     * @param containsUriParams
     *         does string may contains URI templates
     * @param recognizeEncoded
     *         must check string to avoid double encoding
     * @return valid string
     */
    private static String _encode(String str, int component, boolean containsUriParams, boolean recognizeEncoded) {
        int length = str.length();
        StringBuilder sb = new StringBuilder(length);
        boolean encode = false;
        for (int i = 0; i < length; i++) {
            char ch = str.charAt(i);
            encode |= needEncode(ch, component);
            if (ch == '%' && recognizeEncoded) {
                if (checkHexCharacters(str, i)) {
                    sb.append(ch);
                    sb.append(str.charAt(++i));
                    sb.append(str.charAt(++i));
                } else {
                    sb.append(PERCENT);
                }
            } else if (containsUriParams && (ch == '{'/* || ch == '}'*/)) {
                 int x = find(str, i+1, length, '}');
                if (x==-1){
                    throw  new IllegalArgumentException();
                }
                sb.append('{');
                sb.append(str.substring(i+1, x));
                sb.append('}');
                i=x;
            } else if (ch < 128) {
                if (needEncode(ch, component)) {
                    sb.append(ENCODED[component][ch]);
                } else {
                    sb.append(ch);
                }
            } else {
                addUTF8Encoded(ch, sb);
            }
        }
        if (encode) {
            return sb.toString();
        }
        return str;
    }

    private static int find(String chars, int begin, int end, char stopChar) {
        for (int i = begin; i < end; i++) {
            if (chars.charAt(i) == stopChar) {
                return i;
            }
        }
        return -1;
    }


    /**
     * Decode percent encoded URI string.
     *
     * @param str
     *         the source percent encoded string
     * @param component
     *         component of URI, scheme, host, port, etc. NOTE type of
     *         component is not used currently but will be used for decoding IPv6
     *         addresses
     * @return decoded string
     */
    public static String decode(String str, int component) {
        if (str == null) {
            throw new IllegalArgumentException("Decoded string is null");
        }

        int length = str.length();

        if (length < 3 && str.indexOf('%') >= 0) {
            throw new IllegalArgumentException("Malformed string: " + str);
        }

        int p = str.lastIndexOf('%');
        if (p > 0 && p > (length - 3)) {
            throw new IllegalArgumentException("Malformed string '" + str + "' at index " + p);
        }

        p = 0; // reset pointer
        StringBuilder sb = new StringBuilder();
        NoSyncByteArrayOutputStream buff = null;
        while (p < length) {
            char c = str.charAt(p);
            switch (c) {
                case '%':
                    if (p + 2 > length) {
                        throw new IllegalArgumentException("Malformed string '" + str + "' at index " + p);
                    }
                    if (buff == null) {
                        buff = new NoSyncByteArrayOutputStream(4);
                    } else {
                        buff.reset();
                    }
                    p = percentDecode(str, p, buff);
                    byte[] bytes = buff.toByteArray();
                    if (bytes.length == 1 && (bytes[0] & 0xFF) < 128) {
                        sb.append((char)bytes[0]);
                    } else {
                        sb.append(UTF8.decode(ByteBuffer.wrap(bytes)));
                    }
                    break;
                case '+':
                    sb.append(' ');
                    p++;
                    break;
                default:
                    sb.append(c);
                    p++;
                    break;
            }
        }

        return sb.toString();
    }

    /**
     * Check must charter be encoded.
     *
     * @param ch
     *         character
     * @param component
     *         the URI component
     * @return true if character must be encoded false otherwise
     */
    private static boolean needEncode(char ch, int component) {
        int[] allowed = LEGAL[component];
        return allowed.length <= ch || allowed[ch] == 0;
    }

    /**
     * Append UTF-8 encoded character in StringBuilder.
     *
     * @param c
     *         character which must be encoded
     * @param sb
     *         StringBuilder to add character
     */
    private static void addUTF8Encoded(char c, StringBuilder sb) {
        ByteBuffer buf = UTF8.encode(CharBuffer.wrap(Character.toChars(c)));
        while (buf.hasRemaining()) {
            int b = buf.get() & 0xFF;
            sb.append('%');
            sb.append(HEX_DIGITS[b >> 4]);
            sb.append(HEX_DIGITS[b & 0x0F]);
        }
    }

    /**
     * Decode percent encoded string.
     *
     * @param str
     *         the source string
     * @param p
     *         start position in string
     * @param out
     *         output buffer for decoded characters
     * @return current position in source string
     */
    private static int percentDecode(String str, int p, ByteArrayOutputStream out) {
        int length = str.length();
        for (; ; ) {
            char hc = getHexCharacter(str, ++p); // higher char
            char lc = getHexCharacter(str, ++p); // lower char

            int r = ((Character.isDigit(hc) ? hc - '0' : hc - 'A' + 10) << 4)
                    | (Character.isDigit(lc) ? lc - '0' : lc - 'A' + 10);

            out.write((byte)r);
            p++;

            if (p == length || str.charAt(p) != '%') {
                break;
            }
        }

        return p;
    }

    /**
     * Check does two next characters after '%' represent percent-encoded
     * character.
     *
     * @param s
     *         source string
     * @param p
     *         position of character in string
     * @return true is two characters after '%' represent percent-encoded
     * character false otherwise
     */
    public static boolean checkHexCharacters(String s, int p) {
        if (p > (s.length() - 3)) {
            return false;
        }
        try {
            getHexCharacter(s, ++p);
            getHexCharacter(s, ++p);
            return true;
        } catch (IllegalArgumentException e) {
            return false;
        }
    }

    /**
     * Extract character from given string and check is it one of valid for hex
     * sequence.
     *
     * @param str
     *         source string
     * @param p
     *         position of character in string
     * @return character
     */
    private static char getHexCharacter(String str, int p) {
        char c = str.charAt(p);
        if (c >= '0' && c <= '9') {
            return c;
        }
        if (c >= 'A' && c <= 'F') {
            return c;
        }
        if (c >= 'a' && c <= 'f') {
            return Character.toUpperCase(c); // (char)(c - 32);
        }
        throw new IllegalArgumentException("Malformed string '" + str + "' at index " + p);
    }

    /**
     * Parse path segments.
     *
     * @param path
     *         the relative path
     * @param decode
     *         true if character must be decoded false otherwise
     * @return List of {@link PathSegment}
     */
    public static List<PathSegment> parsePathSegments(String path, boolean decode) {
        List<PathSegment> result = new ArrayList<>();
        if (!(path == null || path.isEmpty())) {
            // remove leading slash
            if (path.charAt(0) == '/') {
                path = path.substring(1);
            }

            int p = 0;
            int n = 0;
            while (n < path.length()) {
                n = path.indexOf('/', p);
                if (n < 0) {
                    n = path.length();
                }

                result.add(PathSegmentImpl.fromString(path.substring(p, n), decode));
                p = n + 1;
            }
        }
        return result;
    }

    /**
     * Parse encoded query string.
     *
     * @param rawQuery
     *         source query string
     * @param decode
     *         if true then query parameters will be decoded
     * @return {@link MultivaluedMap} with query parameters
     */
    public static MultivaluedMap<String, String> parseQueryString(String rawQuery, boolean decode) {
        MultivaluedMap<String, String> result = new MultivaluedMapImpl();
        if (!(rawQuery == null || rawQuery.isEmpty())) {
            int p = 0;
            int n = 0;
            while (n < rawQuery.length()) {
                n = rawQuery.indexOf('&', p);
                if (n < 0) {
                    n = rawQuery.length();
                }

                String pair = rawQuery.substring(p, n);
                if (!pair.isEmpty()) {
                    String name;
                    String value;
                    int eq = pair.indexOf('=');
                    if (eq < 0) {
                        // no value
                        name = pair;
                        value = "";
                    } else {
                        name = pair.substring(0, eq);
                        value = pair.substring(eq + 1);
                    }

                    result.add(decode ? decode(name, QUERY) : name, decode ? decode(value, QUERY) : value);
                }
                p = n + 1;
            }
        }
        return result;
    }

    public static URI resolve(URI baseUri, URI resolvingUri) {
        checkArgument(baseUri != null, "Null base uri isn't allowed");
        checkArgument(resolvingUri != null, "Null resolving uri isn't allowed");
        String resolvingUriStr = resolvingUri.toString();
        if (resolvingUriStr.isEmpty()) {
            return baseUri;
        }
        if (resolvingUriStr.startsWith("?")) {
            String baseUriStr = baseUri.toString();
            int q = baseUriStr.indexOf('?');
            if (q > 0) {
                return normalize(URI.create(baseUriStr.substring(0, q) + resolvingUriStr));
            }
            return normalize(URI.create(baseUriStr + resolvingUriStr));
        }
        return normalize(baseUri.resolve(resolvingUri));
    }

    private UriComponent() {
    }
}